3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.taxonx2013
;
13 import java
.io
.FileWriter
;
14 import java
.io
.IOException
;
16 import java
.util
.ArrayList
;
17 import java
.util
.HashMap
;
18 import java
.util
.List
;
21 import java
.util
.regex
.Pattern
;
23 import javax
.xml
.transform
.TransformerException
;
24 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
26 import org
.apache
.commons
.lang
.StringUtils
;
27 import org
.w3c
.dom
.Node
;
28 import org
.w3c
.dom
.NodeList
;
30 import com
.ibm
.lsid
.MalformedLSIDException
;
32 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
33 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
.DerivedUnitType
;
34 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
35 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
36 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
37 import eu
.etaxonomy
.cdm
.model
.common
.LSID
;
38 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
39 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
40 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
41 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
42 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKeyNode
;
43 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
44 import eu
.etaxonomy
.cdm
.model
.description
.TaxonNameDescription
;
45 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
46 import eu
.etaxonomy
.cdm
.model
.name
.BacterialName
;
47 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
48 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
49 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
50 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
51 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
52 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
53 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnitBase
;
54 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
55 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
56 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
57 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
58 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
59 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
60 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
61 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
62 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
63 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
64 import eu
.etaxonomy
.cdm
.strategy
.parser
.ParserProblem
;
76 public class TaxonXTreatmentExtractor
extends TaxonXExtractor
{
78 private final NomenclaturalCode nomenclaturalCode
;
79 private Classification classification
;
81 private String treatmentMainName
,originalTreatmentName
;
83 private final HashMap
<String
,Map
<String
,String
>> namesMap
= new HashMap
<String
, Map
<String
,String
>>();
86 private final Pattern keypattern
= Pattern
.compile("^(\\d+.*|-\\d+.*)");
87 private final Pattern keypatternend
= Pattern
.compile("^.+?\\d$");
89 private boolean maxRankRespected
=false;
92 * @param nomenclaturalCode
93 * @param classification
97 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode
, Classification classification
, TaxonXImport importer
,
98 TaxonXImportState configState
) {
99 this.nomenclaturalCode
=nomenclaturalCode
;
100 this.classification
= classification
;
101 this.importer
=importer
;
102 this.configState
=configState
;
103 prepareCollectors(configState
, importer
.getAgentService());
107 * extracts all the treament information and save them
108 * @param treatmentnode: the XML Node
109 * @param tosave: the list of object to save into the CDM
110 * @param refMods: the reference extracted from the MODS
111 * @param sourceName: the URI of the document
113 @SuppressWarnings({ "rawtypes", "unused" })
114 protected void extractTreatment(Node treatmentnode
, List
<Object
> tosave
, Reference
<?
> refMods
, URI sourceName
) {
115 logger
.info("extractTreatment");
116 List
<TaxonNameBase
> nametosave
= new ArrayList
<TaxonNameBase
>();
117 NodeList children
= treatmentnode
.getChildNodes();
118 Taxon acceptedTaxon
=null;
119 Taxon defaultTaxon
=null;
120 boolean refgroup
=false;
122 for (int i
=0;i
<children
.getLength();i
++){
123 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group")) {
128 for (int i
=0;i
<children
.getLength();i
++){
130 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:nomenclature")){
131 NodeList nomenclature
= children
.item(i
).getChildNodes();
132 boolean containsName
=false;
133 for(int k
=0;k
<nomenclature
.getLength();k
++){
134 if(nomenclature
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
140 reloadClassification();
141 //extract "main" the scientific name
142 acceptedTaxon
= extractNomenclature(children
.item(i
),nametosave
,refMods
);
145 else if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
146 reloadClassification();
147 //extract the References within the document
148 extractReferences(children
.item(i
),nametosave
,acceptedTaxon
,refMods
);
150 else if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
151 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected
){
152 File file
= new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
155 writer
= new FileWriter(file
,true);
156 writer
.write(sourceName
+"\n");
159 } catch (IOException e1
) {
160 // TODO Auto-generated catch block
161 e1
.printStackTrace();
163 String multiple
= askMultiple(children
.item(i
));
164 if (multiple
.equalsIgnoreCase("synonyms")) {
165 extractSynonyms(children
.item(i
),nametosave
, acceptedTaxon
,refMods
);
168 if(multiple
.equalsIgnoreCase("material examined")){
169 extractMaterials(children
.item(i
),acceptedTaxon
, refMods
, nametosave
);
172 if (multiple
.equalsIgnoreCase("distribution")){
173 extractDistribution(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
);
176 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,multiple
);
179 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
180 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected
){
181 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
, nametosave
, refMods
, Feature
.BIOLOGY_ECOLOGY());
183 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
184 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected
){
185 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DESCRIPTION());
187 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
188 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected
){
189 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,Feature
.DIAGNOSIS());
191 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
192 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected
){
193 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DISCUSSION());
196 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
197 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected
){
198 extractDistribution(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
);
200 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
201 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected
){
202 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
,refMods
,Feature
.ETYMOLOGY());
205 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
206 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected
){
207 extractMaterials(children
.item(i
),acceptedTaxon
, refMods
, nametosave
);
210 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
211 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected
){
212 //TODO IGNORE keys for the moment
213 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
214 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,"Keys - unparsed");
217 logger
.info("ANOTHER KIND OF NODES: "+children
.item(i
).getNodeName()+", "+children
.item(i
).getAttributes());
218 if (children
.item(i
).getAttributes() !=null) {
219 logger
.info(children
.item(i
).getAttributes().item(0));
223 // logger.info("saveUpdateNames");
224 if (maxRankRespected
){
225 importer
.getNameService().saveOrUpdate(nametosave
);
226 importer
.getClassificationService().saveOrUpdate(classification
);
227 logger
.info("saveUpdateNames-ok");
234 * @param acceptedTaxon: the current acceptedTaxon
235 * @param nametosave: the list of objects to save into the CDM
236 * @param refMods: the current reference extracted from the MODS
238 @SuppressWarnings("rawtypes")
239 private void extractKey(Node keys
, Taxon acceptedTaxon
,List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
240 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
242 NodeList children
= keys
.getChildNodes();
244 PolytomousKey poly
= PolytomousKey
.NewInstance();
245 poly
.addSource(null,null,refMods
,null);
246 poly
.addTaxonomicScope(acceptedTaxon
);
247 poly
.setTitleCache("bloup");
248 // poly.addCoveredTaxon(acceptedTaxon);
249 PolytomousKeyNode root
= poly
.getRoot();
250 PolytomousKeyNode previous
= null,tmpKey
=null;
252 List
<PolytomousKeyNode
> polyNodes
= new ArrayList
<PolytomousKeyNode
>();
254 // String fullContent = keys.getTextContent();
255 for (int i
=0;i
<children
.getLength();i
++){
256 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
257 NodeList paragraph
= children
.item(i
).getChildNodes();
260 for (int j
=0;j
<paragraph
.getLength();j
++){
261 if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("#text")){
262 if (! paragraph
.item(j
).getTextContent().trim().isEmpty()){
263 key
+=paragraph
.item(j
).getTextContent().trim();
264 // logger.info("KEY: "+j+"--"+key);
267 if(paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
268 taxonKey
=getTaxonFromXML(paragraph
.item(j
),nametosave
,refMods
);
271 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
272 if (keypattern
.matcher(key
).matches()){
273 tmpKey
= PolytomousKeyNode
.NewInstance(key
);
274 if (taxonKey
!=null) {
275 tmpKey
.setTaxon(taxonKey
);
277 polyNodes
.add(tmpKey
);
278 if (previous
== null) {
279 root
.addChild(tmpKey
);
281 previous
.addChild(tmpKey
);
285 tmpKey
=PolytomousKeyNode
.NewInstance(key
);
286 if (taxonKey
!=null) {
287 tmpKey
.setTaxon(taxonKey
);
289 polyNodes
.add(tmpKey
);
290 if (keypatternend
.matcher(key
).matches()) {
291 root
.addChild(tmpKey
);
294 previous
.addChild(tmpKey
);
301 importer
.getPolytomousKeyNodeService().saveOrUpdate(polyNodes
);
302 importer
.getPolytomousKeyService().saveOrUpdate(poly
);
306 * @param taxons: the XML Nodegroup
307 * @param nametosave: the list of objects to save into the CDM
308 * @param acceptedTaxon: the current accepted Taxon
309 * @param refMods: the current reference extracted from the MODS
311 * @return Taxon object built
313 @SuppressWarnings({ "rawtypes", "unchecked" })
314 private Taxon
getTaxonFromXML(Node taxons
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
315 // logger.info("getTaxonFromXML");
316 // logger.info("acceptedTaxon: "+acceptedTaxon);
318 TaxonNameBase nameToBeFilled
= null;
321 String
[] enames
= null;
322 Rank rank
= Rank
.UNKNOWN_RANK();
324 String identifier
="";
327 enames
= extractScientificName(taxons
);
328 if (enames
[1].isEmpty()) {
334 rank
= Rank
.getRankByName(enames
[2]);
335 identifier
= enames
[3];
336 } catch (TransformerFactoryConfigurationError e1
) {
338 } catch (TransformerException e1
) {
340 } catch (UnknownCdmTypeException e
) {
341 logger
.warn("Rank problem!"+enames
[2]);
342 rank
=Rank
.UNKNOWN_RANK();
344 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
346 nameToBeFilled
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
347 if (nameToBeFilled
.hasProblem() &&
348 !((nameToBeFilled
.getParsingProblems().size()==1) && nameToBeFilled
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
349 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
350 nameToBeFilled
=solveNameProblem(original
, name
,parser
);
353 nameToBeFilled
= getTaxonNameBase(nameToBeFilled
,nametosave
);
355 // importer.getNameService().saveOrUpdate(nametosave);
356 Taxon t
= importer
.getTaxonService().findBestMatchingTaxon(nameToBeFilled
.getTitleCache());
358 // logger.info("BestTaxonService not the best or null");
359 t
= new Taxon(nameToBeFilled
,(Reference
<?
>) nameToBeFilled
.getNomenclaturalReference() );//TODO TOFIX reference
360 if (t
.getSec() == null) {
363 if(!configState
.getConfig().doKeepOriginalSecundum()) {
364 t
.setSec(configState
.getConfig().getSecundum());
365 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
367 t
.addSource(null,null,refMods
,null);
369 if (!identifier
.isEmpty() && (identifier
.length()>2)){
370 setLSID(identifier
, t
);
373 Taxon parentTaxon
= askParent(t
, classification
);
374 if (parentTaxon
==null){
375 while (parentTaxon
== null) {
376 parentTaxon
= createParent(t
, refMods
);
377 classification
.addParentChild(parentTaxon
, t
, refMods
, null);
380 classification
.addParentChild(parentTaxon
, t
, refMods
, null);
384 t
= CdmBase
.deproxy(t
, Taxon
.class);
386 if (!configState
.getConfig().doKeepOriginalSecundum()) {
387 t
.setSec(configState
.getConfig().getSecundum());
388 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
395 * @param taxons: the XML Nodegroup
396 * @param nametosave: the list of objects to save into the CDM
397 * @param acceptedTaxon: the current accepted Taxon
398 * @param refMods: the current reference extracted from the MODS
400 * @return Taxon object built
402 @SuppressWarnings({ "rawtypes", "unchecked" })
403 private TaxonNameBase
getTaxonNameBaseFromXML(Node taxons
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
404 // logger.info("getTaxonFromXML");
405 // logger.info("acceptedTaxon: "+acceptedTaxon);
407 TaxonNameBase nameToBeFilled
= null;
410 String
[] enames
= null;
411 Rank rank
= Rank
.UNKNOWN_RANK();
413 String identifier
="";
416 enames
= extractScientificName(taxons
);
417 if (enames
[1].isEmpty()) {
423 rank
= Rank
.getRankByName(enames
[2]);
424 identifier
= enames
[3];
425 } catch (TransformerFactoryConfigurationError e1
) {
427 } catch (TransformerException e1
) {
429 } catch (UnknownCdmTypeException e
) {
430 logger
.warn("Rank problem!"+enames
[2]);
431 rank
=Rank
.UNKNOWN_RANK();
433 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
435 nameToBeFilled
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
436 if (nameToBeFilled
.hasProblem() &&
437 !((nameToBeFilled
.getParsingProblems().size()==1) && nameToBeFilled
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
438 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
439 nameToBeFilled
=solveNameProblem(original
, name
,parser
);
442 nameToBeFilled
= getTaxonNameBase(nameToBeFilled
,nametosave
);
443 return nameToBeFilled
;
448 @SuppressWarnings("rawtypes")
449 private TaxonNameBase
getTaxonNameBase (TaxonNameBase name
, List
<TaxonNameBase
> nametosave
){
450 List
<TaxonNameBase
> names
= importer
.getNameService().list(TaxonNameBase
.class, null, null, null, null);
451 for (TaxonNameBase tb
: names
){
452 if (tb
.getTitleCache().equalsIgnoreCase(name
.getTitleCache())) {
453 logger
.info("TaxonNameBase FOUND"+name
.getTitleCache());
457 logger
.info("TaxonNameBase NOT FOUND "+name
.getTitleCache());
458 nametosave
.add(name
);
468 private void reloadClassification() {
469 Classification cl
= importer
.getClassificationService().find(classification
.getUuid());
473 importer
.getClassificationService().saveOrUpdate(classification
);
474 classification
= importer
.getClassificationService().find(classification
.getUuid());
480 * Create a Taxon for the current NameBase, based on the current reference
481 * @param taxonNameBase
482 * @param refMods: the current reference extracted from the MODS
485 @SuppressWarnings({ "unused", "rawtypes" })
486 private Taxon
getTaxon(TaxonNameBase taxonNameBase
, Reference
<?
> refMods
) {
487 Taxon t
= new Taxon(taxonNameBase
,null );
488 if (!configState
.getConfig().doKeepOriginalSecundum() || (t
.getSec() == null)) {
489 t
.setSec(configState
.getConfig().getSecundum());
490 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
492 t
.addSource(null,null,refMods
,null);
498 * @param distribution: the XML node group
499 * @param acceptedTaxon: the current accepted Taxon
500 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
501 * @param refMods: the current reference extracted from the MODS
503 @SuppressWarnings("rawtypes")
504 private void extractDistribution(Node distribution
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
505 // logger.info("DISTRIBUTION");
506 // logger.info("acceptedTaxon: "+acceptedTaxon);
507 NodeList children
= distribution
.getChildNodes();
508 Map
<Integer
,List
<MySpecimenOrObservation
>> specimenOrObservations
= new HashMap
<Integer
, List
<MySpecimenOrObservation
>>();
509 Map
<Integer
,String
> descriptionsFulltext
= new HashMap
<Integer
,String
>();
511 for (int i
=0;i
<children
.getLength();i
++){
512 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
513 NodeList paragraph
= children
.item(i
).getChildNodes();
514 for (int j
=0;j
<paragraph
.getLength();j
++){
515 if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("#text")){
516 if(!paragraph
.item(j
).getTextContent().trim().isEmpty()) {
517 String s
=paragraph
.item(j
).getTextContent().trim();
518 if (descriptionsFulltext
.get(i
) !=null){
519 s
= descriptionsFulltext
.get(i
)+" "+s
;
521 descriptionsFulltext
.put(i
, s
);
524 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
525 String s
=getTaxonNameBaseFromXML(paragraph
.item(j
),nametosave
,refMods
).toString().split("sec.")[0];
526 if (descriptionsFulltext
.get(i
) !=null){
527 s
= descriptionsFulltext
.get(i
)+" "+s
;
529 descriptionsFulltext
.put(i
, s
);
531 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")){
532 MySpecimenOrObservation specimenOrObservation
= new MySpecimenOrObservation();
533 DerivedUnitBase derivedUnitBase
= null;
534 specimenOrObservation
= extractSpecimenOrObservation(paragraph
.item(j
), derivedUnitBase
, DerivedUnitType
.DerivedUnit
);
535 List
<MySpecimenOrObservation
> speObsList
= specimenOrObservations
.get(i
);
536 if (speObsList
== null) {
537 speObsList
=new ArrayList
<MySpecimenOrObservation
>();
539 speObsList
.add(specimenOrObservation
);
540 specimenOrObservations
.put(i
,speObsList
);
542 String s
= specimenOrObservation
.getDerivedUnitBase().toString();
543 if (descriptionsFulltext
.get(i
) !=null){
544 s
= descriptionsFulltext
.get(i
)+" "+s
;
546 descriptionsFulltext
.put(i
, s
);
554 for (int k
:descriptionsFulltext
.keySet()) {
559 for (int k
:specimenOrObservations
.keySet()) {
566 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
567 Feature currentFeature
= Feature
.DISTRIBUTION();
568 DerivedUnitBase derivedUnitBase
=null;
570 for (int k
=0;k
<=m
;k
++){
571 if(specimenOrObservations
.keySet().contains(k
)){
572 for (MySpecimenOrObservation soo
:specimenOrObservations
.get(k
) ) {
573 derivedUnitBase
= soo
.getDerivedUnitBase();
574 descr
=soo
.getDescr();
576 derivedUnitBase
.addSource(null,null,refMods
,null);
578 importer
.getOccurrenceService().saveOrUpdate(derivedUnitBase
);
580 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
581 acceptedTaxon
.addDescription(taxonDescription
);
584 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
586 Feature feature
=null;
587 feature
= makeFeature(derivedUnitBase
);
588 if(!StringUtils
.isEmpty(descr
)) {
589 derivedUnitBase
.setTitleCache(descr
, true);
591 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
592 indAssociation
.setFeature(feature
);
593 indAssociation
.addSource(null, null, refMods
, null);
595 taxonDescription
.addElement(indAssociation
);
596 taxonDescription
.setTaxon(acceptedTaxon
);
597 taxonDescription
.addSource(null,null,refMods
,null);
599 importer
.getDescriptionService().saveOrUpdate(taxonDescription
);
600 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
601 td
.addDescribedSpecimenOrObservation(soo
.getDerivedUnitBase());
605 if (descriptionsFulltext
.keySet().contains(k
)){
606 if (!descriptionsFulltext
.get(k
).isEmpty() && (descriptionsFulltext
.get(k
).startsWith("Hab.") || descriptionsFulltext
.get(k
).startsWith("Habitat"))){
607 setParticularDescription(descriptionsFulltext
.get(k
),acceptedTaxon
,defaultTaxon
, refMods
, Feature
.HABITAT());
611 TextData textData
= TextData
.NewInstance();
613 textData
.setFeature(currentFeature
);
614 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descriptionsFulltext
.get(k
));
615 textData
.addSource(null, null, refMods
, null);
617 td
.addElement(textData
);
622 if (descriptionsFulltext
.keySet().contains(k
) || specimenOrObservations
.keySet().contains(k
)){
623 td
.addSource(null,null,refMods
,null);
624 acceptedTaxon
.addDescription(td
);
625 importer
.getDescriptionService().saveOrUpdate(td
);
626 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
633 * @param materials: the XML node group
634 * @param acceptedTaxon: the current accepted Taxon
635 * @param refMods: the current reference extracted from the MODS
637 @SuppressWarnings("rawtypes")
638 private void extractMaterials(Node materials
, Taxon acceptedTaxon
, Reference
<?
> refMods
,List
<TaxonNameBase
> nametosave
) {
639 // logger.info("EXTRACTMATERIALS");
640 // logger.info("acceptedTaxon: "+acceptedTaxon);
641 NodeList children
= materials
.getChildNodes();
642 NodeList events
= null;
645 DerivedUnitBase derivedUnitBase
=null;
646 MySpecimenOrObservation myspecimenOrObservation
= null;
648 for (int i
=0;i
<children
.getLength();i
++){
649 String rawAssociation
="";
651 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
652 events
= children
.item(i
).getChildNodes();
653 for(int k
=0;k
<events
.getLength();k
++){
654 if (events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
655 String linkedTaxon
= getTaxonNameBaseFromXML(events
.item(k
), nametosave
,refMods
).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
656 rawAssociation
+=linkedTaxon
.split("sec")[0];
658 if (! events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")
659 && !events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
660 rawAssociation
+= events
.item(k
).getTextContent().trim();
662 if(events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
663 if (!containsDistinctLetters(rawAssociation
.replaceAll(";",""))) {
664 rawAssociation
="no description text";
667 DerivedUnitFacade derivedUnitFacade
= getFacade(rawAssociation
.replaceAll(";",""),DerivedUnitType
.FieldObservation
);
668 derivedUnitBase
= derivedUnitFacade
.innerDerivedUnit();
669 derivedUnitBase
.addSource(null,null,refMods
,null);
670 importer
.getOccurrenceService().saveOrUpdate(derivedUnitBase
);
672 myspecimenOrObservation
= extractSpecimenOrObservation(events
.item(k
),derivedUnitBase
,DerivedUnitType
.FieldObservation
);
673 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
674 descr
=myspecimenOrObservation
.getDescr();
676 derivedUnitBase
.addSource(null,null,refMods
,null);
678 importer
.getOccurrenceService().saveOrUpdate(derivedUnitBase
);
680 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
681 acceptedTaxon
.addDescription(taxonDescription
);
684 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
686 Feature feature
= makeFeature(derivedUnitBase
);
687 if(!StringUtils
.isEmpty(descr
)) {
688 derivedUnitBase
.setTitleCache(descr
, true);
690 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
691 indAssociation
.setFeature(feature
);
692 indAssociation
.addSource(null, null, refMods
, null);
694 taxonDescription
.addElement(indAssociation
);
695 taxonDescription
.setTaxon(acceptedTaxon
);
696 taxonDescription
.addSource(null,null,refMods
,null);
698 importer
.getDescriptionService().saveOrUpdate(taxonDescription
);
699 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
701 if (!rawAssociation
.isEmpty() && !added
){
702 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
703 acceptedTaxon
.addDescription(taxonDescription
);
705 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
707 Feature feature
= Feature
.MATERIALS_EXAMINED();
708 if(!StringUtils
.isEmpty(rawAssociation
)) {
709 derivedUnitBase
.setTitleCache(rawAssociation
, true);
711 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
712 indAssociation
.setFeature(feature
);
713 indAssociation
.addSource(null, null, refMods
, null);
715 taxonDescription
.addElement(indAssociation
);
716 taxonDescription
.setTaxon(acceptedTaxon
);
717 taxonDescription
.addSource(null,null,refMods
,null);
719 importer
.getDescriptionService().saveOrUpdate(taxonDescription
);
720 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
730 * @param materials: the XML node group
731 * @param acceptedTaxon: the current accepted Taxon
732 * @param refMods: the current reference extracted from the MODS
734 @SuppressWarnings("rawtypes")
735 private void extractMaterialsDirect(Node materials
, Taxon acceptedTaxon
, Reference
<?
> refMods
, String event
) {
736 // logger.info("EXTRACTMATERIALS");
737 // logger.info("acceptedTaxon: "+acceptedTaxon);
740 DerivedUnitBase derivedUnitBase
=null;
741 MySpecimenOrObservation myspecimenOrObservation
= null;
744 myspecimenOrObservation
= extractSpecimenOrObservation(materials
,derivedUnitBase
, DerivedUnitType
.FieldObservation
);
745 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
746 descr
=myspecimenOrObservation
.getDescr();
748 derivedUnitBase
.addSource(null,null,refMods
,null);
750 importer
.getOccurrenceService().saveOrUpdate(derivedUnitBase
);
752 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
753 acceptedTaxon
.addDescription(taxonDescription
);
756 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
758 Feature feature
=null;
759 if (event
.equalsIgnoreCase("collection")){
760 feature
= makeFeature(derivedUnitBase
);
763 feature
= Feature
.MATERIALS_EXAMINED();
765 if(!StringUtils
.isEmpty(descr
)) {
766 derivedUnitBase
.setTitleCache(descr
, true);
768 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
769 indAssociation
.setFeature(feature
);
770 indAssociation
.addSource(null, null, refMods
, null);
772 taxonDescription
.addElement(indAssociation
);
773 taxonDescription
.setTaxon(acceptedTaxon
);
774 taxonDescription
.addSource(null,null,refMods
,null);
776 importer
.getDescriptionService().saveOrUpdate(taxonDescription
);
777 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
784 * @param description: the XML node group
785 * @param acceptedTaxon: the current acceptedTaxon
786 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
787 * @param nametosave: the list of objects to save into the CDM
788 * @param refMods: the current reference extracted from the MODS
789 * @param featureName: the feature name
791 private void extractSpecificFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
792 List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, String featureName
) {
793 NodeList children
= description
.getChildNodes();
794 NodeList insideNodes
;
796 String localdescr
="";
798 // String fullContent = description.getTextContent();
799 for (int i
=0;i
<children
.getLength();i
++){
801 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
802 descr
+= children
.item(i
).getTextContent().trim();
804 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
805 insideNodes
=children
.item(i
).getChildNodes();
806 List
<String
> blabla
= new ArrayList
<String
>();
807 for (int j
=0;j
<insideNodes
.getLength();j
++){
808 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
809 String linkedTaxon
= getTaxonNameBaseFromXML(insideNodes
.item(j
), nametosave
,refMods
).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
810 blabla
.add(linkedTaxon
.split("sec")[0]);
812 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
813 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
814 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
815 localdescr
+= insideNodes
.item(j
).getTextContent().trim();
819 if (!blabla
.isEmpty()) {
820 List
<DefinedTermBase
> features
= importer
.getTermService().list(Feature
.class, null,null,null,null);
821 Feature currentFeature
=null;
822 for (DefinedTermBase feature
: features
){
823 String tmpF
= ((Feature
)feature
).getTitleCache();
824 if (tmpF
.equalsIgnoreCase(featureName
)) {
825 currentFeature
=(Feature
)feature
;
828 if (currentFeature
== null) {
829 currentFeature
=Feature
.NewInstance(featureName
, featureName
, featureName
);
830 importer
.getTermService().saveOrUpdate(currentFeature
);
832 setParticularDescription(StringUtils
.join(blabla
," "),acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
844 * @param children: the XML node group
845 * @param nametosave: the list of objects to save into the CDM
846 * @param acceptedTaxon: the current acceptedTaxon
847 * @param refMods: the current reference extracted from the MODS
848 * @param fullContent :the parsed XML content
849 * @return a list of description (text)
851 @SuppressWarnings("unused")
852 private List
<String
> parseParagraph(List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference
<?
> refMods
, Node paragraph
, Feature feature
){
853 List
<String
> fullDescription
= new ArrayList
<String
>();
854 // String localdescr;
856 NodeList insideNodes
;
857 boolean collectionEvent
= false;
858 List
<Node
>collectionEvents
= new ArrayList
<Node
>();
860 NodeList children
= paragraph
.getChildNodes();
862 for (int i
=0;i
<children
.getLength();i
++){
864 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
865 descr
+= children
.item(i
).getTextContent().trim();
867 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
868 insideNodes
=children
.item(i
).getChildNodes();
869 List
<String
> blabla
= new ArrayList
<String
>();
870 for (int j
=0;j
<insideNodes
.getLength();j
++){
871 boolean nodeKnown
= false;
872 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
873 String linkedTaxon
= getTaxonNameBaseFromXML(insideNodes
.item(j
), nametosave
,refMods
).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
874 blabla
.add(linkedTaxon
.split("sec")[0]);
877 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
878 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
879 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
880 // localdescr += insideNodes.item(j).getTextContent().trim();
884 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")) {
885 String ref
= insideNodes
.item(j
).getTextContent().trim();
886 if (ref
.endsWith(";") && ((ref
.length())>1)) {
887 ref
=ref
.substring(0, ref
.length()-1)+".";
889 Reference
<?
> reference
= ReferenceFactory
.newGeneric();
890 reference
.setTitleCache(ref
, true);
891 blabla
.add(reference
.getTitleCache());
894 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
895 collectionEvent
=true;
896 collectionEvents
.add(insideNodes
.item(j
));
900 logger
.info("Node not handled yet : "+insideNodes
.item(j
).getNodeName());
901 logger
.warn("Node not handled yet : "+insideNodes
.item(j
).getNodeName());
905 if (!blabla
.isEmpty()) {
906 fullDescription
.add(StringUtils
.join(blabla
," "));
910 if (collectionEvent
) {
911 logger
.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature
.toString());
912 for (Node coll
:collectionEvents
){
913 extractMaterialsDirect(coll
, acceptedTaxon
, refMods
, "collection");
916 return fullDescription
;
921 * @param description: the XML node group
922 * @param acceptedTaxon: the current acceptedTaxon
923 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
924 * @param nametosave: the list of objects to save into the CDM
925 * @param refMods: the current reference extracted from the MODS
926 * @param feature: the feature to link the data with
928 private void extractFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, Feature feature
){
929 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
930 List
<String
> fullDescription
= parseParagraph( nametosave
, acceptedTaxon
, refMods
, description
,feature
);
932 if (!fullDescription
.isEmpty()) {
933 setParticularDescription(StringUtils
.join(fullDescription
,"<br/>"),acceptedTaxon
,defaultTaxon
, refMods
,feature
);
940 * @param descr: the XML Nodegroup to parse
941 * @param acceptedTaxon: the current acceptedTaxon
942 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
943 * @param refMods: the current reference extracted from the MODS
944 * @param currentFeature: the feature name
947 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference
<?
> refMods
, Feature currentFeature
) {
948 // logger.info("setParticularDescription "+currentFeature);
949 // logger.info("acceptedTaxon: "+acceptedTaxon);
950 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
952 TextData textData
= TextData
.NewInstance();
953 textData
.setFeature(currentFeature
);
954 textData
.addSource(null,null,refMods
,null);
956 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descr
+"<br/>");
958 if(! descr
.isEmpty() && (acceptedTaxon
!=null)){
959 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
960 td
.addElement(textData
);
961 td
.addSource(null,null,refMods
,null);
962 acceptedTaxon
.addDescription(td
);
963 importer
.getDescriptionService().saveOrUpdate(td
);
964 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
967 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
969 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
971 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
973 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
976 logger
.debug("TAXON EXISTS"+defaultTaxon
);
979 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
980 defaultTaxon
.addDescription(td
);
981 td
.addElement(textData
);
982 td
.addSource(null,null,refMods
,null);
983 importer
.getDescriptionService().saveOrUpdate(td
);
984 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
991 * @param synonyms: the XML Nodegroup to parse
992 * @param nametosave: the list of objects to save into the CDM
993 * @param acceptedTaxon: the current acceptedTaxon
994 * @param refMods: the current reference extracted from the MODS
996 @SuppressWarnings({ "rawtypes", "unchecked" })
997 private void extractSynonyms(Node synonyms
, List
<TaxonNameBase
> nametosave
,Taxon acceptedTaxon
, Reference
<?
> refMods
) {
998 // logger.info("extractSynonyms: "+acceptedTaxon);
999 Taxon ttmp
= (Taxon
) importer
.getTaxonService().find(acceptedTaxon
.getUuid());
1001 acceptedTaxon
= CdmBase
.deproxy(ttmp
,Taxon
.class);
1004 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1006 NodeList children
= synonyms
.getChildNodes();
1007 TaxonNameBase nameToBeFilled
= null;
1008 List
<String
> names
= new ArrayList
<String
>();
1010 String identifier
="";
1012 for (int i
=0;i
<children
.getLength();i
++){
1013 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1014 NodeList tmp
= children
.item(i
).getChildNodes();
1015 // String fullContent = children.item(i).getTextContent();
1016 for (int j
=0; j
< tmp
.getLength();j
++){
1017 if(tmp
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1020 enames
= extractScientificName(tmp
.item(j
));
1021 if (enames
[1].isEmpty()) {
1022 names
.add(enames
[0]+"---"+enames
[2]+"---"+enames
[3]);
1024 names
.add(enames
[1]+"---"+enames
[2]+"---"+enames
[3]);
1026 } catch (TransformerFactoryConfigurationError e
) {
1028 } catch (TransformerException e
) {
1036 for(String name
:names
){
1037 System
.out
.println("HANDLE NAME "+name
);
1040 rank
= Rank
.getRankByName(name
.split("---")[1]);
1041 } catch (UnknownCdmTypeException e
) {
1042 logger
.warn("Rank problem!");
1046 identifier
= name
.split("---")[2];
1047 }catch(Exception e
){logger
.warn("identifier empty"); identifier
="";}
1048 name
= name
.split("---")[0];
1050 String original
= name
;
1052 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1053 nameToBeFilled
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
1054 if (nameToBeFilled
.hasProblem() &&
1055 !((nameToBeFilled
.getParsingProblems().size()==1) && nameToBeFilled
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
1056 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1057 nameToBeFilled
= solveNameProblem(original
, name
, parser
);
1059 nameToBeFilled
= getTaxonNameBase(nameToBeFilled
,nametosave
);
1060 Synonym synonym
= Synonym
.NewInstance(nameToBeFilled
, refMods
);
1063 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1064 setLSID(identifier
, synonym
);
1067 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF());
1068 System
.out
.println("SYNONYM");
1070 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1080 * @param refgroup: the XML nodes
1081 * @param nametosave: the list of objects to save into the CDM
1082 * @param acceptedTaxon: the current acceptedTaxon
1083 * @param nametosave: the list of objects to save into the CDM
1084 * @param refMods: the current reference extracted from the MODS
1085 * @return the acceptedTaxon (why?)
1087 @SuppressWarnings({ "null", "unused" ,"rawtypes" })
1088 private Taxon
extractReferences(Node refgroup
, List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference
<?
> refMods
) {
1089 // logger.info("extractReferences");
1090 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1092 NodeList children
= refgroup
.getChildNodes();
1093 NonViralName
<?
> nameToBeFilled
= null;
1094 boolean accepted
=true;
1095 for (int i
=0;i
<children
.getLength();i
++){
1096 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1097 NodeList references
= children
.item(i
).getChildNodes();
1099 boolean foundBibref
=false;
1100 for (int j
=0;j
<references
.getLength();j
++){
1101 if(references
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")){
1103 String ref
= references
.item(j
).getTextContent().trim();
1104 if (ref
.endsWith(";") && ((ref
.length())>1)) {
1105 ref
=ref
.substring(0, ref
.length()-1)+".";
1107 if (ref
.startsWith(treatmentMainName
) && !ref
.endsWith(treatmentMainName
)) {
1108 ref
=ref
.replace(treatmentMainName
, "");
1110 while (ref
.startsWith(".") || ref
.startsWith(",")) {
1111 ref
=ref
.replace(".","").replace(",","").trim();
1115 // logger.info("Current reference :"+nbRef+", "+ref+", "+treatmentMainName+"--"+ref.indexOf(treatmentMainName));
1116 Reference
<?
> reference
= ReferenceFactory
.newGeneric();
1117 reference
.setTitleCache(ref
, true);
1119 boolean makeEmpty
= false;
1120 // Rank rank = null;
1121 // logger.info("TREATMENTMAINNAME: "+treatmentMainName);
1122 // logger.info("ref: "+ref);
1129 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1130 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICBN
)){
1131 nameToBeFilled
= BotanicalName
.NewInstance(null);
1133 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1134 nameToBeFilled
= ZoologicalName
.NewInstance(null);
1136 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1137 nameToBeFilled
= BacterialName
.NewInstance(null);
1140 acceptedTaxon
.getName().setNomenclaturalReference(reference
);
1141 nameToBeFilled
.setNomenclaturalReference(reference
);
1142 acceptedTaxon
.addSource(null,null,refMods
,null);
1144 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1145 acceptedTaxon
.addDescription(td
);
1146 acceptedTaxon
.addSource(null,null,refMods
,null);
1148 TextData textData
= TextData
.NewInstance(Feature
.CITATION());
1150 textData
.addSource(null, null, reference
, null, acceptedTaxon
.getName(), ref
);
1151 td
.addElement(textData
);
1152 td
.addSource(null,null,refMods
,null);
1154 importer
.getDescriptionService().saveOrUpdate(td
);
1156 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1157 // logger.warn("BWAAHHHH: "+nameToBeFilled.getParsingProblems()+", "+ref);
1162 String refString
="";
1164 String identifier
="";
1165 for (int j
=0;j
<references
.getLength();j
++){
1166 //no bibref tag inside
1167 logger
.info("references.item(j).getNodeName()"+references
.item(j
).getNodeName());
1168 if (references
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1171 enames
= extractScientificName(references
.item(j
));
1172 if (enames
[1].isEmpty()) {
1173 name
=enames
[0]+"---"+enames
[2]+"---"+enames
[3];
1175 name
=enames
[1]+"---"+enames
[2]+"---"+enames
[3];
1177 } catch (TransformerFactoryConfigurationError e
) {
1179 } catch (TransformerException e
) {
1185 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")){
1186 refString
= references
.item(j
).getTextContent().trim();
1188 if(references
.item(j
).getNodeName().equalsIgnoreCase("#text") && name
.isEmpty() && !references
.item(j
).getTextContent().trim().isEmpty()){
1190 identifier
= name
.split("---")[3];
1191 }catch(Exception e
){logger
.warn("no identifier");identifier
="";}
1192 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1193 String fullLineRefName
= references
.item(j
).getTextContent().trim();
1194 TaxonNameBase nameTBF
= parser
.parseFullName(fullLineRefName
, nomenclaturalCode
, Rank
.UNKNOWN_RANK());
1195 if (nameTBF
.hasProblem() &&
1196 !((nameTBF
.getParsingProblems().size()==1) && nameTBF
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
1197 nameTBF
=solveNameProblem(fullLineRefName
, fullLineRefName
,parser
);
1199 nameTBF
= getTaxonNameBase(nameTBF
,nametosave
);
1200 Synonym synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1203 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1204 setLSID(identifier
, acceptedTaxon
);
1207 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF());
1208 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1212 if(!name
.isEmpty()){
1213 logger
.info("acceptedTaxon and name: *"+acceptedTaxon
.getTitleCache()+"*, *"+name
+"*");
1214 if (acceptedTaxon
.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(name
.split("---")[0].trim())){
1215 identifier
= name
.split("---")[3];
1216 Reference
<?
> refS
= ReferenceFactory
.newGeneric();
1217 refS
.setTitleCache(refString
, true);
1218 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1219 // acceptedTaxon.addDescription(td);
1220 // acceptedTaxon.addSource(refSource);
1222 // TextData textData = TextData.NewInstance(Feature.CITATION());
1224 // textData.addSource(null, null, refS, null);
1225 // td.addElement(textData);
1226 // td.addSource(refSource);
1227 // importer.getDescriptionService().saveOrUpdate(td);
1230 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1231 setLSID(identifier
, acceptedTaxon
);
1235 acceptedTaxon
.getName().setNomenclaturalReference(refS
);
1236 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1241 rank
= Rank
.getRankByName(name
.split("---")[1]);
1242 } catch (Exception e
) {
1243 logger
.warn("Rank or name problem!");
1246 name
= name
.split("---")[0].trim() + refString
;
1247 String original
= name
;
1249 identifier
= name
.split("---")[3];
1252 logger
.warn("no identifier");
1255 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1256 TaxonNameBase nameTBF
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
1257 if (nameTBF
.hasProblem() &&
1258 !((nameTBF
.getParsingProblems().size()==1) && nameTBF
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
1259 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1260 nameTBF
=solveNameProblem(original
, name
,parser
);
1262 nameTBF
= getTaxonNameBase(nameTBF
,nametosave
);
1263 Synonym synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1266 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1267 String id
= identifier
.split("__")[0];
1268 String source
= identifier
.split("__")[1];
1269 if (id
.indexOf("lsid")>-1){
1271 LSID lsid
= new LSID(id
);
1272 synonym
.setLsid(lsid
);
1273 } catch (MalformedLSIDException e
) {
1274 // TODO Auto-generated catch block
1275 e
.printStackTrace();
1280 //TODO ADD ORIGINAL SOURCE ID
1281 IdentifiableSource os
= IdentifiableSource
.NewInstance();
1282 os
.setIdInSource(id
);
1283 Reference
<?
> re
= ReferenceFactory
.newGeneric();
1284 re
.setTitle(source
);
1286 synonym
.addSource(os
);
1290 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF());
1291 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1297 // importer.getClassificationService().saveOrUpdate(classification);
1298 return acceptedTaxon
;
1304 * @param acceptedTaxon
1306 private void setLSID(String identifier
, TaxonBase
<?
> taxon
) {
1307 boolean lsidok
=false;
1308 String id
= identifier
.split("__")[0];
1309 String source
= identifier
.split("__")[1];
1310 if (id
.indexOf("lsid")>-1){
1312 LSID lsid
= new LSID(id
);
1313 taxon
.setLsid(lsid
);
1315 } catch (MalformedLSIDException e
) {
1316 logger
.warn("Malformed LSID");
1320 if ((id
.indexOf("lsid")<0) || !lsidok
){
1321 //ADD ORIGINAL SOURCE ID
1322 IdentifiableSource os
= IdentifiableSource
.NewInstance();
1323 os
.setIdInSource(id
);
1324 Reference
<?
> re
= ReferenceFactory
.newGeneric();
1325 re
.setTitle(source
);
1327 taxon
.addSource(os
);
1333 * try to solve a parsing problem for a scientific name
1334 * @param original : the name from the OCR document
1335 * @param name : the tagged version
1337 * @return the corrected TaxonNameBase
1339 @SuppressWarnings({ "unchecked", "rawtypes" })
1340 private TaxonNameBase
<?
,?
> solveNameProblem(String original
, String name
, INonViralNameParser parser
) {
1341 Map
<String
,String
> ato
= namesMap
.get(original
);
1342 Rank rank
=Rank
.UNKNOWN_RANK();
1345 rank
=askForRank(original
, Rank
.UNKNOWN_RANK(), nomenclaturalCode
);
1347 rank
= getRank(ato
);
1349 TaxonNameBase
<?
,?
> nameTBF
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
1350 // logger.info("RANK: "+rank);
1352 while (nameTBF
.hasProblem() && (retry
<1) && !((nameTBF
.getParsingProblems().size()==1) && nameTBF
.getParsingProblems().contains(ParserProblem
.CheckRank
))){
1353 String fullname
= getFullReference(name
,nameTBF
.getParsingProblems());
1354 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICBN
)){
1355 nameTBF
= BotanicalName
.NewInstance(null);
1357 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1358 nameTBF
= ZoologicalName
.NewInstance(null);
1360 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1361 nameTBF
= BacterialName
.NewInstance(null);
1363 parser
.parseReferencedName(nameTBF
, fullname
, rank
, false);
1367 nameTBF
.setFullTitleCache(name
, true);
1368 // logger.info("FULL TITLE CACHE "+name);
1374 * @param nomenclatureNode: the XML nodes
1375 * @param nametosave: the list of objects to save into the CDM
1376 * @param refMods: the current reference extracted from the MODS
1379 @SuppressWarnings({ "rawtypes", "unused" })
1380 private Taxon
extractNomenclature(Node nomenclatureNode
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
1381 // logger.info("extractNomenclature");
1382 NodeList children
= nomenclatureNode
.getChildNodes();
1384 TaxonNameBase nameToBeFilled
= null;
1385 Taxon acceptedTaxon
= null;
1386 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1387 String identifier
="";
1389 Rank rank
= Rank
.UNKNOWN_RANK();
1390 // String fullContent = nomenclatureNode.getTextContent();
1391 for (int i
=0;i
<children
.getLength();i
++){
1392 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")) {
1393 freetext
=children
.item(i
).getTextContent();
1395 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1396 System
.out
.println("COLLECTION EVENT INSIDE NOMENCLATURE");
1397 extractMaterialsDirect(children
.item(i
), acceptedTaxon
, refMods
, "collection");
1399 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:name")){
1402 names
= extractScientificName(children
.item(i
));
1403 treatmentMainName
= names
[1];
1404 originalTreatmentName
= names
[0];
1405 rank
= Rank
.getRankByName(names
[2]);
1406 identifier
=names
[3];
1408 } catch (TransformerFactoryConfigurationError e1
) {
1410 } catch (TransformerException e1
) {
1412 } catch (UnknownCdmTypeException e
) {
1416 if (rank
.equals(Rank
.UNKNOWN_RANK()) || rank
.isLower(configState
.getConfig().getMaxRank())){
1417 maxRankRespected
=true;
1419 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICBN
)){
1420 nameToBeFilled
= BotanicalName
.NewInstance(null);
1422 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1423 nameToBeFilled
= ZoologicalName
.NewInstance(null);
1425 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1426 nameToBeFilled
= BacterialName
.NewInstance(null);
1428 acceptedTaxon
= importer
.getTaxonService().findBestMatchingTaxon(treatmentMainName
);
1429 if (acceptedTaxon
==null ){
1430 nameToBeFilled
= parser
.parseFullName(treatmentMainName
, nomenclaturalCode
, null);
1431 if (nameToBeFilled
.hasProblem() &&
1432 !((nameToBeFilled
.getParsingProblems().size()==1) && nameToBeFilled
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
1433 nameToBeFilled
= solveNameProblem(originalTreatmentName
,treatmentMainName
,parser
);
1435 nameToBeFilled
= getTaxonNameBase(nameToBeFilled
,nametosave
);
1436 if (!originalTreatmentName
.isEmpty()) {
1437 TaxonNameDescription td
= TaxonNameDescription
.NewInstance();
1438 td
.setTitleCache(originalTreatmentName
);
1439 nameToBeFilled
.addDescription(td
);
1441 nameToBeFilled
.addSource(null,null,refMods
,null);
1442 acceptedTaxon
= new Taxon(nameToBeFilled
,(Reference
<?
>) nameToBeFilled
.getNomenclaturalReference() );//TODO TOFIX reference
1443 if(!configState
.getConfig().doKeepOriginalSecundum()) {
1444 acceptedTaxon
.setSec(configState
.getConfig().getSecundum());
1445 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
1449 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1450 boolean lsidok
=false;
1451 String id
= identifier
.split("__")[0];
1452 String source
= identifier
.split("__")[1];
1453 if (id
.indexOf("lsid")>-1){
1455 LSID lsid
= new LSID(id
);
1456 acceptedTaxon
.setLsid(lsid
);
1458 } catch (MalformedLSIDException e
) {
1459 logger
.warn("Malformed LSID");
1463 if ((id
.indexOf("lsid")<0) || !lsidok
){
1464 //TODO ADD ORIGINAL SOURCE ID
1465 IdentifiableSource os
= IdentifiableSource
.NewInstance();
1466 os
.setIdInSource(id
);
1467 Reference
<?
> re
= ReferenceFactory
.newGeneric();
1468 re
.setTitle(source
);
1470 acceptedTaxon
.addSource(os
);
1474 acceptedTaxon
.addSource(null,null,refMods
,null);
1475 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1477 Taxon parentTaxon
= askParent(acceptedTaxon
, classification
);
1478 if (parentTaxon
==null){
1479 while (parentTaxon
== null) {
1480 parentTaxon
= createParent(acceptedTaxon
, refMods
);
1481 classification
.addParentChild(parentTaxon
, acceptedTaxon
, refMods
, null);
1484 classification
.addParentChild(parentTaxon
, acceptedTaxon
, refMods
, null);
1487 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1488 Set
<IdentifiableSource
> sources
= acceptedTaxon
.getSources();
1489 boolean sourcelinked
=false;
1490 for (IdentifiableSource source
:sources
){
1491 if (source
.getCitation().getTitle().equalsIgnoreCase(refMods
.getTitleCache())) {
1495 if (!configState
.getConfig().doKeepOriginalSecundum()) {
1496 acceptedTaxon
.setSec(configState
.getConfig().getSecundum());
1497 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
1500 acceptedTaxon
.addSource(null, null, refMods
, null);
1502 if (!sourcelinked
|| !configState
.getConfig().doKeepOriginalSecundum()){
1504 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1505 setLSID(identifier
, acceptedTaxon
);
1507 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1511 maxRankRespected
=false;
1515 // importer.getClassificationService().saveOrUpdate(classification);
1516 return acceptedTaxon
;
1520 * @param acceptedTaxon: the current acceptedTaxon
1521 * @param ref: the current reference extracted from the MODS
1522 * @return the parent for the current accepted taxon
1524 private Taxon
createParent(Taxon acceptedTaxon
, Reference
<?
> ref
) {
1525 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1527 List
<Rank
> rankList
= new ArrayList
<Rank
>();
1528 rankList
= importer
.getTermService().listByTermClass(Rank
.class, null, null, null, null);
1530 List
<String
> rankListStr
= new ArrayList
<String
>();
1531 for (Rank r
:rankList
) {
1532 rankListStr
.add(r
.toString());
1535 String s
= acceptedTaxon
.getTitleCache();
1538 int addTaxon
= askAddParent(s
);
1539 logger
.info("ADD TAXON: "+addTaxon
);
1541 Taxon tmp
= askParent(acceptedTaxon
, classification
);
1543 s
= askSetParent(s
);
1544 r
= askRank(s
,rankListStr
);
1546 NonViralName
<?
> nameToBeFilled
= null;
1547 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICBN
)){
1548 nameToBeFilled
= BotanicalName
.NewInstance(null);
1550 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1551 nameToBeFilled
= ZoologicalName
.NewInstance(null);
1553 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1554 nameToBeFilled
= BacterialName
.NewInstance(null);
1556 nameToBeFilled
.setTitleCache(s
);
1557 nameToBeFilled
.setRank(getRank(r
));
1559 tax
= Taxon
.NewInstance(nameToBeFilled
, ref
);
1565 createParent(tax
, ref
);
1566 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
1567 classification
.addParentChild(tax
, acceptedTaxon
, ref
, null);
1570 classification
.addChildTaxon(acceptedTaxon
, ref
, null, null);
1573 // logger.info("RETURN: "+tax );
1582 * @throws TransformerFactoryConfigurationError
1583 * @throws TransformerException
1584 * @return a list of possible names
1586 private String
[] extractScientificName(Node name
) throws TransformerFactoryConfigurationError
, TransformerException
{
1587 // System.out.println("extractScientificName");
1588 Rank rank
= Rank
.UNKNOWN_RANK();
1589 NodeList children
= name
.getChildNodes();
1590 String fullName
= "";
1592 String identifier
="";
1593 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
1594 List
<String
> atomisedName
= new ArrayList
<String
>();
1596 String rankStr
= "";
1598 for (int i
=0;i
<children
.getLength();i
++){
1599 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:xmldata")){
1600 NodeList atom
= children
.item(i
).getChildNodes();
1601 for (int k
=0;k
<atom
.getLength();k
++){
1602 if (atom
.item(k
).getNodeName().equalsIgnoreCase("tax:xid")){
1604 identifier
= atom
.item(k
).getAttributes().getNamedItem("identifier").getNodeValue();
1605 }catch(Exception e
){
1606 System
.out
.println("pb with identifier, maybe empty");
1609 identifier
+="__"+atom
.item(k
).getAttributes().getNamedItem("source").getNodeValue();
1610 }catch(Exception e
){
1611 System
.out
.println("pb with identifier, maybe empty");
1615 rankStr
= atom
.item(k
).getNodeName().toLowerCase();
1616 // logger.info("RANKSTR:*"+rankStr+"*");
1617 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
1618 rankStr
=atom
.item(k
).getTextContent().trim();
1619 tmpRank
= getRank(rankStr
);
1621 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
1622 if (tmpRank
!= null){
1626 atomisedMap
.put(rankStr
.toLowerCase(),atom
.item(k
).getTextContent().trim());
1627 atomisedName
.add(atom
.item(k
).getTextContent().trim());
1630 if(children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !StringUtils
.isBlank(children
.item(i
).getTextContent())){
1631 // logger.info("name non atomised: "+children.item(i).getTextContent());
1632 fullName
= children
.item(i
).getTextContent().trim();
1633 // logger.info("fullname: "+fullName);
1636 if (fullName
!= null){
1637 fullName
= fullName
.replace("( ", "(");
1638 fullName
= fullName
.replace(" )",")");
1641 if (fullName
.trim().isEmpty()){
1642 fullName
=StringUtils
.join(atomisedName
," ");
1645 while(fullName
.contains(" ")) {
1646 fullName
=fullName
.replace(" ", " ");
1647 // logger.info("while");
1650 namesMap
.put(fullName
,atomisedMap
);
1651 String atomisedNameStr
= StringUtils
.join(atomisedName
," ");
1652 while(atomisedNameStr
.contains(" ")) {
1653 atomisedNameStr
=atomisedNameStr
.replace(" ", " ");
1654 // logger.info("atomisedNameStr: "+atomisedNameStr);
1656 atomisedNameStr
=atomisedNameStr
.trim();
1658 if (fullName
!= null){
1659 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
1660 newName
=getScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
1665 rank
= askForRank(newName
, rank
, nomenclaturalCode
);
1666 String
[] names
= new String
[4];
1669 names
[2]=rank
.toString();
1670 names
[3]=identifier
;
1676 * @param classification2
1678 public void updateClassification(Classification classification2
) {
1679 classification
= classification2
;