2 * Copyright (C) 2013 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.io
.taxonx2013
;
11 import java
.io
.BufferedWriter
;
13 import java
.io
.FileWriter
;
14 import java
.io
.IOException
;
16 import java
.util
.ArrayList
;
17 import java
.util
.Arrays
;
18 import java
.util
.HashMap
;
19 import java
.util
.List
;
22 import java
.util
.UUID
;
23 import java
.util
.regex
.Matcher
;
24 import java
.util
.regex
.Pattern
;
26 import javax
.xml
.transform
.TransformerException
;
27 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
29 import org
.apache
.commons
.lang
.StringUtils
;
30 import org
.apache
.log4j
.Logger
;
31 import org
.w3c
.dom
.Node
;
32 import org
.w3c
.dom
.NodeList
;
34 import com
.ibm
.lsid
.MalformedLSIDException
;
36 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
37 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
38 import eu
.etaxonomy
.cdm
.model
.agent
.AgentBase
;
39 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
40 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
41 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
42 import eu
.etaxonomy
.cdm
.model
.common
.LSID
;
43 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
44 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
45 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
46 import eu
.etaxonomy
.cdm
.model
.description
.FeatureNode
;
47 import eu
.etaxonomy
.cdm
.model
.description
.FeatureTree
;
48 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
49 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
50 import eu
.etaxonomy
.cdm
.model
.description
.TaxonNameDescription
;
51 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
52 import eu
.etaxonomy
.cdm
.model
.name
.INonViralName
;
53 import eu
.etaxonomy
.cdm
.model
.name
.ITaxonNameBase
;
54 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
55 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
56 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
57 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
58 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
59 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
60 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
61 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
62 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
63 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
64 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
65 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymType
;
66 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
67 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
68 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
69 import eu
.etaxonomy
.cdm
.persistence
.dto
.UuidAndTitleCache
;
70 import eu
.etaxonomy
.cdm
.persistence
.query
.MatchMode
;
71 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
72 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
73 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
74 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImplRegExBase
;
81 public class TaxonXTreatmentExtractor
extends TaxonXExtractor
{
83 private static final String PUBLICATION_YEAR
= "publicationYear";
85 private static final Logger logger
= Logger
.getLogger(TaxonXTreatmentExtractor
.class);
87 private static final String notMarkedUp
= "Not marked-up";
88 private static final UUID proIbioTreeUUID
= UUID
.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
89 private static final UUID OtherUUID
= UUID
.fromString("6465f8aa-2175-446f-807e-7163994b120f");
90 private static final UUID NotMarkedUpUUID
= UUID
.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
91 private static final boolean skippQuestion
= true;
93 private final NomenclaturalCode nomenclaturalCode
;
94 private Classification classification
;
96 private String treatmentMainName
,originalTreatmentName
;
98 private final HashMap
<String
,Map
<String
,String
>> namesMap
= new HashMap
<String
, Map
<String
,String
>>();
101 private final Pattern keypattern
= Pattern
.compile("^(\\d+.*|-\\d+.*)");
102 private final Pattern keypatternend
= Pattern
.compile("^.+?\\d$");
104 private boolean maxRankRespected
=false;
105 private Map
<String
, Feature
> featuresMap
;
107 private MyName currentMyName
;
109 private Reference sourceUrlRef
;
111 private String followingText
; //text element immediately following a tax:name in tax:nomenclature TODO move do state
112 private String usedFollowingTextPrefix
; //the part of the following text which has been used during taxon name creation
114 private final TaxonXAddSources sourceHandler
= new TaxonXAddSources();
117 * @param nomenclaturalCode
118 * @param classification
122 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode
, Classification classification
, TaxonXImport importer
,
123 TaxonXImportState configState
,Map
<String
, Feature
> featuresMap
, Reference urlSource
) {
124 this.nomenclaturalCode
=nomenclaturalCode
;
125 this.classification
= classification
;
126 this.importer
=importer
;
127 this.state2
=configState
;
128 this.featuresMap
=featuresMap
;
129 this.sourceUrlRef
=urlSource
;
130 prepareCollectors(configState
, importer
.getAgentService());
131 this.sourceHandler
.setSourceUrlRef(sourceUrlRef
);
132 this.sourceHandler
.setImporter(importer
);
133 this.sourceHandler
.setConfigState(configState
);
137 * extracts all the treament information and save them
138 * @param treatmentnode: the XML Node
139 * @param tosave: the list of object to save into the CDM
140 * @param refMods: the reference extracted from the MODS
141 * @param sourceName: the URI of the document
143 @SuppressWarnings({ "rawtypes", "unused" })
145 protected void extractTreatment(Node treatmentnode
, Reference refMods
, URI sourceName
) { logger
.info("extractTreatment");
146 List
<TaxonNameBase
> namesToSave
= new ArrayList
<TaxonNameBase
>();
147 NodeList children
= treatmentnode
.getChildNodes();
148 Taxon acceptedTaxon
=null;
149 boolean hasRefgroup
=false;
152 for (int i
=0;i
<children
.getLength();i
++){
153 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group")) {
158 for (int i
=0;i
<children
.getLength();i
++){
159 Node child
= children
.item(i
);
160 acceptedTaxon
= handleSingleNode(refMods
, sourceName
, namesToSave
, child
, acceptedTaxon
);
162 // logger.info("saveUpdateNames");
163 if (maxRankRespected
){
164 importer
.getNameService().saveOrUpdate(namesToSave
);
165 importer
.getClassificationService().saveOrUpdate(classification
);
166 //logger.info("saveUpdateNames-ok");
172 private Taxon
handleSingleNode(Reference refMods
, URI sourceName
,
173 List
<TaxonNameBase
> namesToSave
, Node child
, Taxon acceptedTaxon
) {
174 Taxon defaultTaxon
=null;
176 String nodeName
= child
.getNodeName();
177 if (nodeName
.equalsIgnoreCase("tax:nomenclature")){
178 NodeList nomenclatureChildren
= child
.getChildNodes();
179 boolean containsName
= false;
180 for(int k
=0; k
<nomenclatureChildren
.getLength(); k
++){
181 if(nomenclatureChildren
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
187 reloadClassification();
188 //extract "main" the scientific name
190 acceptedTaxon
= extractNomenclature(child
, namesToSave
, refMods
);
191 }catch(ClassCastException e
){
192 //FIXME exception handling
195 // System.out.println("acceptedTaxon : "+acceptedTaxon);
197 }else if (nodeName
.equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
198 reloadClassification();
199 //extract the References within the document
200 extractReferences(child
, namesToSave
,acceptedTaxon
,refMods
);
201 }else if (nodeName
.equalsIgnoreCase("tax:div") &&
202 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected
){
203 File file
= new File(TaxonXImport
.LOG_FOLDER
+ "multipleTaxonX.txt");
206 writer
= new FileWriter(file
,true);
207 writer
.write(sourceName
+"\n");
210 } catch (IOException e1
) {
211 // TODO Auto-generated catch block
212 logger
.error(e1
.getMessage());
214 // String multiple = askMultiple(children.item(i));
215 String multiple
= "Other";
216 if (multiple
.equalsIgnoreCase("other")) {
217 extractSpecificFeatureNotStructured(child
,acceptedTaxon
, defaultTaxon
,namesToSave
, refMods
,multiple
);
218 }else if (multiple
.equalsIgnoreCase("synonyms")) {
220 extractSynonyms(child
,acceptedTaxon
, refMods
, null);
221 }catch(NullPointerException e
){
222 logger
.warn("the accepted taxon is maybe null");
224 }else if(multiple
.equalsIgnoreCase("material examined")){
225 extractMaterials(child
, acceptedTaxon
, refMods
, namesToSave
);
226 }else if (multiple
.equalsIgnoreCase("distribution")){
227 extractDistribution(child
, acceptedTaxon
, defaultTaxon
, namesToSave
, refMods
);
228 }else if (multiple
.equalsIgnoreCase("type status")){
229 extractDescriptionWithReference(child
, acceptedTaxon
, defaultTaxon
,refMods
, "TypeStatus");
230 }else if (multiple
.equalsIgnoreCase("vernacular name")){
231 extractDescriptionWithReference(child
, acceptedTaxon
, defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
233 extractSpecificFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
,multiple
);
236 else if(nodeName
.equalsIgnoreCase("tax:div") &&
237 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected
){
238 extractFeature(child
,acceptedTaxon
,defaultTaxon
, namesToSave
, refMods
, Feature
.BIOLOGY_ECOLOGY());
240 else if(nodeName
.equalsIgnoreCase("tax:div") &&
241 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected
){
242 extractDescriptionWithReference(child
, acceptedTaxon
,defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
244 else if(nodeName
.equalsIgnoreCase("tax:div") &&
245 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected
){
246 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, Feature
.DESCRIPTION());
248 else if(nodeName
.equalsIgnoreCase("tax:div") &&
249 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected
){
250 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
,Feature
.DIAGNOSIS());
252 else if(nodeName
.equalsIgnoreCase("tax:div") &&
253 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected
){
254 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, Feature
.DISCUSSION());
256 else if(nodeName
.equalsIgnoreCase("tax:div") &&
257 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected
){
258 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, Feature
.DESCRIPTION());
260 else if(nodeName
.equalsIgnoreCase("tax:div") &&
261 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected
){
262 extractDistribution(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
);
264 else if(nodeName
.equalsIgnoreCase("tax:div") &&
265 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected
){
266 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
,refMods
,Feature
.ETYMOLOGY());
268 else if(nodeName
.equalsIgnoreCase("tax:div") &&
269 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected
){
270 extractMaterials(child
,acceptedTaxon
, refMods
, namesToSave
);
272 else if(nodeName
.equalsIgnoreCase("tax:figure") && maxRankRespected
){
273 extractSpecificFeature(child
,acceptedTaxon
,defaultTaxon
, namesToSave
, refMods
, "Figure");
275 else if(nodeName
.equalsIgnoreCase("tax:div") &&
276 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected
){
277 extractSpecificFeature(child
, acceptedTaxon
,defaultTaxon
, namesToSave
, refMods
, "table");
278 }else if(nodeName
.equalsIgnoreCase("tax:div") &&
279 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected
){
280 //TODO IGNORE keys for the moment
281 //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
282 extractSpecificFeatureNotStructured(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
,"Keys - unparsed");
285 if (! nodeName
.equalsIgnoreCase("tax:pb")){
286 //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
287 if (child
.getAttributes() !=null) {
288 logger
.info("First Attribute: " + child
.getAttributes().item(0));
290 extractSpecificFeatureNotStructured(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, notMarkedUp
);
293 logger
.warn("Unhandled");
296 return acceptedTaxon
;
300 protected Map
<String
,Feature
> getFeaturesUsed(){
306 private void buildFeatureTree() {
307 logger
.info("buildFeatureTree");
308 FeatureTree proibiospheretree
= importer
.getFeatureTreeService().find(proIbioTreeUUID
);
309 if (proibiospheretree
== null){
310 List
<FeatureTree
> trees
= importer
.getFeatureTreeService().list(FeatureTree
.class, null, null, null, null);
311 if (trees
.size()==1) {
312 FeatureTree ft
= trees
.get(0);
313 if (featuresMap
==null) {
314 featuresMap
=new HashMap
<String
, Feature
>();
316 for (Feature feature
: ft
.getDistinctFeatures()){
318 featuresMap
.put(feature
.getTitleCache(), feature
);
322 proibiospheretree
= FeatureTree
.NewInstance();
323 proibiospheretree
.setUuid(proIbioTreeUUID
);
325 // FeatureNode root = proibiospheretree.getRoot();
326 FeatureNode root2
= proibiospheretree
.getRoot();
328 int nbChildren
= root2
.getChildCount()-1;
329 while (nbChildren
>-1){
331 root2
.removeChild(nbChildren
);
332 }catch(Exception e
){logger
.warn("Can't remove child from FeatureTree "+e
);}
338 for (Feature feature
:featuresMap
.values()) {
339 root2
.addChild(FeatureNode
.NewInstance(feature
));
341 importer
.getFeatureTreeService().saveOrUpdate(proibiospheretree
);
348 * @param acceptedTaxon: the current acceptedTaxon
349 * @param nametosave: the list of objects to save into the CDM
350 * @param refMods: the current reference extracted from the MODS
352 /* @SuppressWarnings("rawtypes")
353 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference refMods) {
354 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
356 NodeList children = keys.getChildNodes();
358 PolytomousKey poly = PolytomousKey.NewInstance();
359 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
360 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
361 poly.addTaxonomicScope(acceptedTaxon);
362 poly.setTitleCache("bloup", true);
363 // poly.addCoveredTaxon(acceptedTaxon);
364 PolytomousKeyNode root = poly.getRoot();
365 PolytomousKeyNode previous = null,tmpKey=null;
367 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
369 // String fullContent = keys.getTextContent();
370 for (int i=0;i<children.getLength();i++){
371 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
372 NodeList paragraph = children.item(i).getChildNodes();
375 for (int j=0;j<paragraph.getLength();j++){
376 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
377 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
378 key+=paragraph.item(j).getTextContent().trim();
379 // logger.info("KEY: "+j+"--"+key);
382 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
383 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
386 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
387 if (keypattern.matcher(key).matches()){
388 tmpKey = PolytomousKeyNode.NewInstance(key);
389 if (taxonKey!=null) {
390 tmpKey.setTaxon(taxonKey);
392 polyNodes.add(tmpKey);
393 if (previous == null) {
394 root.addChild(tmpKey);
396 previous.addChild(tmpKey);
400 tmpKey=PolytomousKeyNode.NewInstance(key);
401 if (taxonKey!=null) {
402 tmpKey.setTaxon(taxonKey);
404 polyNodes.add(tmpKey);
405 if (keypatternend.matcher(key).matches()) {
406 root.addChild(tmpKey);
409 previous.addChild(tmpKey);
416 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
417 importer.getPolytomousKeyService().saveOrUpdate(poly);
423 * @param taxons: the XML Nodegroup
424 * @param nametosave: the list of objects to save into the CDM
425 * @param acceptedTaxon: the current accepted Taxon
426 * @param refMods: the current reference extracted from the MODS
428 * @return Taxon object built
430 @SuppressWarnings({ "rawtypes", "unused" })
431 private TaxonNameBase
getTaxonNameBaseFromXML(Node taxons
, List
<TaxonNameBase
> nametosave
, Reference refMods
, boolean isSynonym
) {
432 // logger.info("getTaxonFromXML");
433 // logger.info("acceptedTaxon: "+acceptedTaxon);
434 logger
.info("getTaxonNameBaseFromXML");
435 TaxonNameBase nameToBeFilled
= null;
437 currentMyName
=new MyName(isSynonym
);
439 NomenclaturalStatusType statusType
= null;
441 String followingText
= null; //needs to be checked if following text is possible
442 currentMyName
= extractScientificName(taxons
,refMods
, null);
443 } catch (TransformerFactoryConfigurationError e1
) {
445 } catch (TransformerException e1
) {
448 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
450 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
451 if (nameToBeFilled.hasProblem() &&
452 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
453 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
454 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
455 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
458 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
460 nameToBeFilled
= currentMyName
.getTaxonNameBase();
461 return nameToBeFilled
;
469 private void reloadClassification() {
470 logger
.info("reloadClassification");
471 Classification cl
= importer
.getClassificationService().find(classification
.getUuid());
475 importer
.getClassificationService().saveOrUpdate(classification
);
476 classification
= importer
.getClassificationService().find(classification
.getUuid());
481 // * Create a Taxon for the current NameBase, based on the current reference
482 // * @param taxonNameBase
483 // * @param refMods: the current reference extracted from the MODS
486 // @SuppressWarnings({ "unused", "rawtypes" })
487 // private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference refMods) {
488 // Taxon t = new Taxon(taxonNameBase,null );
489 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
490 // t.setSec(configState.getConfig().getSecundum());
491 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
493 // /*<<<<<<< .courant
494 // boolean sourceExists=false;
495 // Set<IdentifiableSource> sources = t.getSources();
496 // for (IdentifiableSource src : sources){
497 // String micro = src.getCitationMicroReference();
498 // Reference r = src.getCitation();
499 // if (r.equals(refMods) && micro == null) {
500 // sourceExists=true;
503 // if(!sourceExists) {
504 // t.addSource(null,null,refMods,null);
507 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
508 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
512 private void extractDescriptionWithReference(Node typestatus
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference refMods
,
513 String featureName
) {
514 // System.out.println("extractDescriptionWithReference !");
515 logger
.info("extractDescriptionWithReference");
516 NodeList children
= typestatus
.getChildNodes();
518 Feature currentFeature
=getFeatureObjectFromString(featureName
);
520 String r
="";String s
="";
521 for (int i
=0;i
<children
.getLength();i
++){
522 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
523 s
+=children
.item(i
).getTextContent().trim();
525 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
526 r
+= children
.item(i
).getTextContent().trim();
528 if (s
.indexOf(r
)>-1) {
533 Reference currentref
= ReferenceFactory
.newGeneric();
535 currentref
.setTitleCache(r
, true);
539 setParticularDescription(s
,acceptedTaxon
,defaultTaxon
, currentref
, refMods
,currentFeature
);
544 * @param distribution: the XML node group
545 * @param acceptedTaxon: the current accepted Taxon
546 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
547 * @param refMods: the current reference extracted from the MODS
549 @SuppressWarnings("rawtypes")
550 private void extractDistribution(Node distribution
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference refMods
) {
551 logger
.info("extractDistribution");
552 // logger.info("acceptedTaxon: "+acceptedTaxon);
553 NodeList children
= distribution
.getChildNodes();
554 Map
<Integer
,List
<MySpecimenOrObservation
>> specimenOrObservations
= new HashMap
<Integer
, List
<MySpecimenOrObservation
>>();
555 Map
<Integer
,String
> descriptionsFulltext
= new HashMap
<Integer
,String
>();
557 for (int i
=0;i
<children
.getLength();i
++){
558 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
559 NodeList paragraph
= children
.item(i
).getChildNodes();
560 for (int j
=0;j
<paragraph
.getLength();j
++){
561 if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("#text")){
562 extractText(descriptionsFulltext
, i
, paragraph
.item(j
));
564 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
565 extractInLine(nametosave
, refMods
, descriptionsFulltext
, i
,paragraph
.item(j
));
567 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")){
568 MySpecimenOrObservation specimenOrObservation
= new MySpecimenOrObservation();
569 DerivedUnit derivedUnitBase
= null;
570 specimenOrObservation
= extractSpecimenOrObservation(paragraph
.item(j
), derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
, null);
571 extractTextFromSpecimenOrObservation(specimenOrObservations
, descriptionsFulltext
, i
, specimenOrObservation
);
578 for (int k
:descriptionsFulltext
.keySet()) {
583 for (int k
:specimenOrObservations
.keySet()) {
590 if(acceptedTaxon
!=null){
591 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
592 Feature currentFeature
= Feature
.DISTRIBUTION();
593 // DerivedUnit derivedUnitBase=null;
595 for (int k
=0;k
<=m
;k
++){
596 if(specimenOrObservations
.keySet().contains(k
)){
597 for (MySpecimenOrObservation soo
:specimenOrObservations
.get(k
) ) {
598 handleAssociation(acceptedTaxon
, refMods
, td
, soo
);
602 if (descriptionsFulltext
.keySet().contains(k
)){
603 if (!stringIsEmpty(descriptionsFulltext
.get(k
).trim()) && (descriptionsFulltext
.get(k
).startsWith("Hab.") || descriptionsFulltext
.get(k
).startsWith("Habitat"))){
604 setParticularDescription(descriptionsFulltext
.get(k
),acceptedTaxon
,defaultTaxon
, refMods
, Feature
.HABITAT());
608 handleTextData(refMods
, descriptionsFulltext
, td
, currentFeature
, k
);
612 if (descriptionsFulltext
.keySet().contains(k
) || specimenOrObservations
.keySet().contains(k
)){
613 acceptedTaxon
.addDescription(td
);
614 sourceHandler
.addAndSaveSource(refMods
, td
, null);
615 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
623 * @param descriptionsFulltext
625 * @param currentFeature
628 private void handleTextData(Reference refMods
, Map
<Integer
, String
> descriptionsFulltext
, TaxonDescription td
,
629 Feature currentFeature
, int k
) {
630 //logger.info("handleTextData");
631 TextData textData
= TextData
.NewInstance();
632 textData
.setFeature(currentFeature
);
633 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descriptionsFulltext
.get(k
));
634 sourceHandler
.addSource(refMods
, textData
);
635 td
.addElement(textData
);
639 * @param acceptedTaxon
644 private void handleAssociation(Taxon acceptedTaxon
, Reference refMods
, TaxonDescription td
, MySpecimenOrObservation soo
) {
645 logger
.info("handleAssociation");
646 String descr
=soo
.getDescr();
647 DerivedUnit derivedUnitBase
= soo
.getDerivedUnitBase();
649 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
651 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
653 Feature feature
=null;
654 feature
= makeFeature(derivedUnitBase
);
655 if(!StringUtils
.isEmpty(descr
)) {
656 derivedUnitBase
.setTitleCache(descr
, true);
659 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
661 taxonDescription
.addElement(indAssociation
);
662 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
663 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
664 td
.setDescribedSpecimenOrObservation(soo
.getDerivedUnitBase());
668 * create an individualAssociation
670 * @param derivedUnitBase
674 private IndividualsAssociation
createIndividualAssociation(Reference refMods
, DerivedUnit derivedUnitBase
,
676 logger
.info("createIndividualAssociation");
677 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
678 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
679 indAssociation
.setFeature(feature
);
680 indAssociation
= sourceHandler
.addSource(refMods
, indAssociation
);
681 return indAssociation
;
685 * @param specimenOrObservations
686 * @param descriptionsFulltext
688 * @param specimenOrObservation
690 private void extractTextFromSpecimenOrObservation(Map
<Integer
, List
<MySpecimenOrObservation
>> specimenOrObservations
,
691 Map
<Integer
, String
> descriptionsFulltext
, int i
, MySpecimenOrObservation specimenOrObservation
) {
692 logger
.info("extractTextFromSpecimenOrObservation");
693 List
<MySpecimenOrObservation
> speObsList
= specimenOrObservations
.get(i
);
694 if (speObsList
== null) {
695 speObsList
=new ArrayList
<MySpecimenOrObservation
>();
697 speObsList
.add(specimenOrObservation
);
698 specimenOrObservations
.put(i
,speObsList
);
700 String s
= specimenOrObservation
.getDerivedUnitBase().toString();
701 if (descriptionsFulltext
.get(i
) !=null){
702 s
= descriptionsFulltext
.get(i
)+" "+s
;
704 descriptionsFulltext
.put(i
, s
);
708 * Extract the text with the inline link to a taxon
711 * @param descriptionsFulltext
715 @SuppressWarnings("rawtypes")
716 private void extractInLine(List
<TaxonNameBase
> nametosave
, Reference refMods
, Map
<Integer
, String
> descriptionsFulltext
,
717 int i
, Node paragraph
) {
718 //logger.info("extractInLine");
719 String inLine
=getInlineTextForName(nametosave
, refMods
, paragraph
);
720 if (descriptionsFulltext
.get(i
) !=null){
721 inLine
= descriptionsFulltext
.get(i
)+inLine
;
723 descriptionsFulltext
.put(i
, inLine
);
727 * Extract the raw text from a Node
728 * @param descriptionsFulltext
732 private void extractText(Map
<Integer
, String
> descriptionsFulltext
, int i
, Node node
) {
733 //logger.info("extractText");
734 if(!node
.getTextContent().trim().isEmpty()) {
735 String s
=node
.getTextContent().trim();
736 if (descriptionsFulltext
.get(i
) !=null){
737 s
= descriptionsFulltext
.get(i
)+" "+s
;
739 descriptionsFulltext
.put(i
, s
);
745 * @param materials: the XML node group
746 * @param acceptedTaxon: the current accepted Taxon
747 * @param refMods: the current reference extracted from the MODS
749 @SuppressWarnings("rawtypes")
750 private void extractMaterials(Node materials
, Taxon acceptedTaxon
, Reference refMods
,List
<TaxonNameBase
> nametosave
) {
751 logger
.info("EXTRACTMATERIALS");
752 // logger.info("acceptedTaxon: "+acceptedTaxon);
753 NodeList children
= materials
.getChildNodes();
754 NodeList events
= null;
758 for (int i
=0;i
<children
.getLength();i
++){
759 String rawAssociation
="";
761 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
762 events
= children
.item(i
).getChildNodes();
763 for(int k
=0;k
<events
.getLength();k
++){
764 if (events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
765 String inLine
= getInlineTextForName(nametosave
, refMods
, events
.item(k
));
766 if(!inLine
.isEmpty()) {
767 rawAssociation
+=inLine
;
770 if (! events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")
771 && !events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
772 rawAssociation
+= events
.item(k
).getTextContent().trim();
774 if(events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
775 if (!containsDistinctLetters(rawAssociation
.replaceAll(";",""))) {
776 rawAssociation
="no description text";
779 handleDerivedUnitFacadeAndBase(acceptedTaxon
, refMods
, events
.item(k
), rawAssociation
);
781 if (!rawAssociation
.isEmpty() && !added
){
783 Feature feature
= Feature
.MATERIALS_EXAMINED();
784 featuresMap
.put(feature
.getTitleCache(),feature
);
786 TextData textData
= createTextData(rawAssociation
, refMods
, feature
);
788 if(! rawAssociation
.isEmpty() && (acceptedTaxon
!=null)){
789 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
790 td
.addElement(textData
);
791 acceptedTaxon
.addDescription(td
);
792 sourceHandler
.addAndSaveSource(refMods
, td
, null);
794 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
795 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
797 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
798 // acceptedTaxon.addDescription(taxonDescription);
800 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
802 // Feature feature = Feature.MATERIALS_EXAMINED();
803 // featuresMap.put(feature.getTitleCache(),feature);
804 // if(!StringUtils.isEmpty(rawAssociation)) {
805 // derivedUnitBase.setTitleCache(rawAssociation, true);
807 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
808 // indAssociation.setFeature(feature);
809 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
811 // /*boolean sourceExists=false;
812 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
813 // for (DescriptionElementSource src : dsources){
814 // String micro = src.getCitationMicroReference();
815 // Reference r = src.getCitation();
816 // if (r.equals(refMods) && micro == null) {
817 // sourceExists=true;
820 // if(!sourceExists) {
821 // indAssociation.addSource(null, null, refMods, null);
823 // taxonDescription.addElement(indAssociation);
824 // taxonDescription.setTaxon(acceptedTaxon);
825 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
827 // /*sourceExists=false;
828 // Set<IdentifiableSource> sources = taxonDescription.getSources();
829 // for (IdentifiableSource src : sources){
830 // String micro = src.getCitationMicroReference();
831 // Reference r = src.getCitation();
832 // if (r.equals(refMods) && micro == null) {
833 // sourceExists=true;
836 // if(!sourceExists) {
837 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
840 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
841 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
851 * @param acceptedTaxon
854 * @param rawAssociation
857 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon
, Reference refMods
, Node event
,
858 String rawAssociation
) {
859 logger
.info("handleDerivedUnitFacadeAndBase");
861 DerivedUnit derivedUnitBase
;
862 MySpecimenOrObservation myspecimenOrObservation
;
863 DerivedUnitFacade derivedUnitFacade
= getFacade(rawAssociation
.replaceAll(";",""),SpecimenOrObservationType
.DerivedUnit
);
864 derivedUnitBase
= derivedUnitFacade
.innerDerivedUnit();
866 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
868 //TODO this may not always be correct, ask user
869 TaxonNameBase
<?
,?
> typifiableName
= acceptedTaxon
!= null ? acceptedTaxon
.getName() : null;
870 myspecimenOrObservation
= extractSpecimenOrObservation(event
,derivedUnitBase
,SpecimenOrObservationType
.DerivedUnit
, typifiableName
);
871 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
872 descr
=myspecimenOrObservation
.getDescr();
874 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
876 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
878 Feature feature
= makeFeature(derivedUnitBase
);
879 featuresMap
.put(feature
.getTitleCache(),feature
);
880 if(!StringUtils
.isEmpty(descr
)) {
881 derivedUnitBase
.setTitleCache(descr
, true);
884 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
886 taxonDescription
.addElement(indAssociation
);
887 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
888 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
895 * @param materials: the XML node group
896 * @param acceptedTaxon: the current accepted Taxon
897 * @param refMods: the current reference extracted from the MODS
899 private String
extractMaterialsDirect(Node materials
, Taxon acceptedTaxon
, Reference refMods
, String event
, TaxonNameBase
<?
,?
> currentName
) {
900 logger
.info("extractMaterialsDirect");
901 // logger.info("acceptedTaxon: "+acceptedTaxon);
904 DerivedUnit derivedUnitBase
=null;
905 MySpecimenOrObservation myspecimenOrObservation
= extractSpecimenOrObservation(materials
,derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
, currentName
);
906 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
908 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
910 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
912 Feature feature
=null;
913 if (event
.equalsIgnoreCase("collection")){
914 feature
= makeFeature(derivedUnitBase
);
917 feature
= Feature
.MATERIALS_EXAMINED();
919 featuresMap
.put(feature
.getTitleCache(), feature
);
921 descr
=myspecimenOrObservation
.getDescr();
922 if(!StringUtils
.isEmpty(descr
)) {
923 derivedUnitBase
.setTitleCache(descr
, true);
926 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
928 taxonDescription
.addElement(indAssociation
);
929 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
930 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
932 return derivedUnitBase
.getTitleCache();
938 * @param description: the XML node group
939 * @param acceptedTaxon: the current acceptedTaxon
940 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
941 * @param nametosave: the list of objects to save into the CDM
942 * @param refMods: the current reference extracted from the MODS
943 * @param featureName: the feature name
945 @SuppressWarnings({ "rawtypes"})
946 private String
extractSpecificFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
947 List
<TaxonNameBase
> nametosave
, Reference refMods
, String featureName
) {
948 logger
.info("extractSpecificFeature "+featureName
);
949 // System.out.println("GRUUUUuu");
950 NodeList children
= description
.getChildNodes();
951 NodeList insideNodes
;
954 String localdescr
="";
955 List
<String
> blabla
=null;
956 List
<String
> text
= new ArrayList
<String
>();
958 String table
="<table>";
962 Feature currentFeature
=getFeatureObjectFromString(featureName
);
964 // String fullContent = description.getTextContent();
965 for (int i
=0;i
<children
.getLength();i
++){
967 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
968 text
.add(children
.item(i
).getTextContent().trim());
970 if (featureName
.equalsIgnoreCase("table")){
971 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
972 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
973 head
= extractTableHead(children
.item(i
));
975 line
= extractTableLine(children
.item(i
));
976 if (!line
.equalsIgnoreCase("<tr></tr>")) {
980 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
981 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
982 line
= extractTableLineWithColumn(children
.item(i
).getChildNodes());
983 if(!line
.equalsIgnoreCase("<tr></tr>")) {
988 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
989 insideNodes
=children
.item(i
).getChildNodes();
990 blabla
= new ArrayList
<String
>();
991 for (int j
=0;j
<insideNodes
.getLength();j
++){
992 Node insideNode
= insideNodes
.item(j
);
993 if (insideNode
.getNodeName().equalsIgnoreCase("tax:name")){
994 String inlinetext
= getInlineTextForName(nametosave
, refMods
, insideNode
);
995 if (!inlinetext
.isEmpty()) {
996 blabla
.add(inlinetext
);
999 else if (insideNode
.getNodeName().equalsIgnoreCase("#text")) {
1000 if(!insideNode
.getTextContent().trim().isEmpty()){
1001 blabla
.add(insideNode
.getTextContent().trim());
1002 // localdescr += insideNodes.item(j).getTextContent().trim();
1006 if (!blabla
.isEmpty()) {
1007 String blaStr
= StringUtils
.join(blabla
," ").trim();
1008 if(!stringIsEmpty(blaStr
)) {
1009 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1015 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1016 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1017 localdescr
= children
.item(i
).getTextContent().trim();
1018 if(!stringIsEmpty(localdescr
)) {
1019 setParticularDescription(localdescr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1026 if (!table
.equalsIgnoreCase("<table></table>")){
1027 // System.out.println("TABLE : "+table);
1031 if (text
!=null && !text
.isEmpty()) {
1032 return StringUtils
.join(text
," ");
1044 private String
extractTableLine(Node child
) {
1045 //logger.info("extractTableLine");
1048 if (child
.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1049 line
= extractTableLineWithColumn(child
.getChildNodes());
1060 private String
extractTableHead(Node child
) {
1061 //logger.info("extractTableHead");
1065 NodeList trNodes
= child
.getChildNodes();
1066 for (int k
=0;k
<trNodes
.getLength();k
++){
1067 if (trNodes
.item(k
).getNodeName().equalsIgnoreCase("tax:div")
1068 && trNodes
.item(k
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1069 line
= extractTableLineWithColumn(trNodes
.item(k
).getChildNodes());
1078 * build a html table line, with td columns
1080 * @return an html coded line
1082 private String
extractTableLineWithColumn(NodeList tdNodes
) {
1083 //logger.info("extractTableLineWithColumn");
1086 for (int l
=0;l
<tdNodes
.getLength();l
++){
1087 if (tdNodes
.item(l
).getNodeName().equalsIgnoreCase("tax:p")){
1088 line
+="<td>"+tdNodes
.item(l
).getTextContent()+"</td>";
1096 * @param description: the XML node group
1097 * @param acceptedTaxon: the current acceptedTaxon
1098 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1099 * @param nametosave: the list of objects to save into the CDM
1100 * @param refMods: the current reference extracted from the MODS
1101 * @param featureName: the feature name
1103 @SuppressWarnings({ "unused", "rawtypes" })
1104 private String
extractSpecificFeatureNotStructured(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
1105 List
<TaxonNameBase
> nameToSave
, Reference refMods
, String featureName
) {
1106 logger
.info("extractSpecificFeatureNotStructured " + featureName
);
1107 NodeList children
= description
.getChildNodes();
1108 NodeList insideNodes
;
1109 List
<String
> blabla
= new ArrayList
<String
>();
1112 Feature currentFeature
= getFeatureObjectFromString(featureName
);
1114 String fullContent
= description
.getTextContent();
1115 for (int i
=0;i
<children
.getLength();i
++){
1116 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1117 insideNodes
=children
.item(i
).getChildNodes();
1118 for (int j
=0;j
<insideNodes
.getLength();j
++){
1119 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1120 String inlineText
=getInlineTextForName(nameToSave
, refMods
, insideNodes
.item(j
));
1121 if(!inlineText
.isEmpty()) {
1122 blabla
.add(inlineText
);
1125 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1126 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1127 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1132 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1133 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1134 String localdescr
= children
.item(i
).getTextContent().trim();
1135 if(!localdescr
.isEmpty())
1137 blabla
.add(localdescr
);
1143 if (blabla
!=null && !blabla
.isEmpty()) {
1144 String blaStr
= StringUtils
.join(blabla
," ").trim();
1145 if (! stringIsEmpty(blaStr
)) {
1146 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1161 private boolean stringIsEmpty(String blaStr
) {
1162 if (blaStr
.matches("(\\.|,|;|\\.-)?")){
1172 * @param insideNodes
1176 @SuppressWarnings({ "rawtypes" })
1177 private String
getInlineTextForName(List
<TaxonNameBase
> nametosave
, Reference refMods
, Node insideNode
) {
1179 NodeList children
= insideNode
.getChildNodes();
1181 for (int i
=0;i
<children
.getLength();i
++){
1182 Node nameChild
= children
.item(i
);
1183 if(nameChild
.getNodeName().equalsIgnoreCase("#text")){
1184 result
+= nameChild
.getTextContent();
1189 return result
.replace("\n", "").trim();
1191 TaxonNameBase tnb
= getTaxonNameBaseFromXML(insideNode
, nametosave
,refMods
,false);
1192 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1193 Taxon tax
= currentMyName
.getTaxon();
1194 if(tnb
!=null && tax
!= null){
1195 String linkedTaxon
= tnb
.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1196 return "<cdm:taxon uuid='"+tax
.getUuid()+"'>"+linkedTaxon
+"</cdm:taxon>";
1197 }else if (tnb
!= null && tax
== null){
1199 return "<cdm:taxonName uuid='" + tnb
.getUuid() +"'>" + tnb
.getTitleCache().split("sec")[0] +"</cdm:taxonName>";
1201 logger
.warn("Inline text has no content yet");
1208 * @param featureName
1211 @SuppressWarnings("rawtypes")
1212 private Feature
getFeatureObjectFromString(String featureName
) {
1213 logger
.info("getFeatureObjectFromString");
1214 List
<Feature
> features
= importer
.getTermService().list(Feature
.class, null,null,null,null);
1215 Feature currentFeature
=null;
1216 for (Feature feature
: features
){
1217 String tmpF
= feature
.getTitleCache();
1218 if (tmpF
.equalsIgnoreCase(featureName
)) {
1219 currentFeature
=feature
;
1220 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1223 if (currentFeature
== null) {
1224 currentFeature
=Feature
.NewInstance(featureName
, featureName
, featureName
);
1225 if(featureName
.equalsIgnoreCase("Other")){
1226 currentFeature
.setUuid(OtherUUID
);
1228 if(featureName
.equalsIgnoreCase(notMarkedUp
)){
1229 currentFeature
.setUuid(NotMarkedUpUUID
);
1231 importer
.getTermService().saveOrUpdate(currentFeature
);
1233 return currentFeature
;
1240 * @param children: the XML node group
1241 * @param nametosave: the list of objects to save into the CDM
1242 * @param acceptedTaxon: the current acceptedTaxon
1243 * @param refMods: the current reference extracted from the MODS
1244 * @param fullContent :the parsed XML content
1245 * @return a list of description (text)
1247 @SuppressWarnings({ "unused", "rawtypes" })
1248 private List
<String
> parseParagraph(List
<TaxonNameBase
> namesToSave
, Taxon acceptedTaxon
, Reference refMods
, Node paragraph
, Feature feature
){
1249 logger
.info("parseParagraph "+feature
.toString());
1250 List
<String
> fullDescription
= new ArrayList
<String
>();
1251 // String localdescr;
1253 NodeList insideNodes
;
1254 boolean collectionEvent
= false;
1255 List
<Node
>collectionEvents
= new ArrayList
<Node
>();
1257 NodeList children
= paragraph
.getChildNodes();
1259 for (int i
=0;i
<children
.getLength();i
++){
1261 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
1262 descr
+= children
.item(i
).getTextContent().trim();
1264 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1265 insideNodes
=children
.item(i
).getChildNodes();
1266 List
<String
> blabla
= new ArrayList
<String
>();
1267 for (int j
=0;j
<insideNodes
.getLength();j
++){
1268 boolean nodeKnown
= false;
1269 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1270 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1271 String inlineText
= getInlineTextForName(namesToSave
, refMods
, insideNodes
.item(j
));
1272 if (!inlineText
.isEmpty()) {
1273 blabla
.add(inlineText
);
1277 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1278 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1279 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1280 // localdescr += insideNodes.item(j).getTextContent().trim();
1284 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")) {
1285 String ref
= insideNodes
.item(j
).getTextContent().trim();
1286 if (ref
.endsWith(";") && ((ref
.length())>1)) {
1287 ref
=ref
.substring(0, ref
.length()-1)+".";
1289 Reference reference
= ReferenceFactory
.newGeneric();
1290 reference
.setTitleCache(ref
, true);
1291 blabla
.add(reference
.getTitleCache());
1294 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:figure")){
1295 String figure
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "figure");
1298 else if(insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:div") &&
1299 insideNodes
.item(j
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1300 insideNodes
.item(j
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1301 String table
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "table");
1304 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1305 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1306 String titlecache
= extractMaterialsDirect(insideNodes
.item(j
), acceptedTaxon
, refMods
, "collection", null);
1307 blabla
.add(titlecache
);
1308 collectionEvent
=true;
1309 collectionEvents
.add(insideNodes
.item(j
));
1312 logger
.warn("node not handled yet: " + insideNodes
.item(j
).getNodeName());
1316 if (!StringUtils
.isBlank(StringUtils
.join(blabla
," "))) {
1317 fullDescription
.add(StringUtils
.join(blabla
," "));
1320 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:figure")){
1321 String figure
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "Figure");
1322 fullDescription
.add(figure
);
1324 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
1325 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1326 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1327 String table
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "table");
1328 fullDescription
.add(table
);
1332 if( !stringIsEmpty(descr
.trim())){
1333 Feature currentFeature
= getNotMarkedUpFeatureObject();
1334 setParticularDescription(descr
.trim(),acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1336 // if (collectionEvent) {
1337 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1338 // for (Node coll:collectionEvents){
1339 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1342 return fullDescription
;
1347 * @param description: the XML node group
1348 * @param acceptedTaxon: the current acceptedTaxon
1349 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1350 * @param nametosave: the list of objects to save into the CDM
1351 * @param refMods: the current reference extracted from the MODS
1352 * @param feature: the feature to link the data with
1354 @SuppressWarnings("rawtypes")
1355 private void extractFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> namesToSave
, Reference refMods
, Feature feature
){
1356 logger
.info("EXTRACT FEATURE "+feature
.toString());
1357 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1358 List
<String
> fullDescription
= parseParagraph( namesToSave
, acceptedTaxon
, refMods
, description
,feature
);
1360 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1361 if (!fullDescription
.isEmpty() &&!stringIsEmpty(StringUtils
.join(fullDescription
,"\n").trim())) {
1362 setParticularDescription(StringUtils
.join(fullDescription
,"\n").trim(),acceptedTaxon
,defaultTaxon
, refMods
,feature
);
1369 * @param descr: the XML Nodegroup to parse
1370 * @param acceptedTaxon: the current acceptedTaxon
1371 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1372 * @param refMods: the current reference extracted from the MODS
1373 * @param currentFeature: the feature name
1376 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference refMods
, Feature currentFeature
) {
1377 logger
.info("setParticularDescription " + currentFeature
.getTitleCache()+", \n blabla : "+descr
);
1379 //remove redundant feature title
1380 String featureStr
= currentFeature
.getTitleCache();
1381 if (!descr
.isEmpty() && descr
.toLowerCase().startsWith(featureStr
.toLowerCase())){
1382 descr
= descr
.replaceAll("(?i)" + featureStr
+ "\\.\\s*", "");
1386 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1387 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1389 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1391 if(acceptedTaxon
!=null){
1392 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1393 td
.addElement(textData
);
1394 acceptedTaxon
.addDescription(td
);
1396 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1397 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1400 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1402 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1404 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1406 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1408 }catch(Exception e
){
1409 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1412 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1413 defaultTaxon
.addDescription(td
);
1414 td
.addElement(textData
);
1415 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1416 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1423 * @param currentFeature
1426 private TextData
createTextData(String descr
, Reference refMods
, Feature currentFeature
) {
1427 //logger.info("createTextData");
1428 TextData textData
= TextData
.NewInstance();
1429 textData
.setFeature(currentFeature
);
1430 sourceHandler
.addSource(refMods
, textData
);
1432 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descr
);
1439 * @param descr: the XML Nodegroup to parse
1440 * @param acceptedTaxon: the current acceptedTaxon
1441 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1442 * @param refMods: the current reference extracted from the MODS
1443 * @param currentFeature: the feature name
1446 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
,Reference currentRef
, Reference refMods
, Feature currentFeature
) {
1447 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1448 // logger.info("acceptedTaxon: "+acceptedTaxon);
1449 logger
.info("setParticularDescription");
1450 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1452 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1453 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1455 if(! descr
.isEmpty() && (acceptedTaxon
!=null)){
1456 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1457 td
.addElement(textData
);
1458 acceptedTaxon
.addDescription(td
);
1460 sourceHandler
.addAndSaveSource(refMods
, td
, currentRef
);
1461 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1464 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1466 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1468 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1470 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1472 }catch(Exception e
){
1473 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1476 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1477 defaultTaxon
.addDescription(td
);
1478 td
.addElement(textData
);
1479 sourceHandler
.addAndSaveSource(currentRef
, td
,currentRef
);
1480 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1487 * @param synonyms: the XML Nodegroup to parse
1488 * @param nametosave: the list of objects to save into the CDM
1489 * @param acceptedTaxon: the current acceptedTaxon
1490 * @param refMods: the current reference extracted from the MODS
1492 @SuppressWarnings({ "rawtypes" })
1493 private void extractSynonyms(Node synonymsNode
, Taxon acceptedTaxon
,Reference refMods
, String followingText
) {
1494 logger
.info("extractSynonyms");
1495 //System.out.println("extractSynonyms for: "+acceptedTaxon);
1496 Taxon ttmp
= (Taxon
) importer
.getTaxonService().find(acceptedTaxon
.getUuid());
1498 acceptedTaxon
= CdmBase
.deproxy(ttmp
,Taxon
.class);
1501 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1503 NodeList children
= synonymsNode
.getChildNodes();
1504 List
<MyName
> names
= new ArrayList
<MyName
>();
1506 if(synonymsNode
.getNodeName().equalsIgnoreCase("tax:name")){
1508 MyName myName
= extractScientificNameSynonym(synonymsNode
, refMods
, followingText
);
1510 } catch (TransformerFactoryConfigurationError e
) {
1512 } catch (TransformerException e
) {
1518 for (int i
=0;i
<children
.getLength();i
++){
1519 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1520 NodeList tmp
= children
.item(i
).getChildNodes();
1521 // String fullContent = children.item(i).getTextContent();
1522 for (int j
=0; j
< tmp
.getLength();j
++){
1523 if(tmp
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1525 MyName myName
= extractScientificNameSynonym(tmp
.item(j
),refMods
, followingText
);
1527 } catch (TransformerFactoryConfigurationError e
) {
1529 } catch (TransformerException e
) {
1535 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:name")){
1537 MyName myName
= extractScientificNameSynonym(children
.item(i
),refMods
, followingText
);
1539 } catch (TransformerFactoryConfigurationError e
) {
1541 } catch (TransformerException e
) {
1548 for(MyName name
:names
){
1549 TaxonNameBase nameToBeFilled
= name
.getTaxonNameBase();
1550 Synonym synonym
= name
.getSyno();
1551 addFollowingTextToName(nameToBeFilled
, followingText
);
1553 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1554 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1555 if (nameToBeFilled.hasProblem() &&
1556 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1557 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1558 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1559 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1561 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1563 if (!name
.getIdentifier().isEmpty() && (name
.getIdentifier().length()>2)){
1564 setLSID(name
.getIdentifier(), synonym
);
1567 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1568 boolean synoExist
= false;
1569 for (Synonym syn
: synonymsSet
){
1571 boolean a
=syn
.getName().equals(synonym
.getName());
1572 boolean b
= syn
.getSec().equals(synonym
.getSec());
1577 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1578 sourceHandler
.addSource(refMods
, synonym
);
1579 acceptedTaxon
.addSynonym(synonym
, SynonymType
.SYNONYM_OF());
1582 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1586 private boolean addFollowingTextToName(ITaxonNameBase nameToBeFilled
, String followingText
) {
1587 if (nameToBeFilled
!= null && StringUtils
.isNotBlank(followingText
)){
1588 if (! followingText
.matches("\\d\\.?")){
1590 if (followingText
.startsWith(",")){
1591 followingText
= followingText
.substring(1).trim();
1593 nameToBeFilled
.setFullTitleCache(nameToBeFilled
.getFullTitleCache()+ "," +followingText
, true);
1602 * @param refgroup: the XML nodes
1603 * @param nametosave: the list of objects to save into the CDM
1604 * @param acceptedTaxon: the current acceptedTaxon
1605 * @param nametosave: the list of objects to save into the CDM
1606 * @param refMods: the current reference extracted from the MODS
1607 * @return the acceptedTaxon (why?)
1608 * handle cases where the bibref are inside <p> and outside
1610 @SuppressWarnings({ "rawtypes" })
1611 private Taxon
extractReferences(Node refgroup
, List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference refMods
) {
1612 logger
.info("extractReferences");
1613 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1615 NodeList children
= refgroup
.getChildNodes();
1616 INonViralName nameToBeFilled
= getNonViralNameAccNomenclature();
1618 ReferenceBuilder refBuild
= new ReferenceBuilder(sourceHandler
);
1619 for (int i
=0;i
<children
.getLength();i
++){
1620 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
1621 String ref
= children
.item(i
).getTextContent().trim();
1622 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1623 if (!refBuild
.isFoundBibref()){
1624 extractReferenceRawText(children
.item(i
).getChildNodes(), nameToBeFilled
, refMods
, acceptedTaxon
);
1628 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1629 NodeList references
= children
.item(i
).getChildNodes();
1631 for (int j
=0;j
<references
.getLength();j
++){
1632 if(references
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")){
1633 String ref
= references
.item(j
).getTextContent().trim();
1634 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1637 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")
1638 && !references
.item(j
).getTextContent().trim().isEmpty()){
1639 descr
+= references
.item(j
).getTextContent().trim();
1643 if (!refBuild
.isFoundBibref()){
1644 //if it's not tagged, put it as row information.
1645 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1646 //then put it as a not markup feature if not empty
1647 if (!stringIsEmpty(descr
.trim())){
1648 Feature currentFeature
= getNotMarkedUpFeatureObject();
1649 setParticularDescription(descr
.trim(),acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1654 // importer.getClassificationService().saveOrUpdate(classification);
1655 return acceptedTaxon
;
1660 * get the non viral name according to the current nomenclature
1664 private INonViralName
getNonViralNameAccNomenclature() {
1665 return nomenclaturalCode
.getNewTaxonNameInstance(null);
1669 * @return the feature object for the category "not marked up"
1671 private Feature
getNotMarkedUpFeatureObject() {
1672 // FIXME use getFeature(uuid ....)
1673 logger
.info("getNotMarkedUpFeatureObject");
1674 Feature currentFeature
= (Feature
)importer
.getTermService().find(NotMarkedUpUUID
);
1675 if (currentFeature
== null) {
1676 currentFeature
=Feature
.NewInstance(notMarkedUp
, notMarkedUp
, notMarkedUp
);
1677 currentFeature
.setUuid(NotMarkedUpUUID
);
1678 //TODO use userDefined Feature Vocabulary
1679 Feature
.DISTRIBUTION().getVocabulary().addTerm(currentFeature
);
1680 // importer.getTermService().saveOrUpdate(currentFeature);
1681 importer
.getVocabularyService().saveOrUpdate(currentFeature
.getVocabulary());
1683 return currentFeature
;
1688 * handle cases where the bibref are inside <p> and outside
1690 @SuppressWarnings("rawtypes")
1691 private void extractReferenceRawText(NodeList references
, INonViralName nameToBeFilled
, Reference refMods
,
1692 Taxon acceptedTaxon
) {
1693 logger
.info("extractReferenceRawText");
1694 String refString
="";
1695 currentMyName
= new MyName(true);
1696 for (int j
=0;j
<references
.getLength();j
++){
1697 acceptedTaxon
=CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1698 //no bibref tag inside
1699 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1700 if (references
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1703 String followingText
= null; //needs to be checked if follText is possible
1704 //TODO create or not create?
1705 currentMyName
= extractScientificName(references
.item(j
), refMods
, followingText
);
1706 } catch (TransformerFactoryConfigurationError e
) {
1708 } catch (TransformerException e
) {
1712 // name=name.trim();
1714 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")){
1715 refString
= references
.item(j
).getTextContent().trim();
1717 if(references
.item(j
).getNodeName().equalsIgnoreCase("#text") && !references
.item(j
).getTextContent().trim().isEmpty()){
1719 if (!currentMyName
.getStatus().isEmpty()){
1720 String nomNovStatus
= this.newNameStatus(currentMyName
.getStatus());
1721 if (nomNovStatus
!= null){
1722 nameToBeFilled
.setAppendedPhrase(nomNovStatus
);
1725 NomenclaturalStatusType statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1726 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1727 } catch (UnknownCdmTypeException e
) {
1728 addProblematicStatusToFile(currentMyName
.getStatus());
1729 logger
.warn("Problem with status");
1734 String fullLineRefName
= references
.item(j
).getTextContent().trim();
1735 int nameOrRefOrOther
=2;
1736 nameOrRefOrOther
=askIfNameContained(fullLineRefName
);
1737 if (nameOrRefOrOther
==0){
1738 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1739 Synonym synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1741 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1742 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1743 boolean synoExist
= false;
1744 for (Synonym syn
: synonymsSet
){
1745 // System.out.println(syn.getName()+" -- "+syn.getSec());
1746 boolean a
=syn
.getName().equals(synonym
.getName());
1747 boolean b
= syn
.getSec().equals(synonym
.getSec());
1752 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1753 sourceHandler
.addSource(refMods
, synonym
);
1755 acceptedTaxon
.addSynonym(synonym
, SynonymType
.SYNONYM_OF());
1759 if (nameOrRefOrOther
==1){
1760 Reference re
= ReferenceFactory
.newGeneric();
1761 re
.setTitleCache(fullLineRefName
, true);
1763 /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1764 if (nameTBF.hasProblem() &&
1765 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1766 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1767 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1769 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1771 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1772 Synonym synonym
= Synonym
.NewInstance(nameTBF
, re
);
1774 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1775 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1776 boolean synoExist
= false;
1777 for (Synonym syn
: synonymsSet
){
1778 // System.out.println(syn.getName()+" -- "+syn.getSec());
1779 boolean a
=syn
.getName().equals(synonym
.getName());
1780 boolean b
= syn
.getSec().equals(synonym
.getSec());
1785 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1786 sourceHandler
.addSource(refMods
, synonym
);
1788 acceptedTaxon
.addSynonym(synonym
, SynonymType
.SYNONYM_OF());
1794 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
1795 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
1799 if(!currentMyName
.getName().isEmpty()){
1800 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1801 if (acceptedTaxon
.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName
.getName().trim())){
1802 Reference refS
= ReferenceFactory
.newGeneric();
1803 refS
.setTitleCache(refString
, true);
1804 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1805 // acceptedTaxon.addDescription(td);
1806 // acceptedTaxon.addSource(refSource);
1808 // TextData textData = TextData.NewInstance(Feature.CITATION());
1810 // textData.addSource(null, null, refS, null);
1811 // td.addElement(textData);
1812 // td.addSource(refSource);
1813 // importer.getDescriptionService().saveOrUpdate(td);
1816 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
1817 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
1821 acceptedTaxon
.getName().setNomenclaturalReference(refS
);
1823 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1824 Synonym synonym
= null;
1825 if (! currentMyName
.getStatus().isEmpty()){
1826 String nomNovStatus
= this.newNameStatus(currentMyName
.getStatus());
1827 if (nomNovStatus
!= null){
1828 nameToBeFilled
.setAppendedPhrase(nomNovStatus
);
1831 NomenclaturalStatusType statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1832 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1833 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1834 } catch (UnknownCdmTypeException e
) {
1835 addProblematicStatusToFile(currentMyName
.getStatus());
1836 logger
.warn("Problem with status");
1837 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1838 synonym
.setAppendedPhrase(currentMyName
.getStatus());
1842 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1846 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
1847 setLSID(currentMyName
.getIdentifier(), synonym
);
1850 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1851 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1852 boolean synoExist
= false;
1853 for (Synonym syn
: synonymsSet
){
1854 // System.out.println(syn.getName()+" -- "+syn.getSec());
1855 boolean a
=syn
.getName().equals(synonym
.getName());
1856 boolean b
= syn
.getSec().equals(synonym
.getSec());
1861 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1862 sourceHandler
.addSource(refMods
, synonym
);
1864 acceptedTaxon
.addSynonym(synonym
, SynonymType
.SYNONYM_OF());
1868 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1876 * @param acceptedTaxon
1878 @SuppressWarnings("rawtypes")
1879 private void setLSID(String identifier
, TaxonBase
<?
> taxon
) {
1880 //logger.info("setLSID");
1881 // boolean lsidok=false;
1882 String id
= identifier
.split("__")[0];
1883 String source
= identifier
.split("__")[1];
1884 if (id
.indexOf("lsid")>-1){
1886 LSID lsid
= new LSID(id
);
1887 taxon
.setLsid(lsid
);
1889 } catch (MalformedLSIDException e
) {
1890 logger
.warn("Malformed LSID");
1895 //logger.info("search reference for LSID");
1896 // if ((id.indexOf("lsid")<0) || !lsidok){
1897 //ADD ORIGINAL SOURCE ID EVEN IF LSID
1898 Reference re
= null;
1899 Pager
<Reference
> references
= importer
.getReferenceService().findByTitle(Reference
.class, source
, MatchMode
.EXACT
, null, 1, null, null, null);
1900 if( references
!=null && references
.getCount()>0){
1901 re
=references
.getRecords().get(0);
1903 //logger.info("search reference for LSID-end");
1905 re
= ReferenceFactory
.newGeneric();
1906 re
.setTitleCache(source
, true);
1907 importer
.getReferenceService().saveOrUpdate(re
);
1909 re
=CdmBase
.deproxy(re
, Reference
.class);
1911 //logger.info("search source for LSID");
1912 Set
<IdentifiableSource
> sources
= taxon
.getSources();
1913 boolean lsidinsource
=false;
1914 boolean urlinsource
=false;
1915 for (IdentifiableSource src
:sources
){
1916 if (id
.equalsIgnoreCase(src
.getIdInSource()) && re
.getTitleCache().equals(src
.getCitation().getTitleCache())) {
1919 if (src
.getIdInSource() == null && re
.getTitleCache().equals(sourceUrlRef
.getTitleCache())) {
1924 taxon
.addSource(OriginalSourceType
.Import
, id
,null,re
,null);
1928 sourceUrlRef
=CdmBase
.deproxy(sourceUrlRef
, Reference
.class);
1929 taxon
.addSource(OriginalSourceType
.Import
, null,null,sourceUrlRef
,null);
1936 * try to solve a parsing problem for a scientific name
1937 * @param original : the name from the OCR document
1938 * @param name : the tagged version
1940 * @return the corrected TaxonNameBase
1942 /* @SuppressWarnings({ "unchecked", "rawtypes" })
1943 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1944 Map<String,String> ato = namesMap.get(original);
1946 ato = namesMap.get(original+" "+author);
1950 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1951 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1953 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1954 rank = getRank(ato);
1956 // TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1957 TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1958 // logger.info("RANK: "+rank);
1960 List<ParserProblem> problems = nameTBF.getParsingProblems();
1961 for (ParserProblem pb:problems) {
1962 System.out.println(pb.toString());
1964 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1965 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1966 String fullname=name;
1967 if(! skippQuestion) {
1968 fullname = getFullReference(name,nameTBF.getParsingProblems());
1970 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1971 nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1973 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1974 nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1976 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1977 nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1979 parser.parseReferencedName(nameTBF, fullname, rank, false);
1984 if (name.indexOf(author)>-1) {
1985 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1987 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1989 if (nameTBF.hasProblem()){
1990 if (name.indexOf(author)>-1) {
1991 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1993 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1995 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
1996 problems = nameTBF.getParsingProblems();
1997 for (ParserProblem pb:problems) {
1998 System.out.println(pb.toString());
2000 nameTBF.setFullTitleCache(name, true);
2002 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2003 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2005 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2006 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2008 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2009 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2012 // logger.info("FULL TITLE CACHE "+name);
2014 nameTBF.setFullTitleCache(name, true);
2023 * @param nomenclatureNode: the XML nodes
2024 * @param nametosave: the list of objects to save into the CDM
2025 * @param refMods: the current reference extracted from the MODS
2028 @SuppressWarnings({ "rawtypes" })
2029 private Taxon
extractNomenclature(Node nomenclatureNode
, List
<TaxonNameBase
> nametosave
, Reference refMods
) throws ClassCastException
{
2030 refMods
=CdmBase
.deproxy(refMods
, Reference
.class);
2032 logger
.info("extractNomenclature");
2033 NodeList children
= nomenclatureNode
.getChildNodes();
2035 Taxon acceptedTaxon
= null;
2036 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2038 // String fullContent = nomenclatureNode.getTextContent();
2040 NomenclaturalStatusType statusType
= null;
2041 String newNameStatus
= null;
2043 for (int i
=0;i
<children
.getLength();i
++){
2044 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status")){
2045 String status
= children
.item(i
).getTextContent().trim();
2047 if (!status
.isEmpty()){
2048 if (newNameStatus(status
) != null){
2049 newNameStatus
= newNameStatus(status
);
2052 statusType
= nomStatusString2NomStatus(status
);
2053 } catch (UnknownCdmTypeException e
) {
2055 addProblematicStatusToFile(status
);
2056 logger
.warn("Problem with status: " + status
);
2063 boolean containsSynonyms
=false;
2064 boolean wasSynonym
= false;
2065 usedFollowingTextPrefix
= null; //reset
2067 for (int i
=0; i
<children
.getLength(); i
++){
2068 Node childNode
= children
.item(i
);
2069 String childName
= childNode
.getNodeName();
2073 followingText
= null;
2074 if ( i
+ 1 < children
.getLength()){
2075 Node followingTextNode
= children
.item(i
+1);
2076 if (followingTextNode
.getNodeName().equals("#text") && !followingTextNode
.getTextContent().matches("\\s*") ){
2077 followingText
= followingTextNode
.getTextContent();
2082 if (childName
.equalsIgnoreCase("#text")) {
2083 freetext
= childNode
.getTextContent().trim();
2084 if (usedFollowingTextPrefix
!= null && freetext
.startsWith(usedFollowingTextPrefix
)){
2085 freetext
= freetext
.substring(usedFollowingTextPrefix
.length());
2087 usedFollowingTextPrefix
= null; //reset
2088 }else if (childName
.equalsIgnoreCase("tax:collection_event")) {
2089 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2090 extractMaterialsDirect(childNode
, acceptedTaxon
, refMods
, "collection", currentMyName
.getTaxonNameBase());
2091 }else if(childName
.equalsIgnoreCase("tax:name")){
2092 INonViralName nameToBeFilled
;
2093 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2094 if(!containsSynonyms
){
2097 //System.out.println("I : "+i);
2098 currentMyName
= new MyName(false);
2100 currentMyName
= extractScientificName(childNode
, refMods
, followingText
);
2101 treatmentMainName
= currentMyName
.getNewName();
2102 originalTreatmentName
= currentMyName
.getOriginalName();
2104 } catch (TransformerFactoryConfigurationError e1
) {
2105 throw new RuntimeException(e1
);
2106 } catch (TransformerException e1
) {
2107 throw new RuntimeException(e1
);
2110 if (currentMyName
.getRank().equals(Rank
.UNKNOWN_RANK()) || currentMyName
.getRank().isLower(state2
.getConfig().getMaxRank()) || currentMyName
.getRank().equals(state2
.getConfig().getMaxRank())){
2111 maxRankRespected
=true;
2113 nameToBeFilled
=currentMyName
.getTaxonNameBase();
2115 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2116 acceptedTaxon
=currentMyName
.getTaxon();
2117 //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2120 boolean statusMatch
=false;
2121 if(acceptedTaxon
!=null ){
2122 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2123 statusMatch
=compareStatus(acceptedTaxon
, statusType
);
2124 //System.out.println("statusMatch: "+statusMatch);
2126 if (acceptedTaxon
==null || (acceptedTaxon
!= null && !statusMatch
)){
2128 nameToBeFilled
=currentMyName
.getTaxonNameBase();
2129 if (nameToBeFilled
!= null){
2130 if (!originalTreatmentName
.isEmpty()) {
2131 TaxonNameDescription td
= TaxonNameDescription
.NewInstance();
2132 td
.setTitleCache(originalTreatmentName
, true);
2133 nameToBeFilled
.addDescription(td
);
2136 if(statusType
!= null) {
2137 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
2139 if(newNameStatus
!= null){
2140 nameToBeFilled
.setAppendedPhrase(newNameStatus
);
2142 sourceHandler
.addSource(refMods
, nameToBeFilled
);
2144 if (nameToBeFilled
.getNomenclaturalReference() == null) {
2145 acceptedTaxon
= Taxon
.NewInstance(nameToBeFilled
,refMods
);
2146 //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2149 acceptedTaxon
= Taxon
.NewInstance(nameToBeFilled
,(Reference
) nameToBeFilled
.getNomenclaturalReference() );//TODO TOFIX reference
2150 //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2153 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2155 if(!state2
.getConfig().doKeepOriginalSecundum()) {
2156 acceptedTaxon
.setSec(state2
.getConfig().getSecundum());
2157 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2158 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2161 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2162 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2166 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2167 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2171 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2172 Set
<IdentifiableSource
> sources
= acceptedTaxon
.getSources();
2173 boolean sourcelinked
=false;
2174 for (IdentifiableSource source
:sources
){
2175 if (source
.getCitation().getTitleCache().equalsIgnoreCase(refMods
.getTitleCache())) {
2179 if (!state2
.getConfig().doKeepOriginalSecundum()) {
2180 acceptedTaxon
.setSec(state2
.getConfig().getSecundum());
2181 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2182 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2184 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2187 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2189 if (!sourcelinked
|| !state2
.getConfig().doKeepOriginalSecundum()){
2191 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2192 //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2193 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2195 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2199 maxRankRespected
=false;
2201 containsSynonyms
=true; //all folowing names are handled as synonyms
2204 extractSynonyms(childNode
, acceptedTaxon
, refMods
, followingText
);
2207 }catch(NullPointerException e
){
2208 logger
.warn("null pointer exception, the accepted taxon might be null");
2211 containsSynonyms
=true;
2212 }else if (childName
.equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
2213 reloadClassification();
2214 //extract the References within the document
2215 extractReferences(childNode
,nametosave
,acceptedTaxon
,refMods
);
2216 }else if (childName
.equalsIgnoreCase("tax:bibref")){
2217 logger
.warn(childName
+ " still preliminary");
2219 INonViralName currentName
= currentMyName
== null ?
null : currentMyName
.getTaxonNameBase();
2220 boolean handled
= addFollowingTextToName (currentName
, childNode
.getTextContent() );
2222 setParticularDescription(freetext
.trim(), acceptedTaxon
,acceptedTaxon
, refMods
, getNotMarkedUpFeatureObject());
2225 logger
.warn(childName
+ " not yet handled");
2227 if(!stringIsEmpty(freetext
.trim())) {;
2228 if (! freetext
.matches("\\d\\.?")){
2229 INonViralName currentName
= currentMyName
== null ?
null : currentMyName
.getTaxonNameBase();
2230 boolean handled
= false;
2231 if (currentName
!= null && !wasSynonym
){
2232 handled
= addFollowingTextToName (currentName
, childNode
.getTextContent() );
2235 setParticularDescription(freetext
.trim(), acceptedTaxon
,acceptedTaxon
, refMods
, getNotMarkedUpFeatureObject());
2243 //importer.getClassificationService().saveOrUpdate(classification);
2244 return acceptedTaxon
;
2254 private boolean compareStatus(TaxonBase
<?
> t
, NomenclaturalStatusType statusType
) {
2255 //logger.info("compareStatus");
2256 boolean statusMatch
=false;
2258 Set
<NomenclaturalStatus
> status
= t
.getName().getStatus();
2259 if (statusType
!=null && status
.size()>0){ //the statusType is known for both taxon
2260 for (NomenclaturalStatus st
:status
){
2261 NomenclaturalStatusType stype
= st
.getType();
2262 if (stype
.toString().equalsIgnoreCase(statusType
.toString())) {
2268 if(statusType
== null && status
.size()==0) {//there is no statusType, we can assume it's the same
2276 * @param acceptedTaxon: the current acceptedTaxon
2277 * @param ref: the current reference extracted from the MODS
2278 * @return the parent for the current accepted taxon
2280 /* private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2281 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2283 List<Rank> rankList = new ArrayList<Rank>();
2284 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2286 List<String> rankListStr = new ArrayList<String>();
2287 for (Rank r:rankList) {
2288 rankListStr.add(r.toString());
2291 String s = acceptedTaxon.getTitleCache();
2294 int addTaxon = askAddParent(s);
2295 logger.info("ADD TAXON: "+addTaxon);
2296 if (addTaxon == 0 ){
2297 Taxon tmp = askParent(acceptedTaxon, classification);
2299 s = askSetParent(s);
2300 r = askRank(s,rankListStr);
2302 TaxonNameBase<?,?> nameToBeFilled = null;
2303 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2304 nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2306 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2307 nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2309 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2310 nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2312 nameToBeFilled.setTitleCache(s, true);
2313 nameToBeFilled.setRank(getRank(r), true);
2315 tax = Taxon.NewInstance(nameToBeFilled, ref);
2321 createParent(tax, ref);
2322 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2323 classification.addParentChild(tax, acceptedTaxon, ref, null);
2326 classification.addChildTaxon(acceptedTaxon, ref, null);
2330 classification.addChildTaxon(acceptedTaxon, ref, null);
2333 // logger.info("RETURN: "+tax );
2341 private MyName
extractScientificNameSynonym(Node name
, Reference refMods
, String followingText
) throws TransformerFactoryConfigurationError
, TransformerException
{
2342 //System.out.println("extractScientificNameSynonym");
2343 logger
.info("extractScientificNameSynonym");
2344 String
[] rankListToPrint_tmp
={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2345 List
<String
> rankListToPrint
= new ArrayList
<String
>();
2346 for (String r
: rankListToPrint_tmp
) {
2347 rankListToPrint
.add(r
.toLowerCase());
2350 Rank rank
= Rank
.UNKNOWN_RANK();
2351 NodeList children
= name
.getChildNodes();
2352 String originalName
="";
2353 String fullName
= "";
2355 String identifier
="";
2356 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
2357 List
<String
> atomisedName
= new ArrayList
<String
>();
2359 String rankStr
= "";
2362 String status
= extractStatus(children
);
2364 for (int i
=0;i
<children
.getLength();i
++){
2365 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:xmldata")){
2366 NodeList atom
= children
.item(i
).getChildNodes();
2367 for (int k
=0;k
<atom
.getLength();k
++){
2368 identifier
= extractIdentifier(identifier
, atom
.item(k
));
2370 rankStr
= atom
.item(k
).getNodeName().toLowerCase();
2371 // logger.info("RANKSTR:*"+rankStr+"*");
2372 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
2373 rankStr
=atom
.item(k
).getTextContent().trim();
2374 tmpRank
= getRank(rankStr
);
2376 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2377 if (tmpRank
!= null){
2380 atomisedMap
.put(rankStr
.toLowerCase(),atom
.item(k
).getTextContent().trim());
2382 addAtomisedNamesToMap(rankListToPrint
, rank
, atomisedName
, atom
);
2384 if(children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !StringUtils
.isBlank(children
.item(i
).getTextContent())){
2385 // logger.info("name non atomised: "+children.item(i).getTextContent());
2386 fullName
= children
.item(i
).getTextContent().trim();
2387 // logger.info("fullname: "+fullName);
2390 originalName
=fullName
;
2391 fullName
= cleanName(fullName
, atomisedName
);
2392 namesMap
.put(fullName
,atomisedMap
);
2394 String atomisedNameStr
= getAtomisedNameStr(atomisedName
);
2396 if (fullName
!= null){
2397 // System.out.println("fullname: "+fullName);
2398 // System.out.println("atomised: "+atomisedNameStr);
2399 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
2401 // String defaultN = "";
2402 if (atomisedNameStr
.length()>fullName
.length()) {
2403 newName
=atomisedNameStr
;
2405 if (fullName
.length()>atomisedNameStr
.length() && (rank
.isLower(Rank
.SPECIES()) && fullName
.length()>2 && !fullName
.substring(0, 1).equals("."))) {
2406 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2412 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2419 // rank = askForRank(newName, rank, nomenclaturalCode);
2420 // System.out.println("atomised: "+atomisedMap.toString());
2422 // String[] names = new String[5];
2423 MyName myname
= new MyName(true);
2425 //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2426 // System.out.println(atomisedMap.keySet());
2427 fullName
= extractAuthorFromNames(rank
, fullName
, atomisedMap
, myname
);
2428 myname
.setOriginalName(fullName
);
2429 myname
.setNewName(newName
);
2430 myname
.setRank(rank
);
2431 myname
.setIdentifier(identifier
);
2432 myname
.setStatus(status
);
2433 myname
.setSource(refMods
);
2435 // boolean higherAdded=false;
2438 boolean parseNameManually
=false;
2439 INonViralNameParser
<?
> parser
= NonViralNameParserImpl
.NewInstance();
2440 ITaxonNameBase nameToBeFilledTest
;
2442 //if selected the atomised version
2443 if(newName
==atomisedNameStr
){
2444 nameToBeFilledTest
= parseWithExtension(parser
, atomisedNameStr
, rank
, followingText
, atomisedMap
);
2445 if (nameToBeFilledTest
.hasProblem()){
2446 addProblemNameToFile("ato",atomisedNameStr
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2447 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
, rank
);
2448 if (nameToBeFilledTest
.hasProblem()){
2449 addProblemNameToFile("full",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2450 parseNameManually
=true;
2454 nameToBeFilledTest
= parseWithExtension(parser
, atomisedNameStr
, rank
, followingText
, atomisedMap
);
2455 if (nameToBeFilledTest
.hasProblem()){
2456 addProblemNameToFile("fullversion",fullName
, nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2457 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2458 parseNameManually
=true;
2459 if(!originalName
.equalsIgnoreCase(atomisedNameStr
)) {
2460 addNameDifferenceToFile(originalName
,atomisedNameStr
);
2465 if(parseNameManually
){
2466 //System.out.println("DO IT MANUALLY");
2467 if (this.state2
.getConfig().isUseOldUnparsedSynonymExtraction()){
2468 createUnparsedSynonym(rank
, newName
, atomisedMap
, myname
);
2470 createUnparsedSynonymNew(rank
, newName
, atomisedMap
, myname
, refMods
);;
2473 //System.out.println("AUTOMATIC!");
2474 // createAtomisedTaxonString(newName, atomisedMap, myname);
2475 myname
.setParsedName(nameToBeFilledTest
);
2476 myname
.buildTaxon();
2478 //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2485 * @throws TransformerFactoryConfigurationError
2486 * @throws TransformerException
2487 * @return a list of possible names
2489 @SuppressWarnings({"rawtypes" })
2490 private MyName
extractScientificName(Node name
, Reference refMods
, String followingText
) throws TransformerFactoryConfigurationError
, TransformerException
{
2491 logger
.info("extractScientificName");
2493 String
[] rankListToPrintLowerCase_tmp
={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2494 List
<String
> rankListToPrint
= Arrays
.asList(rankListToPrintLowerCase_tmp
);
2496 Rank rank
= Rank
.UNKNOWN_RANK();
2497 NodeList children
= name
.getChildNodes();
2498 String originalName
= "";
2499 String fullName
= "";
2500 String newName
= "";
2501 String identifier
= "";
2502 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
2503 List
<String
> atomisedNameList
= new ArrayList
<String
>();
2505 String status
= extractStatus(children
);
2507 for (int i
=0;i
<children
.getLength();i
++){
2508 Node nameChild
= children
.item(i
);
2509 if(nameChild
.getNodeName().equalsIgnoreCase("tax:xmldata")){
2510 NodeList xmlDataChildren
= nameChild
.getChildNodes();
2511 for (int k
=0;k
<xmlDataChildren
.getLength();k
++){
2512 Node xmlDataChild
= xmlDataChildren
.item(k
);
2513 identifier
= extractIdentifier(identifier
, xmlDataChild
);
2514 String rankStr
= xmlDataChild
.getNodeName().toLowerCase();
2515 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
2516 rankStr
=xmlDataChild
.getTextContent().trim();
2517 Rank tmpRank
= getRank(rankStr
);
2518 if (tmpRank
!= null){
2522 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2524 atomisedMap
.put(rankStr
.toLowerCase(),xmlDataChild
.getTextContent().trim());
2526 addAtomisedNamesToMap(rankListToPrint
, rank
, atomisedNameList
, xmlDataChildren
);
2528 else if(nameChild
.getNodeName().equalsIgnoreCase("#text") && ! nameChild
.getTextContent().matches("\\s*")){
2529 // logger.info("name non atomised: "+children.item(i).getTextContent());
2530 fullName
= nameChild
.getTextContent().trim();
2531 // logger.info("fullname: "+fullName);
2534 originalName
=fullName
;
2535 fullName
= cleanName(fullName
, atomisedNameList
);
2536 namesMap
.put(fullName
,atomisedMap
);
2538 String atomisedNameStr
= getAtomisedNameStr(atomisedNameList
);
2540 if (fullName
!= null){
2541 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
2543 if (atomisedNameStr
.length()>fullName
.length()) {
2544 newName
= atomisedNameStr
;
2546 if (fullName
.length()>atomisedNameStr
.length() && (rank
.isLower(Rank
.SPECIES()) && fullName
.length()>2 && !fullName
.substring(0, 1).equals("."))) {
2547 newName
= askWhichScientificName(fullName
, atomisedNameStr
, classification
.getTitleCache(), name
);
2553 newName
=askWhichScientificName(fullName
, atomisedNameStr
, classification
.getTitleCache(), name
);
2560 // rank = askForRank(newName, rank, nomenclaturalCode);
2561 // System.out.println("atomised: "+atomisedMap.toString());
2563 // String[] names = new String[5];
2564 MyName myname
= new MyName(false);
2566 //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2567 // System.out.println(atomisedMap.keySet());
2568 fullName
= extractAuthorFromNames(rank
, fullName
, atomisedMap
, myname
);
2569 myname
.setOriginalName(fullName
);
2570 myname
.setNewName(newName
);
2572 myname
.setRank(rank
);
2573 myname
.setIdentifier(identifier
);
2574 myname
.setStatus(status
);
2575 myname
.setSource(refMods
);
2577 // boolean higherAdded=false;
2580 boolean parseNameManually
=false;
2581 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
2582 ITaxonNameBase nameToBeFilledTest
= null;
2584 //if selected the atomised version
2585 if(newName
==atomisedNameStr
){
2586 nameToBeFilledTest
= parseWithExtension(parser
, atomisedNameStr
, rank
, followingText
, atomisedMap
);
2587 if (nameToBeFilledTest
.hasProblem()){
2588 addProblemNameToFile("ato",atomisedNameStr
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2589 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2590 if (nameToBeFilledTest
.hasProblem()){
2591 addProblemNameToFile("full",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2592 parseNameManually
=true;
2596 nameToBeFilledTest
= parseWithExtension(parser
, fullName
, rank
, followingText
, atomisedMap
);
2597 if (nameToBeFilledTest
.hasProblem()){
2598 addProblemNameToFile("fullversion",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2599 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2600 parseNameManually
=true;
2601 if(!originalName
.equalsIgnoreCase(atomisedNameStr
)) {
2602 addNameDifferenceToFile(originalName
,atomisedNameStr
);
2607 //System.out.println("parseNameManually: "+parseNameManually);
2608 if(parseNameManually
){
2609 createAtomisedTaxon(rank
, newName
, atomisedMap
, myname
);
2612 createAtomisedTaxonString(newName
, atomisedMap
, myname
);
2613 myname
.setParsedName(nameToBeFilledTest
);
2614 //TODO correct handling of createIfNotExists
2615 myname
.buildTaxon();
2621 private ITaxonNameBase
parseWithExtension(INonViralNameParser parser
, String atomisedNameStr
, Rank rank
, String followingText
, HashMap
<String
, String
> atomisedMap
) {
2622 Object
[] nameExtensionResult
= getPossibleExtension(followingText
, atomisedMap
, nomenclaturalCode
);
2624 TaxonNameBase name
= (TaxonNameBase
)parser
.parseFullName(atomisedNameStr
, nomenclaturalCode
, rank
);
2625 if (nameExtensionResult
!= null && nameExtensionResult
[0] != null){
2626 String ext
= (String
)nameExtensionResult
[0];
2627 TaxonNameBase extName
= (TaxonNameBase
)parser
.parseFullName(atomisedNameStr
+ " " + ext
, nomenclaturalCode
, rank
);
2628 if (! extName
.hasProblem()){
2630 this.usedFollowingTextPrefix
= ext
;
2631 //TODO do we need to fill the atomisedMap at all?
2632 if ((Boolean
)(nameExtensionResult
[1])){
2635 if ((Boolean
)(nameExtensionResult
[2])){
2636 //TODO BasionymYear etc.
2637 Integer origYear
= name
.getPublicationYear();
2638 if (origYear
!= null){
2639 atomisedMap
.put(PUBLICATION_YEAR
, origYear
.toString());
2647 private Object
[] getPossibleExtension(String followingText
, HashMap
<String
, String
> atomisedMap
, NomenclaturalCode nomenclaturalCode
) {
2648 if (StringUtils
.isBlank(followingText
)){
2652 boolean includeAuthor
= true;
2653 boolean includeYear
= false;
2654 if (atomisedMap
.containsKey("dwc:scientificnameauthorship")){
2655 includeAuthor
= false;
2657 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
2660 String patternStr
= "";
2662 patternStr
+= NonViralNameParserImplRegExBase
.capitalWord
;
2665 patternStr
+= "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2667 String match
= null;
2668 if (! patternStr
.isEmpty()){
2669 Pattern pattern
= Pattern
.compile("^" + patternStr
);
2670 Matcher matcher
= pattern
.matcher(followingText
.trim());
2671 if (matcher
.find()){
2672 match
= matcher
.group();
2676 return new Object
[]{match
, includeAuthor
, includeYear
};
2680 * @param atomisedName
2683 private String
getAtomisedNameStr(List
<String
> atomisedName
) {
2684 //logger.info("getAtomisedNameStr");
2685 String atomisedNameStr
= StringUtils
.join(atomisedName
," ");
2686 while(atomisedNameStr
.contains(" ")) {
2687 atomisedNameStr
=atomisedNameStr
.replace(" ", " ");
2689 atomisedNameStr
=atomisedNameStr
.trim();
2690 return atomisedNameStr
;
2698 private String
extractStatus(NodeList children
) {
2699 logger
.info("extractStatus");
2701 for (int i
=0;i
<children
.getLength();i
++){
2702 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status") ||
2703 (children
.item(i
).getNodeName().equalsIgnoreCase("tax:namePart") &&
2704 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2705 status
= children
.item(i
).getTextContent().trim();
2717 private String
extractIdentifier(String identifier
, Node atom
) {
2718 //logger.info("extractIdentifier");
2719 if (atom
.getNodeName().equalsIgnoreCase("tax:xid")){
2721 identifier
= atom
.getAttributes().getNamedItem("identifier").getNodeValue();
2722 }catch(Exception e
){
2723 System
.out
.println("pb with identifier, maybe empty");
2726 identifier
+="__"+atom
.getAttributes().getNamedItem("source").getNodeValue();
2727 }catch(Exception e
){
2728 System
.out
.println("pb with identifier, maybe empty");
2735 * @param rankListToPrint
2737 * @param atomisedName
2740 private void addAtomisedNamesToMap(List
<String
> rankListToPrint
, Rank rank
, List
<String
> atomisedName
, NodeList atom
) {
2741 logger
.info("addAtomisedNamesToMap");
2742 for (int k
=0;k
<atom
.getLength();k
++){
2743 Node node
= atom
.item(k
);
2744 String nodeName
= node
.getNodeName();
2745 if (! nodeName
.equalsIgnoreCase("dwc:taxonRank") ) { //rank has been handled in higher method
2746 if (nodeName
.equalsIgnoreCase("dwc:subgenus") || nodeName
.equalsIgnoreCase("dwcranks:subgenus")) {
2747 atomisedName
.add("("+ node
.getTextContent().trim()+")");
2748 } else if(nodeName
.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName
.equalsIgnoreCase("dwc:Subspecies") || nodeName
.equalsIgnoreCase("dwc:infraspecificepithet")) {
2749 if(nodeName
.equalsIgnoreCase("dwcranks:varietyepithet")){
2750 atomisedName
.add("var. "+node
.getTextContent().trim());
2751 }else if(nodeName
.equalsIgnoreCase("dwc:Subspecies") || nodeName
.equalsIgnoreCase("dwc:infraspecificepithet")) {
2752 atomisedName
.add("subsp. "+atom
.item(k
).getTextContent().trim());
2754 } else if(rankListToPrint
.contains(nodeName
.toLowerCase())) {
2755 atomisedName
.add(node
.getTextContent().trim());
2757 if (rank
.isHigher(Rank
.GENUS()) && (nodeName
.indexOf("dwcranks:")>-1 || nodeName
.indexOf("dwc:Family")>-1)) {
2758 atomisedName
.add(node
.getTextContent().trim());
2759 }else if (nodeName
.equals("#text")){
2760 String text
= node
.getTextContent();
2761 if (StringUtils
.isNotBlank(text
)){
2763 logger
.warn("name xmldata contains text. This is unhandled");
2765 }else if (nodeName
.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2766 //we currently do not use higher ranks information
2768 //TODO handle unhandled node
2769 logger
.warn("Unhandled node: " + nodeName
);
2778 * @param atomisedName
2781 private String
cleanName(String name
, List
<String
> atomisedName
) {
2782 //logger.info("cleanName");
2783 String fullName
=name
;
2784 if (fullName
!= null){
2785 fullName
= fullName
.replace("( ", "(");
2786 fullName
= fullName
.replace(" )",")");
2788 if (fullName
.trim().isEmpty()){
2789 fullName
=StringUtils
.join(atomisedName
," ");
2792 while(fullName
.contains(" ")) {
2793 fullName
=fullName
.replace(" ", " ");
2794 // logger.info("while");
2796 fullName
=fullName
.trim();
2804 * @param atomisedMap
2808 private String
extractAuthorFromNames(Rank rank
, String name
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2809 logger
.info("extractAuthorFromNames");
2810 String fullName
=name
;
2811 if (atomisedMap
.get("dwc:scientificnameauthorship") == null && fullName
!=null){
2812 // System.out.println("rank : "+rank.toString());
2813 if(rank
.isHigher(Rank
.SPECIES())){
2816 if(atomisedMap
.get("dwcranks:subgenus") != null) {
2817 author
= fullName
.split(atomisedMap
.get("dwcranks:subgenus"))[1].trim();
2819 if(atomisedMap
.get("dwc:subgenus") != null) {
2820 author
= fullName
.split(atomisedMap
.get("dwc:subgenus"))[1].trim();
2822 if(author
== null) {
2823 if(atomisedMap
.get("dwc:genus") != null) {
2824 author
= fullName
.split(atomisedMap
.get("dwc:genus"))[1].trim();
2828 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2829 author
=author
.replaceAll(",","").trim();
2830 myname
.setAuthor(author
);
2832 }catch(Exception e
){
2833 //could not extract the author
2836 if(rank
.equals(Rank
.SPECIES())){
2839 if(author
== null) {
2840 if(atomisedMap
.get("dwc:species") != null) {
2841 String
[] t
= fullName
.split(atomisedMap
.get("dwc:species"));
2842 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2843 author
= fullName
.split(atomisedMap
.get("dwc:species"))[1].trim();
2844 // System.out.println("AUTEUR "+author);
2848 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2849 author
=author
.replaceAll(",","").trim();
2850 myname
.setAuthor(author
);
2852 }catch(Exception e
){
2853 //could not extract the author
2857 myname
.setAuthor(atomisedMap
.get("dwc:scientificnameauthorship"));
2864 * @param atomisedMap
2867 private void createAtomisedTaxonString(String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2868 logger
.info("createAtomisedTaxonString "+atomisedMap
);
2869 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
2870 myname
.setFamilyStr(atomisedMap
.get("dwc:family"));
2872 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
2873 myname
.setSubfamilyStr(atomisedMap
.get("dwcranks:subfamily"));
2875 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
2876 myname
.setTribeStr(atomisedMap
.get("dwcranks:tribe"));
2878 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
2879 myname
.setSubtribeStr(atomisedMap
.get("dwcranks:subtribe"));
2881 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
2882 myname
.setGenusStr(atomisedMap
.get("dwc:genus"));
2884 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2885 myname
.setSubgenusStr(atomisedMap
.get("dwcranks:subgenus"));
2887 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2888 myname
.setSubgenusStr(atomisedMap
.get("dwc:subgenus"));
2890 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
2892 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
2893 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
2894 n
=n
.replace("subsp.","");
2896 if(atomisedMap
.get("dwc:subspecies") != null) {
2897 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
2898 n
=n
.replace("subsp.","");
2900 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
2901 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
2902 n
=n
.replace("var.","");
2903 n
=n
.replace("v.","");
2905 if(atomisedMap
.get("dwcranks:formepithet") != null) {
2907 System
.out
.println("TODO FORMA");
2908 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
2909 n
=n
.replace("forma","");
2912 String author
= myname
.getAuthor();
2913 if(n
.split(" ").length
>2){
2915 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
2918 a
=n
.split(n2
)[1].trim();
2919 }catch(Exception e
){
2920 logger
.info("no author in "+n
+"?");}
2922 myname
.setAuthor(a
);
2923 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2928 myname
.setSpeciesStr(atomisedMap
.get("dwc:species"));
2929 myname
.setAuthor(author
);
2931 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
2932 myname
.setSubspeciesStr(atomisedMap
.get("dwc:subspecies"));
2934 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
2935 myname
.setSubspeciesStr(atomisedMap
.get("dwc:infraspecificepithet"));
2937 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
2938 myname
.setVarietyStr(atomisedMap
.get("dwcranks:varietyepithet"));
2940 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
2941 myname
.setFormStr(atomisedMap
.get("dwcranks:formepithet"));
2943 if (atomisedMap
.get(PUBLICATION_YEAR
) != null){
2944 myname
.setPublicationYear(Integer
.valueOf(atomisedMap
.get(PUBLICATION_YEAR
)));
2949 * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2952 * @param atomisedMap
2955 private void createUnparsedSynonym(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2956 logger
.info("createSynonym");
2957 //System.out.println("createsynonym");
2958 if(rank
.equals(Rank
.UNKNOWN_RANK())){
2959 myname
.setNotParsableTaxon(newName
);
2961 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY()) && rank
.equals(Rank
.FAMILY())){
2962 myname
.setFamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:family"),newName
, Rank
.FAMILY(),rank
));
2964 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY()) && rank
.equals(Rank
.SUBFAMILY())){
2965 myname
.setSubfamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subfamily"), newName
,Rank
.SUBFAMILY(),rank
));
2967 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE()) && rank
.equals(Rank
.TRIBE())){
2968 myname
.setTribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:tribe"),newName
, Rank
.TRIBE(),rank
));
2970 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE()) && rank
.equals(Rank
.SUBTRIBE())){
2971 myname
.setSubtribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subtribe"),newName
, Rank
.SUBTRIBE(),rank
));
2973 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS()) && rank
.equals(Rank
.GENUS())){
2974 myname
.setGenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:genus"),newName
, Rank
.GENUS(),rank
));
2976 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS()) && rank
.equals(Rank
.SUBGENUS())){
2977 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
2979 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS()) && rank
.equals(Rank
.SUBGENUS())){
2980 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
2982 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES()) && rank
.equals(Rank
.SPECIES())){
2984 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
2985 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
2986 n
=n
.replace("subsp.","");
2988 if(atomisedMap
.get("dwc:subspecies") != null) {
2989 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
2990 n
=n
.replace("subsp.","");
2992 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
2993 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
2994 n
=n
.replace("var.","");
2995 n
=n
.replace("v.","");
2997 if(atomisedMap
.get("dwcranks:formepithet") != null) {
2999 //System.out.println("TODO FORMA");
3000 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
3001 n
=n
.replace("forma","");
3004 String author
= myname
.getAuthor();
3005 if(n
.split(" ").length
>2){
3007 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3010 a
= n
.split(n2
)[1].trim();
3011 }catch(Exception e
){logger
.info("no author in "+n
);}
3012 myname
.setAuthor(a
);
3013 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3017 Taxon species
= myname
.findOrCreateTaxon(atomisedMap
.get("dwc:species"),n
, Rank
.SPECIES(),rank
);
3018 myname
.setSpecies(species
);
3019 myname
.setAuthor(author
);
3021 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES()) && rank
.equals(Rank
.SUBSPECIES())){
3022 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subspecies"), newName
,Rank
.SUBSPECIES(),rank
));
3024 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES()) && rank
.equals(Rank
.SUBSPECIES())){
3025 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:infraspecificepithet"),newName
, Rank
.SUBSPECIES(),rank
));
3027 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY()) && rank
.equals(Rank
.VARIETY())){
3028 myname
.setVariety(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:varietyepithet"),newName
, Rank
.VARIETY(),rank
));
3030 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM()) && rank
.equals(Rank
.FORM())){
3031 myname
.setForm(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:formepithet"), newName
,Rank
.FORM(),rank
));
3040 * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3041 * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3042 * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3043 * I created this switch for old
3044 * for Spiders the new version is preferred
3046 private void createUnparsedSynonymNew(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
, Reference refMods
) {
3047 logger
.info("createSynonym");
3049 INonViralName nameToBeFilled
= this.getNonViralNameAccNomenclature();
3050 //System.out.println("createsynonym");
3051 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3053 myname
.setNotParsableTaxon(newName
);
3055 nameToBeFilled
.setTitleCache(newName
, true);
3057 if(atomisedMap
.get("dwc:genus") != null ){
3058 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwc:genus"));
3060 if (rank
.isSupraGeneric()){
3061 if (atomisedMap
.get("dwcranks:subtribe") != null ){
3062 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:subtribe"));
3063 }else if (atomisedMap
.get("dwcranks:subtribe") != null ){
3064 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:subtribe"));
3065 }else if (atomisedMap
.get("dwcranks:tribe") != null ){
3066 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:tribe"));
3067 }else if (atomisedMap
.get("dwcranks:subfamily") != null ){
3068 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:subfamily"));
3069 }else if (atomisedMap
.get("dwc:family") != null ){
3070 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwc:family"));
3072 logger
.warn("Supra generic rank not yet handled or atomisation not available");
3075 if (atomisedMap
.get("dwcranks:subgenus") != null){
3076 nameToBeFilled
.setInfraGenericEpithet(atomisedMap
.get("dwcranks:subgenus"));
3078 if (atomisedMap
.get("dwc:subgenus") != null){
3079 nameToBeFilled
.setInfraGenericEpithet(atomisedMap
.get("dwc:subgenus"));
3081 if (atomisedMap
.get("dwc:species") != null){
3082 nameToBeFilled
.setSpecificEpithet(atomisedMap
.get("dwc:species"));
3084 if (atomisedMap
.get("dwcranks:formepithet") != null){
3085 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwcranks:formepithet"));
3086 }else if (atomisedMap
.get("dwcranks:varietyepithet") != null){
3087 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwcranks:varietyepithet"));
3088 }else if (atomisedMap
.get("dwc:infraspecificepithet") != null){
3089 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwc:infraspecificepithet"));
3090 }else if (atomisedMap
.get("dwc:subspecies") != null){
3091 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwc:subspecies"));
3093 Reference sec
= sourceUrlRef
;
3094 if(!state2
.getConfig().doKeepOriginalSecundum()){
3095 sec
= state2
.getConfig().getSecundum();
3097 Synonym syn
= Synonym
.NewInstance(nameToBeFilled
, sec
);
3098 // sourceHandler.addSource(refMods, syn);
3099 myname
.setSyno(syn
);
3100 myname
.setSynonym(true);
3107 * @param atomisedMap
3110 private void createAtomisedTaxon(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
3111 logger
.info("createAtomisedTaxon "+atomisedMap
);
3112 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3113 myname
.setNotParsableTaxon(newName
);
3116 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
3117 myname
.setFamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:family"),newName
, Rank
.FAMILY(),rank
));
3119 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
3120 myname
.setSubfamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subfamily"), newName
,Rank
.SUBFAMILY(),rank
));
3122 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
3123 myname
.setTribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:tribe"),newName
, Rank
.TRIBE(),rank
));
3125 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
3126 myname
.setSubtribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subtribe"),newName
, Rank
.SUBTRIBE(),rank
));
3128 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
3129 myname
.setGenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:genus"),newName
, Rank
.GENUS(),rank
));
3131 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3132 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3134 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3135 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3137 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
3139 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
3140 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
3141 n
=n
.replace("subsp.","");
3143 if(atomisedMap
.get("dwc:subspecies") != null) {
3144 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
3145 n
=n
.replace("subsp.","");
3147 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
3148 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
3149 n
=n
.replace("var.","");
3150 n
=n
.replace("v.","");
3152 if(atomisedMap
.get("dwcranks:formepithet") != null) {
3154 //System.out.println("TODO FORMA");
3155 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
3156 n
=n
.replace("forma","");
3159 String author
= myname
.getAuthor();
3160 if(n
.split(" ").length
>2){
3161 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3164 a
= n
.split(n2
)[1].trim();
3165 }catch(Exception e
){logger
.info("no author in "+n
);}
3166 myname
.setAuthor(a
);
3167 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3172 myname
.setSpecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:species"),n
, Rank
.SPECIES(),rank
));
3173 myname
.setAuthor(author
);
3175 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3176 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subspecies"), newName
,Rank
.SUBSPECIES(),rank
));
3178 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3179 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:infraspecificepithet"),newName
, Rank
.SUBSPECIES(),rank
));
3181 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
3182 myname
.setVariety(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:varietyepithet"),newName
, Rank
.VARIETY(),rank
));
3184 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
3185 myname
.setForm(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:formepithet"), newName
,Rank
.FORM(),rank
));
3193 private boolean checkRankValidForImport(Rank currentRank
) {
3194 //logger.info("checkRankValidForImport");
3195 return currentRank
.isLower(state2
.getConfig().getMaxRank()) || currentRank
.equals(state2
.getConfig().getMaxRank());
3201 * @param classification2
3203 public void updateClassification(Classification classification2
) {
3204 //logger.info("updateClassification");
3205 classification
= classification2
;
3210 public class MyName
{
3214 public MyName(boolean isSynonym
) {
3216 this.isSynonym
= isSynonym
;
3219 String originalName
="";
3221 Rank rank
=Rank
.UNKNOWN_RANK();
3222 String identifier
="";
3226 TaxonNameBase
<?
,?
> taxonNameBase
;
3230 Taxon family
,subfamily
,tribe
,subtribe
,genus
,subgenus
,species
,subspecies
, variety
,form
;
3231 INonViralName familyName
, subfamilyName
, tribeName
,subtribeName
,genusName
,subgenusName
,speciesName
,subspeciesName
;
3232 String familyStr
, subfamilyStr
, tribeStr
,subtribeStr
,genusStr
,subgenusStr
,speciesStr
,subspeciesStr
,formStr
,varietyStr
;
3233 Integer publicationYear
;
3238 private Taxon taxon
;
3239 private Synonym syno
;
3244 public Synonym
getSyno() {
3249 public String
toString(){
3250 List
<String
> tot
=new ArrayList
<String
>();
3251 String
[] n
= {familyStr
, subfamilyStr
, tribeStr
,subtribeStr
,genusStr
,subgenusStr
,speciesStr
,subspeciesStr
,formStr
,varietyStr
};
3253 if (!StringUtils
.isEmpty(elt
)) {
3259 return StringUtils
.join(tot
," ");
3262 * @param syno the syno to set
3264 public void setSyno(Synonym syno
) {
3268 boolean isSynonym
=false;
3271 * @return the isSynonym
3273 public boolean isSynonym() {
3278 * @param isSynonym the isSynonym to set
3280 public void setSynonym(boolean isSynonym
) {
3281 this.isSynonym
= isSynonym
;
3284 public void setSource(Reference re
){
3291 public void setFormStr(String string
) {
3292 this.formStr
=string
;
3298 public void setVarietyStr(String string
) {
3299 this.varietyStr
=string
;
3305 public void setSubspeciesStr(String string
) {
3306 this.subspeciesStr
=string
;
3312 public void setSpeciesStr(String string
) {
3313 this.speciesStr
=string
;
3319 public void setSubgenusStr(String string
) {
3320 this.subgenusStr
=string
;
3326 public void setGenusStr(String string
) {
3327 this.genusStr
=string
;
3333 public void setSubtribeStr(String string
) {
3334 this.subtribeStr
=string
;
3340 public void setTribeStr(String string
) {
3341 this.tribeStr
=string
;
3347 public void setSubfamilyStr(String string
) {
3348 this.subfamilyStr
=string
;
3354 public void setFamilyStr(String string
) {
3355 this.familyStr
=string
;
3359 * @return the familyStr
3361 public String
getFamilyStr() {
3365 * @return the subfamilyStr
3367 public String
getSubfamilyStr() {
3368 return subfamilyStr
;
3371 * @return the tribeStr
3373 public String
getTribeStr() {
3377 * @return the subtribeStr
3379 public String
getSubtribeStr() {
3383 * @return the genusStr
3385 public String
getGenusStr() {
3389 * @return the subgenusStr
3391 public String
getSubgenusStr() {
3395 * @return the speciesStr
3397 public String
getSpeciesStr() {
3401 * @return the subspeciesStr
3403 public String
getSubspeciesStr() {
3404 return subspeciesStr
;
3407 * @return the formStr
3409 public String
getFormStr() {
3413 * @return the varietyStr
3415 public String
getVarietyStr() {
3419 public Integer
getPublicationYear() {
3420 return publicationYear
;
3423 public void setPublicationYear(Integer publicationYear
) {
3424 this.publicationYear
= publicationYear
;
3430 public void setNotParsableTaxon(String newName2
) {
3431 //takes too much time
3432 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3434 NomenclaturalStatusType statusType
= null;
3435 if (!getStatus().isEmpty()){
3437 statusType
= nomStatusString2NomStatus(getStatus());
3438 } catch (UnknownCdmTypeException e
) {
3439 addProblematicStatusToFile(getStatus());
3440 logger
.warn("Problem with status");
3443 List
<TaxonBase
> tmpList
= new ArrayList
<>();
3445 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, newName2
, MatchMode
.BEGINNING
, null, null, null, null, null);
3446 tmpList
.addAll(taxontest
.getRecords());
3448 //logger.info("tmpList returned: "+tmpList.size());
3451 INonViralName identicName
= null;
3452 boolean foundIdentic
=false;
3453 TaxonBase
<?
> tmpTaxonBase
=null;
3454 // Taxon tmpPartial=null;
3455 for (TaxonBase
<?
> tmpb
:tmpList
){
3457 TaxonNameBase
<?
,?
> tnb
= tmpb
.getName();
3460 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2
) ){
3461 crank
=tnb
.getRank();
3462 if (crank
!=null && rank
!=null){
3463 if (crank
.equals(rank
)){
3465 if (isSynonym
&& tmpb
.isInstanceOf(Synonym
.class) || !isSynonym
&& tmpb
.isInstanceOf(Taxon
.class)){
3476 boolean statusMatch
=false;
3477 boolean appendedMatch
=false;
3478 if(tmpTaxonBase
!=null && foundIdentic
){
3479 statusMatch
=compareStatus(tmpTaxonBase
, statusType
);
3480 if (!getStatus().isEmpty() && ! (tmpTaxonBase
.getAppendedPhrase() == null)) {
3481 appendedMatch
=tmpTaxonBase
.getAppendedPhrase().equals(getStatus());
3483 if (getStatus().isEmpty() && tmpTaxonBase
.getAppendedPhrase() == null) {
3488 if ((tmpTaxonBase
== null || !foundIdentic
) || (tmpTaxonBase
!= null && !statusMatch
) || (tmpTaxonBase
!= null && !appendedMatch
&& !statusMatch
)){
3491 if (identicName
== null){
3492 tnb
= getNonViralNameAccNomenclature();
3495 if(statusType
!= null) {
3496 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3498 if(StringUtils
.isNotBlank(getStatus())) {
3499 tnb
.setAppendedPhrase(getStatus());
3501 tnb
.setTitleCache(newName2
,true);
3502 tmpTaxonBase
= findMatchingTaxon(tnb
,refMods
);
3507 if(tmpTaxonBase
==null){
3508 tmpTaxonBase
= isSynonym ? Synonym
.NewInstance(tnb
, refMods
) : Taxon
.NewInstance(tnb
, refMods
);
3509 if(!state2
.getConfig().doKeepOriginalSecundum()) {
3510 tmpTaxonBase
.setSec(state2
.getConfig().getSecundum());
3512 //tmptaxonbase.setSec(refMods);
3514 classification
.addChildTaxon((Taxon
)tmpTaxonBase
, null, null);
3515 sourceHandler
.addSource(refMods
, (Taxon
)tmpTaxonBase
);
3520 tmpTaxonBase
= CdmBase
.deproxy(tmpTaxonBase
, TaxonBase
.class);
3521 if (author
!= null) {
3522 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3523 setLSID(getIdentifier(), tmpTaxonBase
);
3524 importer
.getTaxonService().saveOrUpdate(tmpTaxonBase
);
3525 tmpTaxonBase
= CdmBase
.deproxy(tmpTaxonBase
, TaxonBase
.class);
3528 TaxonNameBase
<?
,?
> tnb
= CdmBase
.deproxy(tmpTaxonBase
.getName(), TaxonNameBase
.class);
3531 this.taxon
=(Taxon
)tmpTaxonBase
;
3533 if (tmpTaxonBase
instanceof Taxon
){
3534 logger
.warn("Incorrect status");
3536 this.syno
=(Synonym
)tmpTaxonBase
;
3539 taxonNameBase
= tnb
;
3546 public void buildTaxon() {
3547 //System.out.println("BUILD TAXON");
3548 logger
.info("buildTaxon");
3549 NomenclaturalStatusType statusType
= null;
3550 if (!getStatus().isEmpty()){
3551 status
= getStatus();
3552 String newNameStatus
= newNameStatus(status
);
3553 if (newNameStatus
!= null){
3554 taxonNameBase
.setAppendedPhrase(newNameStatus
);
3557 statusType
= nomStatusString2NomStatus(getStatus());
3558 taxonNameBase
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3559 } catch (UnknownCdmTypeException e
) {
3560 addProblematicStatusToFile(getStatus());
3561 logger
.warn("Problem with status");
3565 importer
.getNameService().save(taxonNameBase
);
3567 TaxonBase
<?
> tmpTaxonBase
;
3569 tmpTaxonBase
=Taxon
.NewInstance(taxonNameBase
, refMods
); //sec set null
3572 tmpTaxonBase
=Synonym
.NewInstance(taxonNameBase
, refMods
); //sec set null
3574 boolean exist
= false;
3576 for (TaxonNode node
: classification
.getAllNodes()){
3578 Taxon nodeTaxon
= node
.getTaxon();
3579 boolean titleMatches
= nodeTaxon
.getTitleCache().equalsIgnoreCase(tmpTaxonBase
.getTitleCache());
3580 boolean nomStatusMatches
= compareStatus(node
.getTaxon(), statusType
);
3581 boolean nodeNameReplaceable
= checkNodeNameReplaceable(nodeTaxon
, tmpTaxonBase
);
3582 if(titleMatches
&& nomStatusMatches
) {
3584 tmpTaxonBase
=CdmBase
.deproxy(nodeTaxon
, TaxonBase
.class);
3587 logger
.info("Found the same name but from another type (taxon/synonym)");
3588 TaxonNameBase
<?
,?
> existingTnb
= getTaxon().getName();
3589 tmpTaxonBase
= Synonym
.NewInstance(existingTnb
, refMods
);
3590 importer
.getTaxonService().saveOrUpdate(tmpTaxonBase
);
3593 }else if (nodeNameReplaceable
){
3594 nodeTaxon
.setName(tmpTaxonBase
.getName());
3595 tmpTaxonBase
= nodeTaxon
;
3598 }catch(NullPointerException n
){logger
.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3603 boolean insertAsExisting
=false;
3604 List
<Taxon
> existingTaxons
=new ArrayList
<Taxon
>();
3606 existingTaxons
= getMatchingTaxa(taxonNameBase
);
3607 } catch (Exception e1
) {
3608 e1
.printStackTrace();
3610 double similarityScore
=0.0;
3611 double similarityAuthor
=-1;
3616 for (Taxon bestMatchingTaxon
: existingTaxons
){
3617 //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3618 //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3619 if(taxonNameBase
.getAuthorshipCache()!=null) {
3620 author1
=taxonNameBase
.getAuthorshipCache();
3623 if(bestMatchingTaxon
.getName().getAuthorshipCache()!=null) {
3624 author2
=bestMatchingTaxon
.getName().getAuthorshipCache();
3626 } catch (Exception e
) {
3627 // TODO Auto-generated catch block
3628 e
.printStackTrace();
3631 t1
=taxonNameBase
.getTitleCache();
3632 if (author1
!=null && !StringUtils
.isEmpty(author1
)) {
3633 t1
=t1
.split(Pattern
.quote(author1
))[0];
3635 } catch (Exception e
) {
3636 // TODO Auto-generated catch block
3637 e
.printStackTrace();
3640 t2
=bestMatchingTaxon
.getTitleCache().split("sec.")[0].trim();
3641 if (author2
!=null && !StringUtils
.isEmpty(author2
)) {
3642 t2
=t2
.split(Pattern
.quote(author2
))[0];
3644 } catch (Exception e
) {
3645 // TODO Auto-generated catch block
3646 e
.printStackTrace();
3649 similarityScore
=similarity(t1
.trim(), t2
.trim());
3650 //System.out.println("taxonscore "+similarityScore);
3651 similarityAuthor
=similarity(author1
.trim(), author2
.trim());
3652 //System.out.println("authorscore "+similarityAuthor);
3653 insertAsExisting
= compareAndCheckTaxon(taxonNameBase
, refMods
, similarityScore
, bestMatchingTaxon
, similarityAuthor
);
3654 if(insertAsExisting
) {
3655 tmpTaxonBase
=bestMatchingTaxon
;
3659 if ( !insertAsExisting
){
3660 if(!state2
.getConfig().doKeepOriginalSecundum()) {
3661 tmpTaxonBase
.setSec(state2
.getConfig().getSecundum());
3664 // tmptaxonbase.setSec(refMods);
3665 if (taxonNameBase
.getRank().equals(state2
.getConfig().getMaxRank())) {
3666 //System.out.println("****************************"+tmptaxonbase);
3668 classification
.addChildTaxon((Taxon
)tmpTaxonBase
, refMods
, null);
3671 hierarchy
= new HashMap
<Rank
, Taxon
>();
3672 //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3674 lookForParentNode(taxonNameBase
,(Taxon
)tmpTaxonBase
, refMods
,this);
3675 //System.out.println("HIERARCHY "+hierarchy);
3676 Taxon parent
= buildHierarchy();
3677 if(!taxonExistsInClassification(parent
,(Taxon
)tmpTaxonBase
)){
3679 classification
.addParentChild(parent
, (Taxon
)tmpTaxonBase
, refMods
, null);
3681 classification
.addChildTaxon((Taxon
)tmpTaxonBase
, refMods
, null);
3683 importer
.getClassificationService().saveOrUpdate(classification
);
3686 // Set<TaxonNode> nodeList = classification.getAllNodes();
3687 // for(TaxonNode tn:nodeList) {
3688 // System.out.println(tn.getTaxon());
3692 importer
.getClassificationService().saveOrUpdate(classification
);
3695 Synonym castTest
=CdmBase
.deproxy(tmpTaxonBase
, Synonym
.class);
3696 }catch(Exception e
){
3697 TaxonNameBase
<?
,?
> existingTnb
= tmpTaxonBase
.getName();
3698 Synonym castTest
= Synonym
.NewInstance(existingTnb
, refMods
);
3699 importer
.getTaxonService().saveOrUpdate(castTest
);
3700 tmpTaxonBase
=CdmBase
.deproxy(castTest
, Synonym
.class);
3705 taxon
=CdmBase
.deproxy(tmpTaxonBase
, Taxon
.class);
3707 syno
=CdmBase
.deproxy(tmpTaxonBase
, Synonym
.class);
3712 private boolean checkNodeNameReplaceable(Taxon nodeTaxon
, TaxonBase
<?
> newTaxon
) {
3713 //TODO preliminary check
3714 if (newTaxon
.isInstanceOf(Synonym
.class)){
3717 INonViralName nodeName
= nodeTaxon
.getName();
3718 INonViralName newName
= newTaxon
.getName();
3719 if (nodeTaxon
.getName() == null || newName
== null){
3722 if (nodeTaxon
.getDescriptions().size() > 0 || nodeName
.getDescriptions().size() > 0 || nodeName
.getTypeDesignations().size() > 0 ){
3725 boolean compare
= true;
3726 for (NomenclaturalStatus status
: newName
.getStatus() ){
3727 compare
&= compareStatus(nodeTaxon
, status
.getType());
3733 if (nodeName
.getNameCache() != null && nodeName
.getNameCache().equals(newName
.getNameCache())){
3734 if (nodeName
.getNameCache().equals(nodeName
.getTitleCache())){
3735 if (newName
.getNameCache().length() < newName
.getTitleCache().length()){
3736 logger
.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName
.getNameCache());
3748 private Taxon
buildHierarchy() {
3749 logger
.info("buildHierarchy");
3750 Taxon higherTaxon
= null;
3751 //add the maxRank as a root
3752 if(hierarchy
.containsKey(state2
.getConfig().getMaxRank())){
3753 Taxon ct
=hierarchy
.get(state2
.getConfig().getMaxRank());
3754 if(!taxonExistsInClassification(higherTaxon
, ct
)) {
3755 classification
.addChildTaxon(ct
, refMods
, null);
3757 higherTaxon
= hierarchy
.get(state2
.getConfig().getMaxRank());
3758 // return higherTaxon;
3760 //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3764 if(hierarchy
.containsKey(Rank
.FAMILY()) && !state2
.getConfig().getMaxRank().equals(Rank
.FAMILY())){
3765 higherTaxon
=saveAndGetHigherTaxon(Rank
.FAMILY(),higherTaxon
);
3767 if(hierarchy
.containsKey(Rank
.SUBFAMILY()) && !state2
.getConfig().getMaxRank().equals(Rank
.SUBFAMILY())){
3768 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBFAMILY(),higherTaxon
);
3770 if(hierarchy
.containsKey(Rank
.TRIBE())&& !state2
.getConfig().getMaxRank().equals(Rank
.TRIBE())){
3771 higherTaxon
=saveAndGetHigherTaxon(Rank
.TRIBE(),higherTaxon
);
3773 if(hierarchy
.containsKey(Rank
.SUBTRIBE())&& !state2
.getConfig().getMaxRank().equals(Rank
.SUBTRIBE())){
3774 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBTRIBE(),higherTaxon
);
3776 if(hierarchy
.containsKey(Rank
.GENUS())&& !state2
.getConfig().getMaxRank().equals(Rank
.SUBGENUS())){
3777 higherTaxon
=saveAndGetHigherTaxon(Rank
.GENUS(),higherTaxon
);
3779 if(hierarchy
.containsKey(Rank
.SUBGENUS())&& !state2
.getConfig().getMaxRank().equals(Rank
.SUBGENUS())){
3780 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBGENUS(),higherTaxon
);
3782 importer
.getClassificationService().saveOrUpdate(classification
);
3786 private Taxon
saveAndGetHigherTaxon(Rank r
, Taxon higherTaxon
){
3787 Taxon ct
=hierarchy
.get(r
);
3788 if(!taxonExistsInClassification(higherTaxon
,ct
)) {
3789 if(higherTaxon
!= null && ct
!=null) {
3790 classification
.addParentChild(higherTaxon
, ct
, refMods
, null);
3792 if(higherTaxon
== null && ct
!=null) {
3793 classification
.addChildTaxon(ct
, refMods
, null);
3799 private boolean taxonExistsInClassification(Taxon parent
, Taxon child
){
3800 logger
.info("taxonExistsInClassification");
3801 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3802 boolean found
=false;
3804 for (TaxonNode p
: classification
.getAllNodes()){
3805 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
3806 for (TaxonNode c
: p
.getChildNodes()) {
3807 if (c
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3816 for (TaxonNode p
: classification
.getAllNodes()){
3817 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3823 // System.out.println("LOOK IF TAXA EXIST? "+found);
3827 * @param nameToBeFilledTest
3829 public void setParsedName(ITaxonNameBase nameToBeFilledTest
) {
3830 this.taxonNameBase
= TaxonNameBase
.castAndDeproxy(nameToBeFilledTest
);
3833 //variety dwcranks:varietyEpithet
3835 * @return the author
3837 public String
getAuthor() {
3843 public Taxon
getTaxon() {
3849 public TaxonNameBase
<?
,?
> getTaxonNameBase() {
3850 return taxonNameBase
;
3854 * @param findOrCreateTaxon
3856 public void setForm(Taxon form
) {
3861 * @param findOrCreateTaxon
3863 public void setVariety(Taxon variety
) {
3864 this.variety
=variety
;
3871 @SuppressWarnings("rawtypes")
3872 public Taxon
findOrCreateTaxon(String partialname
,String fullname
, Rank rank
, Rank globalrank
) {
3873 logger
.info("findOrCreateTaxon");
3874 sourceUrlRef
=CdmBase
.deproxy(sourceUrlRef
, Reference
.class);
3875 //takes too much time
3876 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3877 // logger.info("tmpList returned: "+tmpList.size());
3879 NomenclaturalStatusType statusType
= null;
3880 if (!getStatus().isEmpty()){
3882 statusType
= nomStatusString2NomStatus(getStatus());
3883 } catch (UnknownCdmTypeException e
) {
3884 addProblematicStatusToFile(getStatus());
3885 logger
.warn("Problem with status");
3889 List
<TaxonBase
> tmpListFiltered
= new ArrayList
<TaxonBase
>();
3891 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, fullname
, MatchMode
.BEGINNING
, null, null, null, null, null);
3893 tmpListFiltered
.addAll(taxontest
.getRecords());
3894 taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, partialname
, MatchMode
.BEGINNING
, null, null, null, null, null);
3895 tmpListFiltered
.addAll(taxontest
.getRecords());
3897 //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3899 boolean nameCorrected
=false;
3900 if (fullname
.indexOf(partialname
)<0) {
3904 boolean foundIdentic
=false;
3906 for (TaxonBase tmpb
:tmpListFiltered
){
3908 TaxonNameBase tnb
= tmpb
.getName();
3911 if(globalrank
.equals(rank
) || (globalrank
.isLower(Rank
.SPECIES()) && rank
.equals(Rank
.SPECIES()))){
3912 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname
) ){
3913 crank
=tnb
.getRank();
3914 if (crank
!=null && rank
!=null){
3915 if (crank
.equals(rank
)){
3920 }catch(Exception e
){
3921 e
.printStackTrace();
3926 if(nameCorrected
){ //for corrected names such as Anochetus -- A. blf-pat
3927 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname
) ){
3928 crank
=tnb
.getRank();
3929 if (crank
!=null && rank
!=null){
3930 if (crank
.equals(rank
)){
3935 }catch(Exception e
){
3936 e
.printStackTrace();
3944 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname
) ){
3945 crank
=tnb
.getRank();
3946 if (crank
!=null && rank
!=null){
3947 if (crank
.equals(rank
)){
3952 }catch(Exception e
){
3953 e
.printStackTrace();
3962 boolean statusMatch
=false;
3963 boolean appendedMatch
=false;
3964 if(tmp
!=null && foundIdentic
){
3965 statusMatch
=compareStatus(tmp
, statusType
);
3966 if (!getStatus().isEmpty() && ! (tmp
.getAppendedPhrase() == null)) {
3967 appendedMatch
=tmp
.getAppendedPhrase().equals(getStatus());
3969 if (getStatus().isEmpty() && tmp
.getAppendedPhrase() == null) {
3974 if ((tmp
== null || !foundIdentic
) || (tmp
!= null && !statusMatch
) || (tmp
!= null && !appendedMatch
&& !statusMatch
)){
3976 INonViralName tnb
= getNonViralNameAccNomenclature();
3979 if(statusType
!= null) {
3980 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3982 if(StringUtils
.isNotBlank(getStatus())) {
3983 tnb
.setAppendedPhrase(getStatus());
3986 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3987 tnb
.setTitleCache(fullname
, true);
3988 // tnb.setGenusOrUninomial(fullname);
3990 if(rank
.isHigher(Rank
.GENUS())) {
3991 tnb
.setGenusOrUninomial(partialname
);
3994 if(rank
.isHigher(Rank
.SPECIES())) {
3995 tnb
.setTitleCache(partialname
, true);
3998 if (rank
.equals(globalrank
) && author
!= null) {
4000 tnb
.setCombinationAuthorship(findOrCreateAuthor(author
));
4001 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4002 Taxon taxonLSID
= getTaxonByLSID(getIdentifier());
4003 if (taxonLSID
!=null) {
4010 if (rank
.equals(Rank
.FAMILY())) {
4011 tmp
= buildFamily(tnb
);
4013 if (rank
.equals(Rank
.SUBFAMILY())) {
4014 tmp
= buildSubfamily(tnb
);
4016 if (rank
.equals(Rank
.TRIBE())) {
4017 tmp
= buildTribe(tnb
);
4019 if (rank
.equals(Rank
.SUBTRIBE())) {
4020 tmp
= buildSubtribe(tnb
);
4022 if (rank
.equals(Rank
.GENUS())) {
4023 tmp
= buildGenus(partialname
, tnb
);
4026 if (rank
.equals(Rank
.SUBGENUS())) {
4027 tmp
= buildSubgenus(partialname
, tnb
);
4029 if (rank
.equals(Rank
.SPECIES())) {
4030 tmp
= buildSpecies(partialname
, tnb
);
4033 if (rank
.equals(Rank
.SUBSPECIES())) {
4034 tmp
= buildSubspecies(partialname
, tnb
);
4037 if (rank
.equals(Rank
.VARIETY())) {
4038 tmp
= buildVariety(fullname
, partialname
, tnb
);
4041 if (rank
.equals(Rank
.FORM())) {
4042 tmp
= buildForm(fullname
, partialname
, tnb
);
4045 TaxonXTreatmentExtractor
.this.sourceHandler
.addSource(refMods
, tmp
);
4048 importer
.getClassificationService().saveOrUpdate(classification
);
4053 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
4054 if (rank
.equals(globalrank
) && author
!= null) {
4055 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4056 setLSID(getIdentifier(), tmp
);
4057 importer
.getTaxonService().saveOrUpdate(tmp
);
4058 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
4071 private Taxon
buildSubfamily(INonViralName tnb
) {
4073 // tnb.generateTitle();
4074 tmp
= findMatchingTaxon(tnb
,refMods
);
4076 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4077 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4078 tmp
.setSec(state2
.getConfig().getSecundum());
4080 // tmp.setSec(refMods);
4081 // sourceHandler.addSource(refMods, tmp);
4082 if(family
!= null) {
4083 classification
.addParentChild(family
, tmp
, null, null);
4084 higherRank
=Rank
.FAMILY();
4087 //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4088 classification
.addChildTaxon(tmp
, null, null);
4097 private Taxon
buildFamily(INonViralName tnb
) {
4099 // tnb.generateTitle();
4100 tmp
= findMatchingTaxon(tnb
,refMods
);
4102 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4103 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4104 tmp
.setSec(state2
.getConfig().getSecundum());
4106 // tmp.setSec(refMods);
4107 //sourceHandler.addSource(refMods, tmp);
4108 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4109 classification
.addChildTaxon(tmp
, null, null);
4118 private Taxon
buildForm(String fullname
, String partialname
, INonViralName tnb
) {
4119 if (genusName
!=null) {
4120 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4122 if (subgenusName
!=null) {
4123 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4125 if(speciesName
!=null) {
4126 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4128 if(subspeciesName
!= null) {
4129 tnb
.setInfraSpecificEpithet(subspeciesName
.getInfraSpecificEpithet());
4131 if(partialname
!= null) {
4132 tnb
.setInfraSpecificEpithet(partialname
);
4134 //TODO how to save form??
4135 tnb
.setTitleCache(fullname
, true);
4136 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4138 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4139 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4140 tmp
.setSec(state2
.getConfig().getSecundum());
4142 // tmp.setSec(refMods);
4143 //sourceHandler.addSource(refMods, tmp);
4144 if (subspecies
!=null) {
4145 classification
.addParentChild(subspecies
, tmp
, null, null);
4146 higherRank
=Rank
.SUBSPECIES();
4147 higherTaxa
=subspecies
;
4149 if (species
!=null) {
4150 classification
.addParentChild(species
, tmp
, null, null);
4151 higherRank
=Rank
.SPECIES();
4155 // System.out.println("ADDCHILDTAXON FORM "+tmp);
4156 classification
.addChildTaxon(tmp
, null, null);
4167 private Taxon
buildVariety(String fullname
, String partialname
, INonViralName tnb
) {
4169 if (genusName
!=null) {
4170 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4172 if (subgenusName
!=null) {
4173 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4175 if(speciesName
!=null) {
4176 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4178 if(subspeciesName
!= null) {
4179 tnb
.setInfraSpecificEpithet(subspeciesName
.getSpecificEpithet());
4181 if(partialname
!= null) {
4182 tnb
.setInfraSpecificEpithet(partialname
);
4184 //TODO how to save variety?
4185 tnb
.setTitleCache(fullname
, true);
4186 tmp
= findMatchingTaxon(tnb
,refMods
);
4188 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4189 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4190 tmp
.setSec(state2
.getConfig().getSecundum());
4192 // tmp.setSec(refMods);
4193 //sourceHandler.addSource(refMods, tmp);
4194 if (subspecies
!=null) {
4195 classification
.addParentChild(subspecies
, tmp
, null, null);
4196 higherRank
=Rank
.SUBSPECIES();
4197 higherTaxa
=subspecies
;
4199 if(species
!=null) {
4200 classification
.addParentChild(species
, tmp
, null, null);
4201 higherRank
=Rank
.SPECIES();
4205 //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4206 classification
.addChildTaxon(tmp
, null, null);
4213 * @param partialname
4217 private Taxon
buildSubspecies(String partialname
, INonViralName tnb
) {
4218 if (genusName
!=null) {
4219 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4221 if (subgenusName
!=null) {
4222 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4223 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4225 if(speciesName
!=null) {
4226 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
4227 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4229 tnb
.setInfraSpecificEpithet(partialname
);
4230 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4232 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4233 if(!state2
.getConfig().doKeepOriginalSecundum())
4235 tmp
.setSec(state2
.getConfig().getSecundum());
4236 // tmp.setSec(refMods);
4237 //sourceHandler.addSource(refMods, tmp);
4240 if(species
!= null) {
4241 classification
.addParentChild(species
, tmp
, null, null);
4242 higherRank
=Rank
.SPECIES();
4246 //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4247 classification
.addChildTaxon(tmp
, null, null);
4253 * @param partialname
4257 private Taxon
buildSpecies(String partialname
, INonViralName tnb
) {
4258 if (genusName
!=null) {
4259 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4261 if (subgenusName
!=null) {
4262 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4264 tnb
.setSpecificEpithet(partialname
.toLowerCase());
4265 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4267 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4268 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4269 tmp
.setSec(state2
.getConfig().getSecundum());
4271 // tmp.setSec(refMods);
4272 //sourceHandler.addSource(refMods, tmp);
4273 if (subgenus
!=null) {
4274 classification
.addParentChild(subgenus
, tmp
, null, null);
4275 higherRank
=Rank
.SUBGENUS();
4276 higherTaxa
=subgenus
;
4279 classification
.addParentChild(genus
, tmp
, null, null);
4280 higherRank
=Rank
.GENUS();
4284 //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4285 classification
.addChildTaxon(tmp
, null, null);
4292 * @param partialname
4296 private Taxon
buildSubgenus(String partialname
, INonViralName tnb
) {
4297 tnb
.setInfraGenericEpithet(partialname
);
4298 if (genusName
!=null) {
4299 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4301 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4303 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4304 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4305 tmp
.setSec(state2
.getConfig().getSecundum());
4307 // tmp.setSec(refMods);
4308 //sourceHandler.addSource(refMods, tmp);
4310 classification
.addParentChild(genus
, tmp
, null, null);
4311 higherRank
=Rank
.GENUS();
4314 //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4315 classification
.addChildTaxon(tmp
, null, null);
4321 * @param partialname
4325 private Taxon
buildGenus(String partialname
, INonViralName tnb
) {
4327 tnb
.setGenusOrUninomial(partialname
);
4330 tmp
= findMatchingTaxon(tnb
,refMods
);
4332 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4333 if(!state2
.getConfig().doKeepOriginalSecundum())
4335 tmp
.setSec(state2
.getConfig().getSecundum());
4336 // tmp.setSec(refMods);
4337 //sourceHandler.addSource(refMods, tmp);
4340 if(subtribe
!= null) {
4341 classification
.addParentChild(subtribe
, tmp
, null, null);
4342 higherRank
=Rank
.SUBTRIBE();
4343 higherTaxa
=subtribe
;
4346 classification
.addParentChild(tribe
, tmp
, null, null);
4347 higherRank
=Rank
.TRIBE();
4350 if(subfamily
!=null) {
4351 classification
.addParentChild(subfamily
, tmp
, null, null);
4352 higherRank
=Rank
.SUBFAMILY();
4353 higherTaxa
=subfamily
;
4356 classification
.addParentChild(family
, tmp
, null, null);
4357 higherRank
=Rank
.FAMILY();
4361 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4362 classification
.addChildTaxon(tmp
, null, null);
4374 private Taxon
buildSubtribe(INonViralName tnb
) {
4375 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4377 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4378 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4379 tmp
.setSec(state2
.getConfig().getSecundum());
4381 // tmp.setSec(refMods);
4382 //sourceHandler.addSource(refMods, tmp);
4384 classification
.addParentChild(tribe
, tmp
, null, null);
4385 higherRank
=Rank
.TRIBE();
4388 //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4389 classification
.addChildTaxon(tmp
, null, null);
4398 private Taxon
buildTribe(INonViralName tnb
) {
4399 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4401 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4402 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4403 tmp
.setSec(state2
.getConfig().getSecundum());
4405 // tmp.setSec(refMods);
4406 //sourceHandler.addSource(refMods, tmp);
4407 if (subfamily
!=null) {
4408 classification
.addParentChild(subfamily
, tmp
, null, null);
4409 higherRank
=Rank
.SUBFAMILY();
4410 higherTaxa
=subfamily
;
4412 if(family
!= null) {
4413 classification
.addParentChild(family
, tmp
, null, null);
4414 higherRank
=Rank
.FAMILY();
4418 //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4419 classification
.addChildTaxon(tmp
, null, null);
4427 * @param identifier2
4430 @SuppressWarnings("rawtypes")
4431 private Taxon
getTaxonByLSID(String identifier
) {
4432 //logger.info("getTaxonByLSID");
4433 // boolean lsidok=false;
4434 String id
= identifier
.split("__")[0];
4435 // String source = identifier.split("__")[1];
4437 if (id
.indexOf("lsid")>-1){
4439 lsid
= new LSID(id
);
4441 } catch (MalformedLSIDException e
) {
4442 logger
.warn("Malformed LSID");
4446 List
<Taxon
> taxa
= importer
.getTaxonService().list(Taxon
.class, 0, 0, null, null);
4447 LSID currentlsid
=null;
4449 currentlsid
= t
.getLsid();
4450 if (currentlsid
!=null){
4451 if (currentlsid
.getLsid().equals(lsid
.getLsid())){
4455 catch(Exception e
){logger
.warn("Exception occurred while comparing LSIDs "+e
);}
4466 @SuppressWarnings("rawtypes")
4467 private Person
findOrCreateAuthor(String author2
) {
4468 //logger.info("findOrCreateAuthor");
4469 List
<UuidAndTitleCache
<Person
>> hiberPersons
= importer
.getAgentService().getPersonUuidAndTitleCache();
4470 for (UuidAndTitleCache
<Person
> hibernateP
:hiberPersons
){
4471 if(hibernateP
.getTitleCache().equals(author2
)) {
4472 AgentBase existing
= importer
.getAgentService().find(hibernateP
.getUuid());
4473 return CdmBase
.deproxy(existing
, Person
.class);
4476 Person p
= Person
.NewInstance();
4477 p
.setTitleCache(author2
,true);
4478 importer
.getAgentService().saveOrUpdate(p
);
4479 return CdmBase
.deproxy(p
, Person
.class);
4482 * @param author the author to set
4484 public void setAuthor(String author
) {
4485 this.author
= author
;
4489 * @return the higherTaxa
4491 public Taxon
getHigherTaxa() {
4495 * @param higherTaxa the higherTaxa to set
4497 public void setHigherTaxa(Taxon higherTaxa
) {
4498 this.higherTaxa
= higherTaxa
;
4501 * @return the higherRank
4503 public Rank
getHigherRank() {
4507 * @param higherRank the higherRank to set
4509 public void setHigherRank(Rank higherRank
) {
4510 this.higherRank
= higherRank
;
4512 public String
getName(){
4513 if (newName
.isEmpty()) {
4514 return originalName
;
4521 * @return the fullName
4523 public String
getOriginalName() {
4524 return originalName
;
4527 * @param fullName the fullName to set
4529 public void setOriginalName(String fullName
) {
4530 this.originalName
= fullName
;
4533 * @return the newName
4535 public String
getNewName() {
4539 * @param newName the newName to set
4541 public void setNewName(String newName
) {
4542 this.newName
= newName
;
4547 public Rank
getRank() {
4551 * @param rank the rank to set
4553 public void setRank(Rank rank
) {
4557 * @return the idenfitiger
4559 public String
getIdentifier() {
4563 * @param idenfitiger the idenfitiger to set
4565 public void setIdentifier(String identifier
) {
4566 this.identifier
= identifier
;
4569 * @return the status
4571 public String
getStatus() {
4572 if (status
== null) {
4578 * @param status the status to set
4580 public void setStatus(String status
) {
4581 this.status
= status
;
4584 * @return the family
4586 public Taxon
getFamily() {
4590 * @param family the family to set
4592 @SuppressWarnings("rawtypes")
4593 public void setFamily(Taxon family
) {
4594 this.family
= family
;
4595 familyName
= CdmBase
.deproxy(family
.getName());
4598 * @return the subfamily
4600 public Taxon
getSubfamily() {
4604 * @param subfamily the subfamily to set
4606 @SuppressWarnings("rawtypes")
4607 public void setSubfamily(Taxon subfamily
) {
4608 this.subfamily
= subfamily
;
4609 subfamilyName
= CdmBase
.deproxy(subfamily
.getName());
4614 public Taxon
getTribe() {
4618 * @param tribe the tribe to set
4620 @SuppressWarnings("rawtypes")
4621 public void setTribe(Taxon tribe
) {
4623 tribeName
= CdmBase
.deproxy(tribe
.getName());
4626 * @return the subtribe
4628 public Taxon
getSubtribe() {
4632 * @param subtribe the subtribe to set
4634 @SuppressWarnings("rawtypes")
4635 public void setSubtribe(Taxon subtribe
) {
4636 this.subtribe
= subtribe
;
4637 subtribeName
=CdmBase
.deproxy(subtribe
.getName());
4642 public Taxon
getGenus() {
4646 * @param genus the genus to set
4648 @SuppressWarnings("rawtypes")
4649 public void setGenus(Taxon genus
) {
4652 genusName
= CdmBase
.deproxy(genus
.getName());
4656 * @return the subgenus
4658 public Taxon
getSubgenus() {
4662 * @param subgenus the subgenus to set
4664 @SuppressWarnings("rawtypes")
4665 public void setSubgenus(Taxon subgenus
) {
4666 this.subgenus
= subgenus
;
4667 subgenusName
= CdmBase
.deproxy(subgenus
.getName());
4670 * @return the species
4672 public Taxon
getSpecies() {
4676 * @param species the species to set
4678 public void setSpecies(Taxon species
) {
4679 if (species
!= null){
4680 this.species
= species
;
4681 speciesName
= CdmBase
.deproxy(species
.getName());
4685 * @return the subspecies
4687 public Taxon
getSubspecies() {
4691 * @param subspecies the subspecies to set
4693 @SuppressWarnings("rawtypes")
4694 public void setSubspecies(Taxon subspecies
) {
4695 this.subspecies
= subspecies
;
4696 subspeciesName
= CdmBase
.deproxy(subspecies
.getName());
4708 private void addProblematicStatusToFile(String status
) {
4710 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "StatusUnknown_"+classification
.getTitleCache()+".txt",true);
4711 BufferedWriter out
= new BufferedWriter(fstream
);
4712 out
.write(status
+"\n");
4713 //Close the output stream
4715 }catch (Exception e
){//Catch exception if any
4716 System
.err
.println("Error: " + e
.getMessage());
4727 private Taxon
findMatchingTaxon(INonViralName tnb
, Reference refMods
) {
4728 logger
.info("findMatchingTaxon");
4731 refMods
=CdmBase
.deproxy(refMods
, Reference
.class);
4732 boolean insertAsExisting
=false;
4733 List
<Taxon
> existingTaxa
= new ArrayList
<Taxon
>();
4735 existingTaxa
= getMatchingTaxa(tnb
);
4736 } catch (Exception e1
) {
4737 // TODO Auto-generated catch block
4738 e1
.printStackTrace();
4740 double similarityScore
=0.0;
4741 double similarityAuthor
=-1;
4746 for (Taxon bestMatchingTaxon
: existingTaxa
){
4747 if (!existingTaxa
.isEmpty() && state2
.getConfig().isInteractWithUser() && !insertAsExisting
) {
4748 // System.out.println("tnb "+tnb.getTitleCache());
4749 // System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4751 if(tnb
.getAuthorshipCache()!=null) {
4752 author1
=tnb
.getAuthorshipCache();
4754 } catch (Exception e
) {
4755 // TODO Auto-generated catch block
4756 e
.printStackTrace();
4759 if(bestMatchingTaxon
.getName().getAuthorshipCache()!=null) {
4760 author2
=bestMatchingTaxon
.getName().getAuthorshipCache();
4762 } catch (Exception e
) {
4763 // TODO Auto-generated catch block
4764 e
.printStackTrace();
4767 t1
=tnb
.getTitleCache().split("sec.")[0].trim();
4768 if (author1
!=null && !StringUtils
.isEmpty(author1
)) {
4769 t1
=t1
.split(Pattern
.quote(author1
))[0];
4771 } catch (Exception e
) {
4772 // TODO Auto-generated catch block
4773 e
.printStackTrace();
4776 t2
=bestMatchingTaxon
.getTitleCache().split("sec.")[0].trim();
4777 if (author2
!=null && !StringUtils
.isEmpty(author2
)) {
4778 t2
=t2
.split(Pattern
.quote(author2
))[0];
4780 } catch (Exception e
) {
4781 // TODO Auto-generated catch block
4782 e
.printStackTrace();
4784 similarityScore
=similarity(t1
.trim(), t2
.trim());
4785 // System.out.println("taxascore: "+similarityScore);
4786 similarityAuthor
=similarity(author1
.trim(), author2
.trim());
4787 // System.out.println("authorscore: "+similarityAuthor);
4788 insertAsExisting
= compareAndCheckTaxon(tnb
, refMods
, similarityScore
, bestMatchingTaxon
,similarityAuthor
);
4790 if(insertAsExisting
) {
4791 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4792 tmp
=bestMatchingTaxon
;
4793 sourceHandler
.addSource(refMods
, tmp
);
4804 * @param similarityScore
4805 * @param bestMatchingTaxon
4806 * @param similarityAuthor
4809 private boolean compareAndCheckTaxon(INonViralName tnb
, Reference refMods
, double similarityScore
,
4810 Taxon bestMatchingTaxon
, double similarityAuthor
) {
4811 //logger.info("compareAndCheckTaxon");
4812 boolean insertAsExisting
;
4813 // if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4814 // insertAsExisting=false;
4816 //a small hack/automatisation for Chenopodium only
4817 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4818 bestMatchingTaxon
.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4819 insertAsExisting
=true;
4821 insertAsExisting
=askIfReuseBestMatchingTaxon(tnb
, bestMatchingTaxon
, refMods
, similarityScore
,similarityAuthor
);
4825 logDecision(tnb
, bestMatchingTaxon
, insertAsExisting
, refMods
);
4826 return insertAsExisting
;
4832 @SuppressWarnings("rawtypes")
4833 private List
<Taxon
> getMatchingTaxa(ITaxonNameBase tnb
) {
4834 //logger.info("getMatchingTaxon");
4835 if (tnb
.getTitleCache() == null){
4836 tnb
.setTitleCache(tnb
.toString(), tnb
.isProtectedTitleCache());
4839 Pager
<TaxonBase
> pager
=importer
.getTaxonService().findByTitle(TaxonBase
.class, tnb
.getTitleCache().split("sec.")[0].trim(), MatchMode
.BEGINNING
, null, null, null, null, null);
4840 List
<TaxonBase
>records
= pager
.getRecords();
4842 List
<Taxon
> existingTaxons
= new ArrayList
<Taxon
>();
4843 for (TaxonBase r
:records
){
4845 Taxon bestMatchingTaxon
= (Taxon
)r
;
4846 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4847 if(compareTaxonNameLength(bestMatchingTaxon
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4848 existingTaxons
.add(bestMatchingTaxon
);
4850 }catch(ClassCastException e
){logger
.warn("classcast exception, might be a synonym, ignore it");}
4852 Taxon bmt
= importer
.getTaxonService().findBestMatchingTaxon(tnb
.getTitleCache());
4853 if (!existingTaxons
.contains(bmt
) && bmt
!=null) {
4854 if(compareTaxonNameLength(bmt
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4855 existingTaxons
.add(bmt
);
4858 return existingTaxons
;
4862 * Check if the found Taxon can reasonnably be the same
4863 * example: with and without author should match, but the subspecies should not be suggested for a genus
4865 private boolean compareTaxonNameLength(String f
, String o
){
4866 //logger.info("compareTaxonNameLength");
4867 boolean lengthOk
=false;
4868 int sizeF
= f
.length();
4869 int sizeO
= o
.length();
4874 if (sizeF
-sizeO
>10) {
4881 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4885 private double similarity(String s1
, String s2
) {
4886 //logger.info("similarity");
4887 //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4888 if(!StringUtils
.isEmpty(s1
) && !StringUtils
.isEmpty(s2
)){
4889 String l1
=s1
.toLowerCase().trim();
4890 String l2
=s2
.toLowerCase().trim();
4891 if (l1
.length() < l2
.length()) { // s1 should always be bigger
4892 String swap
= l1
; l1
= l2
; l2
= swap
;
4894 int bigLen
= l1
.length();
4895 if (bigLen
== 0) { return 1.0; /* both strings are zero length */ }
4896 return (bigLen
- computeEditDistance(l1
, l2
)) / (double) bigLen
;
4899 if(s1
!=null && s2
!=null){
4900 if (s1
.equalsIgnoreCase(s2
)) {
4908 private int computeEditDistance(String s1
, String s2
) {
4909 //logger.info("computeEditDistance");
4910 int[] costs
= new int[s2
.length() + 1];
4911 for (int i
= 0; i
<= s1
.length(); i
++) {
4913 for (int j
= 0; j
<= s2
.length(); j
++) {
4918 int newValue
= costs
[j
- 1];
4919 if (s1
.charAt(i
- 1) != s2
.charAt(j
- 1)) {
4920 newValue
= Math
.min(Math
.min(newValue
, lastValue
),
4923 costs
[j
- 1] = lastValue
;
4924 lastValue
= newValue
;
4929 costs
[s2
.length()] = lastValue
;
4932 return costs
[s2
.length()];
4935 Map
<Rank
, Taxon
> hierarchy
= new HashMap
<Rank
, Taxon
>();
4937 * @param taxonNameBase
4939 @SuppressWarnings("rawtypes")
4940 public void lookForParentNode(INonViralName taxonNameBase
, Taxon tax
, Reference ref
, MyName myName
) {
4941 logger
.info("lookForParentNode "+taxonNameBase
.getTitleCache()+" for "+myName
.toString());
4942 //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
4943 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
4944 if (taxonNameBase
.getRank().equals(Rank
.FORM())){
4945 handleFormHierarchy(ref
, myName
, parser
);
4947 else if (taxonNameBase
.getRank().equals(Rank
.VARIETY())){
4948 handleVarietyHierarchy(ref
, myName
, parser
);
4950 else if (taxonNameBase
.getRank().equals(Rank
.SUBSPECIES())){
4951 handleSubSpeciesHierarchy(ref
, myName
, parser
);
4953 else if (taxonNameBase
.getRank().equals(Rank
.SPECIES())){
4954 handleSpeciesHierarchy(ref
, myName
, parser
);
4956 else if (taxonNameBase
.getRank().equals(Rank
.SUBGENUS())){
4957 handleSubgenusHierarchy(ref
, myName
, parser
);
4960 if (taxonNameBase
.getRank().equals(Rank
.GENUS())){
4961 handleGenusHierarchy(ref
, myName
, parser
);
4963 if (taxonNameBase
.getRank().equals(Rank
.SUBTRIBE())){
4964 handleSubtribeHierarchy(ref
, myName
, parser
);
4966 if (taxonNameBase
.getRank().equals(Rank
.TRIBE())){
4967 handleTribeHierarchy(ref
, myName
, parser
);
4970 if (taxonNameBase
.getRank().equals(Rank
.SUBFAMILY())){
4971 handleSubfamilyHierarchy(ref
, myName
, parser
);
4980 private void handleSubfamilyHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
4981 System
.out
.println("handleSubfamilyHierarchy");
4982 String parentStr
= myName
.getFamilyStr();
4983 Rank r
= Rank
.FAMILY();
4984 if(parentStr
!=null){
4986 Taxon parent
= null;
4987 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, parentStr
, MatchMode
.BEGINNING
, null, null, null, null, null);
4988 for(TaxonBase tb
:taxontest
.getRecords()){
4990 if (tb
.getName().getRank().equals(r
)) {
4991 parent
=CdmBase
.deproxy(tb
, Taxon
.class);
4994 } catch (Exception e
) {
4995 // TODO Auto-generated catch block
4996 e
.printStackTrace();
4999 if(parent
== null) {
5000 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5001 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5004 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5005 importer
.getTaxonService().save(parent
);
5006 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5010 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5013 hierarchy
.put(r
,parent
);
5022 private void handleTribeHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5023 String parentStr
= myName
.getSubfamilyStr();
5024 Rank r
= Rank
.SUBFAMILY();
5025 if (parentStr
== null){
5026 parentStr
= myName
.getFamilyStr();
5029 if(parentStr
!=null){
5030 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5031 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5032 // importer.getTaxonService().save(parent);
5033 // parent = CdmBase.deproxy(parent, Taxon.class);
5035 boolean parentDoesNotExists
= true;
5036 for (TaxonNode p
: classification
.getAllNodes()){
5037 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5038 parentDoesNotExists
= false;
5039 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5043 // if(parentDoesNotExists) {
5044 // importer.getTaxonService().save(parent);
5045 // parent = CdmBase.deproxy(parent, Taxon.class);
5046 // lookForParentNode(parentNameName, parent, ref,myName);
5048 if(parentDoesNotExists
) {
5049 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5052 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5053 importer
.getTaxonService().save(parent
);
5054 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5058 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5061 hierarchy
.put(r
,parent
);
5070 private void handleSubtribeHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5071 String parentStr
= myName
.getTribeStr();
5072 Rank r
= Rank
.TRIBE();
5073 if (parentStr
== null){
5074 parentStr
= myName
.getSubfamilyStr();
5075 r
= Rank
.SUBFAMILY();
5077 if (parentStr
== null){
5078 parentStr
= myName
.getFamilyStr();
5081 if(parentStr
!=null){
5082 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5083 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5084 // importer.getTaxonService().save(parent);
5085 // parent = CdmBase.deproxy(parent, Taxon.class);
5087 boolean parentDoesNotExists
= true;
5088 for (TaxonNode p
: classification
.getAllNodes()){
5089 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5090 parentDoesNotExists
= false;
5091 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5096 // if(parentDoesNotExists) {
5097 // importer.getTaxonService().save(parent);
5098 // parent = CdmBase.deproxy(parent, Taxon.class);
5099 // lookForParentNode(parentNameName, parent, ref,myName);
5101 if(parentDoesNotExists
) {
5102 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5105 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5106 importer
.getTaxonService().save(parent
);
5107 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5111 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5114 hierarchy
.put(r
,parent
);
5123 private void handleGenusHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5124 String parentStr
= myName
.getSubtribeStr();
5125 Rank r
= Rank
.SUBTRIBE();
5126 if (parentStr
== null){
5127 parentStr
= myName
.getTribeStr();
5130 if (parentStr
== null){
5131 parentStr
= myName
.getSubfamilyStr();
5132 r
= Rank
.SUBFAMILY();
5134 if (parentStr
== null){
5135 parentStr
= myName
.getFamilyStr();
5138 if(parentStr
!=null){
5139 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5140 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5141 // importer.getTaxonService().save(parent);
5142 // parent = CdmBase.deproxy(parent, Taxon.class);
5144 boolean parentDoesNotExist
= true;
5145 for (TaxonNode p
: classification
.getAllNodes()){
5146 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5147 // System.out.println(p.getTaxon().getUuid());
5148 // System.out.println(parent.getUuid());
5149 parentDoesNotExist
= false;
5150 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5154 // if(parentDoesNotExists) {
5155 // importer.getTaxonService().save(parent);
5156 // parent = CdmBase.deproxy(parent, Taxon.class);
5157 // lookForParentNode(parentNameName, parent, ref,myName);
5159 if(parentDoesNotExist
) {
5160 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5163 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5164 importer
.getTaxonService().save(parent
);
5165 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5169 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5172 hierarchy
.put(r
,parent
);
5181 private void handleSubgenusHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5182 String parentStr
= myName
.getGenusStr();
5183 Rank r
= Rank
.GENUS();
5185 if(parentStr
==null){
5186 parentStr
= myName
.getSubtribeStr();
5187 r
= Rank
.SUBTRIBE();
5189 if (parentStr
== null){
5190 parentStr
= myName
.getTribeStr();
5193 if (parentStr
== null){
5194 parentStr
= myName
.getSubfamilyStr();
5195 r
= Rank
.SUBFAMILY();
5197 if (parentStr
== null){
5198 parentStr
= myName
.getFamilyStr();
5201 if(parentStr
!=null){
5202 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5203 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5204 // importer.getTaxonService().save(parent);
5205 // parent = CdmBase.deproxy(parent, Taxon.class);
5207 boolean parentDoesNotExists
= true;
5208 for (TaxonNode p
: classification
.getAllNodes()){
5209 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5210 // System.out.println(p.getTaxon().getUuid());
5211 // System.out.println(parent.getUuid());
5212 parentDoesNotExists
= false;
5213 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5217 // if(parentDoesNotExists) {
5218 // importer.getTaxonService().save(parent);
5219 // parent = CdmBase.deproxy(parent, Taxon.class);
5220 // lookForParentNode(parentNameName, parent, ref,myName);
5222 if(parentDoesNotExists
) {
5223 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5226 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5227 importer
.getTaxonService().save(parent
);
5228 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5232 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5235 hierarchy
.put(r
,parent
);
5244 private void handleSpeciesHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5245 String parentStr
= myName
.getSubgenusStr();
5246 Rank r
= Rank
.SUBGENUS();
5248 if(parentStr
==null){
5249 parentStr
= myName
.getGenusStr();
5253 if(parentStr
==null){
5254 parentStr
= myName
.getSubtribeStr();
5255 r
= Rank
.SUBTRIBE();
5257 if (parentStr
== null){
5258 parentStr
= myName
.getTribeStr();
5261 if (parentStr
== null){
5262 parentStr
= myName
.getSubfamilyStr();
5263 r
= Rank
.SUBFAMILY();
5265 if (parentStr
== null){
5266 parentStr
= myName
.getFamilyStr();
5269 if(parentStr
!=null){
5270 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5271 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5272 hierarchy
.put(r
,parent
);
5281 private void handleSubSpeciesHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5282 String parentStr
= myName
.getSpeciesStr();
5283 Rank r
= Rank
.SPECIES();
5286 if(parentStr
==null){
5287 parentStr
= myName
.getSubgenusStr();
5288 r
= Rank
.SUBGENUS();
5291 if(parentStr
==null){
5292 parentStr
= myName
.getGenusStr();
5296 if(parentStr
==null){
5297 parentStr
= myName
.getSubtribeStr();
5298 r
= Rank
.SUBTRIBE();
5300 if (parentStr
== null){
5301 parentStr
= myName
.getTribeStr();
5304 if (parentStr
== null){
5305 parentStr
= myName
.getSubfamilyStr();
5306 r
= Rank
.SUBFAMILY();
5308 if (parentStr
== null){
5309 parentStr
= myName
.getFamilyStr();
5312 if(parentStr
!=null){
5313 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5314 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5315 hierarchy
.put(r
,parent
);
5325 private void handleFormHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5326 String parentStr
= myName
.getSubspeciesStr();
5327 Rank r
= Rank
.SUBSPECIES();
5330 if(parentStr
==null){
5331 parentStr
= myName
.getSpeciesStr();
5335 if(parentStr
==null){
5336 parentStr
= myName
.getSubgenusStr();
5337 r
= Rank
.SUBGENUS();
5340 if(parentStr
==null){
5341 parentStr
= myName
.getGenusStr();
5345 if(parentStr
==null){
5346 parentStr
= myName
.getSubtribeStr();
5347 r
= Rank
.SUBTRIBE();
5349 if (parentStr
== null){
5350 parentStr
= myName
.getTribeStr();
5353 if (parentStr
== null){
5354 parentStr
= myName
.getSubfamilyStr();
5355 r
= Rank
.SUBFAMILY();
5357 if (parentStr
== null){
5358 parentStr
= myName
.getFamilyStr();
5361 if(parentStr
!=null){
5362 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5363 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5364 hierarchy
.put(r
,parent
);
5373 private void handleVarietyHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5374 String parentStr
= myName
.getSubspeciesStr();
5375 Rank r
= Rank
.SUBSPECIES();
5377 if(parentStr
==null){
5378 parentStr
= myName
.getSpeciesStr();
5382 if(parentStr
==null){
5383 parentStr
= myName
.getSubgenusStr();
5384 r
= Rank
.SUBGENUS();
5387 if(parentStr
==null){
5388 parentStr
= myName
.getGenusStr();
5392 if(parentStr
==null){
5393 parentStr
= myName
.getSubtribeStr();
5394 r
= Rank
.SUBTRIBE();
5396 if (parentStr
== null){
5397 parentStr
= myName
.getTribeStr();
5400 if (parentStr
== null){
5401 parentStr
= myName
.getSubfamilyStr();
5402 r
= Rank
.SUBFAMILY();
5404 if (parentStr
== null){
5405 parentStr
= myName
.getFamilyStr();
5408 if(parentStr
!=null){
5409 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5410 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5411 hierarchy
.put(r
,parent
);
5423 private Taxon
handleParentName(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
, String parentStr
, Rank r
) {
5424 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5425 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5426 // importer.getTaxonService().save(parent);
5427 // parent = CdmBase.deproxy(parent, Taxon.class);
5429 boolean parentDoesNotExists
= true;
5430 for (TaxonNode p
: classification
.getAllNodes()){
5431 if(p
.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent
.getTitleCache().split("sec.")[0].trim())) {
5432 // System.out.println(p.getTaxon().getUuid());
5433 // System.out.println(parent.getUuid());
5434 parentDoesNotExists
= false;
5435 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5439 if(parentDoesNotExists
) {
5440 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5441 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5444 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5445 importer
.getTaxonService().save(parent
);
5450 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5456 private void addNameDifferenceToFile(String originalname
, String atomisedname
){
5458 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "NamesDifferent_"+classification
.getTitleCache()+".txt",true);
5459 BufferedWriter out
= new BufferedWriter(fstream
);
5460 out
.write(originalname
+" (original) versus "+replaceNull(atomisedname
)+" (atomised) \n");
5461 //Close the output stream
5463 }catch (Exception e
){//Catch exception if any
5464 System
.err
.println("Error: " + e
.getMessage());
5470 * @param nomenclaturalCode2
5473 private void addProblemNameToFile(String name
, String author
, NomenclaturalCode nomenclaturalCode2
, Rank rank
) {
5475 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "NameNotParsed.txt",true);
5476 BufferedWriter out
= new BufferedWriter(fstream
);
5477 out
.write(name
+"\t"+replaceNull(author
)+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\n");
5478 //Close the output stream
5480 }catch (Exception e
){//Catch exception if any
5481 System
.err
.println("Error: " + e
.getMessage());
5488 * @param bestMatchingTaxon
5489 * @param insertAsExisting
5492 private void logDecision(INonViralName tnb
, Taxon bestMatchingTaxon
, boolean insertAsExisting
, Reference refMods
) {
5494 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "Decisions_"+classification
.toString()+".txt", true);
5495 BufferedWriter out
= new BufferedWriter(fstream
);
5496 out
.write(tnb
.getTitleCache() + " sec. " + refMods
+ "\t" + bestMatchingTaxon
.getTitleCache() + "\t" + insertAsExisting
+ "\n");
5497 //Close the output stream
5499 }catch (Exception e
){//Catch exception if any
5500 System
.err
.println("Error: " + e
.getMessage());
5505 @SuppressWarnings("unused")
5506 private String
replaceNull(Object in
){
5510 if (in
.getClass().equals(NomenclaturalCode
.class)) {
5511 return ((NomenclaturalCode
)in
).getTitleCache();
5513 return in
.toString();
5518 * @param nomenclaturalCode2
5521 private void addProblemNameToFile(String type
, String name
, NomenclaturalCode nomenclaturalCode2
, Rank rank
, String problems
) {
5523 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "NameNotParsed_"+classification
.getTitleCache()+".txt",true);
5524 BufferedWriter out
= new BufferedWriter(fstream
);
5525 out
.write(type
+"\t"+name
+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\t"+problems
+"\n");
5526 //Close the output stream
5528 }catch (Exception e
){//Catch exception if any
5529 System
.err
.println("Error: " + e
.getMessage());