2 * Copyright (C) 2013 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.io
.taxonx2013
;
11 import java
.io
.BufferedWriter
;
13 import java
.io
.FileWriter
;
14 import java
.io
.IOException
;
16 import java
.util
.ArrayList
;
17 import java
.util
.Arrays
;
18 import java
.util
.HashMap
;
19 import java
.util
.List
;
22 import java
.util
.UUID
;
23 import java
.util
.regex
.Matcher
;
24 import java
.util
.regex
.Pattern
;
26 import javax
.xml
.transform
.TransformerException
;
27 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
29 import org
.apache
.commons
.lang
.StringUtils
;
30 import org
.apache
.log4j
.Logger
;
31 import org
.w3c
.dom
.Node
;
32 import org
.w3c
.dom
.NodeList
;
34 import com
.ibm
.lsid
.MalformedLSIDException
;
36 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
37 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
38 import eu
.etaxonomy
.cdm
.model
.agent
.AgentBase
;
39 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
40 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
41 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
42 import eu
.etaxonomy
.cdm
.model
.common
.LSID
;
43 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
44 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
45 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
46 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
47 import eu
.etaxonomy
.cdm
.model
.description
.TaxonNameDescription
;
48 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
49 import eu
.etaxonomy
.cdm
.model
.name
.INonViralName
;
50 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
51 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
52 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
53 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
54 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
55 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
56 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
57 import eu
.etaxonomy
.cdm
.model
.reference
.OriginalSourceType
;
58 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
59 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
60 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
61 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
62 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymType
;
63 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
64 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
65 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
66 import eu
.etaxonomy
.cdm
.model
.term
.FeatureNode
;
67 import eu
.etaxonomy
.cdm
.model
.term
.FeatureTree
;
68 import eu
.etaxonomy
.cdm
.persistence
.dto
.UuidAndTitleCache
;
69 import eu
.etaxonomy
.cdm
.persistence
.query
.MatchMode
;
70 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
71 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
72 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
73 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImplRegExBase
;
80 public class TaxonXTreatmentExtractor
extends TaxonXExtractor
{
82 private static final String PUBLICATION_YEAR
= "publicationYear";
84 private static final Logger logger
= Logger
.getLogger(TaxonXTreatmentExtractor
.class);
86 private static final String notMarkedUp
= "Not marked-up";
87 private static final UUID proIbioTreeUUID
= UUID
.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
88 private static final UUID OtherUUID
= UUID
.fromString("6465f8aa-2175-446f-807e-7163994b120f");
89 private static final UUID NotMarkedUpUUID
= UUID
.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
90 private static final boolean skippQuestion
= true;
92 private final NomenclaturalCode nomenclaturalCode
;
93 private Classification classification
;
95 private String treatmentMainName
,originalTreatmentName
;
97 private final HashMap
<String
,Map
<String
,String
>> namesMap
= new HashMap
<String
, Map
<String
,String
>>();
100 private final Pattern keypattern
= Pattern
.compile("^(\\d+.*|-\\d+.*)");
101 private final Pattern keypatternend
= Pattern
.compile("^.+?\\d$");
103 private boolean maxRankRespected
=false;
104 private Map
<String
, Feature
> featuresMap
;
106 private MyName currentMyName
;
108 private Reference sourceUrlRef
;
110 private String followingText
; //text element immediately following a tax:name in tax:nomenclature TODO move do state
111 private String usedFollowingTextPrefix
; //the part of the following text which has been used during taxon name creation
113 private final TaxonXAddSources sourceHandler
= new TaxonXAddSources();
116 * @param nomenclaturalCode
117 * @param classification
121 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode
, Classification classification
, TaxonXImport importer
,
122 TaxonXImportState configState
,Map
<String
, Feature
> featuresMap
, Reference urlSource
) {
123 this.nomenclaturalCode
=nomenclaturalCode
;
124 this.classification
= classification
;
125 this.importer
=importer
;
126 this.state2
=configState
;
127 this.featuresMap
=featuresMap
;
128 this.sourceUrlRef
=urlSource
;
129 prepareCollectors(configState
, importer
.getAgentService());
130 this.sourceHandler
.setSourceUrlRef(sourceUrlRef
);
131 this.sourceHandler
.setImporter(importer
);
132 this.sourceHandler
.setConfigState(configState
);
136 * extracts all the treament information and save them
137 * @param treatmentnode: the XML Node
138 * @param tosave: the list of object to save into the CDM
139 * @param refMods: the reference extracted from the MODS
140 * @param sourceName: the URI of the document
142 @SuppressWarnings({ "rawtypes", "unused" })
144 protected void extractTreatment(Node treatmentnode
, Reference refMods
, URI sourceName
) { logger
.info("extractTreatment");
145 List
<TaxonName
> namesToSave
= new ArrayList
<TaxonName
>();
146 NodeList children
= treatmentnode
.getChildNodes();
147 Taxon acceptedTaxon
=null;
148 boolean hasRefgroup
=false;
151 for (int i
=0;i
<children
.getLength();i
++){
152 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group")) {
157 for (int i
=0;i
<children
.getLength();i
++){
158 Node child
= children
.item(i
);
159 acceptedTaxon
= handleSingleNode(refMods
, sourceName
, namesToSave
, child
, acceptedTaxon
);
161 // logger.info("saveUpdateNames");
162 if (maxRankRespected
){
163 importer
.getNameService().saveOrUpdate(namesToSave
);
164 importer
.getClassificationService().saveOrUpdate(classification
);
165 //logger.info("saveUpdateNames-ok");
171 private Taxon
handleSingleNode(Reference refMods
, URI sourceName
,
172 List
<TaxonName
> namesToSave
, Node child
, Taxon acceptedTaxon
) {
173 Taxon defaultTaxon
=null;
175 String nodeName
= child
.getNodeName();
176 if (nodeName
.equalsIgnoreCase("tax:nomenclature")){
177 NodeList nomenclatureChildren
= child
.getChildNodes();
178 boolean containsName
= false;
179 for(int k
=0; k
<nomenclatureChildren
.getLength(); k
++){
180 if(nomenclatureChildren
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
186 reloadClassification();
187 //extract "main" the scientific name
189 acceptedTaxon
= extractNomenclature(child
, namesToSave
, refMods
);
190 }catch(ClassCastException e
){
191 //FIXME exception handling
194 // System.out.println("acceptedTaxon : "+acceptedTaxon);
196 }else if (nodeName
.equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
197 reloadClassification();
198 //extract the References within the document
199 extractReferences(child
, namesToSave
,acceptedTaxon
,refMods
);
200 }else if (nodeName
.equalsIgnoreCase("tax:div") &&
201 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected
){
202 File file
= new File(TaxonXImport
.LOG_FOLDER
+ "multipleTaxonX.txt");
205 writer
= new FileWriter(file
,true);
206 writer
.write(sourceName
+"\n");
209 } catch (IOException e1
) {
210 // TODO Auto-generated catch block
211 logger
.error(e1
.getMessage());
213 // String multiple = askMultiple(children.item(i));
214 String multiple
= "Other";
215 if (multiple
.equalsIgnoreCase("other")) {
216 extractSpecificFeatureNotStructured(child
,acceptedTaxon
, defaultTaxon
,namesToSave
, refMods
,multiple
);
217 }else if (multiple
.equalsIgnoreCase("synonyms")) {
219 extractSynonyms(child
,acceptedTaxon
, refMods
, null);
220 }catch(NullPointerException e
){
221 logger
.warn("the accepted taxon is maybe null");
223 }else if(multiple
.equalsIgnoreCase("material examined")){
224 extractMaterials(child
, acceptedTaxon
, refMods
, namesToSave
);
225 }else if (multiple
.equalsIgnoreCase("distribution")){
226 extractDistribution(child
, acceptedTaxon
, defaultTaxon
, namesToSave
, refMods
);
227 }else if (multiple
.equalsIgnoreCase("type status")){
228 extractDescriptionWithReference(child
, acceptedTaxon
, defaultTaxon
,refMods
, "TypeStatus");
229 }else if (multiple
.equalsIgnoreCase("vernacular name")){
230 extractDescriptionWithReference(child
, acceptedTaxon
, defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
232 extractSpecificFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
,multiple
);
235 else if(nodeName
.equalsIgnoreCase("tax:div") &&
236 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected
){
237 extractFeature(child
,acceptedTaxon
,defaultTaxon
, namesToSave
, refMods
, Feature
.BIOLOGY_ECOLOGY());
239 else if(nodeName
.equalsIgnoreCase("tax:div") &&
240 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected
){
241 extractDescriptionWithReference(child
, acceptedTaxon
,defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
243 else if(nodeName
.equalsIgnoreCase("tax:div") &&
244 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected
){
245 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, Feature
.DESCRIPTION());
247 else if(nodeName
.equalsIgnoreCase("tax:div") &&
248 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected
){
249 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
,Feature
.DIAGNOSIS());
251 else if(nodeName
.equalsIgnoreCase("tax:div") &&
252 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected
){
253 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, Feature
.DISCUSSION());
255 else if(nodeName
.equalsIgnoreCase("tax:div") &&
256 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected
){
257 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, Feature
.DESCRIPTION());
259 else if(nodeName
.equalsIgnoreCase("tax:div") &&
260 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected
){
261 extractDistribution(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
);
263 else if(nodeName
.equalsIgnoreCase("tax:div") &&
264 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected
){
265 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
,refMods
,Feature
.ETYMOLOGY());
267 else if(nodeName
.equalsIgnoreCase("tax:div") &&
268 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected
){
269 extractMaterials(child
,acceptedTaxon
, refMods
, namesToSave
);
271 else if(nodeName
.equalsIgnoreCase("tax:figure") && maxRankRespected
){
272 extractSpecificFeature(child
,acceptedTaxon
,defaultTaxon
, namesToSave
, refMods
, "Figure");
274 else if(nodeName
.equalsIgnoreCase("tax:div") &&
275 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected
){
276 extractSpecificFeature(child
, acceptedTaxon
,defaultTaxon
, namesToSave
, refMods
, "table");
277 }else if(nodeName
.equalsIgnoreCase("tax:div") &&
278 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected
){
279 //TODO IGNORE keys for the moment
280 //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
281 extractSpecificFeatureNotStructured(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
,"Keys - unparsed");
284 if (! nodeName
.equalsIgnoreCase("tax:pb")){
285 //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
286 if (child
.getAttributes() !=null) {
287 logger
.info("First Attribute: " + child
.getAttributes().item(0));
289 extractSpecificFeatureNotStructured(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, notMarkedUp
);
292 logger
.warn("Unhandled");
295 return acceptedTaxon
;
299 protected Map
<String
,Feature
> getFeaturesUsed(){
305 private void buildFeatureTree() {
306 logger
.info("buildFeatureTree");
307 FeatureTree proibiospheretree
= importer
.getFeatureTreeService().find(proIbioTreeUUID
);
308 if (proibiospheretree
== null){
309 List
<FeatureTree
> trees
= importer
.getFeatureTreeService().list(FeatureTree
.class, null, null, null, null);
310 if (trees
.size()==1) {
311 FeatureTree
<Feature
> ft
= trees
.get(0);
312 if (featuresMap
==null) {
313 featuresMap
=new HashMap
<String
, Feature
>();
315 for (Feature feature
: ft
.getDistinctFeatures()){
317 featuresMap
.put(feature
.getTitleCache(), feature
);
321 proibiospheretree
= FeatureTree
.NewInstance();
322 proibiospheretree
.setUuid(proIbioTreeUUID
);
324 // FeatureNode root = proibiospheretree.getRoot();
325 FeatureNode root2
= proibiospheretree
.getRoot();
327 int nbChildren
= root2
.getChildCount()-1;
328 while (nbChildren
>-1){
330 root2
.removeChild(nbChildren
);
331 }catch(Exception e
){logger
.warn("Can't remove child from FeatureTree "+e
);}
337 for (Feature feature
:featuresMap
.values()) {
338 root2
.addChild(FeatureNode
.NewInstance(feature
));
340 importer
.getFeatureTreeService().saveOrUpdate(proibiospheretree
);
347 * @param acceptedTaxon: the current acceptedTaxon
348 * @param nametosave: the list of objects to save into the CDM
349 * @param refMods: the current reference extracted from the MODS
351 /* @SuppressWarnings("rawtypes")
352 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonName> nametosave, Reference refMods) {
353 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
355 NodeList children = keys.getChildNodes();
357 PolytomousKey poly = PolytomousKey.NewInstance();
358 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
359 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
360 poly.addTaxonomicScope(acceptedTaxon);
361 poly.setTitleCache("bloup", true);
362 // poly.addCoveredTaxon(acceptedTaxon);
363 PolytomousKeyNode root = poly.getRoot();
364 PolytomousKeyNode previous = null,tmpKey=null;
366 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
368 // String fullContent = keys.getTextContent();
369 for (int i=0;i<children.getLength();i++){
370 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
371 NodeList paragraph = children.item(i).getChildNodes();
374 for (int j=0;j<paragraph.getLength();j++){
375 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
376 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
377 key+=paragraph.item(j).getTextContent().trim();
378 // logger.info("KEY: "+j+"--"+key);
381 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
382 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
385 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
386 if (keypattern.matcher(key).matches()){
387 tmpKey = PolytomousKeyNode.NewInstance(key);
388 if (taxonKey!=null) {
389 tmpKey.setTaxon(taxonKey);
391 polyNodes.add(tmpKey);
392 if (previous == null) {
393 root.addChild(tmpKey);
395 previous.addChild(tmpKey);
399 tmpKey=PolytomousKeyNode.NewInstance(key);
400 if (taxonKey!=null) {
401 tmpKey.setTaxon(taxonKey);
403 polyNodes.add(tmpKey);
404 if (keypatternend.matcher(key).matches()) {
405 root.addChild(tmpKey);
408 previous.addChild(tmpKey);
415 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
416 importer.getPolytomousKeyService().saveOrUpdate(poly);
422 * @param taxons: the XML Nodegroup
423 * @param nametosave: the list of objects to save into the CDM
424 * @param acceptedTaxon: the current accepted Taxon
425 * @param refMods: the current reference extracted from the MODS
427 * @return Taxon object built
429 @SuppressWarnings({ "rawtypes", "unused" })
430 private TaxonName
getTaxonNameFromXML(Node taxons
, List
<TaxonName
> nametosave
, Reference refMods
, boolean isSynonym
) {
431 // logger.info("getTaxonFromXML");
432 // logger.info("acceptedTaxon: "+acceptedTaxon);
433 logger
.info("getTaxonNameFromXML");
434 TaxonName nameToBeFilled
= null;
436 currentMyName
=new MyName(isSynonym
);
438 NomenclaturalStatusType statusType
= null;
440 String followingText
= null; //needs to be checked if following text is possible
441 currentMyName
= extractScientificName(taxons
,refMods
, null);
442 } catch (TransformerFactoryConfigurationError e1
) {
444 } catch (TransformerException e1
) {
447 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
449 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
450 if (nameToBeFilled.hasProblem() &&
451 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
452 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
453 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
454 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
457 nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
459 nameToBeFilled
= currentMyName
.getTaxonName();
460 return nameToBeFilled
;
468 private void reloadClassification() {
469 logger
.info("reloadClassification");
470 Classification cl
= importer
.getClassificationService().find(classification
.getUuid());
474 importer
.getClassificationService().saveOrUpdate(classification
);
475 classification
= importer
.getClassificationService().find(classification
.getUuid());
480 // * Create a Taxon for the current NameBase, based on the current reference
481 // * @param taxonName
482 // * @param refMods: the current reference extracted from the MODS
485 // @SuppressWarnings({ "unused", "rawtypes" })
486 // private Taxon getTaxon(TaxonName taxonName, Reference refMods) {
487 // Taxon t = new Taxon(taxonName,null );
488 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
489 // t.setSec(configState.getConfig().getSecundum());
490 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
492 // /*<<<<<<< .courant
493 // boolean sourceExists=false;
494 // Set<IdentifiableSource> sources = t.getSources();
495 // for (IdentifiableSource src : sources){
496 // String micro = src.getCitationMicroReference();
497 // Reference r = src.getCitation();
498 // if (r.equals(refMods) && micro == null) {
499 // sourceExists=true;
502 // if(!sourceExists) {
503 // t.addSource(null,null,refMods,null);
506 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
507 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
511 private void extractDescriptionWithReference(Node typestatus
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference refMods
,
512 String featureName
) {
513 // System.out.println("extractDescriptionWithReference !");
514 logger
.info("extractDescriptionWithReference");
515 NodeList children
= typestatus
.getChildNodes();
517 Feature currentFeature
=getFeatureObjectFromString(featureName
);
519 String r
="";String s
="";
520 for (int i
=0;i
<children
.getLength();i
++){
521 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
522 s
+=children
.item(i
).getTextContent().trim();
524 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
525 r
+= children
.item(i
).getTextContent().trim();
527 if (s
.indexOf(r
)>-1) {
532 Reference currentref
= ReferenceFactory
.newGeneric();
534 currentref
.setTitleCache(r
, true);
538 setParticularDescription(s
,acceptedTaxon
,defaultTaxon
, currentref
, refMods
,currentFeature
);
543 * @param distribution: the XML node group
544 * @param acceptedTaxon: the current accepted Taxon
545 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
546 * @param refMods: the current reference extracted from the MODS
548 @SuppressWarnings("rawtypes")
549 private void extractDistribution(Node distribution
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonName
> nametosave
, Reference refMods
) {
550 logger
.info("extractDistribution");
551 // logger.info("acceptedTaxon: "+acceptedTaxon);
552 NodeList children
= distribution
.getChildNodes();
553 Map
<Integer
,List
<MySpecimenOrObservation
>> specimenOrObservations
= new HashMap
<Integer
, List
<MySpecimenOrObservation
>>();
554 Map
<Integer
,String
> descriptionsFulltext
= new HashMap
<Integer
,String
>();
556 for (int i
=0;i
<children
.getLength();i
++){
557 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
558 NodeList paragraph
= children
.item(i
).getChildNodes();
559 for (int j
=0;j
<paragraph
.getLength();j
++){
560 if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("#text")){
561 extractText(descriptionsFulltext
, i
, paragraph
.item(j
));
563 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
564 extractInLine(nametosave
, refMods
, descriptionsFulltext
, i
,paragraph
.item(j
));
566 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")){
567 MySpecimenOrObservation specimenOrObservation
= new MySpecimenOrObservation();
568 DerivedUnit derivedUnitBase
= null;
569 specimenOrObservation
= extractSpecimenOrObservation(paragraph
.item(j
), derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
, null);
570 extractTextFromSpecimenOrObservation(specimenOrObservations
, descriptionsFulltext
, i
, specimenOrObservation
);
577 for (int k
:descriptionsFulltext
.keySet()) {
582 for (int k
:specimenOrObservations
.keySet()) {
589 if(acceptedTaxon
!=null){
590 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
591 Feature currentFeature
= Feature
.DISTRIBUTION();
592 // DerivedUnit derivedUnitBase=null;
594 for (int k
=0;k
<=m
;k
++){
595 if(specimenOrObservations
.keySet().contains(k
)){
596 for (MySpecimenOrObservation soo
:specimenOrObservations
.get(k
) ) {
597 handleAssociation(acceptedTaxon
, refMods
, td
, soo
);
601 if (descriptionsFulltext
.keySet().contains(k
)){
602 if (!stringIsEmpty(descriptionsFulltext
.get(k
).trim()) && (descriptionsFulltext
.get(k
).startsWith("Hab.") || descriptionsFulltext
.get(k
).startsWith("Habitat"))){
603 setParticularDescription(descriptionsFulltext
.get(k
),acceptedTaxon
,defaultTaxon
, refMods
, Feature
.HABITAT());
607 handleTextData(refMods
, descriptionsFulltext
, td
, currentFeature
, k
);
611 if (descriptionsFulltext
.keySet().contains(k
) || specimenOrObservations
.keySet().contains(k
)){
612 acceptedTaxon
.addDescription(td
);
613 sourceHandler
.addAndSaveSource(refMods
, td
, null);
614 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
622 * @param descriptionsFulltext
624 * @param currentFeature
627 private void handleTextData(Reference refMods
, Map
<Integer
, String
> descriptionsFulltext
, TaxonDescription td
,
628 Feature currentFeature
, int k
) {
629 //logger.info("handleTextData");
630 TextData textData
= TextData
.NewInstance();
631 textData
.setFeature(currentFeature
);
632 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descriptionsFulltext
.get(k
));
633 sourceHandler
.addSource(refMods
, textData
);
634 td
.addElement(textData
);
638 * @param acceptedTaxon
643 private void handleAssociation(Taxon acceptedTaxon
, Reference refMods
, TaxonDescription td
, MySpecimenOrObservation soo
) {
644 logger
.info("handleAssociation");
645 String descr
=soo
.getDescr();
646 DerivedUnit derivedUnitBase
= soo
.getDerivedUnitBase();
648 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
650 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
652 Feature feature
=null;
653 feature
= makeFeature(derivedUnitBase
);
654 if(!StringUtils
.isEmpty(descr
)) {
655 derivedUnitBase
.setTitleCache(descr
, true);
658 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
660 taxonDescription
.addElement(indAssociation
);
661 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
662 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
663 td
.setDescribedSpecimenOrObservation(soo
.getDerivedUnitBase());
667 * create an individualAssociation
669 * @param derivedUnitBase
673 private IndividualsAssociation
createIndividualAssociation(Reference refMods
, DerivedUnit derivedUnitBase
,
675 logger
.info("createIndividualAssociation");
676 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
677 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
678 indAssociation
.setFeature(feature
);
679 indAssociation
= sourceHandler
.addSource(refMods
, indAssociation
);
680 return indAssociation
;
684 * @param specimenOrObservations
685 * @param descriptionsFulltext
687 * @param specimenOrObservation
689 private void extractTextFromSpecimenOrObservation(Map
<Integer
, List
<MySpecimenOrObservation
>> specimenOrObservations
,
690 Map
<Integer
, String
> descriptionsFulltext
, int i
, MySpecimenOrObservation specimenOrObservation
) {
691 logger
.info("extractTextFromSpecimenOrObservation");
692 List
<MySpecimenOrObservation
> speObsList
= specimenOrObservations
.get(i
);
693 if (speObsList
== null) {
694 speObsList
=new ArrayList
<MySpecimenOrObservation
>();
696 speObsList
.add(specimenOrObservation
);
697 specimenOrObservations
.put(i
,speObsList
);
699 String s
= specimenOrObservation
.getDerivedUnitBase().toString();
700 if (descriptionsFulltext
.get(i
) !=null){
701 s
= descriptionsFulltext
.get(i
)+" "+s
;
703 descriptionsFulltext
.put(i
, s
);
707 * Extract the text with the inline link to a taxon
710 * @param descriptionsFulltext
714 @SuppressWarnings("rawtypes")
715 private void extractInLine(List
<TaxonName
> nametosave
, Reference refMods
, Map
<Integer
, String
> descriptionsFulltext
,
716 int i
, Node paragraph
) {
717 //logger.info("extractInLine");
718 String inLine
=getInlineTextForName(nametosave
, refMods
, paragraph
);
719 if (descriptionsFulltext
.get(i
) !=null){
720 inLine
= descriptionsFulltext
.get(i
)+inLine
;
722 descriptionsFulltext
.put(i
, inLine
);
726 * Extract the raw text from a Node
727 * @param descriptionsFulltext
731 private void extractText(Map
<Integer
, String
> descriptionsFulltext
, int i
, Node node
) {
732 //logger.info("extractText");
733 if(!node
.getTextContent().trim().isEmpty()) {
734 String s
=node
.getTextContent().trim();
735 if (descriptionsFulltext
.get(i
) !=null){
736 s
= descriptionsFulltext
.get(i
)+" "+s
;
738 descriptionsFulltext
.put(i
, s
);
744 * @param materials: the XML node group
745 * @param acceptedTaxon: the current accepted Taxon
746 * @param refMods: the current reference extracted from the MODS
748 @SuppressWarnings("rawtypes")
749 private void extractMaterials(Node materials
, Taxon acceptedTaxon
, Reference refMods
,List
<TaxonName
> nametosave
) {
750 logger
.info("EXTRACTMATERIALS");
751 // logger.info("acceptedTaxon: "+acceptedTaxon);
752 NodeList children
= materials
.getChildNodes();
753 NodeList events
= null;
757 for (int i
=0;i
<children
.getLength();i
++){
758 String rawAssociation
="";
760 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
761 events
= children
.item(i
).getChildNodes();
762 for(int k
=0;k
<events
.getLength();k
++){
763 if (events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
764 String inLine
= getInlineTextForName(nametosave
, refMods
, events
.item(k
));
765 if(!inLine
.isEmpty()) {
766 rawAssociation
+=inLine
;
769 if (! events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")
770 && !events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
771 rawAssociation
+= events
.item(k
).getTextContent().trim();
773 if(events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
774 if (!containsDistinctLetters(rawAssociation
.replaceAll(";",""))) {
775 rawAssociation
="no description text";
778 handleDerivedUnitFacadeAndBase(acceptedTaxon
, refMods
, events
.item(k
), rawAssociation
);
780 if (!rawAssociation
.isEmpty() && !added
){
782 Feature feature
= Feature
.MATERIALS_EXAMINED();
783 featuresMap
.put(feature
.getTitleCache(),feature
);
785 TextData textData
= createTextData(rawAssociation
, refMods
, feature
);
787 if(! rawAssociation
.isEmpty() && (acceptedTaxon
!=null)){
788 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
789 td
.addElement(textData
);
790 acceptedTaxon
.addDescription(td
);
791 sourceHandler
.addAndSaveSource(refMods
, td
, null);
793 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
794 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
796 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
797 // acceptedTaxon.addDescription(taxonDescription);
799 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
801 // Feature feature = Feature.MATERIALS_EXAMINED();
802 // featuresMap.put(feature.getTitleCache(),feature);
803 // if(!StringUtils.isEmpty(rawAssociation)) {
804 // derivedUnitBase.setTitleCache(rawAssociation, true);
806 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
807 // indAssociation.setFeature(feature);
808 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
810 // /*boolean sourceExists=false;
811 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
812 // for (DescriptionElementSource src : dsources){
813 // String micro = src.getCitationMicroReference();
814 // Reference r = src.getCitation();
815 // if (r.equals(refMods) && micro == null) {
816 // sourceExists=true;
819 // if(!sourceExists) {
820 // indAssociation.addSource(null, null, refMods, null);
822 // taxonDescription.addElement(indAssociation);
823 // taxonDescription.setTaxon(acceptedTaxon);
824 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
826 // /*sourceExists=false;
827 // Set<IdentifiableSource> sources = taxonDescription.getSources();
828 // for (IdentifiableSource src : sources){
829 // String micro = src.getCitationMicroReference();
830 // Reference r = src.getCitation();
831 // if (r.equals(refMods) && micro == null) {
832 // sourceExists=true;
835 // if(!sourceExists) {
836 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
839 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
840 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
850 * @param acceptedTaxon
853 * @param rawAssociation
856 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon
, Reference refMods
, Node event
,
857 String rawAssociation
) {
858 logger
.info("handleDerivedUnitFacadeAndBase");
860 DerivedUnit derivedUnitBase
;
861 MySpecimenOrObservation myspecimenOrObservation
;
862 DerivedUnitFacade derivedUnitFacade
= getFacade(rawAssociation
.replaceAll(";",""),SpecimenOrObservationType
.DerivedUnit
);
863 derivedUnitBase
= derivedUnitFacade
.innerDerivedUnit();
865 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
867 //TODO this may not always be correct, ask user
868 TaxonName typifiableName
= acceptedTaxon
!= null ? acceptedTaxon
.getName() : null;
869 myspecimenOrObservation
= extractSpecimenOrObservation(event
,derivedUnitBase
,SpecimenOrObservationType
.DerivedUnit
, typifiableName
);
870 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
871 descr
=myspecimenOrObservation
.getDescr();
873 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
875 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
877 Feature feature
= makeFeature(derivedUnitBase
);
878 featuresMap
.put(feature
.getTitleCache(),feature
);
879 if(!StringUtils
.isEmpty(descr
)) {
880 derivedUnitBase
.setTitleCache(descr
, true);
883 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
885 taxonDescription
.addElement(indAssociation
);
886 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
887 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
894 * @param materials: the XML node group
895 * @param acceptedTaxon: the current accepted Taxon
896 * @param refMods: the current reference extracted from the MODS
898 private String
extractMaterialsDirect(Node materials
, Taxon acceptedTaxon
, Reference refMods
, String event
, TaxonName currentName
) {
899 logger
.info("extractMaterialsDirect");
900 // logger.info("acceptedTaxon: "+acceptedTaxon);
903 DerivedUnit derivedUnitBase
=null;
904 MySpecimenOrObservation myspecimenOrObservation
= extractSpecimenOrObservation(materials
,derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
, currentName
);
905 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
907 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
909 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
911 Feature feature
=null;
912 if (event
.equalsIgnoreCase("collection")){
913 feature
= makeFeature(derivedUnitBase
);
916 feature
= Feature
.MATERIALS_EXAMINED();
918 featuresMap
.put(feature
.getTitleCache(), feature
);
920 descr
=myspecimenOrObservation
.getDescr();
921 if(!StringUtils
.isEmpty(descr
)) {
922 derivedUnitBase
.setTitleCache(descr
, true);
925 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
927 taxonDescription
.addElement(indAssociation
);
928 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
929 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
931 return derivedUnitBase
.getTitleCache();
937 * @param description: the XML node group
938 * @param acceptedTaxon: the current acceptedTaxon
939 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
940 * @param nametosave: the list of objects to save into the CDM
941 * @param refMods: the current reference extracted from the MODS
942 * @param featureName: the feature name
944 @SuppressWarnings({ "rawtypes"})
945 private String
extractSpecificFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
946 List
<TaxonName
> nametosave
, Reference refMods
, String featureName
) {
947 logger
.info("extractSpecificFeature "+featureName
);
948 // System.out.println("GRUUUUuu");
949 NodeList children
= description
.getChildNodes();
950 NodeList insideNodes
;
953 String localdescr
="";
954 List
<String
> blabla
=null;
955 List
<String
> text
= new ArrayList
<String
>();
957 String table
="<table>";
961 Feature currentFeature
=getFeatureObjectFromString(featureName
);
963 // String fullContent = description.getTextContent();
964 for (int i
=0;i
<children
.getLength();i
++){
966 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
967 text
.add(children
.item(i
).getTextContent().trim());
969 if (featureName
.equalsIgnoreCase("table")){
970 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
971 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
972 head
= extractTableHead(children
.item(i
));
974 line
= extractTableLine(children
.item(i
));
975 if (!line
.equalsIgnoreCase("<tr></tr>")) {
979 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
980 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
981 line
= extractTableLineWithColumn(children
.item(i
).getChildNodes());
982 if(!line
.equalsIgnoreCase("<tr></tr>")) {
987 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
988 insideNodes
=children
.item(i
).getChildNodes();
989 blabla
= new ArrayList
<String
>();
990 for (int j
=0;j
<insideNodes
.getLength();j
++){
991 Node insideNode
= insideNodes
.item(j
);
992 if (insideNode
.getNodeName().equalsIgnoreCase("tax:name")){
993 String inlinetext
= getInlineTextForName(nametosave
, refMods
, insideNode
);
994 if (!inlinetext
.isEmpty()) {
995 blabla
.add(inlinetext
);
998 else if (insideNode
.getNodeName().equalsIgnoreCase("#text")) {
999 if(!insideNode
.getTextContent().trim().isEmpty()){
1000 blabla
.add(insideNode
.getTextContent().trim());
1001 // localdescr += insideNodes.item(j).getTextContent().trim();
1005 if (!blabla
.isEmpty()) {
1006 String blaStr
= StringUtils
.join(blabla
," ").trim();
1007 if(!stringIsEmpty(blaStr
)) {
1008 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1014 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1015 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1016 localdescr
= children
.item(i
).getTextContent().trim();
1017 if(!stringIsEmpty(localdescr
)) {
1018 setParticularDescription(localdescr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1025 if (!table
.equalsIgnoreCase("<table></table>")){
1026 // System.out.println("TABLE : "+table);
1030 if (text
!=null && !text
.isEmpty()) {
1031 return StringUtils
.join(text
," ");
1043 private String
extractTableLine(Node child
) {
1044 //logger.info("extractTableLine");
1047 if (child
.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1048 line
= extractTableLineWithColumn(child
.getChildNodes());
1059 private String
extractTableHead(Node child
) {
1060 //logger.info("extractTableHead");
1064 NodeList trNodes
= child
.getChildNodes();
1065 for (int k
=0;k
<trNodes
.getLength();k
++){
1066 if (trNodes
.item(k
).getNodeName().equalsIgnoreCase("tax:div")
1067 && trNodes
.item(k
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1068 line
= extractTableLineWithColumn(trNodes
.item(k
).getChildNodes());
1077 * build a html table line, with td columns
1079 * @return an html coded line
1081 private String
extractTableLineWithColumn(NodeList tdNodes
) {
1082 //logger.info("extractTableLineWithColumn");
1085 for (int l
=0;l
<tdNodes
.getLength();l
++){
1086 if (tdNodes
.item(l
).getNodeName().equalsIgnoreCase("tax:p")){
1087 line
+="<td>"+tdNodes
.item(l
).getTextContent()+"</td>";
1095 * @param description: the XML node group
1096 * @param acceptedTaxon: the current acceptedTaxon
1097 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1098 * @param nametosave: the list of objects to save into the CDM
1099 * @param refMods: the current reference extracted from the MODS
1100 * @param featureName: the feature name
1102 @SuppressWarnings({ "unused", "rawtypes" })
1103 private String
extractSpecificFeatureNotStructured(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
1104 List
<TaxonName
> nameToSave
, Reference refMods
, String featureName
) {
1105 logger
.info("extractSpecificFeatureNotStructured " + featureName
);
1106 NodeList children
= description
.getChildNodes();
1107 NodeList insideNodes
;
1108 List
<String
> blabla
= new ArrayList
<String
>();
1111 Feature currentFeature
= getFeatureObjectFromString(featureName
);
1113 String fullContent
= description
.getTextContent();
1114 for (int i
=0;i
<children
.getLength();i
++){
1115 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1116 insideNodes
=children
.item(i
).getChildNodes();
1117 for (int j
=0;j
<insideNodes
.getLength();j
++){
1118 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1119 String inlineText
=getInlineTextForName(nameToSave
, refMods
, insideNodes
.item(j
));
1120 if(!inlineText
.isEmpty()) {
1121 blabla
.add(inlineText
);
1124 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1125 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1126 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1131 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1132 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1133 String localdescr
= children
.item(i
).getTextContent().trim();
1134 if(!localdescr
.isEmpty())
1136 blabla
.add(localdescr
);
1142 if (blabla
!=null && !blabla
.isEmpty()) {
1143 String blaStr
= StringUtils
.join(blabla
," ").trim();
1144 if (! stringIsEmpty(blaStr
)) {
1145 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1160 private boolean stringIsEmpty(String blaStr
) {
1161 if (blaStr
.matches("(\\.|,|;|\\.-)?")){
1171 * @param insideNodes
1175 @SuppressWarnings({ "rawtypes" })
1176 private String
getInlineTextForName(List
<TaxonName
> nametosave
, Reference refMods
, Node insideNode
) {
1178 NodeList children
= insideNode
.getChildNodes();
1180 for (int i
=0;i
<children
.getLength();i
++){
1181 Node nameChild
= children
.item(i
);
1182 if(nameChild
.getNodeName().equalsIgnoreCase("#text")){
1183 result
+= nameChild
.getTextContent();
1188 return result
.replace("\n", "").trim();
1190 TaxonName tnb
= getTaxonNameFromXML(insideNode
, nametosave
,refMods
,false);
1191 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1192 Taxon tax
= currentMyName
.getTaxon();
1193 if(tnb
!=null && tax
!= null){
1194 String linkedTaxon
= tnb
.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1195 return "<cdm:taxon uuid='"+tax
.getUuid()+"'>"+linkedTaxon
+"</cdm:taxon>";
1196 }else if (tnb
!= null && tax
== null){
1198 return "<cdm:taxonName uuid='" + tnb
.getUuid() +"'>" + tnb
.getTitleCache().split("sec")[0] +"</cdm:taxonName>";
1200 logger
.warn("Inline text has no content yet");
1207 * @param featureName
1210 @SuppressWarnings("rawtypes")
1211 private Feature
getFeatureObjectFromString(String featureName
) {
1212 logger
.info("getFeatureObjectFromString");
1213 List
<Feature
> features
= importer
.getTermService().list(Feature
.class, null,null,null,null);
1214 Feature currentFeature
=null;
1215 for (Feature feature
: features
){
1216 String tmpF
= feature
.getTitleCache();
1217 if (tmpF
.equalsIgnoreCase(featureName
)) {
1218 currentFeature
=feature
;
1219 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1222 if (currentFeature
== null) {
1223 currentFeature
=Feature
.NewInstance(featureName
, featureName
, featureName
);
1224 if(featureName
.equalsIgnoreCase("Other")){
1225 currentFeature
.setUuid(OtherUUID
);
1227 if(featureName
.equalsIgnoreCase(notMarkedUp
)){
1228 currentFeature
.setUuid(NotMarkedUpUUID
);
1230 importer
.getTermService().saveOrUpdate(currentFeature
);
1232 return currentFeature
;
1239 * @param children: the XML node group
1240 * @param nametosave: the list of objects to save into the CDM
1241 * @param acceptedTaxon: the current acceptedTaxon
1242 * @param refMods: the current reference extracted from the MODS
1243 * @param fullContent :the parsed XML content
1244 * @return a list of description (text)
1246 @SuppressWarnings({ "unused", "rawtypes" })
1247 private List
<String
> parseParagraph(List
<TaxonName
> namesToSave
, Taxon acceptedTaxon
, Reference refMods
, Node paragraph
, Feature feature
){
1248 logger
.info("parseParagraph "+feature
.toString());
1249 List
<String
> fullDescription
= new ArrayList
<String
>();
1250 // String localdescr;
1252 NodeList insideNodes
;
1253 boolean collectionEvent
= false;
1254 List
<Node
>collectionEvents
= new ArrayList
<Node
>();
1256 NodeList children
= paragraph
.getChildNodes();
1258 for (int i
=0;i
<children
.getLength();i
++){
1260 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
1261 descr
+= children
.item(i
).getTextContent().trim();
1263 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1264 insideNodes
=children
.item(i
).getChildNodes();
1265 List
<String
> blabla
= new ArrayList
<String
>();
1266 for (int j
=0;j
<insideNodes
.getLength();j
++){
1267 boolean nodeKnown
= false;
1268 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1269 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1270 String inlineText
= getInlineTextForName(namesToSave
, refMods
, insideNodes
.item(j
));
1271 if (!inlineText
.isEmpty()) {
1272 blabla
.add(inlineText
);
1276 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1277 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1278 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1279 // localdescr += insideNodes.item(j).getTextContent().trim();
1283 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")) {
1284 String ref
= insideNodes
.item(j
).getTextContent().trim();
1285 if (ref
.endsWith(";") && ((ref
.length())>1)) {
1286 ref
=ref
.substring(0, ref
.length()-1)+".";
1288 Reference reference
= ReferenceFactory
.newGeneric();
1289 reference
.setTitleCache(ref
, true);
1290 blabla
.add(reference
.getTitleCache());
1293 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:figure")){
1294 String figure
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "figure");
1297 else if(insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:div") &&
1298 insideNodes
.item(j
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1299 insideNodes
.item(j
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1300 String table
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "table");
1303 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1304 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1305 String titlecache
= extractMaterialsDirect(insideNodes
.item(j
), acceptedTaxon
, refMods
, "collection", null);
1306 blabla
.add(titlecache
);
1307 collectionEvent
=true;
1308 collectionEvents
.add(insideNodes
.item(j
));
1311 logger
.warn("node not handled yet: " + insideNodes
.item(j
).getNodeName());
1315 if (!StringUtils
.isBlank(StringUtils
.join(blabla
," "))) {
1316 fullDescription
.add(StringUtils
.join(blabla
," "));
1319 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:figure")){
1320 String figure
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "Figure");
1321 fullDescription
.add(figure
);
1323 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
1324 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1325 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1326 String table
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "table");
1327 fullDescription
.add(table
);
1331 if( !stringIsEmpty(descr
.trim())){
1332 Feature currentFeature
= getNotMarkedUpFeatureObject();
1333 setParticularDescription(descr
.trim(),acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1335 // if (collectionEvent) {
1336 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1337 // for (Node coll:collectionEvents){
1338 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1341 return fullDescription
;
1346 * @param description: the XML node group
1347 * @param acceptedTaxon: the current acceptedTaxon
1348 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1349 * @param nametosave: the list of objects to save into the CDM
1350 * @param refMods: the current reference extracted from the MODS
1351 * @param feature: the feature to link the data with
1353 @SuppressWarnings("rawtypes")
1354 private void extractFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonName
> namesToSave
, Reference refMods
, Feature feature
){
1355 logger
.info("EXTRACT FEATURE "+feature
.toString());
1356 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1357 List
<String
> fullDescription
= parseParagraph( namesToSave
, acceptedTaxon
, refMods
, description
,feature
);
1359 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1360 if (!fullDescription
.isEmpty() &&!stringIsEmpty(StringUtils
.join(fullDescription
,"\n").trim())) {
1361 setParticularDescription(StringUtils
.join(fullDescription
,"\n").trim(),acceptedTaxon
,defaultTaxon
, refMods
,feature
);
1368 * @param descr: the XML Nodegroup to parse
1369 * @param acceptedTaxon: the current acceptedTaxon
1370 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1371 * @param refMods: the current reference extracted from the MODS
1372 * @param currentFeature: the feature name
1375 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference refMods
, Feature currentFeature
) {
1376 logger
.info("setParticularDescription " + currentFeature
.getTitleCache()+", \n blabla : "+descr
);
1378 //remove redundant feature title
1379 String featureStr
= currentFeature
.getTitleCache();
1380 if (!descr
.isEmpty() && descr
.toLowerCase().startsWith(featureStr
.toLowerCase())){
1381 descr
= descr
.replaceAll("(?i)" + featureStr
+ "\\.\\s*", "");
1385 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1386 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1388 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1390 if(acceptedTaxon
!=null){
1391 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1392 td
.addElement(textData
);
1393 acceptedTaxon
.addDescription(td
);
1395 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1396 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1399 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1401 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1403 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1405 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1407 }catch(Exception e
){
1408 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1411 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1412 defaultTaxon
.addDescription(td
);
1413 td
.addElement(textData
);
1414 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1415 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1422 * @param currentFeature
1425 private TextData
createTextData(String descr
, Reference refMods
, Feature currentFeature
) {
1426 //logger.info("createTextData");
1427 TextData textData
= TextData
.NewInstance();
1428 textData
.setFeature(currentFeature
);
1429 sourceHandler
.addSource(refMods
, textData
);
1431 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descr
);
1438 * @param descr: the XML Nodegroup to parse
1439 * @param acceptedTaxon: the current acceptedTaxon
1440 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1441 * @param refMods: the current reference extracted from the MODS
1442 * @param currentFeature: the feature name
1445 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
,Reference currentRef
, Reference refMods
, Feature currentFeature
) {
1446 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1447 // logger.info("acceptedTaxon: "+acceptedTaxon);
1448 logger
.info("setParticularDescription");
1449 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1451 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1452 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1454 if(! descr
.isEmpty() && (acceptedTaxon
!=null)){
1455 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1456 td
.addElement(textData
);
1457 acceptedTaxon
.addDescription(td
);
1459 sourceHandler
.addAndSaveSource(refMods
, td
, currentRef
);
1460 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1463 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1465 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1467 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1469 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1471 }catch(Exception e
){
1472 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1475 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1476 defaultTaxon
.addDescription(td
);
1477 td
.addElement(textData
);
1478 sourceHandler
.addAndSaveSource(currentRef
, td
,currentRef
);
1479 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1486 * @param synonyms: the XML Nodegroup to parse
1487 * @param nametosave: the list of objects to save into the CDM
1488 * @param acceptedTaxon: the current acceptedTaxon
1489 * @param refMods: the current reference extracted from the MODS
1491 @SuppressWarnings({ "rawtypes" })
1492 private void extractSynonyms(Node synonymsNode
, Taxon acceptedTaxon
,Reference refMods
, String followingText
) {
1493 logger
.info("extractSynonyms");
1494 //System.out.println("extractSynonyms for: "+acceptedTaxon);
1495 Taxon ttmp
= (Taxon
) importer
.getTaxonService().find(acceptedTaxon
.getUuid());
1497 acceptedTaxon
= CdmBase
.deproxy(ttmp
,Taxon
.class);
1500 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1502 NodeList children
= synonymsNode
.getChildNodes();
1503 List
<MyName
> names
= new ArrayList
<MyName
>();
1505 if(synonymsNode
.getNodeName().equalsIgnoreCase("tax:name")){
1507 MyName myName
= extractScientificNameSynonym(synonymsNode
, refMods
, followingText
);
1509 } catch (TransformerFactoryConfigurationError e
) {
1511 } catch (TransformerException e
) {
1517 for (int i
=0;i
<children
.getLength();i
++){
1518 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1519 NodeList tmp
= children
.item(i
).getChildNodes();
1520 // String fullContent = children.item(i).getTextContent();
1521 for (int j
=0; j
< tmp
.getLength();j
++){
1522 if(tmp
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1524 MyName myName
= extractScientificNameSynonym(tmp
.item(j
),refMods
, followingText
);
1526 } catch (TransformerFactoryConfigurationError e
) {
1528 } catch (TransformerException e
) {
1534 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:name")){
1536 MyName myName
= extractScientificNameSynonym(children
.item(i
),refMods
, followingText
);
1538 } catch (TransformerFactoryConfigurationError e
) {
1540 } catch (TransformerException e
) {
1547 for(MyName name
:names
){
1548 TaxonName nameToBeFilled
= name
.getTaxonName();
1549 Synonym synonym
= name
.getSyno();
1550 addFollowingTextToName(nameToBeFilled
, followingText
);
1552 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1553 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1554 if (nameToBeFilled.hasProblem() &&
1555 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1556 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1557 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1558 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1560 nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
1562 if (!name
.getIdentifier().isEmpty() && (name
.getIdentifier().length()>2)){
1563 setLSID(name
.getIdentifier(), synonym
);
1566 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1567 boolean synoExist
= false;
1568 for (Synonym syn
: synonymsSet
){
1570 boolean a
=syn
.getName().equals(synonym
.getName());
1571 boolean b
= syn
.getSec().equals(synonym
.getSec());
1576 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1577 sourceHandler
.addSource(refMods
, synonym
);
1578 acceptedTaxon
.addSynonym(synonym
, SynonymType
.SYNONYM_OF());
1581 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1585 private boolean addFollowingTextToName(TaxonName nameToBeFilled
, String followingText
) {
1586 if (nameToBeFilled
!= null && StringUtils
.isNotBlank(followingText
)){
1587 if (! followingText
.matches("\\d\\.?")){
1589 if (followingText
.startsWith(",")){
1590 followingText
= followingText
.substring(1).trim();
1592 nameToBeFilled
.setFullTitleCache(nameToBeFilled
.getFullTitleCache()+ "," +followingText
, true);
1601 * @param refgroup: the XML nodes
1602 * @param nametosave: the list of objects to save into the CDM
1603 * @param acceptedTaxon: the current acceptedTaxon
1604 * @param nametosave: the list of objects to save into the CDM
1605 * @param refMods: the current reference extracted from the MODS
1606 * @return the acceptedTaxon (why?)
1607 * handle cases where the bibref are inside <p> and outside
1609 @SuppressWarnings({ "rawtypes" })
1610 private Taxon
extractReferences(Node refgroup
, List
<TaxonName
> nametosave
, Taxon acceptedTaxon
, Reference refMods
) {
1611 logger
.info("extractReferences");
1612 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1614 NodeList children
= refgroup
.getChildNodes();
1615 INonViralName nameToBeFilled
= getNonViralNameAccNomenclature();
1617 ReferenceBuilder refBuild
= new ReferenceBuilder(sourceHandler
);
1618 for (int i
=0;i
<children
.getLength();i
++){
1619 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
1620 String ref
= children
.item(i
).getTextContent().trim();
1621 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1622 if (!refBuild
.isFoundBibref()){
1623 extractReferenceRawText(children
.item(i
).getChildNodes(), nameToBeFilled
, refMods
, acceptedTaxon
);
1627 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1628 NodeList references
= children
.item(i
).getChildNodes();
1630 for (int j
=0;j
<references
.getLength();j
++){
1631 if(references
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")){
1632 String ref
= references
.item(j
).getTextContent().trim();
1633 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1636 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")
1637 && !references
.item(j
).getTextContent().trim().isEmpty()){
1638 descr
+= references
.item(j
).getTextContent().trim();
1642 if (!refBuild
.isFoundBibref()){
1643 //if it's not tagged, put it as row information.
1644 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1645 //then put it as a not markup feature if not empty
1646 if (!stringIsEmpty(descr
.trim())){
1647 Feature currentFeature
= getNotMarkedUpFeatureObject();
1648 setParticularDescription(descr
.trim(),acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1653 // importer.getClassificationService().saveOrUpdate(classification);
1654 return acceptedTaxon
;
1659 * get the non viral name according to the current nomenclature
1663 private INonViralName
getNonViralNameAccNomenclature() {
1664 return nomenclaturalCode
.getNewTaxonNameInstance(null);
1668 * @return the feature object for the category "not marked up"
1670 private Feature
getNotMarkedUpFeatureObject() {
1671 // FIXME use getFeature(uuid ....)
1672 logger
.info("getNotMarkedUpFeatureObject");
1673 Feature currentFeature
= (Feature
)importer
.getTermService().find(NotMarkedUpUUID
);
1674 if (currentFeature
== null) {
1675 currentFeature
=Feature
.NewInstance(notMarkedUp
, notMarkedUp
, notMarkedUp
);
1676 currentFeature
.setUuid(NotMarkedUpUUID
);
1677 //TODO use userDefined Feature Vocabulary
1678 Feature
.DISTRIBUTION().getVocabulary().addTerm(currentFeature
);
1679 // importer.getTermService().saveOrUpdate(currentFeature);
1680 importer
.getVocabularyService().saveOrUpdate(currentFeature
.getVocabulary());
1682 return currentFeature
;
1687 * handle cases where the bibref are inside <p> and outside
1689 @SuppressWarnings("rawtypes")
1690 private void extractReferenceRawText(NodeList references
, INonViralName nameToBeFilled
, Reference refMods
,
1691 Taxon acceptedTaxon
) {
1692 logger
.info("extractReferenceRawText");
1693 String refString
="";
1694 currentMyName
= new MyName(true);
1695 for (int j
=0;j
<references
.getLength();j
++){
1696 acceptedTaxon
=CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1697 //no bibref tag inside
1698 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1699 if (references
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1702 String followingText
= null; //needs to be checked if follText is possible
1703 //TODO create or not create?
1704 currentMyName
= extractScientificName(references
.item(j
), refMods
, followingText
);
1705 } catch (TransformerFactoryConfigurationError e
) {
1707 } catch (TransformerException e
) {
1711 // name=name.trim();
1713 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")){
1714 refString
= references
.item(j
).getTextContent().trim();
1716 if(references
.item(j
).getNodeName().equalsIgnoreCase("#text") && !references
.item(j
).getTextContent().trim().isEmpty()){
1718 if (!currentMyName
.getStatus().isEmpty()){
1719 String nomNovStatus
= this.newNameStatus(currentMyName
.getStatus());
1720 if (nomNovStatus
!= null){
1721 nameToBeFilled
.setAppendedPhrase(nomNovStatus
);
1724 NomenclaturalStatusType statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1725 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1726 } catch (UnknownCdmTypeException e
) {
1727 addProblematicStatusToFile(currentMyName
.getStatus());
1728 logger
.warn("Problem with status");
1733 String fullLineRefName
= references
.item(j
).getTextContent().trim();
1734 int nameOrRefOrOther
=2;
1735 nameOrRefOrOther
=askIfNameContained(fullLineRefName
);
1736 if (nameOrRefOrOther
==0){
1737 TaxonName nameTBF
= currentMyName
.getTaxonName();
1738 Synonym synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1740 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1741 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1742 boolean synoExist
= false;
1743 for (Synonym syn
: synonymsSet
){
1744 // System.out.println(syn.getName()+" -- "+syn.getSec());
1745 boolean a
=syn
.getName().equals(synonym
.getName());
1746 boolean b
= syn
.getSec().equals(synonym
.getSec());
1751 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1752 sourceHandler
.addSource(refMods
, synonym
);
1754 acceptedTaxon
.addSynonym(synonym
, SynonymType
.SYNONYM_OF());
1758 if (nameOrRefOrOther
==1){
1759 Reference re
= ReferenceFactory
.newGeneric();
1760 re
.setTitleCache(fullLineRefName
, true);
1762 /* TaxonName nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1763 if (nameTBF.hasProblem() &&
1764 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1765 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1766 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1768 nameTBF = getTaxonName(nameTBF,nametosave,statusType);
1770 TaxonName nameTBF
= currentMyName
.getTaxonName();
1771 Synonym synonym
= Synonym
.NewInstance(nameTBF
, re
);
1773 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1774 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1775 boolean synoExist
= false;
1776 for (Synonym syn
: synonymsSet
){
1777 // System.out.println(syn.getName()+" -- "+syn.getSec());
1778 boolean a
=syn
.getName().equals(synonym
.getName());
1779 boolean b
= syn
.getSec().equals(synonym
.getSec());
1784 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1785 sourceHandler
.addSource(refMods
, synonym
);
1787 acceptedTaxon
.addSynonym(synonym
, SynonymType
.SYNONYM_OF());
1793 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
1794 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
1798 if(!currentMyName
.getName().isEmpty()){
1799 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1800 if (acceptedTaxon
.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName
.getName().trim())){
1801 Reference refS
= ReferenceFactory
.newGeneric();
1802 refS
.setTitleCache(refString
, true);
1803 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1804 // acceptedTaxon.addDescription(td);
1805 // acceptedTaxon.addSource(refSource);
1807 // TextData textData = TextData.NewInstance(Feature.CITATION());
1809 // textData.addSource(null, null, refS, null);
1810 // td.addElement(textData);
1811 // td.addSource(refSource);
1812 // importer.getDescriptionService().saveOrUpdate(td);
1815 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
1816 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
1820 acceptedTaxon
.getName().setNomenclaturalReference(refS
);
1822 TaxonName nameTBF
= currentMyName
.getTaxonName();
1823 Synonym synonym
= null;
1824 if (! currentMyName
.getStatus().isEmpty()){
1825 String nomNovStatus
= this.newNameStatus(currentMyName
.getStatus());
1826 if (nomNovStatus
!= null){
1827 nameToBeFilled
.setAppendedPhrase(nomNovStatus
);
1830 NomenclaturalStatusType statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1831 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1832 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1833 } catch (UnknownCdmTypeException e
) {
1834 addProblematicStatusToFile(currentMyName
.getStatus());
1835 logger
.warn("Problem with status");
1836 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1837 synonym
.setAppendedPhrase(currentMyName
.getStatus());
1841 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1845 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
1846 setLSID(currentMyName
.getIdentifier(), synonym
);
1849 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1850 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1851 boolean synoExist
= false;
1852 for (Synonym syn
: synonymsSet
){
1853 // System.out.println(syn.getName()+" -- "+syn.getSec());
1854 boolean a
=syn
.getName().equals(synonym
.getName());
1855 boolean b
= syn
.getSec().equals(synonym
.getSec());
1860 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1861 sourceHandler
.addSource(refMods
, synonym
);
1863 acceptedTaxon
.addSynonym(synonym
, SynonymType
.SYNONYM_OF());
1867 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1875 * @param acceptedTaxon
1877 @SuppressWarnings("rawtypes")
1878 private void setLSID(String identifier
, TaxonBase
<?
> taxon
) {
1879 //logger.info("setLSID");
1880 // boolean lsidok=false;
1881 String id
= identifier
.split("__")[0];
1882 String source
= identifier
.split("__")[1];
1883 if (id
.indexOf("lsid")>-1){
1885 LSID lsid
= new LSID(id
);
1886 taxon
.setLsid(lsid
);
1888 } catch (MalformedLSIDException e
) {
1889 logger
.warn("Malformed LSID");
1894 //logger.info("search reference for LSID");
1895 // if ((id.indexOf("lsid")<0) || !lsidok){
1896 //ADD ORIGINAL SOURCE ID EVEN IF LSID
1897 Reference re
= null;
1898 Pager
<Reference
> references
= importer
.getReferenceService().findByTitleWithRestrictions(Reference
.class, source
, MatchMode
.EXACT
, null, 1, null, null, null);
1899 if( references
!=null && references
.getCount()>0){
1900 re
=references
.getRecords().get(0);
1902 //logger.info("search reference for LSID-end");
1904 re
= ReferenceFactory
.newGeneric();
1905 re
.setTitleCache(source
, true);
1906 importer
.getReferenceService().saveOrUpdate(re
);
1908 re
=CdmBase
.deproxy(re
, Reference
.class);
1910 //logger.info("search source for LSID");
1911 Set
<IdentifiableSource
> sources
= taxon
.getSources();
1912 boolean lsidinsource
=false;
1913 boolean urlinsource
=false;
1914 for (IdentifiableSource src
:sources
){
1915 if (id
.equalsIgnoreCase(src
.getIdInSource()) && re
.getTitleCache().equals(src
.getCitation().getTitleCache())) {
1918 if (src
.getIdInSource() == null && re
.getTitleCache().equals(sourceUrlRef
.getTitleCache())) {
1923 taxon
.addSource(OriginalSourceType
.Import
, id
,null,re
,null);
1927 sourceUrlRef
=CdmBase
.deproxy(sourceUrlRef
, Reference
.class);
1928 taxon
.addSource(OriginalSourceType
.Import
, null,null,sourceUrlRef
,null);
1935 * try to solve a parsing problem for a scientific name
1936 * @param original : the name from the OCR document
1937 * @param name : the tagged version
1939 * @return the corrected TaxonName
1941 /* @SuppressWarnings({ "unchecked", "rawtypes" })
1942 private TaxonName solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1943 Map<String,String> ato = namesMap.get(original);
1945 ato = namesMap.get(original+" "+author);
1949 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1950 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1952 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1953 rank = getRank(ato);
1955 // TaxonName nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1956 TaxonName nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1957 // logger.info("RANK: "+rank);
1959 List<ParserProblem> problems = nameTBF.getParsingProblems();
1960 for (ParserProblem pb:problems) {
1961 System.out.println(pb.toString());
1963 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1964 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1965 String fullname=name;
1966 if(! skippQuestion) {
1967 fullname = getFullReference(name,nameTBF.getParsingProblems());
1969 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1970 nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1972 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1973 nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1975 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1976 nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1978 parser.parseReferencedName(nameTBF, fullname, rank, false);
1983 if (name.indexOf(author)>-1) {
1984 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1986 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1988 if (nameTBF.hasProblem()){
1989 if (name.indexOf(author)>-1) {
1990 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1992 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1994 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
1995 problems = nameTBF.getParsingProblems();
1996 for (ParserProblem pb:problems) {
1997 System.out.println(pb.toString());
1999 nameTBF.setFullTitleCache(name, true);
2001 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2002 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2004 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2005 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2007 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2008 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2011 // logger.info("FULL TITLE CACHE "+name);
2013 nameTBF.setFullTitleCache(name, true);
2022 * @param nomenclatureNode: the XML nodes
2023 * @param nametosave: the list of objects to save into the CDM
2024 * @param refMods: the current reference extracted from the MODS
2027 @SuppressWarnings({ "rawtypes" })
2028 private Taxon
extractNomenclature(Node nomenclatureNode
, List
<TaxonName
> nametosave
, Reference refMods
) throws ClassCastException
{
2029 refMods
=CdmBase
.deproxy(refMods
, Reference
.class);
2031 logger
.info("extractNomenclature");
2032 NodeList children
= nomenclatureNode
.getChildNodes();
2034 Taxon acceptedTaxon
= null;
2035 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2037 // String fullContent = nomenclatureNode.getTextContent();
2039 NomenclaturalStatusType statusType
= null;
2040 String newNameStatus
= null;
2042 for (int i
=0;i
<children
.getLength();i
++){
2043 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status")){
2044 String status
= children
.item(i
).getTextContent().trim();
2046 if (!status
.isEmpty()){
2047 if (newNameStatus(status
) != null){
2048 newNameStatus
= newNameStatus(status
);
2051 statusType
= nomStatusString2NomStatus(status
);
2052 } catch (UnknownCdmTypeException e
) {
2054 addProblematicStatusToFile(status
);
2055 logger
.warn("Problem with status: " + status
);
2062 boolean containsSynonyms
=false;
2063 boolean wasSynonym
= false;
2064 usedFollowingTextPrefix
= null; //reset
2066 for (int i
=0; i
<children
.getLength(); i
++){
2067 Node childNode
= children
.item(i
);
2068 String childName
= childNode
.getNodeName();
2072 followingText
= null;
2073 if ( i
+ 1 < children
.getLength()){
2074 Node followingTextNode
= children
.item(i
+1);
2075 if (followingTextNode
.getNodeName().equals("#text") && !followingTextNode
.getTextContent().matches("\\s*") ){
2076 followingText
= followingTextNode
.getTextContent();
2081 if (childName
.equalsIgnoreCase("#text")) {
2082 freetext
= childNode
.getTextContent().trim();
2083 if (usedFollowingTextPrefix
!= null && freetext
.startsWith(usedFollowingTextPrefix
)){
2084 freetext
= freetext
.substring(usedFollowingTextPrefix
.length());
2086 usedFollowingTextPrefix
= null; //reset
2087 }else if (childName
.equalsIgnoreCase("tax:collection_event")) {
2088 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2089 extractMaterialsDirect(childNode
, acceptedTaxon
, refMods
, "collection", currentMyName
.getTaxonName());
2090 }else if(childName
.equalsIgnoreCase("tax:name")){
2091 INonViralName nameToBeFilled
;
2092 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2093 if(!containsSynonyms
){
2096 //System.out.println("I : "+i);
2097 currentMyName
= new MyName(false);
2099 currentMyName
= extractScientificName(childNode
, refMods
, followingText
);
2100 treatmentMainName
= currentMyName
.getNewName();
2101 originalTreatmentName
= currentMyName
.getOriginalName();
2103 } catch (TransformerFactoryConfigurationError e1
) {
2104 throw new RuntimeException(e1
);
2105 } catch (TransformerException e1
) {
2106 throw new RuntimeException(e1
);
2109 if (currentMyName
.getRank().equals(Rank
.UNKNOWN_RANK()) || currentMyName
.getRank().isLower(state2
.getConfig().getMaxRank()) || currentMyName
.getRank().equals(state2
.getConfig().getMaxRank())){
2110 maxRankRespected
=true;
2112 nameToBeFilled
=currentMyName
.getTaxonName();
2114 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2115 acceptedTaxon
=currentMyName
.getTaxon();
2116 //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2119 boolean statusMatch
=false;
2120 if(acceptedTaxon
!=null ){
2121 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2122 statusMatch
=compareStatus(acceptedTaxon
, statusType
);
2123 //System.out.println("statusMatch: "+statusMatch);
2125 if (acceptedTaxon
==null || (acceptedTaxon
!= null && !statusMatch
)){
2127 nameToBeFilled
=currentMyName
.getTaxonName();
2128 if (nameToBeFilled
!= null){
2129 if (!originalTreatmentName
.isEmpty()) {
2130 TaxonNameDescription td
= TaxonNameDescription
.NewInstance();
2131 td
.setTitleCache(originalTreatmentName
, true);
2132 nameToBeFilled
.addDescription(td
);
2135 if(statusType
!= null) {
2136 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
2138 if(newNameStatus
!= null){
2139 nameToBeFilled
.setAppendedPhrase(newNameStatus
);
2141 sourceHandler
.addSource(refMods
, TaxonName
.castAndDeproxy(nameToBeFilled
));
2143 if (nameToBeFilled
.getNomenclaturalReference() == null) {
2144 acceptedTaxon
= Taxon
.NewInstance(nameToBeFilled
,refMods
);
2145 //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2148 acceptedTaxon
= Taxon
.NewInstance(nameToBeFilled
,nameToBeFilled
.getNomenclaturalReference() );//TODO TOFIX reference
2149 //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2152 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2154 if(!state2
.getConfig().doKeepOriginalSecundum()) {
2155 acceptedTaxon
.setSec(state2
.getConfig().getSecundum());
2156 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2157 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2160 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2161 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2165 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2166 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2170 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2171 Set
<IdentifiableSource
> sources
= acceptedTaxon
.getSources();
2172 boolean sourcelinked
=false;
2173 for (IdentifiableSource source
:sources
){
2174 if (source
.getCitation().getTitleCache().equalsIgnoreCase(refMods
.getTitleCache())) {
2178 if (!state2
.getConfig().doKeepOriginalSecundum()) {
2179 acceptedTaxon
.setSec(state2
.getConfig().getSecundum());
2180 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2181 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2183 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2186 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2188 if (!sourcelinked
|| !state2
.getConfig().doKeepOriginalSecundum()){
2190 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2191 //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2192 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2194 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2198 maxRankRespected
=false;
2200 containsSynonyms
=true; //all folowing names are handled as synonyms
2203 extractSynonyms(childNode
, acceptedTaxon
, refMods
, followingText
);
2206 }catch(NullPointerException e
){
2207 logger
.warn("null pointer exception, the accepted taxon might be null");
2210 containsSynonyms
=true;
2211 }else if (childName
.equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
2212 reloadClassification();
2213 //extract the References within the document
2214 extractReferences(childNode
,nametosave
,acceptedTaxon
,refMods
);
2215 }else if (childName
.equalsIgnoreCase("tax:bibref")){
2216 logger
.warn(childName
+ " still preliminary");
2218 TaxonName currentName
= currentMyName
== null ?
null : currentMyName
.getTaxonName();
2219 boolean handled
= addFollowingTextToName (currentName
, childNode
.getTextContent() );
2221 setParticularDescription(freetext
.trim(), acceptedTaxon
,acceptedTaxon
, refMods
, getNotMarkedUpFeatureObject());
2224 logger
.warn(childName
+ " not yet handled");
2226 if(!stringIsEmpty(freetext
.trim())) {;
2227 if (! freetext
.matches("\\d\\.?")){
2228 TaxonName currentName
= currentMyName
== null ?
null : currentMyName
.getTaxonName();
2229 boolean handled
= false;
2230 if (currentName
!= null && !wasSynonym
){
2231 handled
= addFollowingTextToName (currentName
, childNode
.getTextContent() );
2234 setParticularDescription(freetext
.trim(), acceptedTaxon
,acceptedTaxon
, refMods
, getNotMarkedUpFeatureObject());
2242 //importer.getClassificationService().saveOrUpdate(classification);
2243 return acceptedTaxon
;
2253 private boolean compareStatus(TaxonBase
<?
> t
, NomenclaturalStatusType statusType
) {
2254 //logger.info("compareStatus");
2255 boolean statusMatch
=false;
2257 Set
<NomenclaturalStatus
> status
= t
.getName().getStatus();
2258 if (statusType
!=null && status
.size()>0){ //the statusType is known for both taxon
2259 for (NomenclaturalStatus st
:status
){
2260 NomenclaturalStatusType stype
= st
.getType();
2261 if (stype
.toString().equalsIgnoreCase(statusType
.toString())) {
2267 if(statusType
== null && status
.size()==0) {//there is no statusType, we can assume it's the same
2275 * @param acceptedTaxon: the current acceptedTaxon
2276 * @param ref: the current reference extracted from the MODS
2277 * @return the parent for the current accepted taxon
2279 /* private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2280 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2282 List<Rank> rankList = new ArrayList<Rank>();
2283 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2285 List<String> rankListStr = new ArrayList<String>();
2286 for (Rank r:rankList) {
2287 rankListStr.add(r.toString());
2290 String s = acceptedTaxon.getTitleCache();
2293 int addTaxon = askAddParent(s);
2294 logger.info("ADD TAXON: "+addTaxon);
2295 if (addTaxon == 0 ){
2296 Taxon tmp = askParent(acceptedTaxon, classification);
2298 s = askSetParent(s);
2299 r = askRank(s,rankListStr);
2301 TaxonName nameToBeFilled = null;
2302 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2303 nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2305 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2306 nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2308 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2309 nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2311 nameToBeFilled.setTitleCache(s, true);
2312 nameToBeFilled.setRank(getRank(r), true);
2314 tax = Taxon.NewInstance(nameToBeFilled, ref);
2320 createParent(tax, ref);
2321 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2322 classification.addParentChild(tax, acceptedTaxon, ref, null);
2325 classification.addChildTaxon(acceptedTaxon, ref, null);
2329 classification.addChildTaxon(acceptedTaxon, ref, null);
2332 // logger.info("RETURN: "+tax );
2340 private MyName
extractScientificNameSynonym(Node name
, Reference refMods
, String followingText
) throws TransformerFactoryConfigurationError
, TransformerException
{
2341 //System.out.println("extractScientificNameSynonym");
2342 logger
.info("extractScientificNameSynonym");
2343 String
[] rankListToPrint_tmp
={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2344 List
<String
> rankListToPrint
= new ArrayList
<String
>();
2345 for (String r
: rankListToPrint_tmp
) {
2346 rankListToPrint
.add(r
.toLowerCase());
2349 Rank rank
= Rank
.UNKNOWN_RANK();
2350 NodeList children
= name
.getChildNodes();
2351 String originalName
="";
2352 String fullName
= "";
2354 String identifier
="";
2355 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
2356 List
<String
> atomisedName
= new ArrayList
<String
>();
2358 String rankStr
= "";
2361 String status
= extractStatus(children
);
2363 for (int i
=0;i
<children
.getLength();i
++){
2364 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:xmldata")){
2365 NodeList atom
= children
.item(i
).getChildNodes();
2366 for (int k
=0;k
<atom
.getLength();k
++){
2367 identifier
= extractIdentifier(identifier
, atom
.item(k
));
2369 rankStr
= atom
.item(k
).getNodeName().toLowerCase();
2370 // logger.info("RANKSTR:*"+rankStr+"*");
2371 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
2372 rankStr
=atom
.item(k
).getTextContent().trim();
2373 tmpRank
= getRank(rankStr
);
2375 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2376 if (tmpRank
!= null){
2379 atomisedMap
.put(rankStr
.toLowerCase(),atom
.item(k
).getTextContent().trim());
2381 addAtomisedNamesToMap(rankListToPrint
, rank
, atomisedName
, atom
);
2383 if(children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !StringUtils
.isBlank(children
.item(i
).getTextContent())){
2384 // logger.info("name non atomised: "+children.item(i).getTextContent());
2385 fullName
= children
.item(i
).getTextContent().trim();
2386 // logger.info("fullname: "+fullName);
2389 originalName
=fullName
;
2390 fullName
= cleanName(fullName
, atomisedName
);
2391 namesMap
.put(fullName
,atomisedMap
);
2393 String atomisedNameStr
= getAtomisedNameStr(atomisedName
);
2395 if (fullName
!= null){
2396 // System.out.println("fullname: "+fullName);
2397 // System.out.println("atomised: "+atomisedNameStr);
2398 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
2400 // String defaultN = "";
2401 if (atomisedNameStr
.length()>fullName
.length()) {
2402 newName
=atomisedNameStr
;
2404 if (fullName
.length()>atomisedNameStr
.length() && (rank
.isLower(Rank
.SPECIES()) && fullName
.length()>2 && !fullName
.substring(0, 1).equals("."))) {
2405 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2411 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2418 // rank = askForRank(newName, rank, nomenclaturalCode);
2419 // System.out.println("atomised: "+atomisedMap.toString());
2421 // String[] names = new String[5];
2422 MyName myname
= new MyName(true);
2424 //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2425 // System.out.println(atomisedMap.keySet());
2426 fullName
= extractAuthorFromNames(rank
, fullName
, atomisedMap
, myname
);
2427 myname
.setOriginalName(fullName
);
2428 myname
.setNewName(newName
);
2429 myname
.setRank(rank
);
2430 myname
.setIdentifier(identifier
);
2431 myname
.setStatus(status
);
2432 myname
.setSource(refMods
);
2434 // boolean higherAdded=false;
2437 boolean parseNameManually
=false;
2438 INonViralNameParser
<?
> parser
= NonViralNameParserImpl
.NewInstance();
2439 TaxonName nameToBeFilledTest
;
2441 //if selected the atomised version
2442 if(newName
==atomisedNameStr
){
2443 nameToBeFilledTest
= parseWithExtension(parser
, atomisedNameStr
, rank
, followingText
, atomisedMap
);
2444 if (nameToBeFilledTest
.hasProblem()){
2445 addProblemNameToFile("ato",atomisedNameStr
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2446 nameToBeFilledTest
= (TaxonName
)parser
.parseFullName(fullName
, nomenclaturalCode
, rank
);
2447 if (nameToBeFilledTest
.hasProblem()){
2448 addProblemNameToFile("full",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2449 parseNameManually
=true;
2453 nameToBeFilledTest
= parseWithExtension(parser
, atomisedNameStr
, rank
, followingText
, atomisedMap
);
2454 if (nameToBeFilledTest
.hasProblem()){
2455 addProblemNameToFile("fullversion",fullName
, nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2456 nameToBeFilledTest
= (TaxonName
)parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2457 parseNameManually
=true;
2458 if(!originalName
.equalsIgnoreCase(atomisedNameStr
)) {
2459 addNameDifferenceToFile(originalName
,atomisedNameStr
);
2464 if(parseNameManually
){
2465 //System.out.println("DO IT MANUALLY");
2466 if (this.state2
.getConfig().isUseOldUnparsedSynonymExtraction()){
2467 createUnparsedSynonym(rank
, newName
, atomisedMap
, myname
);
2469 createUnparsedSynonymNew(rank
, newName
, atomisedMap
, myname
, refMods
);;
2472 //System.out.println("AUTOMATIC!");
2473 // createAtomisedTaxonString(newName, atomisedMap, myname);
2474 myname
.setParsedName(nameToBeFilledTest
);
2475 myname
.buildTaxon();
2477 //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2484 * @throws TransformerFactoryConfigurationError
2485 * @throws TransformerException
2486 * @return a list of possible names
2488 @SuppressWarnings({"rawtypes" })
2489 private MyName
extractScientificName(Node name
, Reference refMods
, String followingText
) throws TransformerFactoryConfigurationError
, TransformerException
{
2490 logger
.info("extractScientificName");
2492 String
[] rankListToPrintLowerCase_tmp
={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2493 List
<String
> rankListToPrint
= Arrays
.asList(rankListToPrintLowerCase_tmp
);
2495 Rank rank
= Rank
.UNKNOWN_RANK();
2496 NodeList children
= name
.getChildNodes();
2497 String originalName
= "";
2498 String fullName
= "";
2499 String newName
= "";
2500 String identifier
= "";
2501 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
2502 List
<String
> atomisedNameList
= new ArrayList
<String
>();
2504 String status
= extractStatus(children
);
2506 for (int i
=0;i
<children
.getLength();i
++){
2507 Node nameChild
= children
.item(i
);
2508 if(nameChild
.getNodeName().equalsIgnoreCase("tax:xmldata")){
2509 NodeList xmlDataChildren
= nameChild
.getChildNodes();
2510 for (int k
=0;k
<xmlDataChildren
.getLength();k
++){
2511 Node xmlDataChild
= xmlDataChildren
.item(k
);
2512 identifier
= extractIdentifier(identifier
, xmlDataChild
);
2513 String rankStr
= xmlDataChild
.getNodeName().toLowerCase();
2514 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
2515 rankStr
=xmlDataChild
.getTextContent().trim();
2516 Rank tmpRank
= getRank(rankStr
);
2517 if (tmpRank
!= null){
2521 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2523 atomisedMap
.put(rankStr
.toLowerCase(),xmlDataChild
.getTextContent().trim());
2525 addAtomisedNamesToMap(rankListToPrint
, rank
, atomisedNameList
, xmlDataChildren
);
2527 else if(nameChild
.getNodeName().equalsIgnoreCase("#text") && ! nameChild
.getTextContent().matches("\\s*")){
2528 // logger.info("name non atomised: "+children.item(i).getTextContent());
2529 fullName
= nameChild
.getTextContent().trim();
2530 // logger.info("fullname: "+fullName);
2533 originalName
=fullName
;
2534 fullName
= cleanName(fullName
, atomisedNameList
);
2535 namesMap
.put(fullName
,atomisedMap
);
2537 String atomisedNameStr
= getAtomisedNameStr(atomisedNameList
);
2539 if (fullName
!= null){
2540 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
2542 if (atomisedNameStr
.length()>fullName
.length()) {
2543 newName
= atomisedNameStr
;
2545 if (fullName
.length()>atomisedNameStr
.length() && (rank
.isLower(Rank
.SPECIES()) && fullName
.length()>2 && !fullName
.substring(0, 1).equals("."))) {
2546 newName
= askWhichScientificName(fullName
, atomisedNameStr
, classification
.getTitleCache(), name
);
2552 newName
=askWhichScientificName(fullName
, atomisedNameStr
, classification
.getTitleCache(), name
);
2559 // rank = askForRank(newName, rank, nomenclaturalCode);
2560 // System.out.println("atomised: "+atomisedMap.toString());
2562 // String[] names = new String[5];
2563 MyName myname
= new MyName(false);
2565 //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2566 // System.out.println(atomisedMap.keySet());
2567 fullName
= extractAuthorFromNames(rank
, fullName
, atomisedMap
, myname
);
2568 myname
.setOriginalName(fullName
);
2569 myname
.setNewName(newName
);
2571 myname
.setRank(rank
);
2572 myname
.setIdentifier(identifier
);
2573 myname
.setStatus(status
);
2574 myname
.setSource(refMods
);
2576 // boolean higherAdded=false;
2579 boolean parseNameManually
=false;
2580 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
2581 TaxonName nameToBeFilledTest
= null;
2583 //if selected the atomised version
2584 if(newName
==atomisedNameStr
){
2585 nameToBeFilledTest
= parseWithExtension(parser
, atomisedNameStr
, rank
, followingText
, atomisedMap
);
2586 if (nameToBeFilledTest
.hasProblem()){
2587 addProblemNameToFile("ato",atomisedNameStr
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2588 nameToBeFilledTest
= (TaxonName
)parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2589 if (nameToBeFilledTest
.hasProblem()){
2590 addProblemNameToFile("full",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2591 parseNameManually
=true;
2595 nameToBeFilledTest
= parseWithExtension(parser
, fullName
, rank
, followingText
, atomisedMap
);
2596 if (nameToBeFilledTest
.hasProblem()){
2597 addProblemNameToFile("fullversion",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2598 nameToBeFilledTest
= (TaxonName
)parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2599 parseNameManually
=true;
2600 if(!originalName
.equalsIgnoreCase(atomisedNameStr
)) {
2601 addNameDifferenceToFile(originalName
,atomisedNameStr
);
2606 //System.out.println("parseNameManually: "+parseNameManually);
2607 if(parseNameManually
){
2608 createAtomisedTaxon(rank
, newName
, atomisedMap
, myname
);
2611 createAtomisedTaxonString(newName
, atomisedMap
, myname
);
2612 myname
.setParsedName(nameToBeFilledTest
);
2613 //TODO correct handling of createIfNotExists
2614 myname
.buildTaxon();
2620 private TaxonName
parseWithExtension(INonViralNameParser parser
, String atomisedNameStr
, Rank rank
, String followingText
, HashMap
<String
, String
> atomisedMap
) {
2621 Object
[] nameExtensionResult
= getPossibleExtension(followingText
, atomisedMap
, nomenclaturalCode
);
2623 TaxonName name
= (TaxonName
)parser
.parseFullName(atomisedNameStr
, nomenclaturalCode
, rank
);
2624 if (nameExtensionResult
!= null && nameExtensionResult
[0] != null){
2625 String ext
= (String
)nameExtensionResult
[0];
2626 TaxonName extName
= (TaxonName
)parser
.parseFullName(atomisedNameStr
+ " " + ext
, nomenclaturalCode
, rank
);
2627 if (! extName
.hasProblem()){
2629 this.usedFollowingTextPrefix
= ext
;
2630 //TODO do we need to fill the atomisedMap at all?
2631 if ((Boolean
)(nameExtensionResult
[1])){
2634 if ((Boolean
)(nameExtensionResult
[2])){
2635 //TODO BasionymYear etc.
2636 Integer origYear
= name
.getPublicationYear();
2637 if (origYear
!= null){
2638 atomisedMap
.put(PUBLICATION_YEAR
, origYear
.toString());
2646 private Object
[] getPossibleExtension(String followingText
, HashMap
<String
, String
> atomisedMap
, NomenclaturalCode nomenclaturalCode
) {
2647 if (StringUtils
.isBlank(followingText
)){
2651 boolean includeAuthor
= true;
2652 boolean includeYear
= false;
2653 if (atomisedMap
.containsKey("dwc:scientificnameauthorship")){
2654 includeAuthor
= false;
2656 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
2659 String patternStr
= "";
2661 patternStr
+= NonViralNameParserImplRegExBase
.capitalWord
;
2664 patternStr
+= "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2666 String match
= null;
2667 if (! patternStr
.isEmpty()){
2668 Pattern pattern
= Pattern
.compile("^" + patternStr
);
2669 Matcher matcher
= pattern
.matcher(followingText
.trim());
2670 if (matcher
.find()){
2671 match
= matcher
.group();
2675 return new Object
[]{match
, includeAuthor
, includeYear
};
2679 * @param atomisedName
2682 private String
getAtomisedNameStr(List
<String
> atomisedName
) {
2683 //logger.info("getAtomisedNameStr");
2684 String atomisedNameStr
= StringUtils
.join(atomisedName
," ");
2685 while(atomisedNameStr
.contains(" ")) {
2686 atomisedNameStr
=atomisedNameStr
.replace(" ", " ");
2688 atomisedNameStr
=atomisedNameStr
.trim();
2689 return atomisedNameStr
;
2697 private String
extractStatus(NodeList children
) {
2698 logger
.info("extractStatus");
2700 for (int i
=0;i
<children
.getLength();i
++){
2701 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status") ||
2702 (children
.item(i
).getNodeName().equalsIgnoreCase("tax:namePart") &&
2703 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2704 status
= children
.item(i
).getTextContent().trim();
2716 private String
extractIdentifier(String identifier
, Node atom
) {
2717 //logger.info("extractIdentifier");
2718 if (atom
.getNodeName().equalsIgnoreCase("tax:xid")){
2720 identifier
= atom
.getAttributes().getNamedItem("identifier").getNodeValue();
2721 }catch(Exception e
){
2722 System
.out
.println("pb with identifier, maybe empty");
2725 identifier
+="__"+atom
.getAttributes().getNamedItem("source").getNodeValue();
2726 }catch(Exception e
){
2727 System
.out
.println("pb with identifier, maybe empty");
2734 * @param rankListToPrint
2736 * @param atomisedName
2739 private void addAtomisedNamesToMap(List
<String
> rankListToPrint
, Rank rank
, List
<String
> atomisedName
, NodeList atom
) {
2740 logger
.info("addAtomisedNamesToMap");
2741 for (int k
=0;k
<atom
.getLength();k
++){
2742 Node node
= atom
.item(k
);
2743 String nodeName
= node
.getNodeName();
2744 if (! nodeName
.equalsIgnoreCase("dwc:taxonRank") ) { //rank has been handled in higher method
2745 if (nodeName
.equalsIgnoreCase("dwc:subgenus") || nodeName
.equalsIgnoreCase("dwcranks:subgenus")) {
2746 atomisedName
.add("("+ node
.getTextContent().trim()+")");
2747 } else if(nodeName
.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName
.equalsIgnoreCase("dwc:Subspecies") || nodeName
.equalsIgnoreCase("dwc:infraspecificepithet")) {
2748 if(nodeName
.equalsIgnoreCase("dwcranks:varietyepithet")){
2749 atomisedName
.add("var. "+node
.getTextContent().trim());
2750 }else if(nodeName
.equalsIgnoreCase("dwc:Subspecies") || nodeName
.equalsIgnoreCase("dwc:infraspecificepithet")) {
2751 atomisedName
.add("subsp. "+atom
.item(k
).getTextContent().trim());
2753 } else if(rankListToPrint
.contains(nodeName
.toLowerCase())) {
2754 atomisedName
.add(node
.getTextContent().trim());
2756 if (rank
.isHigher(Rank
.GENUS()) && (nodeName
.indexOf("dwcranks:")>-1 || nodeName
.indexOf("dwc:Family")>-1)) {
2757 atomisedName
.add(node
.getTextContent().trim());
2758 }else if (nodeName
.equals("#text")){
2759 String text
= node
.getTextContent();
2760 if (StringUtils
.isNotBlank(text
)){
2762 logger
.warn("name xmldata contains text. This is unhandled");
2764 }else if (nodeName
.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2765 //we currently do not use higher ranks information
2767 //TODO handle unhandled node
2768 logger
.warn("Unhandled node: " + nodeName
);
2777 * @param atomisedName
2780 private String
cleanName(String name
, List
<String
> atomisedName
) {
2781 //logger.info("cleanName");
2782 String fullName
=name
;
2783 if (fullName
!= null){
2784 fullName
= fullName
.replace("( ", "(");
2785 fullName
= fullName
.replace(" )",")");
2787 if (fullName
.trim().isEmpty()){
2788 fullName
=StringUtils
.join(atomisedName
," ");
2791 while(fullName
.contains(" ")) {
2792 fullName
=fullName
.replace(" ", " ");
2793 // logger.info("while");
2795 fullName
=fullName
.trim();
2803 * @param atomisedMap
2807 private String
extractAuthorFromNames(Rank rank
, String name
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2808 logger
.info("extractAuthorFromNames");
2809 String fullName
=name
;
2810 if (atomisedMap
.get("dwc:scientificnameauthorship") == null && fullName
!=null){
2811 // System.out.println("rank : "+rank.toString());
2812 if(rank
.isHigher(Rank
.SPECIES())){
2815 if(atomisedMap
.get("dwcranks:subgenus") != null) {
2816 author
= fullName
.split(atomisedMap
.get("dwcranks:subgenus"))[1].trim();
2818 if(atomisedMap
.get("dwc:subgenus") != null) {
2819 author
= fullName
.split(atomisedMap
.get("dwc:subgenus"))[1].trim();
2821 if(author
== null) {
2822 if(atomisedMap
.get("dwc:genus") != null) {
2823 author
= fullName
.split(atomisedMap
.get("dwc:genus"))[1].trim();
2827 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2828 author
=author
.replaceAll(",","").trim();
2829 myname
.setAuthor(author
);
2831 }catch(Exception e
){
2832 //could not extract the author
2835 if(rank
.equals(Rank
.SPECIES())){
2838 if(author
== null) {
2839 if(atomisedMap
.get("dwc:species") != null) {
2840 String
[] t
= fullName
.split(atomisedMap
.get("dwc:species"));
2841 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2842 author
= fullName
.split(atomisedMap
.get("dwc:species"))[1].trim();
2843 // System.out.println("AUTEUR "+author);
2847 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2848 author
=author
.replaceAll(",","").trim();
2849 myname
.setAuthor(author
);
2851 }catch(Exception e
){
2852 //could not extract the author
2856 myname
.setAuthor(atomisedMap
.get("dwc:scientificnameauthorship"));
2863 * @param atomisedMap
2866 private void createAtomisedTaxonString(String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2867 logger
.info("createAtomisedTaxonString "+atomisedMap
);
2868 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
2869 myname
.setFamilyStr(atomisedMap
.get("dwc:family"));
2871 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
2872 myname
.setSubfamilyStr(atomisedMap
.get("dwcranks:subfamily"));
2874 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
2875 myname
.setTribeStr(atomisedMap
.get("dwcranks:tribe"));
2877 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
2878 myname
.setSubtribeStr(atomisedMap
.get("dwcranks:subtribe"));
2880 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
2881 myname
.setGenusStr(atomisedMap
.get("dwc:genus"));
2883 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2884 myname
.setSubgenusStr(atomisedMap
.get("dwcranks:subgenus"));
2886 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2887 myname
.setSubgenusStr(atomisedMap
.get("dwc:subgenus"));
2889 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
2891 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
2892 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
2893 n
=n
.replace("subsp.","");
2895 if(atomisedMap
.get("dwc:subspecies") != null) {
2896 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
2897 n
=n
.replace("subsp.","");
2899 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
2900 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
2901 n
=n
.replace("var.","");
2902 n
=n
.replace("v.","");
2904 if(atomisedMap
.get("dwcranks:formepithet") != null) {
2906 System
.out
.println("TODO FORMA");
2907 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
2908 n
=n
.replace("forma","");
2911 String author
= myname
.getAuthor();
2912 if(n
.split(" ").length
>2){
2914 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
2917 a
=n
.split(n2
)[1].trim();
2918 }catch(Exception e
){
2919 logger
.info("no author in "+n
+"?");}
2921 myname
.setAuthor(a
);
2922 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2927 myname
.setSpeciesStr(atomisedMap
.get("dwc:species"));
2928 myname
.setAuthor(author
);
2930 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
2931 myname
.setSubspeciesStr(atomisedMap
.get("dwc:subspecies"));
2933 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
2934 myname
.setSubspeciesStr(atomisedMap
.get("dwc:infraspecificepithet"));
2936 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
2937 myname
.setVarietyStr(atomisedMap
.get("dwcranks:varietyepithet"));
2939 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
2940 myname
.setFormStr(atomisedMap
.get("dwcranks:formepithet"));
2942 if (atomisedMap
.get(PUBLICATION_YEAR
) != null){
2943 myname
.setPublicationYear(Integer
.valueOf(atomisedMap
.get(PUBLICATION_YEAR
)));
2948 * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2951 * @param atomisedMap
2954 private void createUnparsedSynonym(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2955 logger
.info("createSynonym");
2956 //System.out.println("createsynonym");
2957 if(rank
.equals(Rank
.UNKNOWN_RANK())){
2958 myname
.setNotParsableTaxon(newName
);
2960 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY()) && rank
.equals(Rank
.FAMILY())){
2961 myname
.setFamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:family"),newName
, Rank
.FAMILY(),rank
));
2963 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY()) && rank
.equals(Rank
.SUBFAMILY())){
2964 myname
.setSubfamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subfamily"), newName
,Rank
.SUBFAMILY(),rank
));
2966 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE()) && rank
.equals(Rank
.TRIBE())){
2967 myname
.setTribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:tribe"),newName
, Rank
.TRIBE(),rank
));
2969 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE()) && rank
.equals(Rank
.SUBTRIBE())){
2970 myname
.setSubtribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subtribe"),newName
, Rank
.SUBTRIBE(),rank
));
2972 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS()) && rank
.equals(Rank
.GENUS())){
2973 myname
.setGenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:genus"),newName
, Rank
.GENUS(),rank
));
2975 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS()) && rank
.equals(Rank
.SUBGENUS())){
2976 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
2978 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS()) && rank
.equals(Rank
.SUBGENUS())){
2979 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
2981 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES()) && rank
.equals(Rank
.SPECIES())){
2983 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
2984 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
2985 n
=n
.replace("subsp.","");
2987 if(atomisedMap
.get("dwc:subspecies") != null) {
2988 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
2989 n
=n
.replace("subsp.","");
2991 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
2992 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
2993 n
=n
.replace("var.","");
2994 n
=n
.replace("v.","");
2996 if(atomisedMap
.get("dwcranks:formepithet") != null) {
2998 //System.out.println("TODO FORMA");
2999 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
3000 n
=n
.replace("forma","");
3003 String author
= myname
.getAuthor();
3004 if(n
.split(" ").length
>2){
3006 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3009 a
= n
.split(n2
)[1].trim();
3010 }catch(Exception e
){logger
.info("no author in "+n
);}
3011 myname
.setAuthor(a
);
3012 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3016 Taxon species
= myname
.findOrCreateTaxon(atomisedMap
.get("dwc:species"),n
, Rank
.SPECIES(),rank
);
3017 myname
.setSpecies(species
);
3018 myname
.setAuthor(author
);
3020 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES()) && rank
.equals(Rank
.SUBSPECIES())){
3021 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subspecies"), newName
,Rank
.SUBSPECIES(),rank
));
3023 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES()) && rank
.equals(Rank
.SUBSPECIES())){
3024 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:infraspecificepithet"),newName
, Rank
.SUBSPECIES(),rank
));
3026 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY()) && rank
.equals(Rank
.VARIETY())){
3027 myname
.setVariety(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:varietyepithet"),newName
, Rank
.VARIETY(),rank
));
3029 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM()) && rank
.equals(Rank
.FORM())){
3030 myname
.setForm(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:formepithet"), newName
,Rank
.FORM(),rank
));
3039 * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3040 * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3041 * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3042 * I created this switch for old
3043 * for Spiders the new version is preferred
3045 private void createUnparsedSynonymNew(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
, Reference refMods
) {
3046 logger
.info("createSynonym");
3048 INonViralName nameToBeFilled
= this.getNonViralNameAccNomenclature();
3049 //System.out.println("createsynonym");
3050 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3052 myname
.setNotParsableTaxon(newName
);
3054 nameToBeFilled
.setTitleCache(newName
, true);
3056 if(atomisedMap
.get("dwc:genus") != null ){
3057 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwc:genus"));
3059 if (rank
.isSupraGeneric()){
3060 if (atomisedMap
.get("dwcranks:subtribe") != null ){
3061 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:subtribe"));
3062 }else if (atomisedMap
.get("dwcranks:subtribe") != null ){
3063 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:subtribe"));
3064 }else if (atomisedMap
.get("dwcranks:tribe") != null ){
3065 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:tribe"));
3066 }else if (atomisedMap
.get("dwcranks:subfamily") != null ){
3067 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:subfamily"));
3068 }else if (atomisedMap
.get("dwc:family") != null ){
3069 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwc:family"));
3071 logger
.warn("Supra generic rank not yet handled or atomisation not available");
3074 if (atomisedMap
.get("dwcranks:subgenus") != null){
3075 nameToBeFilled
.setInfraGenericEpithet(atomisedMap
.get("dwcranks:subgenus"));
3077 if (atomisedMap
.get("dwc:subgenus") != null){
3078 nameToBeFilled
.setInfraGenericEpithet(atomisedMap
.get("dwc:subgenus"));
3080 if (atomisedMap
.get("dwc:species") != null){
3081 nameToBeFilled
.setSpecificEpithet(atomisedMap
.get("dwc:species"));
3083 if (atomisedMap
.get("dwcranks:formepithet") != null){
3084 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwcranks:formepithet"));
3085 }else if (atomisedMap
.get("dwcranks:varietyepithet") != null){
3086 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwcranks:varietyepithet"));
3087 }else if (atomisedMap
.get("dwc:infraspecificepithet") != null){
3088 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwc:infraspecificepithet"));
3089 }else if (atomisedMap
.get("dwc:subspecies") != null){
3090 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwc:subspecies"));
3092 Reference sec
= sourceUrlRef
;
3093 if(!state2
.getConfig().doKeepOriginalSecundum()){
3094 sec
= state2
.getConfig().getSecundum();
3096 Synonym syn
= Synonym
.NewInstance(nameToBeFilled
, sec
);
3097 // sourceHandler.addSource(refMods, syn);
3098 myname
.setSyno(syn
);
3099 myname
.setSynonym(true);
3106 * @param atomisedMap
3109 private void createAtomisedTaxon(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
3110 logger
.info("createAtomisedTaxon "+atomisedMap
);
3111 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3112 myname
.setNotParsableTaxon(newName
);
3115 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
3116 myname
.setFamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:family"),newName
, Rank
.FAMILY(),rank
));
3118 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
3119 myname
.setSubfamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subfamily"), newName
,Rank
.SUBFAMILY(),rank
));
3121 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
3122 myname
.setTribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:tribe"),newName
, Rank
.TRIBE(),rank
));
3124 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
3125 myname
.setSubtribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subtribe"),newName
, Rank
.SUBTRIBE(),rank
));
3127 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
3128 myname
.setGenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:genus"),newName
, Rank
.GENUS(),rank
));
3130 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3131 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3133 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3134 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3136 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
3138 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
3139 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
3140 n
=n
.replace("subsp.","");
3142 if(atomisedMap
.get("dwc:subspecies") != null) {
3143 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
3144 n
=n
.replace("subsp.","");
3146 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
3147 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
3148 n
=n
.replace("var.","");
3149 n
=n
.replace("v.","");
3151 if(atomisedMap
.get("dwcranks:formepithet") != null) {
3153 //System.out.println("TODO FORMA");
3154 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
3155 n
=n
.replace("forma","");
3158 String author
= myname
.getAuthor();
3159 if(n
.split(" ").length
>2){
3160 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3163 a
= n
.split(n2
)[1].trim();
3164 }catch(Exception e
){logger
.info("no author in "+n
);}
3165 myname
.setAuthor(a
);
3166 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3171 myname
.setSpecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:species"),n
, Rank
.SPECIES(),rank
));
3172 myname
.setAuthor(author
);
3174 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3175 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subspecies"), newName
,Rank
.SUBSPECIES(),rank
));
3177 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3178 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:infraspecificepithet"),newName
, Rank
.SUBSPECIES(),rank
));
3180 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
3181 myname
.setVariety(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:varietyepithet"),newName
, Rank
.VARIETY(),rank
));
3183 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
3184 myname
.setForm(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:formepithet"), newName
,Rank
.FORM(),rank
));
3192 private boolean checkRankValidForImport(Rank currentRank
) {
3193 //logger.info("checkRankValidForImport");
3194 return currentRank
.isLower(state2
.getConfig().getMaxRank()) || currentRank
.equals(state2
.getConfig().getMaxRank());
3200 * @param classification2
3202 public void updateClassification(Classification classification2
) {
3203 //logger.info("updateClassification");
3204 classification
= classification2
;
3209 public class MyName
{
3213 public MyName(boolean isSynonym
) {
3215 this.isSynonym
= isSynonym
;
3218 String originalName
="";
3220 Rank rank
=Rank
.UNKNOWN_RANK();
3221 String identifier
="";
3225 TaxonName taxonName
;
3229 Taxon family
,subfamily
,tribe
,subtribe
,genus
,subgenus
,species
,subspecies
, variety
,form
;
3230 INonViralName familyName
, subfamilyName
, tribeName
,subtribeName
,genusName
,subgenusName
,speciesName
,subspeciesName
;
3231 String familyStr
, subfamilyStr
, tribeStr
,subtribeStr
,genusStr
,subgenusStr
,speciesStr
,subspeciesStr
,formStr
,varietyStr
;
3232 Integer publicationYear
;
3237 private Taxon taxon
;
3238 private Synonym syno
;
3243 public Synonym
getSyno() {
3248 public String
toString(){
3249 List
<String
> tot
=new ArrayList
<String
>();
3250 String
[] n
= {familyStr
, subfamilyStr
, tribeStr
,subtribeStr
,genusStr
,subgenusStr
,speciesStr
,subspeciesStr
,formStr
,varietyStr
};
3252 if (!StringUtils
.isEmpty(elt
)) {
3258 return StringUtils
.join(tot
," ");
3261 * @param syno the syno to set
3263 public void setSyno(Synonym syno
) {
3267 boolean isSynonym
=false;
3270 * @return the isSynonym
3272 public boolean isSynonym() {
3277 * @param isSynonym the isSynonym to set
3279 public void setSynonym(boolean isSynonym
) {
3280 this.isSynonym
= isSynonym
;
3283 public void setSource(Reference re
){
3290 public void setFormStr(String string
) {
3291 this.formStr
=string
;
3297 public void setVarietyStr(String string
) {
3298 this.varietyStr
=string
;
3304 public void setSubspeciesStr(String string
) {
3305 this.subspeciesStr
=string
;
3311 public void setSpeciesStr(String string
) {
3312 this.speciesStr
=string
;
3318 public void setSubgenusStr(String string
) {
3319 this.subgenusStr
=string
;
3325 public void setGenusStr(String string
) {
3326 this.genusStr
=string
;
3332 public void setSubtribeStr(String string
) {
3333 this.subtribeStr
=string
;
3339 public void setTribeStr(String string
) {
3340 this.tribeStr
=string
;
3346 public void setSubfamilyStr(String string
) {
3347 this.subfamilyStr
=string
;
3353 public void setFamilyStr(String string
) {
3354 this.familyStr
=string
;
3358 * @return the familyStr
3360 public String
getFamilyStr() {
3364 * @return the subfamilyStr
3366 public String
getSubfamilyStr() {
3367 return subfamilyStr
;
3370 * @return the tribeStr
3372 public String
getTribeStr() {
3376 * @return the subtribeStr
3378 public String
getSubtribeStr() {
3382 * @return the genusStr
3384 public String
getGenusStr() {
3388 * @return the subgenusStr
3390 public String
getSubgenusStr() {
3394 * @return the speciesStr
3396 public String
getSpeciesStr() {
3400 * @return the subspeciesStr
3402 public String
getSubspeciesStr() {
3403 return subspeciesStr
;
3406 * @return the formStr
3408 public String
getFormStr() {
3412 * @return the varietyStr
3414 public String
getVarietyStr() {
3418 public Integer
getPublicationYear() {
3419 return publicationYear
;
3422 public void setPublicationYear(Integer publicationYear
) {
3423 this.publicationYear
= publicationYear
;
3429 public void setNotParsableTaxon(String newName2
) {
3430 //takes too much time
3431 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3433 NomenclaturalStatusType statusType
= null;
3434 if (!getStatus().isEmpty()){
3436 statusType
= nomStatusString2NomStatus(getStatus());
3437 } catch (UnknownCdmTypeException e
) {
3438 addProblematicStatusToFile(getStatus());
3439 logger
.warn("Problem with status");
3442 List
<TaxonBase
> tmpList
= new ArrayList
<>();
3444 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitleWithRestrictions(TaxonBase
.class, newName2
, MatchMode
.BEGINNING
, null, null, null, null, null);
3445 tmpList
.addAll(taxontest
.getRecords());
3447 //logger.info("tmpList returned: "+tmpList.size());
3450 INonViralName identicName
= null;
3451 boolean foundIdentic
=false;
3452 TaxonBase
<?
> tmpTaxonBase
=null;
3453 // Taxon tmpPartial=null;
3454 for (TaxonBase
<?
> tmpb
:tmpList
){
3456 TaxonName tnb
= tmpb
.getName();
3459 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2
) ){
3460 crank
=tnb
.getRank();
3461 if (crank
!=null && rank
!=null){
3462 if (crank
.equals(rank
)){
3464 if (isSynonym
&& tmpb
.isInstanceOf(Synonym
.class) || !isSynonym
&& tmpb
.isInstanceOf(Taxon
.class)){
3475 boolean statusMatch
=false;
3476 boolean appendedMatch
=false;
3477 if(tmpTaxonBase
!=null && foundIdentic
){
3478 statusMatch
=compareStatus(tmpTaxonBase
, statusType
);
3479 if (!getStatus().isEmpty() && ! (tmpTaxonBase
.getAppendedPhrase() == null)) {
3480 appendedMatch
=tmpTaxonBase
.getAppendedPhrase().equals(getStatus());
3482 if (getStatus().isEmpty() && tmpTaxonBase
.getAppendedPhrase() == null) {
3487 if ((tmpTaxonBase
== null || !foundIdentic
) || (tmpTaxonBase
!= null && !statusMatch
) || (tmpTaxonBase
!= null && !appendedMatch
&& !statusMatch
)){
3490 if (identicName
== null){
3491 tnb
= getNonViralNameAccNomenclature();
3494 if(statusType
!= null) {
3495 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3497 if(StringUtils
.isNotBlank(getStatus())) {
3498 tnb
.setAppendedPhrase(getStatus());
3500 tnb
.setTitleCache(newName2
,true);
3501 tmpTaxonBase
= findMatchingTaxon(tnb
,refMods
);
3506 if(tmpTaxonBase
==null){
3507 tmpTaxonBase
= isSynonym ? Synonym
.NewInstance(tnb
, refMods
) : Taxon
.NewInstance(tnb
, refMods
);
3508 if(!state2
.getConfig().doKeepOriginalSecundum()) {
3509 tmpTaxonBase
.setSec(state2
.getConfig().getSecundum());
3511 //tmptaxonbase.setSec(refMods);
3513 classification
.addChildTaxon((Taxon
)tmpTaxonBase
, null, null);
3514 sourceHandler
.addSource(refMods
, (Taxon
)tmpTaxonBase
);
3519 tmpTaxonBase
= CdmBase
.deproxy(tmpTaxonBase
, TaxonBase
.class);
3520 if (author
!= null) {
3521 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3522 setLSID(getIdentifier(), tmpTaxonBase
);
3523 importer
.getTaxonService().saveOrUpdate(tmpTaxonBase
);
3524 tmpTaxonBase
= CdmBase
.deproxy(tmpTaxonBase
, TaxonBase
.class);
3527 TaxonName tnb
= CdmBase
.deproxy(tmpTaxonBase
.getName(), TaxonName
.class);
3530 this.taxon
=(Taxon
)tmpTaxonBase
;
3532 if (tmpTaxonBase
instanceof Taxon
){
3533 logger
.warn("Incorrect status");
3535 this.syno
=(Synonym
)tmpTaxonBase
;
3545 public void buildTaxon() {
3546 //System.out.println("BUILD TAXON");
3547 logger
.info("buildTaxon");
3548 NomenclaturalStatusType statusType
= null;
3549 if (!getStatus().isEmpty()){
3550 status
= getStatus();
3551 String newNameStatus
= newNameStatus(status
);
3552 if (newNameStatus
!= null){
3553 taxonName
.setAppendedPhrase(newNameStatus
);
3556 statusType
= nomStatusString2NomStatus(getStatus());
3557 taxonName
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3558 } catch (UnknownCdmTypeException e
) {
3559 addProblematicStatusToFile(getStatus());
3560 logger
.warn("Problem with status");
3564 importer
.getNameService().save(taxonName
);
3566 TaxonBase
<?
> tmpTaxonBase
;
3568 tmpTaxonBase
=Taxon
.NewInstance(taxonName
, refMods
); //sec set null
3571 tmpTaxonBase
=Synonym
.NewInstance(taxonName
, refMods
); //sec set null
3573 boolean exist
= false;
3575 for (TaxonNode node
: classification
.getAllNodes()){
3577 Taxon nodeTaxon
= node
.getTaxon();
3578 boolean titleMatches
= nodeTaxon
.getTitleCache().equalsIgnoreCase(tmpTaxonBase
.getTitleCache());
3579 boolean nomStatusMatches
= compareStatus(node
.getTaxon(), statusType
);
3580 boolean nodeNameReplaceable
= checkNodeNameReplaceable(nodeTaxon
, tmpTaxonBase
);
3581 if(titleMatches
&& nomStatusMatches
) {
3583 tmpTaxonBase
=CdmBase
.deproxy(nodeTaxon
, TaxonBase
.class);
3586 logger
.info("Found the same name but from another type (taxon/synonym)");
3587 TaxonName existingTnb
= getTaxon().getName();
3588 tmpTaxonBase
= Synonym
.NewInstance(existingTnb
, refMods
);
3589 importer
.getTaxonService().saveOrUpdate(tmpTaxonBase
);
3592 }else if (nodeNameReplaceable
){
3593 nodeTaxon
.setName(tmpTaxonBase
.getName());
3594 tmpTaxonBase
= nodeTaxon
;
3597 }catch(NullPointerException n
){logger
.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3602 boolean insertAsExisting
=false;
3603 List
<Taxon
> existingTaxons
=new ArrayList
<Taxon
>();
3605 existingTaxons
= getMatchingTaxa(taxonName
);
3606 } catch (Exception e1
) {
3607 e1
.printStackTrace();
3609 double similarityScore
=0.0;
3610 double similarityAuthor
=-1;
3615 for (Taxon bestMatchingTaxon
: existingTaxons
){
3616 //System.out.println("tnbase "+taxonname.getTitleCache());
3617 //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3618 if(taxonName
.getAuthorshipCache()!=null) {
3619 author1
=taxonName
.getAuthorshipCache();
3622 if(bestMatchingTaxon
.getName().getAuthorshipCache()!=null) {
3623 author2
=bestMatchingTaxon
.getName().getAuthorshipCache();
3625 } catch (Exception e
) {
3626 // TODO Auto-generated catch block
3627 e
.printStackTrace();
3630 t1
=taxonName
.getTitleCache();
3631 if (author1
!=null && !StringUtils
.isEmpty(author1
)) {
3632 t1
=t1
.split(Pattern
.quote(author1
))[0];
3634 } catch (Exception e
) {
3635 // TODO Auto-generated catch block
3636 e
.printStackTrace();
3639 t2
=bestMatchingTaxon
.getTitleCache().split("sec.")[0].trim();
3640 if (author2
!=null && !StringUtils
.isEmpty(author2
)) {
3641 t2
=t2
.split(Pattern
.quote(author2
))[0];
3643 } catch (Exception e
) {
3644 // TODO Auto-generated catch block
3645 e
.printStackTrace();
3648 similarityScore
=similarity(t1
.trim(), t2
.trim());
3649 //System.out.println("taxonscore "+similarityScore);
3650 similarityAuthor
=similarity(author1
.trim(), author2
.trim());
3651 //System.out.println("authorscore "+similarityAuthor);
3652 insertAsExisting
= compareAndCheckTaxon(taxonName
, refMods
, similarityScore
, bestMatchingTaxon
, similarityAuthor
);
3653 if(insertAsExisting
) {
3654 tmpTaxonBase
=bestMatchingTaxon
;
3658 if ( !insertAsExisting
){
3659 if(!state2
.getConfig().doKeepOriginalSecundum()) {
3660 tmpTaxonBase
.setSec(state2
.getConfig().getSecundum());
3663 // tmptaxonbase.setSec(refMods);
3664 if (taxonName
.getRank().equals(state2
.getConfig().getMaxRank())) {
3665 //System.out.println("****************************"+tmptaxonbase);
3667 classification
.addChildTaxon((Taxon
)tmpTaxonBase
, refMods
, null);
3670 hierarchy
= new HashMap
<Rank
, Taxon
>();
3671 //System.out.println("LOOK FOR PARENT "+taxonname.toString()+", "+tmptaxonbase.toString());
3673 lookForParentNode(taxonName
,(Taxon
)tmpTaxonBase
, refMods
,this);
3674 //System.out.println("HIERARCHY "+hierarchy);
3675 Taxon parent
= buildHierarchy();
3676 if(!taxonExistsInClassification(parent
,(Taxon
)tmpTaxonBase
)){
3678 classification
.addParentChild(parent
, (Taxon
)tmpTaxonBase
, refMods
, null);
3680 classification
.addChildTaxon((Taxon
)tmpTaxonBase
, refMods
, null);
3682 importer
.getClassificationService().saveOrUpdate(classification
);
3685 // Set<TaxonNode> nodeList = classification.getAllNodes();
3686 // for(TaxonNode tn:nodeList) {
3687 // System.out.println(tn.getTaxon());
3691 importer
.getClassificationService().saveOrUpdate(classification
);
3694 Synonym castTest
=CdmBase
.deproxy(tmpTaxonBase
, Synonym
.class);
3695 }catch(Exception e
){
3696 TaxonName existingTnb
= tmpTaxonBase
.getName();
3697 Synonym castTest
= Synonym
.NewInstance(existingTnb
, refMods
);
3698 importer
.getTaxonService().saveOrUpdate(castTest
);
3699 tmpTaxonBase
=CdmBase
.deproxy(castTest
, Synonym
.class);
3704 taxon
=CdmBase
.deproxy(tmpTaxonBase
, Taxon
.class);
3706 syno
=CdmBase
.deproxy(tmpTaxonBase
, Synonym
.class);
3711 private boolean checkNodeNameReplaceable(Taxon nodeTaxon
, TaxonBase
<?
> newTaxon
) {
3712 //TODO preliminary check
3713 if (newTaxon
.isInstanceOf(Synonym
.class)){
3716 INonViralName nodeName
= nodeTaxon
.getName();
3717 INonViralName newName
= newTaxon
.getName();
3718 if (nodeTaxon
.getName() == null || newName
== null){
3721 if (nodeTaxon
.getDescriptions().size() > 0 || nodeName
.getDescriptions().size() > 0 || nodeName
.getTypeDesignations().size() > 0 ){
3724 boolean compare
= true;
3725 for (NomenclaturalStatus status
: newName
.getStatus() ){
3726 compare
&= compareStatus(nodeTaxon
, status
.getType());
3732 if (nodeName
.getNameCache() != null && nodeName
.getNameCache().equals(newName
.getNameCache())){
3733 if (nodeName
.getNameCache().equals(nodeName
.getTitleCache())){
3734 if (newName
.getNameCache().length() < newName
.getTitleCache().length()){
3735 logger
.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName
.getNameCache());
3747 private Taxon
buildHierarchy() {
3748 logger
.info("buildHierarchy");
3749 Taxon higherTaxon
= null;
3750 //add the maxRank as a root
3751 if(hierarchy
.containsKey(state2
.getConfig().getMaxRank())){
3752 Taxon ct
=hierarchy
.get(state2
.getConfig().getMaxRank());
3753 if(!taxonExistsInClassification(higherTaxon
, ct
)) {
3754 classification
.addChildTaxon(ct
, refMods
, null);
3756 higherTaxon
= hierarchy
.get(state2
.getConfig().getMaxRank());
3757 // return higherTaxon;
3759 //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3763 if(hierarchy
.containsKey(Rank
.FAMILY()) && !state2
.getConfig().getMaxRank().equals(Rank
.FAMILY())){
3764 higherTaxon
=saveAndGetHigherTaxon(Rank
.FAMILY(),higherTaxon
);
3766 if(hierarchy
.containsKey(Rank
.SUBFAMILY()) && !state2
.getConfig().getMaxRank().equals(Rank
.SUBFAMILY())){
3767 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBFAMILY(),higherTaxon
);
3769 if(hierarchy
.containsKey(Rank
.TRIBE())&& !state2
.getConfig().getMaxRank().equals(Rank
.TRIBE())){
3770 higherTaxon
=saveAndGetHigherTaxon(Rank
.TRIBE(),higherTaxon
);
3772 if(hierarchy
.containsKey(Rank
.SUBTRIBE())&& !state2
.getConfig().getMaxRank().equals(Rank
.SUBTRIBE())){
3773 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBTRIBE(),higherTaxon
);
3775 if(hierarchy
.containsKey(Rank
.GENUS())&& !state2
.getConfig().getMaxRank().equals(Rank
.SUBGENUS())){
3776 higherTaxon
=saveAndGetHigherTaxon(Rank
.GENUS(),higherTaxon
);
3778 if(hierarchy
.containsKey(Rank
.SUBGENUS())&& !state2
.getConfig().getMaxRank().equals(Rank
.SUBGENUS())){
3779 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBGENUS(),higherTaxon
);
3781 importer
.getClassificationService().saveOrUpdate(classification
);
3785 private Taxon
saveAndGetHigherTaxon(Rank r
, Taxon higherTaxon
){
3786 Taxon ct
=hierarchy
.get(r
);
3787 if(!taxonExistsInClassification(higherTaxon
,ct
)) {
3788 if(higherTaxon
!= null && ct
!=null) {
3789 classification
.addParentChild(higherTaxon
, ct
, refMods
, null);
3791 if(higherTaxon
== null && ct
!=null) {
3792 classification
.addChildTaxon(ct
, refMods
, null);
3798 private boolean taxonExistsInClassification(Taxon parent
, Taxon child
){
3799 logger
.info("taxonExistsInClassification");
3800 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3801 boolean found
=false;
3803 for (TaxonNode p
: classification
.getAllNodes()){
3804 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
3805 for (TaxonNode c
: p
.getChildNodes()) {
3806 if (c
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3815 for (TaxonNode p
: classification
.getAllNodes()){
3816 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3822 // System.out.println("LOOK IF TAXA EXIST? "+found);
3826 * @param nameToBeFilledTest
3828 public void setParsedName(TaxonName nameToBeFilledTest
) {
3829 this.taxonName
= TaxonName
.castAndDeproxy(nameToBeFilledTest
);
3832 //variety dwcranks:varietyEpithet
3834 * @return the author
3836 public String
getAuthor() {
3842 public Taxon
getTaxon() {
3848 public TaxonName
getTaxonName() {
3853 * @param findOrCreateTaxon
3855 public void setForm(Taxon form
) {
3860 * @param findOrCreateTaxon
3862 public void setVariety(Taxon variety
) {
3863 this.variety
=variety
;
3870 @SuppressWarnings("rawtypes")
3871 public Taxon
findOrCreateTaxon(String partialname
,String fullname
, Rank rank
, Rank globalrank
) {
3872 logger
.info("findOrCreateTaxon");
3873 sourceUrlRef
=CdmBase
.deproxy(sourceUrlRef
, Reference
.class);
3874 //takes too much time
3875 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3876 // logger.info("tmpList returned: "+tmpList.size());
3878 NomenclaturalStatusType statusType
= null;
3879 if (!getStatus().isEmpty()){
3881 statusType
= nomStatusString2NomStatus(getStatus());
3882 } catch (UnknownCdmTypeException e
) {
3883 addProblematicStatusToFile(getStatus());
3884 logger
.warn("Problem with status");
3888 List
<TaxonBase
> tmpListFiltered
= new ArrayList
<TaxonBase
>();
3890 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitleWithRestrictions(TaxonBase
.class, fullname
, MatchMode
.BEGINNING
, null, null, null, null, null);
3892 tmpListFiltered
.addAll(taxontest
.getRecords());
3893 taxontest
= importer
.getTaxonService().findByTitleWithRestrictions(TaxonBase
.class, partialname
, MatchMode
.BEGINNING
, null, null, null, null, null);
3894 tmpListFiltered
.addAll(taxontest
.getRecords());
3896 //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3898 boolean nameCorrected
=false;
3899 if (fullname
.indexOf(partialname
)<0) {
3903 boolean foundIdentic
=false;
3905 for (TaxonBase tmpb
:tmpListFiltered
){
3907 TaxonName tnb
= tmpb
.getName();
3910 if(globalrank
.equals(rank
) || (globalrank
.isLower(Rank
.SPECIES()) && rank
.equals(Rank
.SPECIES()))){
3911 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname
) ){
3912 crank
=tnb
.getRank();
3913 if (crank
!=null && rank
!=null){
3914 if (crank
.equals(rank
)){
3919 }catch(Exception e
){
3920 e
.printStackTrace();
3925 if(nameCorrected
){ //for corrected names such as Anochetus -- A. blf-pat
3926 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname
) ){
3927 crank
=tnb
.getRank();
3928 if (crank
!=null && rank
!=null){
3929 if (crank
.equals(rank
)){
3934 }catch(Exception e
){
3935 e
.printStackTrace();
3943 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname
) ){
3944 crank
=tnb
.getRank();
3945 if (crank
!=null && rank
!=null){
3946 if (crank
.equals(rank
)){
3951 }catch(Exception e
){
3952 e
.printStackTrace();
3961 boolean statusMatch
=false;
3962 boolean appendedMatch
=false;
3963 if(tmp
!=null && foundIdentic
){
3964 statusMatch
=compareStatus(tmp
, statusType
);
3965 if (!getStatus().isEmpty() && ! (tmp
.getAppendedPhrase() == null)) {
3966 appendedMatch
=tmp
.getAppendedPhrase().equals(getStatus());
3968 if (getStatus().isEmpty() && tmp
.getAppendedPhrase() == null) {
3973 if ((tmp
== null || !foundIdentic
) || (tmp
!= null && !statusMatch
) || (tmp
!= null && !appendedMatch
&& !statusMatch
)){
3975 INonViralName tnb
= getNonViralNameAccNomenclature();
3978 if(statusType
!= null) {
3979 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3981 if(StringUtils
.isNotBlank(getStatus())) {
3982 tnb
.setAppendedPhrase(getStatus());
3985 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3986 tnb
.setTitleCache(fullname
, true);
3987 // tnb.setGenusOrUninomial(fullname);
3989 if(rank
.isHigher(Rank
.GENUS())) {
3990 tnb
.setGenusOrUninomial(partialname
);
3993 if(rank
.isHigher(Rank
.SPECIES())) {
3994 tnb
.setTitleCache(partialname
, true);
3997 if (rank
.equals(globalrank
) && author
!= null) {
3999 tnb
.setCombinationAuthorship(findOrCreateAuthor(author
));
4000 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4001 Taxon taxonLSID
= getTaxonByLSID(getIdentifier());
4002 if (taxonLSID
!=null) {
4009 if (rank
.equals(Rank
.FAMILY())) {
4010 tmp
= buildFamily(tnb
);
4012 if (rank
.equals(Rank
.SUBFAMILY())) {
4013 tmp
= buildSubfamily(tnb
);
4015 if (rank
.equals(Rank
.TRIBE())) {
4016 tmp
= buildTribe(tnb
);
4018 if (rank
.equals(Rank
.SUBTRIBE())) {
4019 tmp
= buildSubtribe(tnb
);
4021 if (rank
.equals(Rank
.GENUS())) {
4022 tmp
= buildGenus(partialname
, tnb
);
4025 if (rank
.equals(Rank
.SUBGENUS())) {
4026 tmp
= buildSubgenus(partialname
, tnb
);
4028 if (rank
.equals(Rank
.SPECIES())) {
4029 tmp
= buildSpecies(partialname
, tnb
);
4032 if (rank
.equals(Rank
.SUBSPECIES())) {
4033 tmp
= buildSubspecies(partialname
, tnb
);
4036 if (rank
.equals(Rank
.VARIETY())) {
4037 tmp
= buildVariety(fullname
, partialname
, tnb
);
4040 if (rank
.equals(Rank
.FORM())) {
4041 tmp
= buildForm(fullname
, partialname
, tnb
);
4044 TaxonXTreatmentExtractor
.this.sourceHandler
.addSource(refMods
, tmp
);
4047 importer
.getClassificationService().saveOrUpdate(classification
);
4052 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
4053 if (rank
.equals(globalrank
) && author
!= null) {
4054 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4055 setLSID(getIdentifier(), tmp
);
4056 importer
.getTaxonService().saveOrUpdate(tmp
);
4057 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
4070 private Taxon
buildSubfamily(INonViralName tnb
) {
4072 // tnb.generateTitle();
4073 tmp
= findMatchingTaxon(tnb
,refMods
);
4075 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4076 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4077 tmp
.setSec(state2
.getConfig().getSecundum());
4079 // tmp.setSec(refMods);
4080 // sourceHandler.addSource(refMods, tmp);
4081 if(family
!= null) {
4082 classification
.addParentChild(family
, tmp
, null, null);
4083 higherRank
=Rank
.FAMILY();
4086 //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4087 classification
.addChildTaxon(tmp
, null, null);
4096 private Taxon
buildFamily(INonViralName tnb
) {
4098 // tnb.generateTitle();
4099 tmp
= findMatchingTaxon(tnb
,refMods
);
4101 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4102 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4103 tmp
.setSec(state2
.getConfig().getSecundum());
4105 // tmp.setSec(refMods);
4106 //sourceHandler.addSource(refMods, tmp);
4107 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4108 classification
.addChildTaxon(tmp
, null, null);
4117 private Taxon
buildForm(String fullname
, String partialname
, INonViralName tnb
) {
4118 if (genusName
!=null) {
4119 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4121 if (subgenusName
!=null) {
4122 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4124 if(speciesName
!=null) {
4125 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4127 if(subspeciesName
!= null) {
4128 tnb
.setInfraSpecificEpithet(subspeciesName
.getInfraSpecificEpithet());
4130 if(partialname
!= null) {
4131 tnb
.setInfraSpecificEpithet(partialname
);
4133 //TODO how to save form??
4134 tnb
.setTitleCache(fullname
, true);
4135 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4137 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4138 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4139 tmp
.setSec(state2
.getConfig().getSecundum());
4141 // tmp.setSec(refMods);
4142 //sourceHandler.addSource(refMods, tmp);
4143 if (subspecies
!=null) {
4144 classification
.addParentChild(subspecies
, tmp
, null, null);
4145 higherRank
=Rank
.SUBSPECIES();
4146 higherTaxa
=subspecies
;
4148 if (species
!=null) {
4149 classification
.addParentChild(species
, tmp
, null, null);
4150 higherRank
=Rank
.SPECIES();
4154 // System.out.println("ADDCHILDTAXON FORM "+tmp);
4155 classification
.addChildTaxon(tmp
, null, null);
4166 private Taxon
buildVariety(String fullname
, String partialname
, INonViralName tnb
) {
4168 if (genusName
!=null) {
4169 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4171 if (subgenusName
!=null) {
4172 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4174 if(speciesName
!=null) {
4175 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4177 if(subspeciesName
!= null) {
4178 tnb
.setInfraSpecificEpithet(subspeciesName
.getSpecificEpithet());
4180 if(partialname
!= null) {
4181 tnb
.setInfraSpecificEpithet(partialname
);
4183 //TODO how to save variety?
4184 tnb
.setTitleCache(fullname
, true);
4185 tmp
= findMatchingTaxon(tnb
,refMods
);
4187 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4188 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4189 tmp
.setSec(state2
.getConfig().getSecundum());
4191 // tmp.setSec(refMods);
4192 //sourceHandler.addSource(refMods, tmp);
4193 if (subspecies
!=null) {
4194 classification
.addParentChild(subspecies
, tmp
, null, null);
4195 higherRank
=Rank
.SUBSPECIES();
4196 higherTaxa
=subspecies
;
4198 if(species
!=null) {
4199 classification
.addParentChild(species
, tmp
, null, null);
4200 higherRank
=Rank
.SPECIES();
4204 //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4205 classification
.addChildTaxon(tmp
, null, null);
4212 * @param partialname
4216 private Taxon
buildSubspecies(String partialname
, INonViralName tnb
) {
4217 if (genusName
!=null) {
4218 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4220 if (subgenusName
!=null) {
4221 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4222 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4224 if(speciesName
!=null) {
4225 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
4226 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4228 tnb
.setInfraSpecificEpithet(partialname
);
4229 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4231 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4232 if(!state2
.getConfig().doKeepOriginalSecundum())
4234 tmp
.setSec(state2
.getConfig().getSecundum());
4235 // tmp.setSec(refMods);
4236 //sourceHandler.addSource(refMods, tmp);
4239 if(species
!= null) {
4240 classification
.addParentChild(species
, tmp
, null, null);
4241 higherRank
=Rank
.SPECIES();
4245 //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4246 classification
.addChildTaxon(tmp
, null, null);
4252 * @param partialname
4256 private Taxon
buildSpecies(String partialname
, INonViralName tnb
) {
4257 if (genusName
!=null) {
4258 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4260 if (subgenusName
!=null) {
4261 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4263 tnb
.setSpecificEpithet(partialname
.toLowerCase());
4264 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4266 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4267 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4268 tmp
.setSec(state2
.getConfig().getSecundum());
4270 // tmp.setSec(refMods);
4271 //sourceHandler.addSource(refMods, tmp);
4272 if (subgenus
!=null) {
4273 classification
.addParentChild(subgenus
, tmp
, null, null);
4274 higherRank
=Rank
.SUBGENUS();
4275 higherTaxa
=subgenus
;
4278 classification
.addParentChild(genus
, tmp
, null, null);
4279 higherRank
=Rank
.GENUS();
4283 //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4284 classification
.addChildTaxon(tmp
, null, null);
4291 * @param partialname
4295 private Taxon
buildSubgenus(String partialname
, INonViralName tnb
) {
4296 tnb
.setInfraGenericEpithet(partialname
);
4297 if (genusName
!=null) {
4298 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4300 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4302 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4303 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4304 tmp
.setSec(state2
.getConfig().getSecundum());
4306 // tmp.setSec(refMods);
4307 //sourceHandler.addSource(refMods, tmp);
4309 classification
.addParentChild(genus
, tmp
, null, null);
4310 higherRank
=Rank
.GENUS();
4313 //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4314 classification
.addChildTaxon(tmp
, null, null);
4320 * @param partialname
4324 private Taxon
buildGenus(String partialname
, INonViralName tnb
) {
4326 tnb
.setGenusOrUninomial(partialname
);
4329 tmp
= findMatchingTaxon(tnb
,refMods
);
4331 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4332 if(!state2
.getConfig().doKeepOriginalSecundum())
4334 tmp
.setSec(state2
.getConfig().getSecundum());
4335 // tmp.setSec(refMods);
4336 //sourceHandler.addSource(refMods, tmp);
4339 if(subtribe
!= null) {
4340 classification
.addParentChild(subtribe
, tmp
, null, null);
4341 higherRank
=Rank
.SUBTRIBE();
4342 higherTaxa
=subtribe
;
4345 classification
.addParentChild(tribe
, tmp
, null, null);
4346 higherRank
=Rank
.TRIBE();
4349 if(subfamily
!=null) {
4350 classification
.addParentChild(subfamily
, tmp
, null, null);
4351 higherRank
=Rank
.SUBFAMILY();
4352 higherTaxa
=subfamily
;
4355 classification
.addParentChild(family
, tmp
, null, null);
4356 higherRank
=Rank
.FAMILY();
4360 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4361 classification
.addChildTaxon(tmp
, null, null);
4373 private Taxon
buildSubtribe(INonViralName tnb
) {
4374 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4376 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4377 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4378 tmp
.setSec(state2
.getConfig().getSecundum());
4380 // tmp.setSec(refMods);
4381 //sourceHandler.addSource(refMods, tmp);
4383 classification
.addParentChild(tribe
, tmp
, null, null);
4384 higherRank
=Rank
.TRIBE();
4387 //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4388 classification
.addChildTaxon(tmp
, null, null);
4397 private Taxon
buildTribe(INonViralName tnb
) {
4398 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4400 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4401 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4402 tmp
.setSec(state2
.getConfig().getSecundum());
4404 // tmp.setSec(refMods);
4405 //sourceHandler.addSource(refMods, tmp);
4406 if (subfamily
!=null) {
4407 classification
.addParentChild(subfamily
, tmp
, null, null);
4408 higherRank
=Rank
.SUBFAMILY();
4409 higherTaxa
=subfamily
;
4411 if(family
!= null) {
4412 classification
.addParentChild(family
, tmp
, null, null);
4413 higherRank
=Rank
.FAMILY();
4417 //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4418 classification
.addChildTaxon(tmp
, null, null);
4426 * @param identifier2
4429 @SuppressWarnings("rawtypes")
4430 private Taxon
getTaxonByLSID(String identifier
) {
4431 //logger.info("getTaxonByLSID");
4432 // boolean lsidok=false;
4433 String id
= identifier
.split("__")[0];
4434 // String source = identifier.split("__")[1];
4436 if (id
.indexOf("lsid")>-1){
4438 lsid
= new LSID(id
);
4440 } catch (MalformedLSIDException e
) {
4441 logger
.warn("Malformed LSID");
4445 List
<Taxon
> taxa
= importer
.getTaxonService().list(Taxon
.class, 0, 0, null, null);
4446 LSID currentlsid
=null;
4448 currentlsid
= t
.getLsid();
4449 if (currentlsid
!=null){
4450 if (currentlsid
.getLsid().equals(lsid
.getLsid())){
4454 catch(Exception e
){logger
.warn("Exception occurred while comparing LSIDs "+e
);}
4465 @SuppressWarnings("rawtypes")
4466 private Person
findOrCreateAuthor(String author2
) {
4467 //logger.info("findOrCreateAuthor");
4468 List
<UuidAndTitleCache
<Person
>> hiberPersons
= importer
.getAgentService().getPersonUuidAndTitleCache();
4469 for (UuidAndTitleCache
<Person
> hibernateP
:hiberPersons
){
4470 if(hibernateP
.getTitleCache().equals(author2
)) {
4471 AgentBase existing
= importer
.getAgentService().find(hibernateP
.getUuid());
4472 return CdmBase
.deproxy(existing
, Person
.class);
4475 Person p
= Person
.NewInstance();
4476 p
.setTitleCache(author2
,true);
4477 importer
.getAgentService().saveOrUpdate(p
);
4478 return CdmBase
.deproxy(p
, Person
.class);
4481 * @param author the author to set
4483 public void setAuthor(String author
) {
4484 this.author
= author
;
4488 * @return the higherTaxa
4490 public Taxon
getHigherTaxa() {
4494 * @param higherTaxa the higherTaxa to set
4496 public void setHigherTaxa(Taxon higherTaxa
) {
4497 this.higherTaxa
= higherTaxa
;
4500 * @return the higherRank
4502 public Rank
getHigherRank() {
4506 * @param higherRank the higherRank to set
4508 public void setHigherRank(Rank higherRank
) {
4509 this.higherRank
= higherRank
;
4511 public String
getName(){
4512 if (newName
.isEmpty()) {
4513 return originalName
;
4520 * @return the fullName
4522 public String
getOriginalName() {
4523 return originalName
;
4526 * @param fullName the fullName to set
4528 public void setOriginalName(String fullName
) {
4529 this.originalName
= fullName
;
4532 * @return the newName
4534 public String
getNewName() {
4538 * @param newName the newName to set
4540 public void setNewName(String newName
) {
4541 this.newName
= newName
;
4546 public Rank
getRank() {
4550 * @param rank the rank to set
4552 public void setRank(Rank rank
) {
4556 * @return the idenfitiger
4558 public String
getIdentifier() {
4562 * @param idenfitiger the idenfitiger to set
4564 public void setIdentifier(String identifier
) {
4565 this.identifier
= identifier
;
4568 * @return the status
4570 public String
getStatus() {
4571 if (status
== null) {
4577 * @param status the status to set
4579 public void setStatus(String status
) {
4580 this.status
= status
;
4583 * @return the family
4585 public Taxon
getFamily() {
4589 * @param family the family to set
4591 @SuppressWarnings("rawtypes")
4592 public void setFamily(Taxon family
) {
4593 this.family
= family
;
4594 familyName
= CdmBase
.deproxy(family
.getName());
4597 * @return the subfamily
4599 public Taxon
getSubfamily() {
4603 * @param subfamily the subfamily to set
4605 @SuppressWarnings("rawtypes")
4606 public void setSubfamily(Taxon subfamily
) {
4607 this.subfamily
= subfamily
;
4608 subfamilyName
= CdmBase
.deproxy(subfamily
.getName());
4613 public Taxon
getTribe() {
4617 * @param tribe the tribe to set
4619 @SuppressWarnings("rawtypes")
4620 public void setTribe(Taxon tribe
) {
4622 tribeName
= CdmBase
.deproxy(tribe
.getName());
4625 * @return the subtribe
4627 public Taxon
getSubtribe() {
4631 * @param subtribe the subtribe to set
4633 @SuppressWarnings("rawtypes")
4634 public void setSubtribe(Taxon subtribe
) {
4635 this.subtribe
= subtribe
;
4636 subtribeName
=CdmBase
.deproxy(subtribe
.getName());
4641 public Taxon
getGenus() {
4645 * @param genus the genus to set
4647 @SuppressWarnings("rawtypes")
4648 public void setGenus(Taxon genus
) {
4651 genusName
= CdmBase
.deproxy(genus
.getName());
4655 * @return the subgenus
4657 public Taxon
getSubgenus() {
4661 * @param subgenus the subgenus to set
4663 @SuppressWarnings("rawtypes")
4664 public void setSubgenus(Taxon subgenus
) {
4665 this.subgenus
= subgenus
;
4666 subgenusName
= CdmBase
.deproxy(subgenus
.getName());
4669 * @return the species
4671 public Taxon
getSpecies() {
4675 * @param species the species to set
4677 public void setSpecies(Taxon species
) {
4678 if (species
!= null){
4679 this.species
= species
;
4680 speciesName
= CdmBase
.deproxy(species
.getName());
4684 * @return the subspecies
4686 public Taxon
getSubspecies() {
4690 * @param subspecies the subspecies to set
4692 @SuppressWarnings("rawtypes")
4693 public void setSubspecies(Taxon subspecies
) {
4694 this.subspecies
= subspecies
;
4695 subspeciesName
= CdmBase
.deproxy(subspecies
.getName());
4707 private void addProblematicStatusToFile(String status
) {
4709 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "StatusUnknown_"+classification
.getTitleCache()+".txt",true);
4710 BufferedWriter out
= new BufferedWriter(fstream
);
4711 out
.write(status
+"\n");
4712 //Close the output stream
4714 }catch (Exception e
){//Catch exception if any
4715 System
.err
.println("Error: " + e
.getMessage());
4726 private Taxon
findMatchingTaxon(INonViralName tnb
, Reference refMods
) {
4727 logger
.info("findMatchingTaxon");
4730 refMods
=CdmBase
.deproxy(refMods
, Reference
.class);
4731 boolean insertAsExisting
=false;
4732 List
<Taxon
> existingTaxa
= new ArrayList
<Taxon
>();
4734 existingTaxa
= getMatchingTaxa(TaxonName
.castAndDeproxy(tnb
));
4735 } catch (Exception e1
) {
4736 // TODO Auto-generated catch block
4737 e1
.printStackTrace();
4739 double similarityScore
=0.0;
4740 double similarityAuthor
=-1;
4745 for (Taxon bestMatchingTaxon
: existingTaxa
){
4746 if (!existingTaxa
.isEmpty() && state2
.getConfig().isInteractWithUser() && !insertAsExisting
) {
4747 // System.out.println("tnb "+tnb.getTitleCache());
4748 // System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4750 if(tnb
.getAuthorshipCache()!=null) {
4751 author1
=tnb
.getAuthorshipCache();
4753 } catch (Exception e
) {
4754 // TODO Auto-generated catch block
4755 e
.printStackTrace();
4758 if(bestMatchingTaxon
.getName().getAuthorshipCache()!=null) {
4759 author2
=bestMatchingTaxon
.getName().getAuthorshipCache();
4761 } catch (Exception e
) {
4762 // TODO Auto-generated catch block
4763 e
.printStackTrace();
4766 t1
=tnb
.getTitleCache().split("sec.")[0].trim();
4767 if (author1
!=null && !StringUtils
.isEmpty(author1
)) {
4768 t1
=t1
.split(Pattern
.quote(author1
))[0];
4770 } catch (Exception e
) {
4771 // TODO Auto-generated catch block
4772 e
.printStackTrace();
4775 t2
=bestMatchingTaxon
.getTitleCache().split("sec.")[0].trim();
4776 if (author2
!=null && !StringUtils
.isEmpty(author2
)) {
4777 t2
=t2
.split(Pattern
.quote(author2
))[0];
4779 } catch (Exception e
) {
4780 // TODO Auto-generated catch block
4781 e
.printStackTrace();
4783 similarityScore
=similarity(t1
.trim(), t2
.trim());
4784 // System.out.println("taxascore: "+similarityScore);
4785 similarityAuthor
=similarity(author1
.trim(), author2
.trim());
4786 // System.out.println("authorscore: "+similarityAuthor);
4787 insertAsExisting
= compareAndCheckTaxon(tnb
, refMods
, similarityScore
, bestMatchingTaxon
,similarityAuthor
);
4789 if(insertAsExisting
) {
4790 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4791 tmp
=bestMatchingTaxon
;
4792 sourceHandler
.addSource(refMods
, tmp
);
4803 * @param similarityScore
4804 * @param bestMatchingTaxon
4805 * @param similarityAuthor
4808 private boolean compareAndCheckTaxon(INonViralName tnb
, Reference refMods
, double similarityScore
,
4809 Taxon bestMatchingTaxon
, double similarityAuthor
) {
4810 //logger.info("compareAndCheckTaxon");
4811 boolean insertAsExisting
;
4812 // if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4813 // insertAsExisting=false;
4815 //a small hack/automatisation for Chenopodium only
4816 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4817 bestMatchingTaxon
.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4818 insertAsExisting
=true;
4820 insertAsExisting
=askIfReuseBestMatchingTaxon(tnb
, bestMatchingTaxon
, refMods
, similarityScore
,similarityAuthor
);
4824 logDecision(tnb
, bestMatchingTaxon
, insertAsExisting
, refMods
);
4825 return insertAsExisting
;
4831 @SuppressWarnings("rawtypes")
4832 private List
<Taxon
> getMatchingTaxa(TaxonName tnb
) {
4833 //logger.info("getMatchingTaxon");
4834 if (tnb
.getTitleCache() == null){
4835 tnb
.setTitleCache(tnb
.toString(), tnb
.isProtectedTitleCache());
4838 Pager
<TaxonBase
> pager
=importer
.getTaxonService().findByTitleWithRestrictions(TaxonBase
.class, tnb
.getTitleCache().split("sec.")[0].trim(), MatchMode
.BEGINNING
, null, null, null, null, null);
4839 List
<TaxonBase
>records
= pager
.getRecords();
4841 List
<Taxon
> existingTaxons
= new ArrayList
<Taxon
>();
4842 for (TaxonBase r
:records
){
4844 Taxon bestMatchingTaxon
= (Taxon
)r
;
4845 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4846 if(compareTaxonNameLength(bestMatchingTaxon
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4847 existingTaxons
.add(bestMatchingTaxon
);
4849 }catch(ClassCastException e
){logger
.warn("classcast exception, might be a synonym, ignore it");}
4851 Taxon bmt
= importer
.getTaxonService().findBestMatchingTaxon(tnb
.getTitleCache());
4852 if (!existingTaxons
.contains(bmt
) && bmt
!=null) {
4853 if(compareTaxonNameLength(bmt
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4854 existingTaxons
.add(bmt
);
4857 return existingTaxons
;
4861 * Check if the found Taxon can reasonnably be the same
4862 * example: with and without author should match, but the subspecies should not be suggested for a genus
4864 private boolean compareTaxonNameLength(String f
, String o
){
4865 //logger.info("compareTaxonNameLength");
4866 boolean lengthOk
=false;
4867 int sizeF
= f
.length();
4868 int sizeO
= o
.length();
4873 if (sizeF
-sizeO
>10) {
4880 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4884 private double similarity(String s1
, String s2
) {
4885 //logger.info("similarity");
4886 //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4887 if(!StringUtils
.isEmpty(s1
) && !StringUtils
.isEmpty(s2
)){
4888 String l1
=s1
.toLowerCase().trim();
4889 String l2
=s2
.toLowerCase().trim();
4890 if (l1
.length() < l2
.length()) { // s1 should always be bigger
4891 String swap
= l1
; l1
= l2
; l2
= swap
;
4893 int bigLen
= l1
.length();
4894 if (bigLen
== 0) { return 1.0; /* both strings are zero length */ }
4895 return (bigLen
- computeEditDistance(l1
, l2
)) / (double) bigLen
;
4898 if(s1
!=null && s2
!=null){
4899 if (s1
.equalsIgnoreCase(s2
)) {
4907 private int computeEditDistance(String s1
, String s2
) {
4908 //logger.info("computeEditDistance");
4909 int[] costs
= new int[s2
.length() + 1];
4910 for (int i
= 0; i
<= s1
.length(); i
++) {
4912 for (int j
= 0; j
<= s2
.length(); j
++) {
4917 int newValue
= costs
[j
- 1];
4918 if (s1
.charAt(i
- 1) != s2
.charAt(j
- 1)) {
4919 newValue
= Math
.min(Math
.min(newValue
, lastValue
),
4922 costs
[j
- 1] = lastValue
;
4923 lastValue
= newValue
;
4928 costs
[s2
.length()] = lastValue
;
4931 return costs
[s2
.length()];
4934 Map
<Rank
, Taxon
> hierarchy
= new HashMap
<Rank
, Taxon
>();
4938 @SuppressWarnings("rawtypes")
4939 public void lookForParentNode(INonViralName taxonName
, Taxon tax
, Reference ref
, MyName myName
) {
4940 logger
.info("lookForParentNode "+taxonName
.getTitleCache()+" for "+myName
.toString());
4941 //System.out.println("LOOK FOR PARENT NODE "+taxonname.toString()+"; "+tax.toString()+"; "+taxonname.getRank());
4942 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
4943 if (taxonName
.getRank().equals(Rank
.FORM())){
4944 handleFormHierarchy(ref
, myName
, parser
);
4946 else if (taxonName
.getRank().equals(Rank
.VARIETY())){
4947 handleVarietyHierarchy(ref
, myName
, parser
);
4949 else if (taxonName
.getRank().equals(Rank
.SUBSPECIES())){
4950 handleSubSpeciesHierarchy(ref
, myName
, parser
);
4952 else if (taxonName
.getRank().equals(Rank
.SPECIES())){
4953 handleSpeciesHierarchy(ref
, myName
, parser
);
4955 else if (taxonName
.getRank().equals(Rank
.SUBGENUS())){
4956 handleSubgenusHierarchy(ref
, myName
, parser
);
4959 if (taxonName
.getRank().equals(Rank
.GENUS())){
4960 handleGenusHierarchy(ref
, myName
, parser
);
4962 if (taxonName
.getRank().equals(Rank
.SUBTRIBE())){
4963 handleSubtribeHierarchy(ref
, myName
, parser
);
4965 if (taxonName
.getRank().equals(Rank
.TRIBE())){
4966 handleTribeHierarchy(ref
, myName
, parser
);
4969 if (taxonName
.getRank().equals(Rank
.SUBFAMILY())){
4970 handleSubfamilyHierarchy(ref
, myName
, parser
);
4979 private void handleSubfamilyHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
4980 System
.out
.println("handleSubfamilyHierarchy");
4981 String parentStr
= myName
.getFamilyStr();
4982 Rank r
= Rank
.FAMILY();
4983 if(parentStr
!=null){
4985 Taxon parent
= null;
4986 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitleWithRestrictions(TaxonBase
.class, parentStr
, MatchMode
.BEGINNING
, null, null, null, null, null);
4987 for(TaxonBase tb
:taxontest
.getRecords()){
4989 if (tb
.getName().getRank().equals(r
)) {
4990 parent
=CdmBase
.deproxy(tb
, Taxon
.class);
4993 } catch (Exception e
) {
4994 // TODO Auto-generated catch block
4995 e
.printStackTrace();
4998 if(parent
== null) {
4999 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5000 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5003 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5004 importer
.getTaxonService().save(parent
);
5005 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5009 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5012 hierarchy
.put(r
,parent
);
5021 private void handleTribeHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5022 String parentStr
= myName
.getSubfamilyStr();
5023 Rank r
= Rank
.SUBFAMILY();
5024 if (parentStr
== null){
5025 parentStr
= myName
.getFamilyStr();
5028 if(parentStr
!=null){
5029 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5030 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5031 // importer.getTaxonService().save(parent);
5032 // parent = CdmBase.deproxy(parent, Taxon.class);
5034 boolean parentDoesNotExists
= true;
5035 for (TaxonNode p
: classification
.getAllNodes()){
5036 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5037 parentDoesNotExists
= false;
5038 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5042 // if(parentDoesNotExists) {
5043 // importer.getTaxonService().save(parent);
5044 // parent = CdmBase.deproxy(parent, Taxon.class);
5045 // lookForParentNode(parentNameName, parent, ref,myName);
5047 if(parentDoesNotExists
) {
5048 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5051 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5052 importer
.getTaxonService().save(parent
);
5053 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5057 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5060 hierarchy
.put(r
,parent
);
5069 private void handleSubtribeHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5070 String parentStr
= myName
.getTribeStr();
5071 Rank r
= Rank
.TRIBE();
5072 if (parentStr
== null){
5073 parentStr
= myName
.getSubfamilyStr();
5074 r
= Rank
.SUBFAMILY();
5076 if (parentStr
== null){
5077 parentStr
= myName
.getFamilyStr();
5080 if(parentStr
!=null){
5081 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5082 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5083 // importer.getTaxonService().save(parent);
5084 // parent = CdmBase.deproxy(parent, Taxon.class);
5086 boolean parentDoesNotExists
= true;
5087 for (TaxonNode p
: classification
.getAllNodes()){
5088 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5089 parentDoesNotExists
= false;
5090 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5095 // if(parentDoesNotExists) {
5096 // importer.getTaxonService().save(parent);
5097 // parent = CdmBase.deproxy(parent, Taxon.class);
5098 // lookForParentNode(parentNameName, parent, ref,myName);
5100 if(parentDoesNotExists
) {
5101 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5104 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5105 importer
.getTaxonService().save(parent
);
5106 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5110 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5113 hierarchy
.put(r
,parent
);
5122 private void handleGenusHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5123 String parentStr
= myName
.getSubtribeStr();
5124 Rank r
= Rank
.SUBTRIBE();
5125 if (parentStr
== null){
5126 parentStr
= myName
.getTribeStr();
5129 if (parentStr
== null){
5130 parentStr
= myName
.getSubfamilyStr();
5131 r
= Rank
.SUBFAMILY();
5133 if (parentStr
== null){
5134 parentStr
= myName
.getFamilyStr();
5137 if(parentStr
!=null){
5138 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5139 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5140 // importer.getTaxonService().save(parent);
5141 // parent = CdmBase.deproxy(parent, Taxon.class);
5143 boolean parentDoesNotExist
= true;
5144 for (TaxonNode p
: classification
.getAllNodes()){
5145 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5146 // System.out.println(p.getTaxon().getUuid());
5147 // System.out.println(parent.getUuid());
5148 parentDoesNotExist
= false;
5149 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5153 // if(parentDoesNotExists) {
5154 // importer.getTaxonService().save(parent);
5155 // parent = CdmBase.deproxy(parent, Taxon.class);
5156 // lookForParentNode(parentNameName, parent, ref,myName);
5158 if(parentDoesNotExist
) {
5159 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5162 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5163 importer
.getTaxonService().save(parent
);
5164 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5168 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5171 hierarchy
.put(r
,parent
);
5180 private void handleSubgenusHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5181 String parentStr
= myName
.getGenusStr();
5182 Rank r
= Rank
.GENUS();
5184 if(parentStr
==null){
5185 parentStr
= myName
.getSubtribeStr();
5186 r
= Rank
.SUBTRIBE();
5188 if (parentStr
== null){
5189 parentStr
= myName
.getTribeStr();
5192 if (parentStr
== null){
5193 parentStr
= myName
.getSubfamilyStr();
5194 r
= Rank
.SUBFAMILY();
5196 if (parentStr
== null){
5197 parentStr
= myName
.getFamilyStr();
5200 if(parentStr
!=null){
5201 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5202 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5203 // importer.getTaxonService().save(parent);
5204 // parent = CdmBase.deproxy(parent, Taxon.class);
5206 boolean parentDoesNotExists
= true;
5207 for (TaxonNode p
: classification
.getAllNodes()){
5208 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5209 // System.out.println(p.getTaxon().getUuid());
5210 // System.out.println(parent.getUuid());
5211 parentDoesNotExists
= false;
5212 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5216 // if(parentDoesNotExists) {
5217 // importer.getTaxonService().save(parent);
5218 // parent = CdmBase.deproxy(parent, Taxon.class);
5219 // lookForParentNode(parentNameName, parent, ref,myName);
5221 if(parentDoesNotExists
) {
5222 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5225 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5226 importer
.getTaxonService().save(parent
);
5227 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5231 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5234 hierarchy
.put(r
,parent
);
5243 private void handleSpeciesHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5244 String parentStr
= myName
.getSubgenusStr();
5245 Rank r
= Rank
.SUBGENUS();
5247 if(parentStr
==null){
5248 parentStr
= myName
.getGenusStr();
5252 if(parentStr
==null){
5253 parentStr
= myName
.getSubtribeStr();
5254 r
= Rank
.SUBTRIBE();
5256 if (parentStr
== null){
5257 parentStr
= myName
.getTribeStr();
5260 if (parentStr
== null){
5261 parentStr
= myName
.getSubfamilyStr();
5262 r
= Rank
.SUBFAMILY();
5264 if (parentStr
== null){
5265 parentStr
= myName
.getFamilyStr();
5268 if(parentStr
!=null){
5269 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5270 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5271 hierarchy
.put(r
,parent
);
5280 private void handleSubSpeciesHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5281 String parentStr
= myName
.getSpeciesStr();
5282 Rank r
= Rank
.SPECIES();
5285 if(parentStr
==null){
5286 parentStr
= myName
.getSubgenusStr();
5287 r
= Rank
.SUBGENUS();
5290 if(parentStr
==null){
5291 parentStr
= myName
.getGenusStr();
5295 if(parentStr
==null){
5296 parentStr
= myName
.getSubtribeStr();
5297 r
= Rank
.SUBTRIBE();
5299 if (parentStr
== null){
5300 parentStr
= myName
.getTribeStr();
5303 if (parentStr
== null){
5304 parentStr
= myName
.getSubfamilyStr();
5305 r
= Rank
.SUBFAMILY();
5307 if (parentStr
== null){
5308 parentStr
= myName
.getFamilyStr();
5311 if(parentStr
!=null){
5312 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5313 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5314 hierarchy
.put(r
,parent
);
5324 private void handleFormHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5325 String parentStr
= myName
.getSubspeciesStr();
5326 Rank r
= Rank
.SUBSPECIES();
5329 if(parentStr
==null){
5330 parentStr
= myName
.getSpeciesStr();
5334 if(parentStr
==null){
5335 parentStr
= myName
.getSubgenusStr();
5336 r
= Rank
.SUBGENUS();
5339 if(parentStr
==null){
5340 parentStr
= myName
.getGenusStr();
5344 if(parentStr
==null){
5345 parentStr
= myName
.getSubtribeStr();
5346 r
= Rank
.SUBTRIBE();
5348 if (parentStr
== null){
5349 parentStr
= myName
.getTribeStr();
5352 if (parentStr
== null){
5353 parentStr
= myName
.getSubfamilyStr();
5354 r
= Rank
.SUBFAMILY();
5356 if (parentStr
== null){
5357 parentStr
= myName
.getFamilyStr();
5360 if(parentStr
!=null){
5361 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5362 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5363 hierarchy
.put(r
,parent
);
5372 private void handleVarietyHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5373 String parentStr
= myName
.getSubspeciesStr();
5374 Rank r
= Rank
.SUBSPECIES();
5376 if(parentStr
==null){
5377 parentStr
= myName
.getSpeciesStr();
5381 if(parentStr
==null){
5382 parentStr
= myName
.getSubgenusStr();
5383 r
= Rank
.SUBGENUS();
5386 if(parentStr
==null){
5387 parentStr
= myName
.getGenusStr();
5391 if(parentStr
==null){
5392 parentStr
= myName
.getSubtribeStr();
5393 r
= Rank
.SUBTRIBE();
5395 if (parentStr
== null){
5396 parentStr
= myName
.getTribeStr();
5399 if (parentStr
== null){
5400 parentStr
= myName
.getSubfamilyStr();
5401 r
= Rank
.SUBFAMILY();
5403 if (parentStr
== null){
5404 parentStr
= myName
.getFamilyStr();
5407 if(parentStr
!=null){
5408 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5409 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5410 hierarchy
.put(r
,parent
);
5422 private Taxon
handleParentName(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
, String parentStr
, Rank r
) {
5423 INonViralName parentNameName
= parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5424 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5425 // importer.getTaxonService().save(parent);
5426 // parent = CdmBase.deproxy(parent, Taxon.class);
5428 boolean parentDoesNotExists
= true;
5429 for (TaxonNode p
: classification
.getAllNodes()){
5430 if(p
.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent
.getTitleCache().split("sec.")[0].trim())) {
5431 // System.out.println(p.getTaxon().getUuid());
5432 // System.out.println(parent.getUuid());
5433 parentDoesNotExists
= false;
5434 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5438 if(parentDoesNotExists
) {
5439 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5440 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5443 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5444 importer
.getTaxonService().save(parent
);
5449 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5455 private void addNameDifferenceToFile(String originalname
, String atomisedname
){
5457 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "NamesDifferent_"+classification
.getTitleCache()+".txt",true);
5458 BufferedWriter out
= new BufferedWriter(fstream
);
5459 out
.write(originalname
+" (original) versus "+replaceNull(atomisedname
)+" (atomised) \n");
5460 //Close the output stream
5462 }catch (Exception e
){//Catch exception if any
5463 System
.err
.println("Error: " + e
.getMessage());
5469 * @param nomenclaturalCode2
5472 private void addProblemNameToFile(String name
, String author
, NomenclaturalCode nomenclaturalCode2
, Rank rank
) {
5474 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "NameNotParsed.txt",true);
5475 BufferedWriter out
= new BufferedWriter(fstream
);
5476 out
.write(name
+"\t"+replaceNull(author
)+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\n");
5477 //Close the output stream
5479 }catch (Exception e
){//Catch exception if any
5480 System
.err
.println("Error: " + e
.getMessage());
5487 * @param bestMatchingTaxon
5488 * @param insertAsExisting
5491 private void logDecision(INonViralName tnb
, Taxon bestMatchingTaxon
, boolean insertAsExisting
, Reference refMods
) {
5493 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "Decisions_"+classification
.toString()+".txt", true);
5494 BufferedWriter out
= new BufferedWriter(fstream
);
5495 out
.write(tnb
.getTitleCache() + " sec. " + refMods
+ "\t" + bestMatchingTaxon
.getTitleCache() + "\t" + insertAsExisting
+ "\n");
5496 //Close the output stream
5498 }catch (Exception e
){//Catch exception if any
5499 System
.err
.println("Error: " + e
.getMessage());
5504 @SuppressWarnings("unused")
5505 private String
replaceNull(Object in
){
5509 if (in
.getClass().equals(NomenclaturalCode
.class)) {
5510 return ((NomenclaturalCode
)in
).getTitleCache();
5512 return in
.toString();
5517 * @param nomenclaturalCode2
5520 private void addProblemNameToFile(String type
, String name
, NomenclaturalCode nomenclaturalCode2
, Rank rank
, String problems
) {
5522 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "NameNotParsed_"+classification
.getTitleCache()+".txt",true);
5523 BufferedWriter out
= new BufferedWriter(fstream
);
5524 out
.write(type
+"\t"+name
+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\t"+problems
+"\n");
5525 //Close the output stream
5527 }catch (Exception e
){//Catch exception if any
5528 System
.err
.println("Error: " + e
.getMessage());