3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.taxonx2013
;
12 import java
.io
.BufferedWriter
;
14 import java
.io
.FileWriter
;
15 import java
.io
.IOException
;
17 import java
.util
.ArrayList
;
18 import java
.util
.Arrays
;
19 import java
.util
.HashMap
;
20 import java
.util
.List
;
23 import java
.util
.UUID
;
24 import java
.util
.regex
.Matcher
;
25 import java
.util
.regex
.Pattern
;
27 import javax
.xml
.transform
.TransformerException
;
28 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
30 import org
.apache
.commons
.lang
.StringUtils
;
31 import org
.apache
.log4j
.Logger
;
32 import org
.w3c
.dom
.Node
;
33 import org
.w3c
.dom
.NodeList
;
35 import com
.ibm
.lsid
.MalformedLSIDException
;
37 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
38 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
39 import eu
.etaxonomy
.cdm
.model
.agent
.AgentBase
;
40 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
41 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
42 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
43 import eu
.etaxonomy
.cdm
.model
.common
.LSID
;
44 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
45 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
46 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
47 import eu
.etaxonomy
.cdm
.model
.description
.FeatureNode
;
48 import eu
.etaxonomy
.cdm
.model
.description
.FeatureTree
;
49 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
50 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
51 import eu
.etaxonomy
.cdm
.model
.description
.TaxonNameDescription
;
52 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
53 import eu
.etaxonomy
.cdm
.model
.name
.BacterialName
;
54 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
55 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
56 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
57 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
58 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
59 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
60 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
61 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
62 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
63 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
64 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
65 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
66 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
67 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
68 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
69 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
70 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
71 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
72 import eu
.etaxonomy
.cdm
.persistence
.dto
.UuidAndTitleCache
;
73 import eu
.etaxonomy
.cdm
.persistence
.query
.MatchMode
;
74 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
75 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
76 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
77 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImplRegExBase
;
84 public class TaxonXTreatmentExtractor
extends TaxonXExtractor
{
86 private static final String PUBLICATION_YEAR
= "publicationYear";
88 private static final Logger logger
= Logger
.getLogger(TaxonXTreatmentExtractor
.class);
90 private static final String notMarkedUp
= "Not marked-up";
91 private static final UUID proIbioTreeUUID
= UUID
.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
92 private static final UUID OtherUUID
= UUID
.fromString("6465f8aa-2175-446f-807e-7163994b120f");
93 private static final UUID NotMarkedUpUUID
= UUID
.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
94 private static final boolean skippQuestion
= true;
96 private final NomenclaturalCode nomenclaturalCode
;
97 private Classification classification
;
99 private String treatmentMainName
,originalTreatmentName
;
101 private final HashMap
<String
,Map
<String
,String
>> namesMap
= new HashMap
<String
, Map
<String
,String
>>();
104 private final Pattern keypattern
= Pattern
.compile("^(\\d+.*|-\\d+.*)");
105 private final Pattern keypatternend
= Pattern
.compile("^.+?\\d$");
107 private boolean maxRankRespected
=false;
108 private Map
<String
, Feature
> featuresMap
;
110 private MyName currentMyName
;
112 private Reference sourceUrlRef
;
114 private String followingText
; //text element immediately following a tax:name in tax:nomenclature TODO move do state
115 private String usedFollowingTextPrefix
; //the part of the following text which has been used during taxon name creation
117 private final TaxonXAddSources sourceHandler
= new TaxonXAddSources();
120 * @param nomenclaturalCode
121 * @param classification
125 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode
, Classification classification
, TaxonXImport importer
,
126 TaxonXImportState configState
,Map
<String
, Feature
> featuresMap
, Reference urlSource
) {
127 this.nomenclaturalCode
=nomenclaturalCode
;
128 this.classification
= classification
;
129 this.importer
=importer
;
130 this.state2
=configState
;
131 this.featuresMap
=featuresMap
;
132 this.sourceUrlRef
=urlSource
;
133 prepareCollectors(configState
, importer
.getAgentService());
134 this.sourceHandler
.setSourceUrlRef(sourceUrlRef
);
135 this.sourceHandler
.setImporter(importer
);
136 this.sourceHandler
.setConfigState(configState
);
140 * extracts all the treament information and save them
141 * @param treatmentnode: the XML Node
142 * @param tosave: the list of object to save into the CDM
143 * @param refMods: the reference extracted from the MODS
144 * @param sourceName: the URI of the document
146 @SuppressWarnings({ "rawtypes", "unused" })
148 protected void extractTreatment(Node treatmentnode
, Reference refMods
, URI sourceName
) { logger
.info("extractTreatment");
149 List
<TaxonNameBase
> namesToSave
= new ArrayList
<TaxonNameBase
>();
150 NodeList children
= treatmentnode
.getChildNodes();
151 Taxon acceptedTaxon
=null;
152 boolean hasRefgroup
=false;
155 for (int i
=0;i
<children
.getLength();i
++){
156 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group")) {
161 for (int i
=0;i
<children
.getLength();i
++){
162 Node child
= children
.item(i
);
163 acceptedTaxon
= handleSingleNode(refMods
, sourceName
, namesToSave
, child
, acceptedTaxon
);
165 // logger.info("saveUpdateNames");
166 if (maxRankRespected
){
167 importer
.getNameService().saveOrUpdate(namesToSave
);
168 importer
.getClassificationService().saveOrUpdate(classification
);
169 //logger.info("saveUpdateNames-ok");
175 private Taxon
handleSingleNode(Reference refMods
, URI sourceName
,
176 List
<TaxonNameBase
> namesToSave
, Node child
, Taxon acceptedTaxon
) {
177 Taxon defaultTaxon
=null;
179 String nodeName
= child
.getNodeName();
180 if (nodeName
.equalsIgnoreCase("tax:nomenclature")){
181 NodeList nomenclatureChildren
= child
.getChildNodes();
182 boolean containsName
= false;
183 for(int k
=0; k
<nomenclatureChildren
.getLength(); k
++){
184 if(nomenclatureChildren
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
190 reloadClassification();
191 //extract "main" the scientific name
193 acceptedTaxon
= extractNomenclature(child
, namesToSave
, refMods
);
194 }catch(ClassCastException e
){
195 //FIXME exception handling
198 // System.out.println("acceptedTaxon : "+acceptedTaxon);
200 }else if (nodeName
.equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
201 reloadClassification();
202 //extract the References within the document
203 extractReferences(child
, namesToSave
,acceptedTaxon
,refMods
);
204 }else if (nodeName
.equalsIgnoreCase("tax:div") &&
205 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected
){
206 File file
= new File(TaxonXImport
.LOG_FOLDER
+ "multipleTaxonX.txt");
209 writer
= new FileWriter(file
,true);
210 writer
.write(sourceName
+"\n");
213 } catch (IOException e1
) {
214 // TODO Auto-generated catch block
215 logger
.error(e1
.getMessage());
217 // String multiple = askMultiple(children.item(i));
218 String multiple
= "Other";
219 if (multiple
.equalsIgnoreCase("other")) {
220 extractSpecificFeatureNotStructured(child
,acceptedTaxon
, defaultTaxon
,namesToSave
, refMods
,multiple
);
221 }else if (multiple
.equalsIgnoreCase("synonyms")) {
223 extractSynonyms(child
,acceptedTaxon
, refMods
, null);
224 }catch(NullPointerException e
){
225 logger
.warn("the accepted taxon is maybe null");
227 }else if(multiple
.equalsIgnoreCase("material examined")){
228 extractMaterials(child
, acceptedTaxon
, refMods
, namesToSave
);
229 }else if (multiple
.equalsIgnoreCase("distribution")){
230 extractDistribution(child
, acceptedTaxon
, defaultTaxon
, namesToSave
, refMods
);
231 }else if (multiple
.equalsIgnoreCase("type status")){
232 extractDescriptionWithReference(child
, acceptedTaxon
, defaultTaxon
,refMods
, "TypeStatus");
233 }else if (multiple
.equalsIgnoreCase("vernacular name")){
234 extractDescriptionWithReference(child
, acceptedTaxon
, defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
236 extractSpecificFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
,multiple
);
239 else if(nodeName
.equalsIgnoreCase("tax:div") &&
240 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected
){
241 extractFeature(child
,acceptedTaxon
,defaultTaxon
, namesToSave
, refMods
, Feature
.BIOLOGY_ECOLOGY());
243 else if(nodeName
.equalsIgnoreCase("tax:div") &&
244 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected
){
245 extractDescriptionWithReference(child
, acceptedTaxon
,defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
247 else if(nodeName
.equalsIgnoreCase("tax:div") &&
248 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected
){
249 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, Feature
.DESCRIPTION());
251 else if(nodeName
.equalsIgnoreCase("tax:div") &&
252 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected
){
253 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
,Feature
.DIAGNOSIS());
255 else if(nodeName
.equalsIgnoreCase("tax:div") &&
256 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected
){
257 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, Feature
.DISCUSSION());
259 else if(nodeName
.equalsIgnoreCase("tax:div") &&
260 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected
){
261 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, Feature
.DESCRIPTION());
263 else if(nodeName
.equalsIgnoreCase("tax:div") &&
264 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected
){
265 extractDistribution(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
);
267 else if(nodeName
.equalsIgnoreCase("tax:div") &&
268 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected
){
269 extractFeature(child
,acceptedTaxon
,defaultTaxon
,namesToSave
,refMods
,Feature
.ETYMOLOGY());
271 else if(nodeName
.equalsIgnoreCase("tax:div") &&
272 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected
){
273 extractMaterials(child
,acceptedTaxon
, refMods
, namesToSave
);
275 else if(nodeName
.equalsIgnoreCase("tax:figure") && maxRankRespected
){
276 extractSpecificFeature(child
,acceptedTaxon
,defaultTaxon
, namesToSave
, refMods
, "Figure");
278 else if(nodeName
.equalsIgnoreCase("tax:div") &&
279 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected
){
280 extractSpecificFeature(child
, acceptedTaxon
,defaultTaxon
, namesToSave
, refMods
, "table");
281 }else if(nodeName
.equalsIgnoreCase("tax:div") &&
282 child
.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected
){
283 //TODO IGNORE keys for the moment
284 //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
285 extractSpecificFeatureNotStructured(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
,"Keys - unparsed");
288 if (! nodeName
.equalsIgnoreCase("tax:pb")){
289 //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
290 if (child
.getAttributes() !=null) {
291 logger
.info("First Attribute: " + child
.getAttributes().item(0));
293 extractSpecificFeatureNotStructured(child
,acceptedTaxon
,defaultTaxon
,namesToSave
, refMods
, notMarkedUp
);
296 logger
.warn("Unhandled");
299 return acceptedTaxon
;
303 protected Map
<String
,Feature
> getFeaturesUsed(){
309 private void buildFeatureTree() {
310 logger
.info("buildFeatureTree");
311 FeatureTree proibiospheretree
= importer
.getFeatureTreeService().find(proIbioTreeUUID
);
312 if (proibiospheretree
== null){
313 List
<FeatureTree
> trees
= importer
.getFeatureTreeService().list(FeatureTree
.class, null, null, null, null);
314 if (trees
.size()==1) {
315 FeatureTree ft
= trees
.get(0);
316 if (featuresMap
==null) {
317 featuresMap
=new HashMap
<String
, Feature
>();
319 for (Feature feature
: ft
.getDistinctFeatures()){
321 featuresMap
.put(feature
.getTitleCache(), feature
);
325 proibiospheretree
= FeatureTree
.NewInstance();
326 proibiospheretree
.setUuid(proIbioTreeUUID
);
328 // FeatureNode root = proibiospheretree.getRoot();
329 FeatureNode root2
= proibiospheretree
.getRoot();
331 int nbChildren
= root2
.getChildCount()-1;
332 while (nbChildren
>-1){
334 root2
.removeChild(nbChildren
);
335 }catch(Exception e
){logger
.warn("Can't remove child from FeatureTree "+e
);}
341 for (Feature feature
:featuresMap
.values()) {
342 root2
.addChild(FeatureNode
.NewInstance(feature
));
344 importer
.getFeatureTreeService().saveOrUpdate(proibiospheretree
);
351 * @param acceptedTaxon: the current acceptedTaxon
352 * @param nametosave: the list of objects to save into the CDM
353 * @param refMods: the current reference extracted from the MODS
355 /* @SuppressWarnings("rawtypes")
356 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference refMods) {
357 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
359 NodeList children = keys.getChildNodes();
361 PolytomousKey poly = PolytomousKey.NewInstance();
362 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
363 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
364 poly.addTaxonomicScope(acceptedTaxon);
365 poly.setTitleCache("bloup", true);
366 // poly.addCoveredTaxon(acceptedTaxon);
367 PolytomousKeyNode root = poly.getRoot();
368 PolytomousKeyNode previous = null,tmpKey=null;
370 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
372 // String fullContent = keys.getTextContent();
373 for (int i=0;i<children.getLength();i++){
374 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
375 NodeList paragraph = children.item(i).getChildNodes();
378 for (int j=0;j<paragraph.getLength();j++){
379 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
380 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
381 key+=paragraph.item(j).getTextContent().trim();
382 // logger.info("KEY: "+j+"--"+key);
385 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
386 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
389 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
390 if (keypattern.matcher(key).matches()){
391 tmpKey = PolytomousKeyNode.NewInstance(key);
392 if (taxonKey!=null) {
393 tmpKey.setTaxon(taxonKey);
395 polyNodes.add(tmpKey);
396 if (previous == null) {
397 root.addChild(tmpKey);
399 previous.addChild(tmpKey);
403 tmpKey=PolytomousKeyNode.NewInstance(key);
404 if (taxonKey!=null) {
405 tmpKey.setTaxon(taxonKey);
407 polyNodes.add(tmpKey);
408 if (keypatternend.matcher(key).matches()) {
409 root.addChild(tmpKey);
412 previous.addChild(tmpKey);
419 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
420 importer.getPolytomousKeyService().saveOrUpdate(poly);
426 * @param taxons: the XML Nodegroup
427 * @param nametosave: the list of objects to save into the CDM
428 * @param acceptedTaxon: the current accepted Taxon
429 * @param refMods: the current reference extracted from the MODS
431 * @return Taxon object built
433 @SuppressWarnings({ "rawtypes", "unused" })
434 private TaxonNameBase
getTaxonNameBaseFromXML(Node taxons
, List
<TaxonNameBase
> nametosave
, Reference refMods
, boolean isSynonym
) {
435 // logger.info("getTaxonFromXML");
436 // logger.info("acceptedTaxon: "+acceptedTaxon);
437 logger
.info("getTaxonNameBaseFromXML");
438 TaxonNameBase nameToBeFilled
= null;
440 currentMyName
=new MyName(isSynonym
);
442 NomenclaturalStatusType statusType
= null;
444 String followingText
= null; //needs to be checked if following text is possible
445 currentMyName
= extractScientificName(taxons
,refMods
, null);
446 } catch (TransformerFactoryConfigurationError e1
) {
448 } catch (TransformerException e1
) {
451 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
453 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
454 if (nameToBeFilled.hasProblem() &&
455 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
456 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
457 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
458 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
461 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
463 nameToBeFilled
= currentMyName
.getTaxonNameBase();
464 return nameToBeFilled
;
472 private void reloadClassification() {
473 logger
.info("reloadClassification");
474 Classification cl
= importer
.getClassificationService().find(classification
.getUuid());
478 importer
.getClassificationService().saveOrUpdate(classification
);
479 classification
= importer
.getClassificationService().find(classification
.getUuid());
484 // * Create a Taxon for the current NameBase, based on the current reference
485 // * @param taxonNameBase
486 // * @param refMods: the current reference extracted from the MODS
489 // @SuppressWarnings({ "unused", "rawtypes" })
490 // private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference refMods) {
491 // Taxon t = new Taxon(taxonNameBase,null );
492 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
493 // t.setSec(configState.getConfig().getSecundum());
494 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
496 // /*<<<<<<< .courant
497 // boolean sourceExists=false;
498 // Set<IdentifiableSource> sources = t.getSources();
499 // for (IdentifiableSource src : sources){
500 // String micro = src.getCitationMicroReference();
501 // Reference r = src.getCitation();
502 // if (r.equals(refMods) && micro == null) {
503 // sourceExists=true;
506 // if(!sourceExists) {
507 // t.addSource(null,null,refMods,null);
510 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
511 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
515 private void extractDescriptionWithReference(Node typestatus
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference refMods
,
516 String featureName
) {
517 // System.out.println("extractDescriptionWithReference !");
518 logger
.info("extractDescriptionWithReference");
519 NodeList children
= typestatus
.getChildNodes();
521 Feature currentFeature
=getFeatureObjectFromString(featureName
);
523 String r
="";String s
="";
524 for (int i
=0;i
<children
.getLength();i
++){
525 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
526 s
+=children
.item(i
).getTextContent().trim();
528 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
529 r
+= children
.item(i
).getTextContent().trim();
531 if (s
.indexOf(r
)>-1) {
536 Reference currentref
= ReferenceFactory
.newGeneric();
538 currentref
.setTitleCache(r
, true);
542 setParticularDescription(s
,acceptedTaxon
,defaultTaxon
, currentref
, refMods
,currentFeature
);
547 * @param distribution: the XML node group
548 * @param acceptedTaxon: the current accepted Taxon
549 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
550 * @param refMods: the current reference extracted from the MODS
552 @SuppressWarnings("rawtypes")
553 private void extractDistribution(Node distribution
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference refMods
) {
554 logger
.info("extractDistribution");
555 // logger.info("acceptedTaxon: "+acceptedTaxon);
556 NodeList children
= distribution
.getChildNodes();
557 Map
<Integer
,List
<MySpecimenOrObservation
>> specimenOrObservations
= new HashMap
<Integer
, List
<MySpecimenOrObservation
>>();
558 Map
<Integer
,String
> descriptionsFulltext
= new HashMap
<Integer
,String
>();
560 for (int i
=0;i
<children
.getLength();i
++){
561 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
562 NodeList paragraph
= children
.item(i
).getChildNodes();
563 for (int j
=0;j
<paragraph
.getLength();j
++){
564 if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("#text")){
565 extractText(descriptionsFulltext
, i
, paragraph
.item(j
));
567 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
568 extractInLine(nametosave
, refMods
, descriptionsFulltext
, i
,paragraph
.item(j
));
570 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")){
571 MySpecimenOrObservation specimenOrObservation
= new MySpecimenOrObservation();
572 DerivedUnit derivedUnitBase
= null;
573 specimenOrObservation
= extractSpecimenOrObservation(paragraph
.item(j
), derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
, null);
574 extractTextFromSpecimenOrObservation(specimenOrObservations
, descriptionsFulltext
, i
, specimenOrObservation
);
581 for (int k
:descriptionsFulltext
.keySet()) {
586 for (int k
:specimenOrObservations
.keySet()) {
593 if(acceptedTaxon
!=null){
594 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
595 Feature currentFeature
= Feature
.DISTRIBUTION();
596 // DerivedUnit derivedUnitBase=null;
598 for (int k
=0;k
<=m
;k
++){
599 if(specimenOrObservations
.keySet().contains(k
)){
600 for (MySpecimenOrObservation soo
:specimenOrObservations
.get(k
) ) {
601 handleAssociation(acceptedTaxon
, refMods
, td
, soo
);
605 if (descriptionsFulltext
.keySet().contains(k
)){
606 if (!stringIsEmpty(descriptionsFulltext
.get(k
).trim()) && (descriptionsFulltext
.get(k
).startsWith("Hab.") || descriptionsFulltext
.get(k
).startsWith("Habitat"))){
607 setParticularDescription(descriptionsFulltext
.get(k
),acceptedTaxon
,defaultTaxon
, refMods
, Feature
.HABITAT());
611 handleTextData(refMods
, descriptionsFulltext
, td
, currentFeature
, k
);
615 if (descriptionsFulltext
.keySet().contains(k
) || specimenOrObservations
.keySet().contains(k
)){
616 acceptedTaxon
.addDescription(td
);
617 sourceHandler
.addAndSaveSource(refMods
, td
, null);
618 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
626 * @param descriptionsFulltext
628 * @param currentFeature
631 private void handleTextData(Reference refMods
, Map
<Integer
, String
> descriptionsFulltext
, TaxonDescription td
,
632 Feature currentFeature
, int k
) {
633 //logger.info("handleTextData");
634 TextData textData
= TextData
.NewInstance();
635 textData
.setFeature(currentFeature
);
636 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descriptionsFulltext
.get(k
));
637 sourceHandler
.addSource(refMods
, textData
);
638 td
.addElement(textData
);
642 * @param acceptedTaxon
647 private void handleAssociation(Taxon acceptedTaxon
, Reference refMods
, TaxonDescription td
, MySpecimenOrObservation soo
) {
648 logger
.info("handleAssociation");
649 String descr
=soo
.getDescr();
650 DerivedUnit derivedUnitBase
= soo
.getDerivedUnitBase();
652 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
654 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
656 Feature feature
=null;
657 feature
= makeFeature(derivedUnitBase
);
658 if(!StringUtils
.isEmpty(descr
)) {
659 derivedUnitBase
.setTitleCache(descr
, true);
662 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
664 taxonDescription
.addElement(indAssociation
);
665 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
666 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
667 td
.setDescribedSpecimenOrObservation(soo
.getDerivedUnitBase());
671 * create an individualAssociation
673 * @param derivedUnitBase
677 private IndividualsAssociation
createIndividualAssociation(Reference refMods
, DerivedUnit derivedUnitBase
,
679 logger
.info("createIndividualAssociation");
680 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
681 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
682 indAssociation
.setFeature(feature
);
683 indAssociation
= sourceHandler
.addSource(refMods
, indAssociation
);
684 return indAssociation
;
688 * @param specimenOrObservations
689 * @param descriptionsFulltext
691 * @param specimenOrObservation
693 private void extractTextFromSpecimenOrObservation(Map
<Integer
, List
<MySpecimenOrObservation
>> specimenOrObservations
,
694 Map
<Integer
, String
> descriptionsFulltext
, int i
, MySpecimenOrObservation specimenOrObservation
) {
695 logger
.info("extractTextFromSpecimenOrObservation");
696 List
<MySpecimenOrObservation
> speObsList
= specimenOrObservations
.get(i
);
697 if (speObsList
== null) {
698 speObsList
=new ArrayList
<MySpecimenOrObservation
>();
700 speObsList
.add(specimenOrObservation
);
701 specimenOrObservations
.put(i
,speObsList
);
703 String s
= specimenOrObservation
.getDerivedUnitBase().toString();
704 if (descriptionsFulltext
.get(i
) !=null){
705 s
= descriptionsFulltext
.get(i
)+" "+s
;
707 descriptionsFulltext
.put(i
, s
);
711 * Extract the text with the inline link to a taxon
714 * @param descriptionsFulltext
718 @SuppressWarnings("rawtypes")
719 private void extractInLine(List
<TaxonNameBase
> nametosave
, Reference refMods
, Map
<Integer
, String
> descriptionsFulltext
,
720 int i
, Node paragraph
) {
721 //logger.info("extractInLine");
722 String inLine
=getInlineTextForName(nametosave
, refMods
, paragraph
);
723 if (descriptionsFulltext
.get(i
) !=null){
724 inLine
= descriptionsFulltext
.get(i
)+inLine
;
726 descriptionsFulltext
.put(i
, inLine
);
730 * Extract the raw text from a Node
731 * @param descriptionsFulltext
735 private void extractText(Map
<Integer
, String
> descriptionsFulltext
, int i
, Node node
) {
736 //logger.info("extractText");
737 if(!node
.getTextContent().trim().isEmpty()) {
738 String s
=node
.getTextContent().trim();
739 if (descriptionsFulltext
.get(i
) !=null){
740 s
= descriptionsFulltext
.get(i
)+" "+s
;
742 descriptionsFulltext
.put(i
, s
);
748 * @param materials: the XML node group
749 * @param acceptedTaxon: the current accepted Taxon
750 * @param refMods: the current reference extracted from the MODS
752 @SuppressWarnings("rawtypes")
753 private void extractMaterials(Node materials
, Taxon acceptedTaxon
, Reference refMods
,List
<TaxonNameBase
> nametosave
) {
754 logger
.info("EXTRACTMATERIALS");
755 // logger.info("acceptedTaxon: "+acceptedTaxon);
756 NodeList children
= materials
.getChildNodes();
757 NodeList events
= null;
761 for (int i
=0;i
<children
.getLength();i
++){
762 String rawAssociation
="";
764 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
765 events
= children
.item(i
).getChildNodes();
766 for(int k
=0;k
<events
.getLength();k
++){
767 if (events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
768 String inLine
= getInlineTextForName(nametosave
, refMods
, events
.item(k
));
769 if(!inLine
.isEmpty()) {
770 rawAssociation
+=inLine
;
773 if (! events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")
774 && !events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
775 rawAssociation
+= events
.item(k
).getTextContent().trim();
777 if(events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
778 if (!containsDistinctLetters(rawAssociation
.replaceAll(";",""))) {
779 rawAssociation
="no description text";
782 handleDerivedUnitFacadeAndBase(acceptedTaxon
, refMods
, events
.item(k
), rawAssociation
);
784 if (!rawAssociation
.isEmpty() && !added
){
786 Feature feature
= Feature
.MATERIALS_EXAMINED();
787 featuresMap
.put(feature
.getTitleCache(),feature
);
789 TextData textData
= createTextData(rawAssociation
, refMods
, feature
);
791 if(! rawAssociation
.isEmpty() && (acceptedTaxon
!=null)){
792 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
793 td
.addElement(textData
);
794 acceptedTaxon
.addDescription(td
);
795 sourceHandler
.addAndSaveSource(refMods
, td
, null);
797 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
798 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
800 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
801 // acceptedTaxon.addDescription(taxonDescription);
803 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
805 // Feature feature = Feature.MATERIALS_EXAMINED();
806 // featuresMap.put(feature.getTitleCache(),feature);
807 // if(!StringUtils.isEmpty(rawAssociation)) {
808 // derivedUnitBase.setTitleCache(rawAssociation, true);
810 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
811 // indAssociation.setFeature(feature);
812 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
814 // /*boolean sourceExists=false;
815 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
816 // for (DescriptionElementSource src : dsources){
817 // String micro = src.getCitationMicroReference();
818 // Reference r = src.getCitation();
819 // if (r.equals(refMods) && micro == null) {
820 // sourceExists=true;
823 // if(!sourceExists) {
824 // indAssociation.addSource(null, null, refMods, null);
826 // taxonDescription.addElement(indAssociation);
827 // taxonDescription.setTaxon(acceptedTaxon);
828 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
830 // /*sourceExists=false;
831 // Set<IdentifiableSource> sources = taxonDescription.getSources();
832 // for (IdentifiableSource src : sources){
833 // String micro = src.getCitationMicroReference();
834 // Reference r = src.getCitation();
835 // if (r.equals(refMods) && micro == null) {
836 // sourceExists=true;
839 // if(!sourceExists) {
840 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
843 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
844 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
854 * @param acceptedTaxon
857 * @param rawAssociation
860 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon
, Reference refMods
, Node event
,
861 String rawAssociation
) {
862 logger
.info("handleDerivedUnitFacadeAndBase");
864 DerivedUnit derivedUnitBase
;
865 MySpecimenOrObservation myspecimenOrObservation
;
866 DerivedUnitFacade derivedUnitFacade
= getFacade(rawAssociation
.replaceAll(";",""),SpecimenOrObservationType
.DerivedUnit
);
867 derivedUnitBase
= derivedUnitFacade
.innerDerivedUnit();
869 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
871 //TODO this may not always be correct, ask user
872 TaxonNameBase
<?
,?
> typifiableName
= acceptedTaxon
!= null ? acceptedTaxon
.getName() : null;
873 myspecimenOrObservation
= extractSpecimenOrObservation(event
,derivedUnitBase
,SpecimenOrObservationType
.DerivedUnit
, typifiableName
);
874 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
875 descr
=myspecimenOrObservation
.getDescr();
877 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
879 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
881 Feature feature
= makeFeature(derivedUnitBase
);
882 featuresMap
.put(feature
.getTitleCache(),feature
);
883 if(!StringUtils
.isEmpty(descr
)) {
884 derivedUnitBase
.setTitleCache(descr
, true);
887 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
889 taxonDescription
.addElement(indAssociation
);
890 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
891 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
898 * @param materials: the XML node group
899 * @param acceptedTaxon: the current accepted Taxon
900 * @param refMods: the current reference extracted from the MODS
902 private String
extractMaterialsDirect(Node materials
, Taxon acceptedTaxon
, Reference refMods
, String event
, NonViralName
<?
> currentName
) {
903 logger
.info("extractMaterialsDirect");
904 // logger.info("acceptedTaxon: "+acceptedTaxon);
907 DerivedUnit derivedUnitBase
=null;
908 MySpecimenOrObservation myspecimenOrObservation
= extractSpecimenOrObservation(materials
,derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
, currentName
);
909 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
911 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
913 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
915 Feature feature
=null;
916 if (event
.equalsIgnoreCase("collection")){
917 feature
= makeFeature(derivedUnitBase
);
920 feature
= Feature
.MATERIALS_EXAMINED();
922 featuresMap
.put(feature
.getTitleCache(), feature
);
924 descr
=myspecimenOrObservation
.getDescr();
925 if(!StringUtils
.isEmpty(descr
)) {
926 derivedUnitBase
.setTitleCache(descr
, true);
929 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
931 taxonDescription
.addElement(indAssociation
);
932 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
933 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
935 return derivedUnitBase
.getTitleCache();
941 * @param description: the XML node group
942 * @param acceptedTaxon: the current acceptedTaxon
943 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
944 * @param nametosave: the list of objects to save into the CDM
945 * @param refMods: the current reference extracted from the MODS
946 * @param featureName: the feature name
948 @SuppressWarnings({ "rawtypes"})
949 private String
extractSpecificFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
950 List
<TaxonNameBase
> nametosave
, Reference refMods
, String featureName
) {
951 logger
.info("extractSpecificFeature "+featureName
);
952 // System.out.println("GRUUUUuu");
953 NodeList children
= description
.getChildNodes();
954 NodeList insideNodes
;
957 String localdescr
="";
958 List
<String
> blabla
=null;
959 List
<String
> text
= new ArrayList
<String
>();
961 String table
="<table>";
965 Feature currentFeature
=getFeatureObjectFromString(featureName
);
967 // String fullContent = description.getTextContent();
968 for (int i
=0;i
<children
.getLength();i
++){
970 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
971 text
.add(children
.item(i
).getTextContent().trim());
973 if (featureName
.equalsIgnoreCase("table")){
974 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
975 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
976 head
= extractTableHead(children
.item(i
));
978 line
= extractTableLine(children
.item(i
));
979 if (!line
.equalsIgnoreCase("<tr></tr>")) {
983 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
984 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
985 line
= extractTableLineWithColumn(children
.item(i
).getChildNodes());
986 if(!line
.equalsIgnoreCase("<tr></tr>")) {
991 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
992 insideNodes
=children
.item(i
).getChildNodes();
993 blabla
= new ArrayList
<String
>();
994 for (int j
=0;j
<insideNodes
.getLength();j
++){
995 Node insideNode
= insideNodes
.item(j
);
996 if (insideNode
.getNodeName().equalsIgnoreCase("tax:name")){
997 String inlinetext
= getInlineTextForName(nametosave
, refMods
, insideNode
);
998 if (!inlinetext
.isEmpty()) {
999 blabla
.add(inlinetext
);
1002 else if (insideNode
.getNodeName().equalsIgnoreCase("#text")) {
1003 if(!insideNode
.getTextContent().trim().isEmpty()){
1004 blabla
.add(insideNode
.getTextContent().trim());
1005 // localdescr += insideNodes.item(j).getTextContent().trim();
1009 if (!blabla
.isEmpty()) {
1010 String blaStr
= StringUtils
.join(blabla
," ").trim();
1011 if(!stringIsEmpty(blaStr
)) {
1012 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1018 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1019 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1020 localdescr
= children
.item(i
).getTextContent().trim();
1021 if(!stringIsEmpty(localdescr
)) {
1022 setParticularDescription(localdescr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1029 if (!table
.equalsIgnoreCase("<table></table>")){
1030 // System.out.println("TABLE : "+table);
1034 if (text
!=null && !text
.isEmpty()) {
1035 return StringUtils
.join(text
," ");
1047 private String
extractTableLine(Node child
) {
1048 //logger.info("extractTableLine");
1051 if (child
.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1052 line
= extractTableLineWithColumn(child
.getChildNodes());
1063 private String
extractTableHead(Node child
) {
1064 //logger.info("extractTableHead");
1068 NodeList trNodes
= child
.getChildNodes();
1069 for (int k
=0;k
<trNodes
.getLength();k
++){
1070 if (trNodes
.item(k
).getNodeName().equalsIgnoreCase("tax:div")
1071 && trNodes
.item(k
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1072 line
= extractTableLineWithColumn(trNodes
.item(k
).getChildNodes());
1081 * build a html table line, with td columns
1083 * @return an html coded line
1085 private String
extractTableLineWithColumn(NodeList tdNodes
) {
1086 //logger.info("extractTableLineWithColumn");
1089 for (int l
=0;l
<tdNodes
.getLength();l
++){
1090 if (tdNodes
.item(l
).getNodeName().equalsIgnoreCase("tax:p")){
1091 line
+="<td>"+tdNodes
.item(l
).getTextContent()+"</td>";
1099 * @param description: the XML node group
1100 * @param acceptedTaxon: the current acceptedTaxon
1101 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1102 * @param nametosave: the list of objects to save into the CDM
1103 * @param refMods: the current reference extracted from the MODS
1104 * @param featureName: the feature name
1106 @SuppressWarnings({ "unused", "rawtypes" })
1107 private String
extractSpecificFeatureNotStructured(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
1108 List
<TaxonNameBase
> nameToSave
, Reference refMods
, String featureName
) {
1109 logger
.info("extractSpecificFeatureNotStructured " + featureName
);
1110 NodeList children
= description
.getChildNodes();
1111 NodeList insideNodes
;
1112 List
<String
> blabla
= new ArrayList
<String
>();
1115 Feature currentFeature
= getFeatureObjectFromString(featureName
);
1117 String fullContent
= description
.getTextContent();
1118 for (int i
=0;i
<children
.getLength();i
++){
1119 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1120 insideNodes
=children
.item(i
).getChildNodes();
1121 for (int j
=0;j
<insideNodes
.getLength();j
++){
1122 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1123 String inlineText
=getInlineTextForName(nameToSave
, refMods
, insideNodes
.item(j
));
1124 if(!inlineText
.isEmpty()) {
1125 blabla
.add(inlineText
);
1128 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1129 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1130 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1135 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1136 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1137 String localdescr
= children
.item(i
).getTextContent().trim();
1138 if(!localdescr
.isEmpty())
1140 blabla
.add(localdescr
);
1146 if (blabla
!=null && !blabla
.isEmpty()) {
1147 String blaStr
= StringUtils
.join(blabla
," ").trim();
1148 if (! stringIsEmpty(blaStr
)) {
1149 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1164 private boolean stringIsEmpty(String blaStr
) {
1165 if (blaStr
.matches("(\\.|,|;|\\.-)?")){
1175 * @param insideNodes
1179 @SuppressWarnings({ "rawtypes" })
1180 private String
getInlineTextForName(List
<TaxonNameBase
> nametosave
, Reference refMods
, Node insideNode
) {
1182 NodeList children
= insideNode
.getChildNodes();
1184 for (int i
=0;i
<children
.getLength();i
++){
1185 Node nameChild
= children
.item(i
);
1186 if(nameChild
.getNodeName().equalsIgnoreCase("#text")){
1187 result
+= nameChild
.getTextContent();
1192 return result
.replace("\n", "").trim();
1194 TaxonNameBase tnb
= getTaxonNameBaseFromXML(insideNode
, nametosave
,refMods
,false);
1195 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1196 Taxon tax
= currentMyName
.getTaxon();
1197 if(tnb
!=null && tax
!= null){
1198 String linkedTaxon
= tnb
.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1199 return "<cdm:taxon uuid='"+tax
.getUuid()+"'>"+linkedTaxon
+"</cdm:taxon>";
1200 }else if (tnb
!= null && tax
== null){
1202 return "<cdm:taxonName uuid='" + tnb
.getUuid() +"'>" + tnb
.getTitleCache().split("sec")[0] +"</cdm:taxonName>";
1204 logger
.warn("Inline text has no content yet");
1211 * @param featureName
1214 @SuppressWarnings("rawtypes")
1215 private Feature
getFeatureObjectFromString(String featureName
) {
1216 logger
.info("getFeatureObjectFromString");
1217 List
<Feature
> features
= importer
.getTermService().list(Feature
.class, null,null,null,null);
1218 Feature currentFeature
=null;
1219 for (Feature feature
: features
){
1220 String tmpF
= feature
.getTitleCache();
1221 if (tmpF
.equalsIgnoreCase(featureName
)) {
1222 currentFeature
=feature
;
1223 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1226 if (currentFeature
== null) {
1227 currentFeature
=Feature
.NewInstance(featureName
, featureName
, featureName
);
1228 if(featureName
.equalsIgnoreCase("Other")){
1229 currentFeature
.setUuid(OtherUUID
);
1231 if(featureName
.equalsIgnoreCase(notMarkedUp
)){
1232 currentFeature
.setUuid(NotMarkedUpUUID
);
1234 importer
.getTermService().saveOrUpdate(currentFeature
);
1236 return currentFeature
;
1243 * @param children: the XML node group
1244 * @param nametosave: the list of objects to save into the CDM
1245 * @param acceptedTaxon: the current acceptedTaxon
1246 * @param refMods: the current reference extracted from the MODS
1247 * @param fullContent :the parsed XML content
1248 * @return a list of description (text)
1250 @SuppressWarnings({ "unused", "rawtypes" })
1251 private List
<String
> parseParagraph(List
<TaxonNameBase
> namesToSave
, Taxon acceptedTaxon
, Reference refMods
, Node paragraph
, Feature feature
){
1252 logger
.info("parseParagraph "+feature
.toString());
1253 List
<String
> fullDescription
= new ArrayList
<String
>();
1254 // String localdescr;
1256 NodeList insideNodes
;
1257 boolean collectionEvent
= false;
1258 List
<Node
>collectionEvents
= new ArrayList
<Node
>();
1260 NodeList children
= paragraph
.getChildNodes();
1262 for (int i
=0;i
<children
.getLength();i
++){
1264 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
1265 descr
+= children
.item(i
).getTextContent().trim();
1267 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1268 insideNodes
=children
.item(i
).getChildNodes();
1269 List
<String
> blabla
= new ArrayList
<String
>();
1270 for (int j
=0;j
<insideNodes
.getLength();j
++){
1271 boolean nodeKnown
= false;
1272 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1273 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1274 String inlineText
= getInlineTextForName(namesToSave
, refMods
, insideNodes
.item(j
));
1275 if (!inlineText
.isEmpty()) {
1276 blabla
.add(inlineText
);
1280 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1281 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1282 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1283 // localdescr += insideNodes.item(j).getTextContent().trim();
1287 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")) {
1288 String ref
= insideNodes
.item(j
).getTextContent().trim();
1289 if (ref
.endsWith(";") && ((ref
.length())>1)) {
1290 ref
=ref
.substring(0, ref
.length()-1)+".";
1292 Reference reference
= ReferenceFactory
.newGeneric();
1293 reference
.setTitleCache(ref
, true);
1294 blabla
.add(reference
.getTitleCache());
1297 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:figure")){
1298 String figure
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "figure");
1301 else if(insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:div") &&
1302 insideNodes
.item(j
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1303 insideNodes
.item(j
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1304 String table
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "table");
1307 else if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1308 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1309 String titlecache
= extractMaterialsDirect(insideNodes
.item(j
), acceptedTaxon
, refMods
, "collection", null);
1310 blabla
.add(titlecache
);
1311 collectionEvent
=true;
1312 collectionEvents
.add(insideNodes
.item(j
));
1315 logger
.warn("node not handled yet: " + insideNodes
.item(j
).getNodeName());
1319 if (!StringUtils
.isBlank(StringUtils
.join(blabla
," "))) {
1320 fullDescription
.add(StringUtils
.join(blabla
," "));
1323 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:figure")){
1324 String figure
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "Figure");
1325 fullDescription
.add(figure
);
1327 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
1328 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1329 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1330 String table
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, namesToSave
, refMods
, "table");
1331 fullDescription
.add(table
);
1335 if( !stringIsEmpty(descr
.trim())){
1336 Feature currentFeature
= getNotMarkedUpFeatureObject();
1337 setParticularDescription(descr
.trim(),acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1339 // if (collectionEvent) {
1340 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1341 // for (Node coll:collectionEvents){
1342 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1345 return fullDescription
;
1350 * @param description: the XML node group
1351 * @param acceptedTaxon: the current acceptedTaxon
1352 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1353 * @param nametosave: the list of objects to save into the CDM
1354 * @param refMods: the current reference extracted from the MODS
1355 * @param feature: the feature to link the data with
1357 @SuppressWarnings("rawtypes")
1358 private void extractFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> namesToSave
, Reference refMods
, Feature feature
){
1359 logger
.info("EXTRACT FEATURE "+feature
.toString());
1360 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1361 List
<String
> fullDescription
= parseParagraph( namesToSave
, acceptedTaxon
, refMods
, description
,feature
);
1363 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1364 if (!fullDescription
.isEmpty() &&!stringIsEmpty(StringUtils
.join(fullDescription
,"\n").trim())) {
1365 setParticularDescription(StringUtils
.join(fullDescription
,"\n").trim(),acceptedTaxon
,defaultTaxon
, refMods
,feature
);
1372 * @param descr: the XML Nodegroup to parse
1373 * @param acceptedTaxon: the current acceptedTaxon
1374 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1375 * @param refMods: the current reference extracted from the MODS
1376 * @param currentFeature: the feature name
1379 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference refMods
, Feature currentFeature
) {
1380 logger
.info("setParticularDescription " + currentFeature
.getTitleCache()+", \n blabla : "+descr
);
1382 //remove redundant feature title
1383 String featureStr
= currentFeature
.getTitleCache();
1384 if (!descr
.isEmpty() && descr
.toLowerCase().startsWith(featureStr
.toLowerCase())){
1385 descr
= descr
.replaceAll("(?i)" + featureStr
+ "\\.\\s*", "");
1389 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1390 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1392 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1394 if(acceptedTaxon
!=null){
1395 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1396 td
.addElement(textData
);
1397 acceptedTaxon
.addDescription(td
);
1399 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1400 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1403 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1405 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1407 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1409 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1411 }catch(Exception e
){
1412 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1415 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1416 defaultTaxon
.addDescription(td
);
1417 td
.addElement(textData
);
1418 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1419 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1426 * @param currentFeature
1429 private TextData
createTextData(String descr
, Reference refMods
, Feature currentFeature
) {
1430 //logger.info("createTextData");
1431 TextData textData
= TextData
.NewInstance();
1432 textData
.setFeature(currentFeature
);
1433 sourceHandler
.addSource(refMods
, textData
);
1435 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descr
);
1442 * @param descr: the XML Nodegroup to parse
1443 * @param acceptedTaxon: the current acceptedTaxon
1444 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1445 * @param refMods: the current reference extracted from the MODS
1446 * @param currentFeature: the feature name
1449 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
,Reference currentRef
, Reference refMods
, Feature currentFeature
) {
1450 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1451 // logger.info("acceptedTaxon: "+acceptedTaxon);
1452 logger
.info("setParticularDescription");
1453 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1455 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1456 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1458 if(! descr
.isEmpty() && (acceptedTaxon
!=null)){
1459 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1460 td
.addElement(textData
);
1461 acceptedTaxon
.addDescription(td
);
1463 sourceHandler
.addAndSaveSource(refMods
, td
, currentRef
);
1464 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1467 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1469 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1471 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1473 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1475 }catch(Exception e
){
1476 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1479 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1480 defaultTaxon
.addDescription(td
);
1481 td
.addElement(textData
);
1482 sourceHandler
.addAndSaveSource(currentRef
, td
,currentRef
);
1483 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1490 * @param synonyms: the XML Nodegroup to parse
1491 * @param nametosave: the list of objects to save into the CDM
1492 * @param acceptedTaxon: the current acceptedTaxon
1493 * @param refMods: the current reference extracted from the MODS
1495 @SuppressWarnings({ "rawtypes" })
1496 private void extractSynonyms(Node synonymsNode
, Taxon acceptedTaxon
,Reference refMods
, String followingText
) {
1497 logger
.info("extractSynonyms");
1498 //System.out.println("extractSynonyms for: "+acceptedTaxon);
1499 Taxon ttmp
= (Taxon
) importer
.getTaxonService().find(acceptedTaxon
.getUuid());
1501 acceptedTaxon
= CdmBase
.deproxy(ttmp
,Taxon
.class);
1504 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1506 NodeList children
= synonymsNode
.getChildNodes();
1507 List
<MyName
> names
= new ArrayList
<MyName
>();
1509 if(synonymsNode
.getNodeName().equalsIgnoreCase("tax:name")){
1511 MyName myName
= extractScientificNameSynonym(synonymsNode
, refMods
, followingText
);
1513 } catch (TransformerFactoryConfigurationError e
) {
1515 } catch (TransformerException e
) {
1521 for (int i
=0;i
<children
.getLength();i
++){
1522 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1523 NodeList tmp
= children
.item(i
).getChildNodes();
1524 // String fullContent = children.item(i).getTextContent();
1525 for (int j
=0; j
< tmp
.getLength();j
++){
1526 if(tmp
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1528 MyName myName
= extractScientificNameSynonym(tmp
.item(j
),refMods
, followingText
);
1530 } catch (TransformerFactoryConfigurationError e
) {
1532 } catch (TransformerException e
) {
1538 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:name")){
1540 MyName myName
= extractScientificNameSynonym(children
.item(i
),refMods
, followingText
);
1542 } catch (TransformerFactoryConfigurationError e
) {
1544 } catch (TransformerException e
) {
1551 for(MyName name
:names
){
1552 TaxonNameBase nameToBeFilled
= name
.getTaxonNameBase();
1553 Synonym synonym
= name
.getSyno();
1554 addFollowingTextToName(nameToBeFilled
, followingText
);
1556 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1557 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1558 if (nameToBeFilled.hasProblem() &&
1559 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1560 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1561 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1562 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1564 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1566 if (!name
.getIdentifier().isEmpty() && (name
.getIdentifier().length()>2)){
1567 setLSID(name
.getIdentifier(), synonym
);
1570 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1571 boolean synoExist
= false;
1572 for (Synonym syn
: synonymsSet
){
1574 boolean a
=syn
.getName().equals(synonym
.getName());
1575 boolean b
= syn
.getSec().equals(synonym
.getSec());
1580 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1581 sourceHandler
.addSource(refMods
, synonym
);
1582 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),refMods
, null);
1585 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1589 private boolean addFollowingTextToName(TaxonNameBase nameToBeFilled
, String followingText
) {
1590 if (nameToBeFilled
!= null && StringUtils
.isNotBlank(followingText
)){
1591 if (! followingText
.matches("\\d\\.?")){
1593 if (followingText
.startsWith(",")){
1594 followingText
= followingText
.substring(1).trim();
1596 nameToBeFilled
.setFullTitleCache(nameToBeFilled
.getFullTitleCache()+ "," +followingText
, true);
1605 * @param refgroup: the XML nodes
1606 * @param nametosave: the list of objects to save into the CDM
1607 * @param acceptedTaxon: the current acceptedTaxon
1608 * @param nametosave: the list of objects to save into the CDM
1609 * @param refMods: the current reference extracted from the MODS
1610 * @return the acceptedTaxon (why?)
1611 * handle cases where the bibref are inside <p> and outside
1613 @SuppressWarnings({ "rawtypes" })
1614 private Taxon
extractReferences(Node refgroup
, List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference refMods
) {
1615 logger
.info("extractReferences");
1616 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1618 NodeList children
= refgroup
.getChildNodes();
1619 NonViralName
<?
> nameToBeFilled
= getNonViralNameAccNomenclature();
1621 ReferenceBuilder refBuild
= new ReferenceBuilder(sourceHandler
);
1622 for (int i
=0;i
<children
.getLength();i
++){
1623 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
1624 String ref
= children
.item(i
).getTextContent().trim();
1625 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1626 if (!refBuild
.isFoundBibref()){
1627 extractReferenceRawText(children
.item(i
).getChildNodes(), nameToBeFilled
, refMods
, acceptedTaxon
);
1631 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1632 NodeList references
= children
.item(i
).getChildNodes();
1634 for (int j
=0;j
<references
.getLength();j
++){
1635 if(references
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")){
1636 String ref
= references
.item(j
).getTextContent().trim();
1637 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1640 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")
1641 && !references
.item(j
).getTextContent().trim().isEmpty()){
1642 descr
+= references
.item(j
).getTextContent().trim();
1646 if (!refBuild
.isFoundBibref()){
1647 //if it's not tagged, put it as row information.
1648 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1649 //then put it as a not markup feature if not empty
1650 if (!stringIsEmpty(descr
.trim())){
1651 Feature currentFeature
= getNotMarkedUpFeatureObject();
1652 setParticularDescription(descr
.trim(),acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1657 // importer.getClassificationService().saveOrUpdate(classification);
1658 return acceptedTaxon
;
1663 * get the non viral name according to the current nomenclature
1667 private NonViralName
<?
> getNonViralNameAccNomenclature() {
1668 return (NonViralName
<?
>)nomenclaturalCode
.getNewTaxonNameInstance(null);
1672 * @return the feature object for the category "not marked up"
1674 private Feature
getNotMarkedUpFeatureObject() {
1675 // FIXME use getFeature(uuid ....)
1676 logger
.info("getNotMarkedUpFeatureObject");
1677 Feature currentFeature
= (Feature
)importer
.getTermService().find(NotMarkedUpUUID
);
1678 if (currentFeature
== null) {
1679 currentFeature
=Feature
.NewInstance(notMarkedUp
, notMarkedUp
, notMarkedUp
);
1680 currentFeature
.setUuid(NotMarkedUpUUID
);
1681 //TODO use userDefined Feature Vocabulary
1682 Feature
.DISTRIBUTION().getVocabulary().addTerm(currentFeature
);
1683 // importer.getTermService().saveOrUpdate(currentFeature);
1684 importer
.getVocabularyService().saveOrUpdate(currentFeature
.getVocabulary());
1686 return currentFeature
;
1691 * handle cases where the bibref are inside <p> and outside
1693 @SuppressWarnings("rawtypes")
1694 private void extractReferenceRawText(NodeList references
, NonViralName
<?
> nameToBeFilled
, Reference refMods
,
1695 Taxon acceptedTaxon
) {
1696 logger
.info("extractReferenceRawText");
1697 String refString
="";
1698 currentMyName
= new MyName(true);
1699 for (int j
=0;j
<references
.getLength();j
++){
1700 acceptedTaxon
=CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1701 //no bibref tag inside
1702 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1703 if (references
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1706 String followingText
= null; //needs to be checked if follText is possible
1707 //TODO create or not create?
1708 currentMyName
= extractScientificName(references
.item(j
), refMods
, followingText
);
1709 } catch (TransformerFactoryConfigurationError e
) {
1711 } catch (TransformerException e
) {
1715 // name=name.trim();
1717 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")){
1718 refString
= references
.item(j
).getTextContent().trim();
1720 if(references
.item(j
).getNodeName().equalsIgnoreCase("#text") && !references
.item(j
).getTextContent().trim().isEmpty()){
1722 if (!currentMyName
.getStatus().isEmpty()){
1723 String nomNovStatus
= this.newNameStatus(currentMyName
.getStatus());
1724 if (nomNovStatus
!= null){
1725 nameToBeFilled
.setAppendedPhrase(nomNovStatus
);
1728 NomenclaturalStatusType statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1729 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1730 } catch (UnknownCdmTypeException e
) {
1731 addProblematicStatusToFile(currentMyName
.getStatus());
1732 logger
.warn("Problem with status");
1737 String fullLineRefName
= references
.item(j
).getTextContent().trim();
1738 int nameOrRefOrOther
=2;
1739 nameOrRefOrOther
=askIfNameContained(fullLineRefName
);
1740 if (nameOrRefOrOther
==0){
1741 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1742 Synonym synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1744 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1745 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1746 boolean synoExist
= false;
1747 for (Synonym syn
: synonymsSet
){
1748 // System.out.println(syn.getName()+" -- "+syn.getSec());
1749 boolean a
=syn
.getName().equals(synonym
.getName());
1750 boolean b
= syn
.getSec().equals(synonym
.getSec());
1755 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1756 sourceHandler
.addSource(refMods
, synonym
);
1758 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),refMods
, null);
1762 if (nameOrRefOrOther
==1){
1763 Reference re
= ReferenceFactory
.newGeneric();
1764 re
.setTitleCache(fullLineRefName
, true);
1766 /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1767 if (nameTBF.hasProblem() &&
1768 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1769 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1770 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1772 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1774 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1775 Synonym synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1777 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1778 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1779 boolean synoExist
= false;
1780 for (Synonym syn
: synonymsSet
){
1781 // System.out.println(syn.getName()+" -- "+syn.getSec());
1782 boolean a
=syn
.getName().equals(synonym
.getName());
1783 boolean b
= syn
.getSec().equals(synonym
.getSec());
1788 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1789 sourceHandler
.addSource(refMods
, synonym
);
1791 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),re
, null);
1797 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
1798 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
1802 if(!currentMyName
.getName().isEmpty()){
1803 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1804 if (acceptedTaxon
.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName
.getName().trim())){
1805 Reference refS
= ReferenceFactory
.newGeneric();
1806 refS
.setTitleCache(refString
, true);
1807 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1808 // acceptedTaxon.addDescription(td);
1809 // acceptedTaxon.addSource(refSource);
1811 // TextData textData = TextData.NewInstance(Feature.CITATION());
1813 // textData.addSource(null, null, refS, null);
1814 // td.addElement(textData);
1815 // td.addSource(refSource);
1816 // importer.getDescriptionService().saveOrUpdate(td);
1819 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
1820 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
1824 acceptedTaxon
.getName().setNomenclaturalReference(refS
);
1826 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1827 Synonym synonym
= null;
1828 if (! currentMyName
.getStatus().isEmpty()){
1829 String nomNovStatus
= this.newNameStatus(currentMyName
.getStatus());
1830 if (nomNovStatus
!= null){
1831 nameToBeFilled
.setAppendedPhrase(nomNovStatus
);
1834 NomenclaturalStatusType statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1835 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1836 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1837 } catch (UnknownCdmTypeException e
) {
1838 addProblematicStatusToFile(currentMyName
.getStatus());
1839 logger
.warn("Problem with status");
1840 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1841 synonym
.setAppendedPhrase(currentMyName
.getStatus());
1845 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1849 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
1850 setLSID(currentMyName
.getIdentifier(), synonym
);
1853 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1854 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1855 boolean synoExist
= false;
1856 for (Synonym syn
: synonymsSet
){
1857 // System.out.println(syn.getName()+" -- "+syn.getSec());
1858 boolean a
=syn
.getName().equals(synonym
.getName());
1859 boolean b
= syn
.getSec().equals(synonym
.getSec());
1864 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1865 sourceHandler
.addSource(refMods
, synonym
);
1867 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),refMods
, null);
1871 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1879 * @param acceptedTaxon
1881 @SuppressWarnings("rawtypes")
1882 private void setLSID(String identifier
, TaxonBase
<?
> taxon
) {
1883 //logger.info("setLSID");
1884 // boolean lsidok=false;
1885 String id
= identifier
.split("__")[0];
1886 String source
= identifier
.split("__")[1];
1887 if (id
.indexOf("lsid")>-1){
1889 LSID lsid
= new LSID(id
);
1890 taxon
.setLsid(lsid
);
1892 } catch (MalformedLSIDException e
) {
1893 logger
.warn("Malformed LSID");
1898 //logger.info("search reference for LSID");
1899 // if ((id.indexOf("lsid")<0) || !lsidok){
1900 //ADD ORIGINAL SOURCE ID EVEN IF LSID
1901 Reference re
= null;
1902 Pager
<Reference
> references
= importer
.getReferenceService().findByTitle(Reference
.class, source
, MatchMode
.EXACT
, null, 1, null, null, null);
1903 if( references
!=null && references
.getCount()>0){
1904 re
=references
.getRecords().get(0);
1906 //logger.info("search reference for LSID-end");
1908 re
= ReferenceFactory
.newGeneric();
1909 re
.setTitleCache(source
, true);
1910 importer
.getReferenceService().saveOrUpdate(re
);
1912 re
=CdmBase
.deproxy(re
, Reference
.class);
1914 //logger.info("search source for LSID");
1915 Set
<IdentifiableSource
> sources
= taxon
.getSources();
1916 boolean lsidinsource
=false;
1917 boolean urlinsource
=false;
1918 for (IdentifiableSource src
:sources
){
1919 if (id
.equalsIgnoreCase(src
.getIdInSource()) && re
.getTitleCache().equals(src
.getCitation().getTitleCache())) {
1922 if (src
.getIdInSource() == null && re
.getTitleCache().equals(sourceUrlRef
.getTitleCache())) {
1927 taxon
.addSource(OriginalSourceType
.Import
, id
,null,re
,null);
1931 sourceUrlRef
=CdmBase
.deproxy(sourceUrlRef
, Reference
.class);
1932 taxon
.addSource(OriginalSourceType
.Import
, null,null,sourceUrlRef
,null);
1939 * try to solve a parsing problem for a scientific name
1940 * @param original : the name from the OCR document
1941 * @param name : the tagged version
1943 * @return the corrected TaxonNameBase
1945 /* @SuppressWarnings({ "unchecked", "rawtypes" })
1946 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1947 Map<String,String> ato = namesMap.get(original);
1949 ato = namesMap.get(original+" "+author);
1953 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1954 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1956 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1957 rank = getRank(ato);
1959 // TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1960 TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1961 // logger.info("RANK: "+rank);
1963 List<ParserProblem> problems = nameTBF.getParsingProblems();
1964 for (ParserProblem pb:problems) {
1965 System.out.println(pb.toString());
1967 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1968 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1969 String fullname=name;
1970 if(! skippQuestion) {
1971 fullname = getFullReference(name,nameTBF.getParsingProblems());
1973 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1974 nameTBF = BotanicalName.NewInstance(null);
1976 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1977 nameTBF = ZoologicalName.NewInstance(null);
1979 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1980 nameTBF= BacterialName.NewInstance(null);
1982 parser.parseReferencedName(nameTBF, fullname, rank, false);
1987 if (name.indexOf(author)>-1) {
1988 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1990 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1992 if (nameTBF.hasProblem()){
1993 if (name.indexOf(author)>-1) {
1994 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1996 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1998 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
1999 problems = nameTBF.getParsingProblems();
2000 for (ParserProblem pb:problems) {
2001 System.out.println(pb.toString());
2003 nameTBF.setFullTitleCache(name, true);
2005 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2006 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2008 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2009 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2011 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2012 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2015 // logger.info("FULL TITLE CACHE "+name);
2017 nameTBF.setFullTitleCache(name, true);
2026 * @param nomenclatureNode: the XML nodes
2027 * @param nametosave: the list of objects to save into the CDM
2028 * @param refMods: the current reference extracted from the MODS
2031 @SuppressWarnings({ "rawtypes" })
2032 private Taxon
extractNomenclature(Node nomenclatureNode
, List
<TaxonNameBase
> nametosave
, Reference refMods
) throws ClassCastException
{
2033 refMods
=CdmBase
.deproxy(refMods
, Reference
.class);
2035 logger
.info("extractNomenclature");
2036 NodeList children
= nomenclatureNode
.getChildNodes();
2038 Taxon acceptedTaxon
= null;
2039 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2041 // String fullContent = nomenclatureNode.getTextContent();
2043 NomenclaturalStatusType statusType
= null;
2044 String newNameStatus
= null;
2046 for (int i
=0;i
<children
.getLength();i
++){
2047 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status")){
2048 String status
= children
.item(i
).getTextContent().trim();
2050 if (!status
.isEmpty()){
2051 if (newNameStatus(status
) != null){
2052 newNameStatus
= newNameStatus(status
);
2055 statusType
= nomStatusString2NomStatus(status
);
2056 } catch (UnknownCdmTypeException e
) {
2058 addProblematicStatusToFile(status
);
2059 logger
.warn("Problem with status: " + status
);
2066 boolean containsSynonyms
=false;
2067 boolean wasSynonym
= false;
2068 usedFollowingTextPrefix
= null; //reset
2070 for (int i
=0; i
<children
.getLength(); i
++){
2071 Node childNode
= children
.item(i
);
2072 String childName
= childNode
.getNodeName();
2076 followingText
= null;
2077 if ( i
+ 1 < children
.getLength()){
2078 Node followingTextNode
= children
.item(i
+1);
2079 if (followingTextNode
.getNodeName().equals("#text") && !followingTextNode
.getTextContent().matches("\\s*") ){
2080 followingText
= followingTextNode
.getTextContent();
2085 if (childName
.equalsIgnoreCase("#text")) {
2086 freetext
= childNode
.getTextContent().trim();
2087 if (usedFollowingTextPrefix
!= null && freetext
.startsWith(usedFollowingTextPrefix
)){
2088 freetext
= freetext
.substring(usedFollowingTextPrefix
.length());
2090 usedFollowingTextPrefix
= null; //reset
2091 }else if (childName
.equalsIgnoreCase("tax:collection_event")) {
2092 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2093 extractMaterialsDirect(childNode
, acceptedTaxon
, refMods
, "collection", currentMyName
.getTaxonNameBase());
2094 }else if(childName
.equalsIgnoreCase("tax:name")){
2095 NonViralName
<?
> nameToBeFilled
;
2096 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2097 if(!containsSynonyms
){
2100 //System.out.println("I : "+i);
2101 currentMyName
= new MyName(false);
2103 currentMyName
= extractScientificName(childNode
, refMods
, followingText
);
2104 treatmentMainName
= currentMyName
.getNewName();
2105 originalTreatmentName
= currentMyName
.getOriginalName();
2107 } catch (TransformerFactoryConfigurationError e1
) {
2108 throw new RuntimeException(e1
);
2109 } catch (TransformerException e1
) {
2110 throw new RuntimeException(e1
);
2113 if (currentMyName
.getRank().equals(Rank
.UNKNOWN_RANK()) || currentMyName
.getRank().isLower(state2
.getConfig().getMaxRank()) || currentMyName
.getRank().equals(state2
.getConfig().getMaxRank())){
2114 maxRankRespected
=true;
2116 nameToBeFilled
=currentMyName
.getTaxonNameBase();
2118 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2119 acceptedTaxon
=currentMyName
.getTaxon();
2120 //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2123 boolean statusMatch
=false;
2124 if(acceptedTaxon
!=null ){
2125 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2126 statusMatch
=compareStatus(acceptedTaxon
, statusType
);
2127 //System.out.println("statusMatch: "+statusMatch);
2129 if (acceptedTaxon
==null || (acceptedTaxon
!= null && !statusMatch
)){
2131 nameToBeFilled
=currentMyName
.getTaxonNameBase();
2132 if (nameToBeFilled
!= null){
2133 if (!originalTreatmentName
.isEmpty()) {
2134 TaxonNameDescription td
= TaxonNameDescription
.NewInstance();
2135 td
.setTitleCache(originalTreatmentName
, true);
2136 nameToBeFilled
.addDescription(td
);
2139 if(statusType
!= null) {
2140 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
2142 if(newNameStatus
!= null){
2143 nameToBeFilled
.setAppendedPhrase(newNameStatus
);
2145 sourceHandler
.addSource(refMods
, nameToBeFilled
);
2147 if (nameToBeFilled
.getNomenclaturalReference() == null) {
2148 acceptedTaxon
= new Taxon(nameToBeFilled
,refMods
);
2149 //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2152 acceptedTaxon
= new Taxon(nameToBeFilled
,(Reference
) nameToBeFilled
.getNomenclaturalReference() );//TODO TOFIX reference
2153 //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2156 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2158 if(!state2
.getConfig().doKeepOriginalSecundum()) {
2159 acceptedTaxon
.setSec(state2
.getConfig().getSecundum());
2160 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2161 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2164 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2165 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2169 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2170 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2174 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2175 Set
<IdentifiableSource
> sources
= acceptedTaxon
.getSources();
2176 boolean sourcelinked
=false;
2177 for (IdentifiableSource source
:sources
){
2178 if (source
.getCitation().getTitleCache().equalsIgnoreCase(refMods
.getTitleCache())) {
2182 if (!state2
.getConfig().doKeepOriginalSecundum()) {
2183 acceptedTaxon
.setSec(state2
.getConfig().getSecundum());
2184 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2185 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2187 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2190 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2192 if (!sourcelinked
|| !state2
.getConfig().doKeepOriginalSecundum()){
2194 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2195 //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2196 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2198 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2202 maxRankRespected
=false;
2204 containsSynonyms
=true; //all folowing names are handled as synonyms
2207 extractSynonyms(childNode
, acceptedTaxon
, refMods
, followingText
);
2210 }catch(NullPointerException e
){
2211 logger
.warn("null pointer exception, the accepted taxon might be null");
2214 containsSynonyms
=true;
2215 }else if (childName
.equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
2216 reloadClassification();
2217 //extract the References within the document
2218 extractReferences(childNode
,nametosave
,acceptedTaxon
,refMods
);
2219 }else if (childName
.equalsIgnoreCase("tax:bibref")){
2220 logger
.warn(childName
+ " still preliminary");
2222 NonViralName
<?
> currentName
= currentMyName
== null ?
null : currentMyName
.getTaxonNameBase();
2223 boolean handled
= addFollowingTextToName (currentName
, childNode
.getTextContent() );
2225 setParticularDescription(freetext
.trim(), acceptedTaxon
,acceptedTaxon
, refMods
, getNotMarkedUpFeatureObject());
2228 logger
.warn(childName
+ " not yet handled");
2230 if(!stringIsEmpty(freetext
.trim())) {;
2231 if (! freetext
.matches("\\d\\.?")){
2232 NonViralName
<?
> currentName
= currentMyName
== null ?
null : currentMyName
.getTaxonNameBase();
2233 boolean handled
= false;
2234 if (currentName
!= null && !wasSynonym
){
2235 handled
= addFollowingTextToName (currentName
, childNode
.getTextContent() );
2238 setParticularDescription(freetext
.trim(), acceptedTaxon
,acceptedTaxon
, refMods
, getNotMarkedUpFeatureObject());
2246 //importer.getClassificationService().saveOrUpdate(classification);
2247 return acceptedTaxon
;
2257 private boolean compareStatus(TaxonBase
<?
> t
, NomenclaturalStatusType statusType
) {
2258 //logger.info("compareStatus");
2259 boolean statusMatch
=false;
2261 Set
<NomenclaturalStatus
> status
= t
.getName().getStatus();
2262 if (statusType
!=null && status
.size()>0){ //the statusType is known for both taxon
2263 for (NomenclaturalStatus st
:status
){
2264 NomenclaturalStatusType stype
= st
.getType();
2265 if (stype
.toString().equalsIgnoreCase(statusType
.toString())) {
2271 if(statusType
== null && status
.size()==0) {//there is no statusType, we can assume it's the same
2279 * @param acceptedTaxon: the current acceptedTaxon
2280 * @param ref: the current reference extracted from the MODS
2281 * @return the parent for the current accepted taxon
2283 /* private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2284 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2286 List<Rank> rankList = new ArrayList<Rank>();
2287 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2289 List<String> rankListStr = new ArrayList<String>();
2290 for (Rank r:rankList) {
2291 rankListStr.add(r.toString());
2294 String s = acceptedTaxon.getTitleCache();
2297 int addTaxon = askAddParent(s);
2298 logger.info("ADD TAXON: "+addTaxon);
2299 if (addTaxon == 0 ){
2300 Taxon tmp = askParent(acceptedTaxon, classification);
2302 s = askSetParent(s);
2303 r = askRank(s,rankListStr);
2305 NonViralName<?> nameToBeFilled = null;
2306 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2307 nameToBeFilled = BotanicalName.NewInstance(null);
2309 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2310 nameToBeFilled = ZoologicalName.NewInstance(null);
2312 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2313 nameToBeFilled = BacterialName.NewInstance(null);
2315 nameToBeFilled.setTitleCache(s, true);
2316 nameToBeFilled.setRank(getRank(r), true);
2318 tax = Taxon.NewInstance(nameToBeFilled, ref);
2324 createParent(tax, ref);
2325 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2326 classification.addParentChild(tax, acceptedTaxon, ref, null);
2329 classification.addChildTaxon(acceptedTaxon, ref, null);
2333 classification.addChildTaxon(acceptedTaxon, ref, null);
2336 // logger.info("RETURN: "+tax );
2344 private MyName
extractScientificNameSynonym(Node name
, Reference refMods
, String followingText
) throws TransformerFactoryConfigurationError
, TransformerException
{
2345 //System.out.println("extractScientificNameSynonym");
2346 logger
.info("extractScientificNameSynonym");
2347 String
[] rankListToPrint_tmp
={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2348 List
<String
> rankListToPrint
= new ArrayList
<String
>();
2349 for (String r
: rankListToPrint_tmp
) {
2350 rankListToPrint
.add(r
.toLowerCase());
2353 Rank rank
= Rank
.UNKNOWN_RANK();
2354 NodeList children
= name
.getChildNodes();
2355 String originalName
="";
2356 String fullName
= "";
2358 String identifier
="";
2359 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
2360 List
<String
> atomisedName
= new ArrayList
<String
>();
2362 String rankStr
= "";
2365 String status
= extractStatus(children
);
2367 for (int i
=0;i
<children
.getLength();i
++){
2368 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:xmldata")){
2369 NodeList atom
= children
.item(i
).getChildNodes();
2370 for (int k
=0;k
<atom
.getLength();k
++){
2371 identifier
= extractIdentifier(identifier
, atom
.item(k
));
2373 rankStr
= atom
.item(k
).getNodeName().toLowerCase();
2374 // logger.info("RANKSTR:*"+rankStr+"*");
2375 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
2376 rankStr
=atom
.item(k
).getTextContent().trim();
2377 tmpRank
= getRank(rankStr
);
2379 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2380 if (tmpRank
!= null){
2383 atomisedMap
.put(rankStr
.toLowerCase(),atom
.item(k
).getTextContent().trim());
2385 addAtomisedNamesToMap(rankListToPrint
, rank
, atomisedName
, atom
);
2387 if(children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !StringUtils
.isBlank(children
.item(i
).getTextContent())){
2388 // logger.info("name non atomised: "+children.item(i).getTextContent());
2389 fullName
= children
.item(i
).getTextContent().trim();
2390 // logger.info("fullname: "+fullName);
2393 originalName
=fullName
;
2394 fullName
= cleanName(fullName
, atomisedName
);
2395 namesMap
.put(fullName
,atomisedMap
);
2397 String atomisedNameStr
= getAtomisedNameStr(atomisedName
);
2399 if (fullName
!= null){
2400 // System.out.println("fullname: "+fullName);
2401 // System.out.println("atomised: "+atomisedNameStr);
2402 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
2404 // String defaultN = "";
2405 if (atomisedNameStr
.length()>fullName
.length()) {
2406 newName
=atomisedNameStr
;
2408 if (fullName
.length()>atomisedNameStr
.length() && (rank
.isLower(Rank
.SPECIES()) && fullName
.length()>2 && !fullName
.substring(0, 1).equals("."))) {
2409 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2415 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2422 // rank = askForRank(newName, rank, nomenclaturalCode);
2423 // System.out.println("atomised: "+atomisedMap.toString());
2425 // String[] names = new String[5];
2426 MyName myname
= new MyName(true);
2428 //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2429 // System.out.println(atomisedMap.keySet());
2430 fullName
= extractAuthorFromNames(rank
, fullName
, atomisedMap
, myname
);
2431 myname
.setOriginalName(fullName
);
2432 myname
.setNewName(newName
);
2433 myname
.setRank(rank
);
2434 myname
.setIdentifier(identifier
);
2435 myname
.setStatus(status
);
2436 myname
.setSource(refMods
);
2438 // boolean higherAdded=false;
2441 boolean parseNameManually
=false;
2442 INonViralNameParser
<?
> parser
= NonViralNameParserImpl
.NewInstance();
2443 TaxonNameBase
<?
,?
> nameToBeFilledTest
;
2445 //if selected the atomised version
2446 if(newName
==atomisedNameStr
){
2447 nameToBeFilledTest
= parseWithExtension(parser
, atomisedNameStr
, rank
, followingText
, atomisedMap
);
2448 if (nameToBeFilledTest
.hasProblem()){
2449 addProblemNameToFile("ato",atomisedNameStr
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2450 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2451 if (nameToBeFilledTest
.hasProblem()){
2452 addProblemNameToFile("full",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2453 parseNameManually
=true;
2457 nameToBeFilledTest
= parseWithExtension(parser
, atomisedNameStr
, rank
, followingText
, atomisedMap
);
2458 if (nameToBeFilledTest
.hasProblem()){
2459 addProblemNameToFile("fullversion",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2460 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2461 parseNameManually
=true;
2462 if(!originalName
.equalsIgnoreCase(atomisedNameStr
)) {
2463 addNameDifferenceToFile(originalName
,atomisedNameStr
);
2468 if(parseNameManually
){
2469 //System.out.println("DO IT MANUALLY");
2470 if (this.state2
.getConfig().isUseOldUnparsedSynonymExtraction()){
2471 createUnparsedSynonym(rank
, newName
, atomisedMap
, myname
);
2473 createUnparsedSynonymNew(rank
, newName
, atomisedMap
, myname
, refMods
);;
2476 //System.out.println("AUTOMATIC!");
2477 // createAtomisedTaxonString(newName, atomisedMap, myname);
2478 myname
.setParsedName(nameToBeFilledTest
);
2479 myname
.buildTaxon();
2481 //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2488 * @throws TransformerFactoryConfigurationError
2489 * @throws TransformerException
2490 * @return a list of possible names
2492 @SuppressWarnings({"rawtypes" })
2493 private MyName
extractScientificName(Node name
, Reference refMods
, String followingText
) throws TransformerFactoryConfigurationError
, TransformerException
{
2494 logger
.info("extractScientificName");
2496 String
[] rankListToPrintLowerCase_tmp
={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2497 List
<String
> rankListToPrint
= Arrays
.asList(rankListToPrintLowerCase_tmp
);
2499 Rank rank
= Rank
.UNKNOWN_RANK();
2500 NodeList children
= name
.getChildNodes();
2501 String originalName
= "";
2502 String fullName
= "";
2503 String newName
= "";
2504 String identifier
= "";
2505 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
2506 List
<String
> atomisedNameList
= new ArrayList
<String
>();
2508 String status
= extractStatus(children
);
2510 for (int i
=0;i
<children
.getLength();i
++){
2511 Node nameChild
= children
.item(i
);
2512 if(nameChild
.getNodeName().equalsIgnoreCase("tax:xmldata")){
2513 NodeList xmlDataChildren
= nameChild
.getChildNodes();
2514 for (int k
=0;k
<xmlDataChildren
.getLength();k
++){
2515 Node xmlDataChild
= xmlDataChildren
.item(k
);
2516 identifier
= extractIdentifier(identifier
, xmlDataChild
);
2517 String rankStr
= xmlDataChild
.getNodeName().toLowerCase();
2518 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
2519 rankStr
=xmlDataChild
.getTextContent().trim();
2520 Rank tmpRank
= getRank(rankStr
);
2521 if (tmpRank
!= null){
2525 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2527 atomisedMap
.put(rankStr
.toLowerCase(),xmlDataChild
.getTextContent().trim());
2529 addAtomisedNamesToMap(rankListToPrint
, rank
, atomisedNameList
, xmlDataChildren
);
2531 else if(nameChild
.getNodeName().equalsIgnoreCase("#text") && ! nameChild
.getTextContent().matches("\\s*")){
2532 // logger.info("name non atomised: "+children.item(i).getTextContent());
2533 fullName
= nameChild
.getTextContent().trim();
2534 // logger.info("fullname: "+fullName);
2537 originalName
=fullName
;
2538 fullName
= cleanName(fullName
, atomisedNameList
);
2539 namesMap
.put(fullName
,atomisedMap
);
2541 String atomisedNameStr
= getAtomisedNameStr(atomisedNameList
);
2543 if (fullName
!= null){
2544 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
2546 if (atomisedNameStr
.length()>fullName
.length()) {
2547 newName
= atomisedNameStr
;
2549 if (fullName
.length()>atomisedNameStr
.length() && (rank
.isLower(Rank
.SPECIES()) && fullName
.length()>2 && !fullName
.substring(0, 1).equals("."))) {
2550 newName
= askWhichScientificName(fullName
, atomisedNameStr
, classification
.getTitleCache(), name
);
2556 newName
=askWhichScientificName(fullName
, atomisedNameStr
, classification
.getTitleCache(), name
);
2563 // rank = askForRank(newName, rank, nomenclaturalCode);
2564 // System.out.println("atomised: "+atomisedMap.toString());
2566 // String[] names = new String[5];
2567 MyName myname
= new MyName(false);
2569 //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2570 // System.out.println(atomisedMap.keySet());
2571 fullName
= extractAuthorFromNames(rank
, fullName
, atomisedMap
, myname
);
2572 myname
.setOriginalName(fullName
);
2573 myname
.setNewName(newName
);
2575 myname
.setRank(rank
);
2576 myname
.setIdentifier(identifier
);
2577 myname
.setStatus(status
);
2578 myname
.setSource(refMods
);
2580 // boolean higherAdded=false;
2583 boolean parseNameManually
=false;
2584 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
2585 TaxonNameBase nameToBeFilledTest
= null;
2587 //if selected the atomised version
2588 if(newName
==atomisedNameStr
){
2589 nameToBeFilledTest
= parseWithExtension(parser
, atomisedNameStr
, rank
, followingText
, atomisedMap
);
2590 if (nameToBeFilledTest
.hasProblem()){
2591 addProblemNameToFile("ato",atomisedNameStr
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2592 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2593 if (nameToBeFilledTest
.hasProblem()){
2594 addProblemNameToFile("full",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2595 parseNameManually
=true;
2599 nameToBeFilledTest
= parseWithExtension(parser
, fullName
, rank
, followingText
, atomisedMap
);
2600 if (nameToBeFilledTest
.hasProblem()){
2601 addProblemNameToFile("fullversion",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2602 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2603 parseNameManually
=true;
2604 if(!originalName
.equalsIgnoreCase(atomisedNameStr
)) {
2605 addNameDifferenceToFile(originalName
,atomisedNameStr
);
2610 //System.out.println("parseNameManually: "+parseNameManually);
2611 if(parseNameManually
){
2612 createAtomisedTaxon(rank
, newName
, atomisedMap
, myname
);
2615 createAtomisedTaxonString(newName
, atomisedMap
, myname
);
2616 myname
.setParsedName(nameToBeFilledTest
);
2617 //TODO correct handling of createIfNotExists
2618 myname
.buildTaxon();
2624 private TaxonNameBase
<?
,?
> parseWithExtension(INonViralNameParser parser
, String atomisedNameStr
, Rank rank
, String followingText
, HashMap
<String
, String
> atomisedMap
) {
2625 Object
[] nameExtensionResult
= getPossibleExtension(followingText
, atomisedMap
, nomenclaturalCode
);
2627 TaxonNameBase
<?
,?
> name
= parser
.parseFullName(atomisedNameStr
, nomenclaturalCode
, rank
);
2628 if (nameExtensionResult
!= null && nameExtensionResult
[0] != null){
2629 String ext
= (String
)nameExtensionResult
[0];
2630 TaxonNameBase
<?
,?
> extName
=parser
.parseFullName(atomisedNameStr
+ " " + ext
, nomenclaturalCode
, rank
);
2631 if (! extName
.hasProblem()){
2633 this.usedFollowingTextPrefix
= ext
;
2634 //TODO do we need to fill the atomisedMap at all?
2635 if ((Boolean
)(nameExtensionResult
[1])){
2638 if ((Boolean
)(nameExtensionResult
[2])){
2639 //TODO BasionymYear etc.
2640 Integer origYear
= ((ZoologicalName
)name
).getPublicationYear();
2641 if (origYear
!= null){
2642 atomisedMap
.put(PUBLICATION_YEAR
, origYear
.toString());
2650 private Object
[] getPossibleExtension(String followingText
, HashMap
<String
, String
> atomisedMap
, NomenclaturalCode nomenclaturalCode
) {
2651 if (StringUtils
.isBlank(followingText
)){
2655 boolean includeAuthor
= true;
2656 boolean includeYear
= false;
2657 if (atomisedMap
.containsKey("dwc:scientificnameauthorship")){
2658 includeAuthor
= false;
2660 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
2663 String patternStr
= "";
2665 patternStr
+= NonViralNameParserImplRegExBase
.capitalWord
;
2668 patternStr
+= "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2670 String match
= null;
2671 if (! patternStr
.isEmpty()){
2672 Pattern pattern
= Pattern
.compile("^" + patternStr
);
2673 Matcher matcher
= pattern
.matcher(followingText
.trim());
2674 if (matcher
.find()){
2675 match
= matcher
.group();
2679 return new Object
[]{match
, includeAuthor
, includeYear
};
2683 * @param atomisedName
2686 private String
getAtomisedNameStr(List
<String
> atomisedName
) {
2687 //logger.info("getAtomisedNameStr");
2688 String atomisedNameStr
= StringUtils
.join(atomisedName
," ");
2689 while(atomisedNameStr
.contains(" ")) {
2690 atomisedNameStr
=atomisedNameStr
.replace(" ", " ");
2692 atomisedNameStr
=atomisedNameStr
.trim();
2693 return atomisedNameStr
;
2701 private String
extractStatus(NodeList children
) {
2702 logger
.info("extractStatus");
2704 for (int i
=0;i
<children
.getLength();i
++){
2705 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status") ||
2706 (children
.item(i
).getNodeName().equalsIgnoreCase("tax:namePart") &&
2707 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2708 status
= children
.item(i
).getTextContent().trim();
2720 private String
extractIdentifier(String identifier
, Node atom
) {
2721 //logger.info("extractIdentifier");
2722 if (atom
.getNodeName().equalsIgnoreCase("tax:xid")){
2724 identifier
= atom
.getAttributes().getNamedItem("identifier").getNodeValue();
2725 }catch(Exception e
){
2726 System
.out
.println("pb with identifier, maybe empty");
2729 identifier
+="__"+atom
.getAttributes().getNamedItem("source").getNodeValue();
2730 }catch(Exception e
){
2731 System
.out
.println("pb with identifier, maybe empty");
2738 * @param rankListToPrint
2740 * @param atomisedName
2743 private void addAtomisedNamesToMap(List
<String
> rankListToPrint
, Rank rank
, List
<String
> atomisedName
, NodeList atom
) {
2744 logger
.info("addAtomisedNamesToMap");
2745 for (int k
=0;k
<atom
.getLength();k
++){
2746 Node node
= atom
.item(k
);
2747 String nodeName
= node
.getNodeName();
2748 if (! nodeName
.equalsIgnoreCase("dwc:taxonRank") ) { //rank has been handled in higher method
2749 if (nodeName
.equalsIgnoreCase("dwc:subgenus") || nodeName
.equalsIgnoreCase("dwcranks:subgenus")) {
2750 atomisedName
.add("("+ node
.getTextContent().trim()+")");
2751 } else if(nodeName
.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName
.equalsIgnoreCase("dwc:Subspecies") || nodeName
.equalsIgnoreCase("dwc:infraspecificepithet")) {
2752 if(nodeName
.equalsIgnoreCase("dwcranks:varietyepithet")){
2753 atomisedName
.add("var. "+node
.getTextContent().trim());
2754 }else if(nodeName
.equalsIgnoreCase("dwc:Subspecies") || nodeName
.equalsIgnoreCase("dwc:infraspecificepithet")) {
2755 atomisedName
.add("subsp. "+atom
.item(k
).getTextContent().trim());
2757 } else if(rankListToPrint
.contains(nodeName
.toLowerCase())) {
2758 atomisedName
.add(node
.getTextContent().trim());
2760 if (rank
.isHigher(Rank
.GENUS()) && (nodeName
.indexOf("dwcranks:")>-1 || nodeName
.indexOf("dwc:Family")>-1)) {
2761 atomisedName
.add(node
.getTextContent().trim());
2762 }else if (nodeName
.equals("#text")){
2763 String text
= node
.getTextContent();
2764 if (StringUtils
.isNotBlank(text
)){
2766 logger
.warn("name xmldata contains text. This is unhandled");
2768 }else if (nodeName
.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2769 //we currently do not use higher ranks information
2771 //TODO handle unhandled node
2772 logger
.warn("Unhandled node: " + nodeName
);
2781 * @param atomisedName
2784 private String
cleanName(String name
, List
<String
> atomisedName
) {
2785 //logger.info("cleanName");
2786 String fullName
=name
;
2787 if (fullName
!= null){
2788 fullName
= fullName
.replace("( ", "(");
2789 fullName
= fullName
.replace(" )",")");
2791 if (fullName
.trim().isEmpty()){
2792 fullName
=StringUtils
.join(atomisedName
," ");
2795 while(fullName
.contains(" ")) {
2796 fullName
=fullName
.replace(" ", " ");
2797 // logger.info("while");
2799 fullName
=fullName
.trim();
2807 * @param atomisedMap
2811 private String
extractAuthorFromNames(Rank rank
, String name
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2812 logger
.info("extractAuthorFromNames");
2813 String fullName
=name
;
2814 if (atomisedMap
.get("dwc:scientificnameauthorship") == null && fullName
!=null){
2815 // System.out.println("rank : "+rank.toString());
2816 if(rank
.isHigher(Rank
.SPECIES())){
2819 if(atomisedMap
.get("dwcranks:subgenus") != null) {
2820 author
= fullName
.split(atomisedMap
.get("dwcranks:subgenus"))[1].trim();
2822 if(atomisedMap
.get("dwc:subgenus") != null) {
2823 author
= fullName
.split(atomisedMap
.get("dwc:subgenus"))[1].trim();
2825 if(author
== null) {
2826 if(atomisedMap
.get("dwc:genus") != null) {
2827 author
= fullName
.split(atomisedMap
.get("dwc:genus"))[1].trim();
2831 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2832 author
=author
.replaceAll(",","").trim();
2833 myname
.setAuthor(author
);
2835 }catch(Exception e
){
2836 //could not extract the author
2839 if(rank
.equals(Rank
.SPECIES())){
2842 if(author
== null) {
2843 if(atomisedMap
.get("dwc:species") != null) {
2844 String
[] t
= fullName
.split(atomisedMap
.get("dwc:species"));
2845 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2846 author
= fullName
.split(atomisedMap
.get("dwc:species"))[1].trim();
2847 // System.out.println("AUTEUR "+author);
2851 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2852 author
=author
.replaceAll(",","").trim();
2853 myname
.setAuthor(author
);
2855 }catch(Exception e
){
2856 //could not extract the author
2860 myname
.setAuthor(atomisedMap
.get("dwc:scientificnameauthorship"));
2867 * @param atomisedMap
2870 private void createAtomisedTaxonString(String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2871 logger
.info("createAtomisedTaxonString "+atomisedMap
);
2872 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
2873 myname
.setFamilyStr(atomisedMap
.get("dwc:family"));
2875 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
2876 myname
.setSubfamilyStr(atomisedMap
.get("dwcranks:subfamily"));
2878 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
2879 myname
.setTribeStr(atomisedMap
.get("dwcranks:tribe"));
2881 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
2882 myname
.setSubtribeStr(atomisedMap
.get("dwcranks:subtribe"));
2884 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
2885 myname
.setGenusStr(atomisedMap
.get("dwc:genus"));
2887 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2888 myname
.setSubgenusStr(atomisedMap
.get("dwcranks:subgenus"));
2890 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2891 myname
.setSubgenusStr(atomisedMap
.get("dwc:subgenus"));
2893 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
2895 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
2896 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
2897 n
=n
.replace("subsp.","");
2899 if(atomisedMap
.get("dwc:subspecies") != null) {
2900 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
2901 n
=n
.replace("subsp.","");
2903 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
2904 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
2905 n
=n
.replace("var.","");
2906 n
=n
.replace("v.","");
2908 if(atomisedMap
.get("dwcranks:formepithet") != null) {
2910 System
.out
.println("TODO FORMA");
2911 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
2912 n
=n
.replace("forma","");
2915 String author
= myname
.getAuthor();
2916 if(n
.split(" ").length
>2){
2918 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
2921 a
=n
.split(n2
)[1].trim();
2922 }catch(Exception e
){
2923 logger
.info("no author in "+n
+"?");}
2925 myname
.setAuthor(a
);
2926 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2931 myname
.setSpeciesStr(atomisedMap
.get("dwc:species"));
2932 myname
.setAuthor(author
);
2934 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
2935 myname
.setSubspeciesStr(atomisedMap
.get("dwc:subspecies"));
2937 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
2938 myname
.setSubspeciesStr(atomisedMap
.get("dwc:infraspecificepithet"));
2940 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
2941 myname
.setVarietyStr(atomisedMap
.get("dwcranks:varietyepithet"));
2943 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
2944 myname
.setFormStr(atomisedMap
.get("dwcranks:formepithet"));
2946 if (atomisedMap
.get(PUBLICATION_YEAR
) != null){
2947 myname
.setPublicationYear(Integer
.valueOf(atomisedMap
.get(PUBLICATION_YEAR
)));
2952 * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2955 * @param atomisedMap
2958 private void createUnparsedSynonym(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2959 logger
.info("createSynonym");
2960 //System.out.println("createsynonym");
2961 if(rank
.equals(Rank
.UNKNOWN_RANK())){
2962 myname
.setNotParsableTaxon(newName
);
2964 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY()) && rank
.equals(Rank
.FAMILY())){
2965 myname
.setFamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:family"),newName
, Rank
.FAMILY(),rank
));
2967 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY()) && rank
.equals(Rank
.SUBFAMILY())){
2968 myname
.setSubfamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subfamily"), newName
,Rank
.SUBFAMILY(),rank
));
2970 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE()) && rank
.equals(Rank
.TRIBE())){
2971 myname
.setTribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:tribe"),newName
, Rank
.TRIBE(),rank
));
2973 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE()) && rank
.equals(Rank
.SUBTRIBE())){
2974 myname
.setSubtribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subtribe"),newName
, Rank
.SUBTRIBE(),rank
));
2976 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS()) && rank
.equals(Rank
.GENUS())){
2977 myname
.setGenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:genus"),newName
, Rank
.GENUS(),rank
));
2979 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS()) && rank
.equals(Rank
.SUBGENUS())){
2980 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
2982 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS()) && rank
.equals(Rank
.SUBGENUS())){
2983 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
2985 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES()) && rank
.equals(Rank
.SPECIES())){
2987 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
2988 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
2989 n
=n
.replace("subsp.","");
2991 if(atomisedMap
.get("dwc:subspecies") != null) {
2992 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
2993 n
=n
.replace("subsp.","");
2995 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
2996 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
2997 n
=n
.replace("var.","");
2998 n
=n
.replace("v.","");
3000 if(atomisedMap
.get("dwcranks:formepithet") != null) {
3002 //System.out.println("TODO FORMA");
3003 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
3004 n
=n
.replace("forma","");
3007 String author
= myname
.getAuthor();
3008 if(n
.split(" ").length
>2){
3010 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3013 a
= n
.split(n2
)[1].trim();
3014 }catch(Exception e
){logger
.info("no author in "+n
);}
3015 myname
.setAuthor(a
);
3016 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3020 Taxon species
= myname
.findOrCreateTaxon(atomisedMap
.get("dwc:species"),n
, Rank
.SPECIES(),rank
);
3021 myname
.setSpecies(species
);
3022 myname
.setAuthor(author
);
3024 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES()) && rank
.equals(Rank
.SUBSPECIES())){
3025 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subspecies"), newName
,Rank
.SUBSPECIES(),rank
));
3027 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES()) && rank
.equals(Rank
.SUBSPECIES())){
3028 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:infraspecificepithet"),newName
, Rank
.SUBSPECIES(),rank
));
3030 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY()) && rank
.equals(Rank
.VARIETY())){
3031 myname
.setVariety(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:varietyepithet"),newName
, Rank
.VARIETY(),rank
));
3033 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM()) && rank
.equals(Rank
.FORM())){
3034 myname
.setForm(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:formepithet"), newName
,Rank
.FORM(),rank
));
3043 * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3044 * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3045 * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3046 * I created this switch for old
3047 * for Spiders the new version is preferred
3049 private void createUnparsedSynonymNew(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
, Reference refMods
) {
3050 logger
.info("createSynonym");
3052 NonViralName
<?
> nameToBeFilled
= this.getNonViralNameAccNomenclature();
3053 //System.out.println("createsynonym");
3054 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3056 myname
.setNotParsableTaxon(newName
);
3058 nameToBeFilled
.setTitleCache(newName
, true);
3060 if(atomisedMap
.get("dwc:genus") != null ){
3061 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwc:genus"));
3063 if (rank
.isSupraGeneric()){
3064 if (atomisedMap
.get("dwcranks:subtribe") != null ){
3065 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:subtribe"));
3066 }else if (atomisedMap
.get("dwcranks:subtribe") != null ){
3067 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:subtribe"));
3068 }else if (atomisedMap
.get("dwcranks:tribe") != null ){
3069 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:tribe"));
3070 }else if (atomisedMap
.get("dwcranks:subfamily") != null ){
3071 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwcranks:subfamily"));
3072 }else if (atomisedMap
.get("dwc:family") != null ){
3073 nameToBeFilled
.setGenusOrUninomial(atomisedMap
.get("dwc:family"));
3075 logger
.warn("Supra generic rank not yet handled or atomisation not available");
3078 if (atomisedMap
.get("dwcranks:subgenus") != null){
3079 nameToBeFilled
.setInfraGenericEpithet(atomisedMap
.get("dwcranks:subgenus"));
3081 if (atomisedMap
.get("dwc:subgenus") != null){
3082 nameToBeFilled
.setInfraGenericEpithet(atomisedMap
.get("dwc:subgenus"));
3084 if (atomisedMap
.get("dwc:species") != null){
3085 nameToBeFilled
.setSpecificEpithet(atomisedMap
.get("dwc:species"));
3087 if (atomisedMap
.get("dwcranks:formepithet") != null){
3088 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwcranks:formepithet"));
3089 }else if (atomisedMap
.get("dwcranks:varietyepithet") != null){
3090 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwcranks:varietyepithet"));
3091 }else if (atomisedMap
.get("dwc:infraspecificepithet") != null){
3092 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwc:infraspecificepithet"));
3093 }else if (atomisedMap
.get("dwc:subspecies") != null){
3094 nameToBeFilled
.setInfraSpecificEpithet(atomisedMap
.get("dwc:subspecies"));
3096 Reference sec
= sourceUrlRef
;
3097 if(!state2
.getConfig().doKeepOriginalSecundum()){
3098 sec
= state2
.getConfig().getSecundum();
3100 Synonym syn
= Synonym
.NewInstance(nameToBeFilled
, sec
);
3101 // sourceHandler.addSource(refMods, syn);
3102 myname
.setSyno(syn
);
3103 myname
.setSynonym(true);
3110 * @param atomisedMap
3113 private void createAtomisedTaxon(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
3114 logger
.info("createAtomisedTaxon "+atomisedMap
);
3115 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3116 myname
.setNotParsableTaxon(newName
);
3119 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
3120 myname
.setFamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:family"),newName
, Rank
.FAMILY(),rank
));
3122 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
3123 myname
.setSubfamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subfamily"), newName
,Rank
.SUBFAMILY(),rank
));
3125 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
3126 myname
.setTribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:tribe"),newName
, Rank
.TRIBE(),rank
));
3128 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
3129 myname
.setSubtribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subtribe"),newName
, Rank
.SUBTRIBE(),rank
));
3131 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
3132 myname
.setGenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:genus"),newName
, Rank
.GENUS(),rank
));
3134 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3135 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3137 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3138 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3140 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
3142 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
3143 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
3144 n
=n
.replace("subsp.","");
3146 if(atomisedMap
.get("dwc:subspecies") != null) {
3147 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
3148 n
=n
.replace("subsp.","");
3150 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
3151 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
3152 n
=n
.replace("var.","");
3153 n
=n
.replace("v.","");
3155 if(atomisedMap
.get("dwcranks:formepithet") != null) {
3157 //System.out.println("TODO FORMA");
3158 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
3159 n
=n
.replace("forma","");
3162 String author
= myname
.getAuthor();
3163 if(n
.split(" ").length
>2){
3164 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3167 a
= n
.split(n2
)[1].trim();
3168 }catch(Exception e
){logger
.info("no author in "+n
);}
3169 myname
.setAuthor(a
);
3170 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3175 myname
.setSpecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:species"),n
, Rank
.SPECIES(),rank
));
3176 myname
.setAuthor(author
);
3178 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3179 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subspecies"), newName
,Rank
.SUBSPECIES(),rank
));
3181 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3182 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:infraspecificepithet"),newName
, Rank
.SUBSPECIES(),rank
));
3184 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
3185 myname
.setVariety(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:varietyepithet"),newName
, Rank
.VARIETY(),rank
));
3187 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
3188 myname
.setForm(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:formepithet"), newName
,Rank
.FORM(),rank
));
3196 private boolean checkRankValidForImport(Rank currentRank
) {
3197 //logger.info("checkRankValidForImport");
3198 return currentRank
.isLower(state2
.getConfig().getMaxRank()) || currentRank
.equals(state2
.getConfig().getMaxRank());
3204 * @param classification2
3206 public void updateClassification(Classification classification2
) {
3207 //logger.info("updateClassification");
3208 classification
= classification2
;
3213 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3214 * if errors, cast into a classis nonviralname
3215 * @param taxonnamebase2
3217 @SuppressWarnings("rawtypes")
3218 public NonViralName
<?
> castTaxonNameBase(TaxonNameBase tnb
, NonViralName
<?
> nvn
) {
3220 //logger.info("castTaxonNameBase");
3221 NonViralName
<?
> taxonnamebase2
= nvn
;
3222 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)) {
3224 taxonnamebase2
=(BotanicalName
) tnb
;
3225 }catch(Exception e
){
3226 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3229 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)) {
3231 taxonnamebase2
=(ZoologicalName
) tnb
;
3232 }catch(Exception e
){
3233 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3236 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)) {
3238 taxonnamebase2
=(BacterialName
) tnb
;
3239 }catch(Exception e
){
3240 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3243 return taxonnamebase2
;
3248 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3249 * if errors, cast into a classis nonviralname
3250 * @param taxonnamebase2
3252 @SuppressWarnings("rawtypes")
3253 public NonViralName
<?
> castTaxonNameBase(TaxonNameBase tnb
) {
3254 //logger.info("castTaxonNameBase2");
3255 NonViralName
<?
> taxonnamebase2
= null;
3256 tnb
=CdmBase
.deproxy(tnb
, TaxonNameBase
.class);
3257 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)) {
3259 taxonnamebase2
=(BotanicalName
) tnb
;
3260 }catch(Exception e
){
3261 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3264 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)) {
3266 taxonnamebase2
=(ZoologicalName
) tnb
;
3267 }catch(Exception e
){
3268 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3271 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)) {
3273 taxonnamebase2
=(BacterialName
) tnb
;
3274 }catch(Exception e
){
3275 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3278 return taxonnamebase2
;
3281 public class MyName
{
3285 public MyName(boolean isSynonym
) {
3287 this.isSynonym
= isSynonym
;
3290 String originalName
="";
3292 Rank rank
=Rank
.UNKNOWN_RANK();
3293 String identifier
="";
3297 NonViralName
<?
> taxonNameBase
;
3301 Taxon family
,subfamily
,tribe
,subtribe
,genus
,subgenus
,species
,subspecies
, variety
,form
;
3302 NonViralName
<?
> familyName
, subfamilyName
, tribeName
,subtribeName
,genusName
,subgenusName
,speciesName
,subspeciesName
;
3303 String familyStr
, subfamilyStr
, tribeStr
,subtribeStr
,genusStr
,subgenusStr
,speciesStr
,subspeciesStr
,formStr
,varietyStr
;
3304 Integer publicationYear
;
3309 private Taxon taxon
;
3310 private Synonym syno
;
3315 public Synonym
getSyno() {
3320 public String
toString(){
3321 List
<String
> tot
=new ArrayList
<String
>();
3322 String
[] n
= {familyStr
, subfamilyStr
, tribeStr
,subtribeStr
,genusStr
,subgenusStr
,speciesStr
,subspeciesStr
,formStr
,varietyStr
};
3324 if (!StringUtils
.isEmpty(elt
)) {
3330 return StringUtils
.join(tot
," ");
3333 * @param syno the syno to set
3335 public void setSyno(Synonym syno
) {
3339 boolean isSynonym
=false;
3342 * @return the isSynonym
3344 public boolean isSynonym() {
3349 * @param isSynonym the isSynonym to set
3351 public void setSynonym(boolean isSynonym
) {
3352 this.isSynonym
= isSynonym
;
3355 public void setSource(Reference re
){
3362 public void setFormStr(String string
) {
3363 this.formStr
=string
;
3369 public void setVarietyStr(String string
) {
3370 this.varietyStr
=string
;
3376 public void setSubspeciesStr(String string
) {
3377 this.subspeciesStr
=string
;
3383 public void setSpeciesStr(String string
) {
3384 this.speciesStr
=string
;
3390 public void setSubgenusStr(String string
) {
3391 this.subgenusStr
=string
;
3397 public void setGenusStr(String string
) {
3398 this.genusStr
=string
;
3404 public void setSubtribeStr(String string
) {
3405 this.subtribeStr
=string
;
3411 public void setTribeStr(String string
) {
3412 this.tribeStr
=string
;
3418 public void setSubfamilyStr(String string
) {
3419 this.subfamilyStr
=string
;
3425 public void setFamilyStr(String string
) {
3426 this.familyStr
=string
;
3430 * @return the familyStr
3432 public String
getFamilyStr() {
3436 * @return the subfamilyStr
3438 public String
getSubfamilyStr() {
3439 return subfamilyStr
;
3442 * @return the tribeStr
3444 public String
getTribeStr() {
3448 * @return the subtribeStr
3450 public String
getSubtribeStr() {
3454 * @return the genusStr
3456 public String
getGenusStr() {
3460 * @return the subgenusStr
3462 public String
getSubgenusStr() {
3466 * @return the speciesStr
3468 public String
getSpeciesStr() {
3472 * @return the subspeciesStr
3474 public String
getSubspeciesStr() {
3475 return subspeciesStr
;
3478 * @return the formStr
3480 public String
getFormStr() {
3484 * @return the varietyStr
3486 public String
getVarietyStr() {
3490 public Integer
getPublicationYear() {
3491 return publicationYear
;
3494 public void setPublicationYear(Integer publicationYear
) {
3495 this.publicationYear
= publicationYear
;
3501 public void setNotParsableTaxon(String newName2
) {
3502 //takes too much time
3503 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3505 NomenclaturalStatusType statusType
= null;
3506 if (!getStatus().isEmpty()){
3508 statusType
= nomStatusString2NomStatus(getStatus());
3509 } catch (UnknownCdmTypeException e
) {
3510 addProblematicStatusToFile(getStatus());
3511 logger
.warn("Problem with status");
3514 List
<TaxonBase
> tmpList
= new ArrayList
<TaxonBase
>();
3516 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, newName2
, MatchMode
.BEGINNING
, null, null, null, null, null);
3517 tmpList
.addAll(taxontest
.getRecords());
3519 //logger.info("tmpList returned: "+tmpList.size());
3522 NonViralName
<?
> identicName
= null;
3523 boolean foundIdentic
=false;
3524 TaxonBase
<?
> tmpTaxonBase
=null;
3525 // Taxon tmpPartial=null;
3526 for (TaxonBase
<?
> tmpb
:tmpList
){
3528 TaxonNameBase
<?
,?
> tnb
= tmpb
.getName();
3531 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2
) ){
3532 crank
=tnb
.getRank();
3533 if (crank
!=null && rank
!=null){
3534 if (crank
.equals(rank
)){
3535 identicName
= CdmBase
.deproxy(tnb
, NonViralName
.class);
3536 if (isSynonym
&& tmpb
.isInstanceOf(Synonym
.class) || !isSynonym
&& tmpb
.isInstanceOf(Taxon
.class)){
3547 boolean statusMatch
=false;
3548 boolean appendedMatch
=false;
3549 if(tmpTaxonBase
!=null && foundIdentic
){
3550 statusMatch
=compareStatus(tmpTaxonBase
, statusType
);
3551 if (!getStatus().isEmpty() && ! (tmpTaxonBase
.getAppendedPhrase() == null)) {
3552 appendedMatch
=tmpTaxonBase
.getAppendedPhrase().equals(getStatus());
3554 if (getStatus().isEmpty() && tmpTaxonBase
.getAppendedPhrase() == null) {
3559 if ((tmpTaxonBase
== null || !foundIdentic
) || (tmpTaxonBase
!= null && !statusMatch
) || (tmpTaxonBase
!= null && !appendedMatch
&& !statusMatch
)){
3561 NonViralName
<?
> tnb
;
3562 if (identicName
== null){
3563 tnb
= getNonViralNameAccNomenclature();
3566 if(statusType
!= null) {
3567 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3569 if(StringUtils
.isNotBlank(getStatus())) {
3570 tnb
.setAppendedPhrase(getStatus());
3572 tnb
.setTitleCache(newName2
,true);
3573 tmpTaxonBase
= findMatchingTaxon(tnb
,refMods
);
3578 if(tmpTaxonBase
==null){
3579 tmpTaxonBase
= isSynonym ? Synonym
.NewInstance(tnb
, refMods
) : Taxon
.NewInstance(tnb
, refMods
);
3580 if(!state2
.getConfig().doKeepOriginalSecundum()) {
3581 tmpTaxonBase
.setSec(state2
.getConfig().getSecundum());
3583 //tmptaxonbase.setSec(refMods);
3585 classification
.addChildTaxon((Taxon
)tmpTaxonBase
, null, null);
3586 sourceHandler
.addSource(refMods
, (Taxon
)tmpTaxonBase
);
3591 tmpTaxonBase
= CdmBase
.deproxy(tmpTaxonBase
, TaxonBase
.class);
3592 if (author
!= null) {
3593 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3594 setLSID(getIdentifier(), tmpTaxonBase
);
3595 importer
.getTaxonService().saveOrUpdate(tmpTaxonBase
);
3596 tmpTaxonBase
= CdmBase
.deproxy(tmpTaxonBase
, TaxonBase
.class);
3599 TaxonNameBase
<?
,?
> tnb
= CdmBase
.deproxy(tmpTaxonBase
.getName(), TaxonNameBase
.class);
3602 this.taxon
=(Taxon
)tmpTaxonBase
;
3604 if (tmpTaxonBase
instanceof Taxon
){
3605 logger
.warn("Incorrect status");
3607 this.syno
=(Synonym
)tmpTaxonBase
;
3610 taxonNameBase
= castTaxonNameBase(tnb
, taxonNameBase
);
3617 public void buildTaxon() {
3618 //System.out.println("BUILD TAXON");
3619 logger
.info("buildTaxon");
3620 NomenclaturalStatusType statusType
= null;
3621 if (!getStatus().isEmpty()){
3622 status
= getStatus();
3623 String newNameStatus
= newNameStatus(status
);
3624 if (newNameStatus
!= null){
3625 taxonNameBase
.setAppendedPhrase(newNameStatus
);
3628 statusType
= nomStatusString2NomStatus(getStatus());
3629 taxonNameBase
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3630 } catch (UnknownCdmTypeException e
) {
3631 addProblematicStatusToFile(getStatus());
3632 logger
.warn("Problem with status");
3636 importer
.getNameService().save(taxonNameBase
);
3638 TaxonBase
<?
> tmpTaxonBase
;
3640 tmpTaxonBase
=Taxon
.NewInstance(taxonNameBase
, refMods
); //sec set null
3643 tmpTaxonBase
=Synonym
.NewInstance(taxonNameBase
, refMods
); //sec set null
3645 boolean exist
= false;
3647 for (TaxonNode node
: classification
.getAllNodes()){
3649 Taxon nodeTaxon
= node
.getTaxon();
3650 boolean titleMatches
= nodeTaxon
.getTitleCache().equalsIgnoreCase(tmpTaxonBase
.getTitleCache());
3651 boolean nomStatusMatches
= compareStatus(node
.getTaxon(), statusType
);
3652 boolean nodeNameReplaceable
= checkNodeNameReplaceable(nodeTaxon
, tmpTaxonBase
);
3653 if(titleMatches
&& nomStatusMatches
) {
3655 tmpTaxonBase
=CdmBase
.deproxy(nodeTaxon
, TaxonBase
.class);
3658 logger
.info("Found the same name but from another type (taxon/synonym)");
3659 TaxonNameBase
<?
,?
> existingTnb
= getTaxon().getName();
3660 tmpTaxonBase
= new Synonym(existingTnb
, refMods
);
3661 importer
.getTaxonService().saveOrUpdate(tmpTaxonBase
);
3664 }else if (nodeNameReplaceable
){
3665 nodeTaxon
.setName(tmpTaxonBase
.getName());
3666 tmpTaxonBase
= nodeTaxon
;
3669 }catch(NullPointerException n
){logger
.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3674 boolean insertAsExisting
=false;
3675 List
<Taxon
> existingTaxons
=new ArrayList
<Taxon
>();
3677 existingTaxons
= getMatchingTaxa(taxonNameBase
);
3678 } catch (Exception e1
) {
3679 e1
.printStackTrace();
3681 double similarityScore
=0.0;
3682 double similarityAuthor
=-1;
3687 for (Taxon bestMatchingTaxon
: existingTaxons
){
3688 //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3689 //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3690 if(taxonNameBase
.getAuthorshipCache()!=null) {
3691 author1
=taxonNameBase
.getAuthorshipCache();
3694 if(castTaxonNameBase(bestMatchingTaxon
.getName()).getAuthorshipCache()!=null) {
3695 author2
=castTaxonNameBase(bestMatchingTaxon
.getName()).getAuthorshipCache();
3697 } catch (Exception e
) {
3698 // TODO Auto-generated catch block
3699 e
.printStackTrace();
3702 t1
=taxonNameBase
.getTitleCache();
3703 if (author1
!=null && !StringUtils
.isEmpty(author1
)) {
3704 t1
=t1
.split(Pattern
.quote(author1
))[0];
3706 } catch (Exception e
) {
3707 // TODO Auto-generated catch block
3708 e
.printStackTrace();
3711 t2
=bestMatchingTaxon
.getTitleCache().split("sec.")[0].trim();
3712 if (author2
!=null && !StringUtils
.isEmpty(author2
)) {
3713 t2
=t2
.split(Pattern
.quote(author2
))[0];
3715 } catch (Exception e
) {
3716 // TODO Auto-generated catch block
3717 e
.printStackTrace();
3720 similarityScore
=similarity(t1
.trim(), t2
.trim());
3721 //System.out.println("taxonscore "+similarityScore);
3722 similarityAuthor
=similarity(author1
.trim(), author2
.trim());
3723 //System.out.println("authorscore "+similarityAuthor);
3724 insertAsExisting
= compareAndCheckTaxon(taxonNameBase
, refMods
, similarityScore
, bestMatchingTaxon
, similarityAuthor
);
3725 if(insertAsExisting
) {
3726 tmpTaxonBase
=bestMatchingTaxon
;
3730 if ( !insertAsExisting
){
3731 if(!state2
.getConfig().doKeepOriginalSecundum()) {
3732 tmpTaxonBase
.setSec(state2
.getConfig().getSecundum());
3735 // tmptaxonbase.setSec(refMods);
3736 if (taxonNameBase
.getRank().equals(state2
.getConfig().getMaxRank())) {
3737 //System.out.println("****************************"+tmptaxonbase);
3739 classification
.addChildTaxon((Taxon
)tmpTaxonBase
, refMods
, null);
3742 hierarchy
= new HashMap
<Rank
, Taxon
>();
3743 //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3745 lookForParentNode(taxonNameBase
,(Taxon
)tmpTaxonBase
, refMods
,this);
3746 //System.out.println("HIERARCHY "+hierarchy);
3747 Taxon parent
= buildHierarchy();
3748 if(!taxonExistsInClassification(parent
,(Taxon
)tmpTaxonBase
)){
3750 classification
.addParentChild(parent
, (Taxon
)tmpTaxonBase
, refMods
, null);
3752 classification
.addChildTaxon((Taxon
)tmpTaxonBase
, refMods
, null);
3754 importer
.getClassificationService().saveOrUpdate(classification
);
3757 // Set<TaxonNode> nodeList = classification.getAllNodes();
3758 // for(TaxonNode tn:nodeList) {
3759 // System.out.println(tn.getTaxon());
3763 importer
.getClassificationService().saveOrUpdate(classification
);
3766 Synonym castTest
=CdmBase
.deproxy(tmpTaxonBase
, Synonym
.class);
3767 }catch(Exception e
){
3768 TaxonNameBase
<?
,?
> existingTnb
= tmpTaxonBase
.getName();
3769 Synonym castTest
= new Synonym(existingTnb
, refMods
);
3770 importer
.getTaxonService().saveOrUpdate(castTest
);
3771 tmpTaxonBase
=CdmBase
.deproxy(castTest
, Synonym
.class);
3776 taxon
=CdmBase
.deproxy(tmpTaxonBase
, Taxon
.class);
3778 syno
=CdmBase
.deproxy(tmpTaxonBase
, Synonym
.class);
3783 private boolean checkNodeNameReplaceable(Taxon nodeTaxon
, TaxonBase
<?
> newTaxon
) {
3784 //TODO preliminary check
3785 if (newTaxon
.isInstanceOf(Synonym
.class)){
3788 NonViralName
<?
> nodeName
= CdmBase
.deproxy(nodeTaxon
.getName(), NonViralName
.class);
3789 NonViralName
<?
> newName
= CdmBase
.deproxy(newTaxon
.getName(), NonViralName
.class);
3790 if (nodeTaxon
.getName() == null || newName
== null){
3793 if (nodeTaxon
.getDescriptions().size() > 0 || nodeName
.getDescriptions().size() > 0 || nodeName
.getTypeDesignations().size() > 0 ){
3796 boolean compare
= true;
3797 for (NomenclaturalStatus status
: newName
.getStatus() ){
3798 compare
&= compareStatus(nodeTaxon
, status
.getType());
3804 if (nodeName
.getNameCache() != null && nodeName
.getNameCache().equals(newName
.getNameCache())){
3805 if (nodeName
.getNameCache().equals(nodeName
.getTitleCache())){
3806 if (newName
.getNameCache().length() < newName
.getTitleCache().length()){
3807 logger
.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName
.getNameCache());
3819 private Taxon
buildHierarchy() {
3820 logger
.info("buildHierarchy");
3821 Taxon higherTaxon
= null;
3822 //add the maxRank as a root
3823 if(hierarchy
.containsKey(state2
.getConfig().getMaxRank())){
3824 Taxon ct
=hierarchy
.get(state2
.getConfig().getMaxRank());
3825 if(!taxonExistsInClassification(higherTaxon
, ct
)) {
3826 classification
.addChildTaxon(ct
, refMods
, null);
3828 higherTaxon
= hierarchy
.get(state2
.getConfig().getMaxRank());
3829 // return higherTaxon;
3831 //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3835 if(hierarchy
.containsKey(Rank
.FAMILY()) && !state2
.getConfig().getMaxRank().equals(Rank
.FAMILY())){
3836 higherTaxon
=saveAndGetHigherTaxon(Rank
.FAMILY(),higherTaxon
);
3838 if(hierarchy
.containsKey(Rank
.SUBFAMILY()) && !state2
.getConfig().getMaxRank().equals(Rank
.SUBFAMILY())){
3839 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBFAMILY(),higherTaxon
);
3841 if(hierarchy
.containsKey(Rank
.TRIBE())&& !state2
.getConfig().getMaxRank().equals(Rank
.TRIBE())){
3842 higherTaxon
=saveAndGetHigherTaxon(Rank
.TRIBE(),higherTaxon
);
3844 if(hierarchy
.containsKey(Rank
.SUBTRIBE())&& !state2
.getConfig().getMaxRank().equals(Rank
.SUBTRIBE())){
3845 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBTRIBE(),higherTaxon
);
3847 if(hierarchy
.containsKey(Rank
.GENUS())&& !state2
.getConfig().getMaxRank().equals(Rank
.SUBGENUS())){
3848 higherTaxon
=saveAndGetHigherTaxon(Rank
.GENUS(),higherTaxon
);
3850 if(hierarchy
.containsKey(Rank
.SUBGENUS())&& !state2
.getConfig().getMaxRank().equals(Rank
.SUBGENUS())){
3851 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBGENUS(),higherTaxon
);
3853 importer
.getClassificationService().saveOrUpdate(classification
);
3857 private Taxon
saveAndGetHigherTaxon(Rank r
, Taxon higherTaxon
){
3858 Taxon ct
=hierarchy
.get(r
);
3859 if(!taxonExistsInClassification(higherTaxon
,ct
)) {
3860 if(higherTaxon
!= null && ct
!=null) {
3861 classification
.addParentChild(higherTaxon
, ct
, refMods
, null);
3863 if(higherTaxon
== null && ct
!=null) {
3864 classification
.addChildTaxon(ct
, refMods
, null);
3870 private boolean taxonExistsInClassification(Taxon parent
, Taxon child
){
3871 logger
.info("taxonExistsInClassification");
3872 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3873 boolean found
=false;
3875 for (TaxonNode p
: classification
.getAllNodes()){
3876 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
3877 for (TaxonNode c
: p
.getChildNodes()) {
3878 if (c
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3887 for (TaxonNode p
: classification
.getAllNodes()){
3888 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3894 // System.out.println("LOOK IF TAXA EXIST? "+found);
3898 * @param nameToBeFilledTest
3900 @SuppressWarnings("rawtypes")
3901 public void setParsedName(TaxonNameBase nameToBeFilledTest
) {
3902 this.taxonNameBase
= (NonViralName
<?
>) nameToBeFilledTest
;
3905 //variety dwcranks:varietyEpithet
3907 * @return the author
3909 public String
getAuthor() {
3915 public Taxon
getTaxon() {
3921 public NonViralName
<?
> getTaxonNameBase() {
3922 return taxonNameBase
;
3926 * @param findOrCreateTaxon
3928 public void setForm(Taxon form
) {
3933 * @param findOrCreateTaxon
3935 public void setVariety(Taxon variety
) {
3936 this.variety
=variety
;
3943 @SuppressWarnings("rawtypes")
3944 public Taxon
findOrCreateTaxon(String partialname
,String fullname
, Rank rank
, Rank globalrank
) {
3945 logger
.info("findOrCreateTaxon");
3946 sourceUrlRef
=CdmBase
.deproxy(sourceUrlRef
, Reference
.class);
3947 //takes too much time
3948 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3949 // logger.info("tmpList returned: "+tmpList.size());
3951 NomenclaturalStatusType statusType
= null;
3952 if (!getStatus().isEmpty()){
3954 statusType
= nomStatusString2NomStatus(getStatus());
3955 } catch (UnknownCdmTypeException e
) {
3956 addProblematicStatusToFile(getStatus());
3957 logger
.warn("Problem with status");
3961 List
<TaxonBase
> tmpListFiltered
= new ArrayList
<TaxonBase
>();
3963 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, fullname
, MatchMode
.BEGINNING
, null, null, null, null, null);
3965 tmpListFiltered
.addAll(taxontest
.getRecords());
3966 taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, partialname
, MatchMode
.BEGINNING
, null, null, null, null, null);
3967 tmpListFiltered
.addAll(taxontest
.getRecords());
3969 //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3971 boolean nameCorrected
=false;
3972 if (fullname
.indexOf(partialname
)<0) {
3976 boolean foundIdentic
=false;
3978 for (TaxonBase tmpb
:tmpListFiltered
){
3980 TaxonNameBase tnb
= tmpb
.getName();
3983 if(globalrank
.equals(rank
) || (globalrank
.isLower(Rank
.SPECIES()) && rank
.equals(Rank
.SPECIES()))){
3984 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname
) ){
3985 crank
=tnb
.getRank();
3986 if (crank
!=null && rank
!=null){
3987 if (crank
.equals(rank
)){
3992 }catch(Exception e
){
3993 e
.printStackTrace();
3998 if(nameCorrected
){ //for corrected names such as Anochetus -- A. blf-pat
3999 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname
) ){
4000 crank
=tnb
.getRank();
4001 if (crank
!=null && rank
!=null){
4002 if (crank
.equals(rank
)){
4007 }catch(Exception e
){
4008 e
.printStackTrace();
4016 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname
) ){
4017 crank
=tnb
.getRank();
4018 if (crank
!=null && rank
!=null){
4019 if (crank
.equals(rank
)){
4024 }catch(Exception e
){
4025 e
.printStackTrace();
4034 boolean statusMatch
=false;
4035 boolean appendedMatch
=false;
4036 if(tmp
!=null && foundIdentic
){
4037 statusMatch
=compareStatus(tmp
, statusType
);
4038 if (!getStatus().isEmpty() && ! (tmp
.getAppendedPhrase() == null)) {
4039 appendedMatch
=tmp
.getAppendedPhrase().equals(getStatus());
4041 if (getStatus().isEmpty() && tmp
.getAppendedPhrase() == null) {
4046 if ((tmp
== null || !foundIdentic
) || (tmp
!= null && !statusMatch
) || (tmp
!= null && !appendedMatch
&& !statusMatch
)){
4048 NonViralName
<?
> tnb
= getNonViralNameAccNomenclature();
4051 if(statusType
!= null) {
4052 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
4054 if(StringUtils
.isNotBlank(getStatus())) {
4055 tnb
.setAppendedPhrase(getStatus());
4058 if(rank
.equals(Rank
.UNKNOWN_RANK())){
4059 tnb
.setTitleCache(fullname
, true);
4060 // tnb.setGenusOrUninomial(fullname);
4062 if(rank
.isHigher(Rank
.GENUS())) {
4063 tnb
.setGenusOrUninomial(partialname
);
4066 if(rank
.isHigher(Rank
.SPECIES())) {
4067 tnb
.setTitleCache(partialname
, true);
4070 if (rank
.equals(globalrank
) && author
!= null) {
4072 tnb
.setCombinationAuthorship(findOrCreateAuthor(author
));
4073 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4074 Taxon taxonLSID
= getTaxonByLSID(getIdentifier());
4075 if (taxonLSID
!=null) {
4082 if (rank
.equals(Rank
.FAMILY())) {
4083 tmp
= buildFamily(tnb
);
4085 if (rank
.equals(Rank
.SUBFAMILY())) {
4086 tmp
= buildSubfamily(tnb
);
4088 if (rank
.equals(Rank
.TRIBE())) {
4089 tmp
= buildTribe(tnb
);
4091 if (rank
.equals(Rank
.SUBTRIBE())) {
4092 tmp
= buildSubtribe(tnb
);
4094 if (rank
.equals(Rank
.GENUS())) {
4095 tmp
= buildGenus(partialname
, tnb
);
4098 if (rank
.equals(Rank
.SUBGENUS())) {
4099 tmp
= buildSubgenus(partialname
, tnb
);
4101 if (rank
.equals(Rank
.SPECIES())) {
4102 tmp
= buildSpecies(partialname
, tnb
);
4105 if (rank
.equals(Rank
.SUBSPECIES())) {
4106 tmp
= buildSubspecies(partialname
, tnb
);
4109 if (rank
.equals(Rank
.VARIETY())) {
4110 tmp
= buildVariety(fullname
, partialname
, tnb
);
4113 if (rank
.equals(Rank
.FORM())) {
4114 tmp
= buildForm(fullname
, partialname
, tnb
);
4117 TaxonXTreatmentExtractor
.this.sourceHandler
.addSource(refMods
, tmp
);
4120 importer
.getClassificationService().saveOrUpdate(classification
);
4125 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
4126 if (rank
.equals(globalrank
) && author
!= null) {
4127 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4128 setLSID(getIdentifier(), tmp
);
4129 importer
.getTaxonService().saveOrUpdate(tmp
);
4130 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
4143 private Taxon
buildSubfamily(NonViralName
<?
> tnb
) {
4145 // tnb.generateTitle();
4146 tmp
= findMatchingTaxon(tnb
,refMods
);
4148 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4149 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4150 tmp
.setSec(state2
.getConfig().getSecundum());
4152 // tmp.setSec(refMods);
4153 // sourceHandler.addSource(refMods, tmp);
4154 if(family
!= null) {
4155 classification
.addParentChild(family
, tmp
, null, null);
4156 higherRank
=Rank
.FAMILY();
4159 //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4160 classification
.addChildTaxon(tmp
, null, null);
4169 private Taxon
buildFamily(NonViralName
<?
> tnb
) {
4171 // tnb.generateTitle();
4172 tmp
= findMatchingTaxon(tnb
,refMods
);
4174 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4175 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4176 tmp
.setSec(state2
.getConfig().getSecundum());
4178 // tmp.setSec(refMods);
4179 //sourceHandler.addSource(refMods, tmp);
4180 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4181 classification
.addChildTaxon(tmp
, null, null);
4190 private Taxon
buildForm(String fullname
, String partialname
, NonViralName
<?
> tnb
) {
4191 if (genusName
!=null) {
4192 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4194 if (subgenusName
!=null) {
4195 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4197 if(speciesName
!=null) {
4198 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4200 if(subspeciesName
!= null) {
4201 tnb
.setInfraSpecificEpithet(subspeciesName
.getInfraSpecificEpithet());
4203 if(partialname
!= null) {
4204 tnb
.setInfraSpecificEpithet(partialname
);
4206 //TODO how to save form??
4207 tnb
.setTitleCache(fullname
, true);
4208 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4210 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4211 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4212 tmp
.setSec(state2
.getConfig().getSecundum());
4214 // tmp.setSec(refMods);
4215 //sourceHandler.addSource(refMods, tmp);
4216 if (subspecies
!=null) {
4217 classification
.addParentChild(subspecies
, tmp
, null, null);
4218 higherRank
=Rank
.SUBSPECIES();
4219 higherTaxa
=subspecies
;
4221 if (species
!=null) {
4222 classification
.addParentChild(species
, tmp
, null, null);
4223 higherRank
=Rank
.SPECIES();
4227 // System.out.println("ADDCHILDTAXON FORM "+tmp);
4228 classification
.addChildTaxon(tmp
, null, null);
4239 private Taxon
buildVariety(String fullname
, String partialname
, NonViralName
<?
> tnb
) {
4241 if (genusName
!=null) {
4242 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4244 if (subgenusName
!=null) {
4245 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4247 if(speciesName
!=null) {
4248 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4250 if(subspeciesName
!= null) {
4251 tnb
.setInfraSpecificEpithet(subspeciesName
.getSpecificEpithet());
4253 if(partialname
!= null) {
4254 tnb
.setInfraSpecificEpithet(partialname
);
4256 //TODO how to save variety?
4257 tnb
.setTitleCache(fullname
, true);
4258 tmp
= findMatchingTaxon(tnb
,refMods
);
4260 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4261 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4262 tmp
.setSec(state2
.getConfig().getSecundum());
4264 // tmp.setSec(refMods);
4265 //sourceHandler.addSource(refMods, tmp);
4266 if (subspecies
!=null) {
4267 classification
.addParentChild(subspecies
, tmp
, null, null);
4268 higherRank
=Rank
.SUBSPECIES();
4269 higherTaxa
=subspecies
;
4271 if(species
!=null) {
4272 classification
.addParentChild(species
, tmp
, null, null);
4273 higherRank
=Rank
.SPECIES();
4277 //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4278 classification
.addChildTaxon(tmp
, null, null);
4285 * @param partialname
4289 private Taxon
buildSubspecies(String partialname
, NonViralName
<?
> tnb
) {
4290 if (genusName
!=null) {
4291 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4293 if (subgenusName
!=null) {
4294 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4295 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4297 if(speciesName
!=null) {
4298 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
4299 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4301 tnb
.setInfraSpecificEpithet(partialname
);
4302 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4304 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4305 if(!state2
.getConfig().doKeepOriginalSecundum())
4307 tmp
.setSec(state2
.getConfig().getSecundum());
4308 // tmp.setSec(refMods);
4309 //sourceHandler.addSource(refMods, tmp);
4312 if(species
!= null) {
4313 classification
.addParentChild(species
, tmp
, null, null);
4314 higherRank
=Rank
.SPECIES();
4318 //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4319 classification
.addChildTaxon(tmp
, null, null);
4325 * @param partialname
4329 private Taxon
buildSpecies(String partialname
, NonViralName
<?
> tnb
) {
4330 if (genusName
!=null) {
4331 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4333 if (subgenusName
!=null) {
4334 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4336 tnb
.setSpecificEpithet(partialname
.toLowerCase());
4337 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4339 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4340 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4341 tmp
.setSec(state2
.getConfig().getSecundum());
4343 // tmp.setSec(refMods);
4344 //sourceHandler.addSource(refMods, tmp);
4345 if (subgenus
!=null) {
4346 classification
.addParentChild(subgenus
, tmp
, null, null);
4347 higherRank
=Rank
.SUBGENUS();
4348 higherTaxa
=subgenus
;
4351 classification
.addParentChild(genus
, tmp
, null, null);
4352 higherRank
=Rank
.GENUS();
4356 //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4357 classification
.addChildTaxon(tmp
, null, null);
4364 * @param partialname
4368 private Taxon
buildSubgenus(String partialname
, NonViralName
<?
> tnb
) {
4369 tnb
.setInfraGenericEpithet(partialname
);
4370 if (genusName
!=null) {
4371 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4373 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4375 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4376 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4377 tmp
.setSec(state2
.getConfig().getSecundum());
4379 // tmp.setSec(refMods);
4380 //sourceHandler.addSource(refMods, tmp);
4382 classification
.addParentChild(genus
, tmp
, null, null);
4383 higherRank
=Rank
.GENUS();
4386 //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4387 classification
.addChildTaxon(tmp
, null, null);
4393 * @param partialname
4397 private Taxon
buildGenus(String partialname
, NonViralName
<?
> tnb
) {
4399 tnb
.setGenusOrUninomial(partialname
);
4402 tmp
= findMatchingTaxon(tnb
,refMods
);
4404 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4405 if(!state2
.getConfig().doKeepOriginalSecundum())
4407 tmp
.setSec(state2
.getConfig().getSecundum());
4408 // tmp.setSec(refMods);
4409 //sourceHandler.addSource(refMods, tmp);
4412 if(subtribe
!= null) {
4413 classification
.addParentChild(subtribe
, tmp
, null, null);
4414 higherRank
=Rank
.SUBTRIBE();
4415 higherTaxa
=subtribe
;
4418 classification
.addParentChild(tribe
, tmp
, null, null);
4419 higherRank
=Rank
.TRIBE();
4422 if(subfamily
!=null) {
4423 classification
.addParentChild(subfamily
, tmp
, null, null);
4424 higherRank
=Rank
.SUBFAMILY();
4425 higherTaxa
=subfamily
;
4428 classification
.addParentChild(family
, tmp
, null, null);
4429 higherRank
=Rank
.FAMILY();
4433 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4434 classification
.addChildTaxon(tmp
, null, null);
4446 private Taxon
buildSubtribe(NonViralName
<?
> tnb
) {
4447 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4449 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4450 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4451 tmp
.setSec(state2
.getConfig().getSecundum());
4453 // tmp.setSec(refMods);
4454 //sourceHandler.addSource(refMods, tmp);
4456 classification
.addParentChild(tribe
, tmp
, null, null);
4457 higherRank
=Rank
.TRIBE();
4460 //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4461 classification
.addChildTaxon(tmp
, null, null);
4470 private Taxon
buildTribe(NonViralName
<?
> tnb
) {
4471 Taxon tmp
= findMatchingTaxon(tnb
,refMods
);
4473 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4474 if(!state2
.getConfig().doKeepOriginalSecundum()) {
4475 tmp
.setSec(state2
.getConfig().getSecundum());
4477 // tmp.setSec(refMods);
4478 //sourceHandler.addSource(refMods, tmp);
4479 if (subfamily
!=null) {
4480 classification
.addParentChild(subfamily
, tmp
, null, null);
4481 higherRank
=Rank
.SUBFAMILY();
4482 higherTaxa
=subfamily
;
4484 if(family
!= null) {
4485 classification
.addParentChild(family
, tmp
, null, null);
4486 higherRank
=Rank
.FAMILY();
4490 //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4491 classification
.addChildTaxon(tmp
, null, null);
4499 * @param identifier2
4502 @SuppressWarnings("rawtypes")
4503 private Taxon
getTaxonByLSID(String identifier
) {
4504 //logger.info("getTaxonByLSID");
4505 // boolean lsidok=false;
4506 String id
= identifier
.split("__")[0];
4507 // String source = identifier.split("__")[1];
4509 if (id
.indexOf("lsid")>-1){
4511 lsid
= new LSID(id
);
4513 } catch (MalformedLSIDException e
) {
4514 logger
.warn("Malformed LSID");
4518 List
<Taxon
> taxa
= importer
.getTaxonService().list(Taxon
.class, 0, 0, null, null);
4519 LSID currentlsid
=null;
4521 currentlsid
= t
.getLsid();
4522 if (currentlsid
!=null){
4523 if (currentlsid
.getLsid().equals(lsid
.getLsid())){
4527 catch(Exception e
){logger
.warn("Exception occurred while comparing LSIDs "+e
);}
4538 @SuppressWarnings("rawtypes")
4539 private Person
findOrCreateAuthor(String author2
) {
4540 //logger.info("findOrCreateAuthor");
4541 List
<UuidAndTitleCache
<Person
>> hiberPersons
= importer
.getAgentService().getPersonUuidAndTitleCache();
4542 for (UuidAndTitleCache
<Person
> hibernateP
:hiberPersons
){
4543 if(hibernateP
.getTitleCache().equals(author2
)) {
4544 AgentBase existing
= importer
.getAgentService().find(hibernateP
.getUuid());
4545 return CdmBase
.deproxy(existing
, Person
.class);
4548 Person p
= Person
.NewInstance();
4549 p
.setTitleCache(author2
,true);
4550 importer
.getAgentService().saveOrUpdate(p
);
4551 return CdmBase
.deproxy(p
, Person
.class);
4554 * @param author the author to set
4556 public void setAuthor(String author
) {
4557 this.author
= author
;
4561 * @return the higherTaxa
4563 public Taxon
getHigherTaxa() {
4567 * @param higherTaxa the higherTaxa to set
4569 public void setHigherTaxa(Taxon higherTaxa
) {
4570 this.higherTaxa
= higherTaxa
;
4573 * @return the higherRank
4575 public Rank
getHigherRank() {
4579 * @param higherRank the higherRank to set
4581 public void setHigherRank(Rank higherRank
) {
4582 this.higherRank
= higherRank
;
4584 public String
getName(){
4585 if (newName
.isEmpty()) {
4586 return originalName
;
4593 * @return the fullName
4595 public String
getOriginalName() {
4596 return originalName
;
4599 * @param fullName the fullName to set
4601 public void setOriginalName(String fullName
) {
4602 this.originalName
= fullName
;
4605 * @return the newName
4607 public String
getNewName() {
4611 * @param newName the newName to set
4613 public void setNewName(String newName
) {
4614 this.newName
= newName
;
4619 public Rank
getRank() {
4623 * @param rank the rank to set
4625 public void setRank(Rank rank
) {
4629 * @return the idenfitiger
4631 public String
getIdentifier() {
4635 * @param idenfitiger the idenfitiger to set
4637 public void setIdentifier(String identifier
) {
4638 this.identifier
= identifier
;
4641 * @return the status
4643 public String
getStatus() {
4644 if (status
== null) {
4650 * @param status the status to set
4652 public void setStatus(String status
) {
4653 this.status
= status
;
4656 * @return the family
4658 public Taxon
getFamily() {
4662 * @param family the family to set
4664 @SuppressWarnings("rawtypes")
4665 public void setFamily(Taxon family
) {
4666 this.family
= family
;
4667 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(family
.getName(), TaxonNameBase
.class);
4668 familyName
= castTaxonNameBase(taxonNameBase
,familyName
);
4671 * @return the subfamily
4673 public Taxon
getSubfamily() {
4677 * @param subfamily the subfamily to set
4679 @SuppressWarnings("rawtypes")
4680 public void setSubfamily(Taxon subfamily
) {
4681 this.subfamily
= subfamily
;
4682 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subfamily
.getName(), TaxonNameBase
.class);
4683 subfamilyName
= castTaxonNameBase(taxonNameBase
,subfamilyName
);
4688 public Taxon
getTribe() {
4692 * @param tribe the tribe to set
4694 @SuppressWarnings("rawtypes")
4695 public void setTribe(Taxon tribe
) {
4697 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(tribe
.getName(), TaxonNameBase
.class);
4698 tribeName
= castTaxonNameBase(taxonNameBase
,tribeName
);
4701 * @return the subtribe
4703 public Taxon
getSubtribe() {
4707 * @param subtribe the subtribe to set
4709 @SuppressWarnings("rawtypes")
4710 public void setSubtribe(Taxon subtribe
) {
4711 this.subtribe
= subtribe
;
4712 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subtribe
.getName(), TaxonNameBase
.class);
4713 subtribeName
=castTaxonNameBase(taxonNameBase
,subtribeName
);
4718 public Taxon
getGenus() {
4722 * @param genus the genus to set
4724 @SuppressWarnings("rawtypes")
4725 public void setGenus(Taxon genus
) {
4728 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(genus
.getName(), TaxonNameBase
.class);
4729 genusName
= castTaxonNameBase(taxonNameBase
,genusName
);
4733 * @return the subgenus
4735 public Taxon
getSubgenus() {
4739 * @param subgenus the subgenus to set
4741 @SuppressWarnings("rawtypes")
4742 public void setSubgenus(Taxon subgenus
) {
4743 this.subgenus
= subgenus
;
4744 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subgenus
.getName(), TaxonNameBase
.class);
4745 subgenusName
= castTaxonNameBase(taxonNameBase
,subgenusName
);
4748 * @return the species
4750 public Taxon
getSpecies() {
4754 * @param species the species to set
4756 public void setSpecies(Taxon species
) {
4757 if (species
!= null){
4758 this.species
= species
;
4759 @SuppressWarnings("rawtypes")
4760 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(species
.getName(), TaxonNameBase
.class);
4761 speciesName
= castTaxonNameBase(taxonNameBase
,speciesName
);
4765 * @return the subspecies
4767 public Taxon
getSubspecies() {
4771 * @param subspecies the subspecies to set
4773 @SuppressWarnings("rawtypes")
4774 public void setSubspecies(Taxon subspecies
) {
4775 this.subspecies
= subspecies
;
4776 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subspecies
.getName(), TaxonNameBase
.class);
4777 subspeciesName
= castTaxonNameBase(taxonNameBase
,subspeciesName
);
4789 private void addProblematicStatusToFile(String status
) {
4791 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "StatusUnknown_"+classification
.getTitleCache()+".txt",true);
4792 BufferedWriter out
= new BufferedWriter(fstream
);
4793 out
.write(status
+"\n");
4794 //Close the output stream
4796 }catch (Exception e
){//Catch exception if any
4797 System
.err
.println("Error: " + e
.getMessage());
4808 private Taxon
findMatchingTaxon(NonViralName
<?
> tnb
, Reference refMods
) {
4809 logger
.info("findMatchingTaxon");
4812 refMods
=CdmBase
.deproxy(refMods
, Reference
.class);
4813 boolean insertAsExisting
=false;
4814 List
<Taxon
> existingTaxa
= new ArrayList
<Taxon
>();
4816 existingTaxa
= getMatchingTaxa(tnb
);
4817 } catch (Exception e1
) {
4818 // TODO Auto-generated catch block
4819 e1
.printStackTrace();
4821 double similarityScore
=0.0;
4822 double similarityAuthor
=-1;
4827 for (Taxon bestMatchingTaxon
: existingTaxa
){
4828 if (!existingTaxa
.isEmpty() && state2
.getConfig().isInteractWithUser() && !insertAsExisting
) {
4829 // System.out.println("tnb "+tnb.getTitleCache());
4830 // System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4832 if(tnb
.getAuthorshipCache()!=null) {
4833 author1
=tnb
.getAuthorshipCache();
4835 } catch (Exception e
) {
4836 // TODO Auto-generated catch block
4837 e
.printStackTrace();
4840 if(castTaxonNameBase(bestMatchingTaxon
.getName()).getAuthorshipCache()!=null) {
4841 author2
=castTaxonNameBase(bestMatchingTaxon
.getName()).getAuthorshipCache();
4843 } catch (Exception e
) {
4844 // TODO Auto-generated catch block
4845 e
.printStackTrace();
4848 t1
=tnb
.getTitleCache().split("sec.")[0].trim();
4849 if (author1
!=null && !StringUtils
.isEmpty(author1
)) {
4850 t1
=t1
.split(Pattern
.quote(author1
))[0];
4852 } catch (Exception e
) {
4853 // TODO Auto-generated catch block
4854 e
.printStackTrace();
4857 t2
=bestMatchingTaxon
.getTitleCache().split("sec.")[0].trim();
4858 if (author2
!=null && !StringUtils
.isEmpty(author2
)) {
4859 t2
=t2
.split(Pattern
.quote(author2
))[0];
4861 } catch (Exception e
) {
4862 // TODO Auto-generated catch block
4863 e
.printStackTrace();
4865 similarityScore
=similarity(t1
.trim(), t2
.trim());
4866 // System.out.println("taxascore: "+similarityScore);
4867 similarityAuthor
=similarity(author1
.trim(), author2
.trim());
4868 // System.out.println("authorscore: "+similarityAuthor);
4869 insertAsExisting
= compareAndCheckTaxon(tnb
, refMods
, similarityScore
, bestMatchingTaxon
,similarityAuthor
);
4871 if(insertAsExisting
) {
4872 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4873 tmp
=bestMatchingTaxon
;
4874 sourceHandler
.addSource(refMods
, tmp
);
4885 * @param similarityScore
4886 * @param bestMatchingTaxon
4887 * @param similarityAuthor
4890 private boolean compareAndCheckTaxon(NonViralName
<?
> tnb
, Reference refMods
, double similarityScore
,
4891 Taxon bestMatchingTaxon
, double similarityAuthor
) {
4892 //logger.info("compareAndCheckTaxon");
4893 boolean insertAsExisting
;
4894 // if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4895 // insertAsExisting=false;
4897 //a small hack/automatisation for Chenopodium only
4898 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4899 bestMatchingTaxon
.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4900 insertAsExisting
=true;
4902 insertAsExisting
=askIfReuseBestMatchingTaxon(tnb
, bestMatchingTaxon
, refMods
, similarityScore
,similarityAuthor
);
4906 logDecision(tnb
,bestMatchingTaxon
,insertAsExisting
, refMods
);
4907 return insertAsExisting
;
4913 @SuppressWarnings("rawtypes")
4914 private List
<Taxon
> getMatchingTaxa(TaxonNameBase tnb
) {
4915 //logger.info("getMatchingTaxon");
4916 if (tnb
.getTitleCache() == null){
4917 tnb
.setTitleCache(tnb
.toString(), tnb
.isProtectedTitleCache());
4920 Pager
<TaxonBase
> pager
=importer
.getTaxonService().findByTitle(TaxonBase
.class, tnb
.getTitleCache().split("sec.")[0].trim(), MatchMode
.BEGINNING
, null, null, null, null, null);
4921 List
<TaxonBase
>records
= pager
.getRecords();
4923 List
<Taxon
> existingTaxons
= new ArrayList
<Taxon
>();
4924 for (TaxonBase r
:records
){
4926 Taxon bestMatchingTaxon
= (Taxon
)r
;
4927 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4928 if(compareTaxonNameLength(bestMatchingTaxon
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4929 existingTaxons
.add(bestMatchingTaxon
);
4931 }catch(ClassCastException e
){logger
.warn("classcast exception, might be a synonym, ignore it");}
4933 Taxon bmt
= importer
.getTaxonService().findBestMatchingTaxon(tnb
.getTitleCache());
4934 if (!existingTaxons
.contains(bmt
) && bmt
!=null) {
4935 if(compareTaxonNameLength(bmt
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4936 existingTaxons
.add(bmt
);
4939 return existingTaxons
;
4943 * Check if the found Taxon can reasonnably be the same
4944 * example: with and without author should match, but the subspecies should not be suggested for a genus
4946 private boolean compareTaxonNameLength(String f
, String o
){
4947 //logger.info("compareTaxonNameLength");
4948 boolean lengthOk
=false;
4949 int sizeF
= f
.length();
4950 int sizeO
= o
.length();
4955 if (sizeF
-sizeO
>10) {
4962 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4966 private double similarity(String s1
, String s2
) {
4967 //logger.info("similarity");
4968 //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4969 if(!StringUtils
.isEmpty(s1
) && !StringUtils
.isEmpty(s2
)){
4970 String l1
=s1
.toLowerCase().trim();
4971 String l2
=s2
.toLowerCase().trim();
4972 if (l1
.length() < l2
.length()) { // s1 should always be bigger
4973 String swap
= l1
; l1
= l2
; l2
= swap
;
4975 int bigLen
= l1
.length();
4976 if (bigLen
== 0) { return 1.0; /* both strings are zero length */ }
4977 return (bigLen
- computeEditDistance(l1
, l2
)) / (double) bigLen
;
4980 if(s1
!=null && s2
!=null){
4981 if (s1
.equalsIgnoreCase(s2
)) {
4989 private int computeEditDistance(String s1
, String s2
) {
4990 //logger.info("computeEditDistance");
4991 int[] costs
= new int[s2
.length() + 1];
4992 for (int i
= 0; i
<= s1
.length(); i
++) {
4994 for (int j
= 0; j
<= s2
.length(); j
++) {
4999 int newValue
= costs
[j
- 1];
5000 if (s1
.charAt(i
- 1) != s2
.charAt(j
- 1)) {
5001 newValue
= Math
.min(Math
.min(newValue
, lastValue
),
5004 costs
[j
- 1] = lastValue
;
5005 lastValue
= newValue
;
5010 costs
[s2
.length()] = lastValue
;
5013 return costs
[s2
.length()];
5016 Map
<Rank
, Taxon
> hierarchy
= new HashMap
<Rank
, Taxon
>();
5018 * @param taxonNameBase
5020 @SuppressWarnings("rawtypes")
5021 public void lookForParentNode(NonViralName
<?
> taxonNameBase
, Taxon tax
, Reference ref
, MyName myName
) {
5022 logger
.info("lookForParentNode "+taxonNameBase
.getTitleCache()+" for "+myName
.toString());
5023 //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
5024 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
5025 if (taxonNameBase
.getRank().equals(Rank
.FORM())){
5026 handleFormHierarchy(ref
, myName
, parser
);
5028 else if (taxonNameBase
.getRank().equals(Rank
.VARIETY())){
5029 handleVarietyHierarchy(ref
, myName
, parser
);
5031 else if (taxonNameBase
.getRank().equals(Rank
.SUBSPECIES())){
5032 handleSubSpeciesHierarchy(ref
, myName
, parser
);
5034 else if (taxonNameBase
.getRank().equals(Rank
.SPECIES())){
5035 handleSpeciesHierarchy(ref
, myName
, parser
);
5037 else if (taxonNameBase
.getRank().equals(Rank
.SUBGENUS())){
5038 handleSubgenusHierarchy(ref
, myName
, parser
);
5041 if (taxonNameBase
.getRank().equals(Rank
.GENUS())){
5042 handleGenusHierarchy(ref
, myName
, parser
);
5044 if (taxonNameBase
.getRank().equals(Rank
.SUBTRIBE())){
5045 handleSubtribeHierarchy(ref
, myName
, parser
);
5047 if (taxonNameBase
.getRank().equals(Rank
.TRIBE())){
5048 handleTribeHierarchy(ref
, myName
, parser
);
5051 if (taxonNameBase
.getRank().equals(Rank
.SUBFAMILY())){
5052 handleSubfamilyHierarchy(ref
, myName
, parser
);
5061 private void handleSubfamilyHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5062 System
.out
.println("handleSubfamilyHierarchy");
5063 String parentStr
= myName
.getFamilyStr();
5064 Rank r
= Rank
.FAMILY();
5065 if(parentStr
!=null){
5067 Taxon parent
= null;
5068 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, parentStr
, MatchMode
.BEGINNING
, null, null, null, null, null);
5069 for(TaxonBase tb
:taxontest
.getRecords()){
5071 if (tb
.getName().getRank().equals(r
)) {
5072 parent
=CdmBase
.deproxy(tb
, Taxon
.class);
5075 } catch (Exception e
) {
5076 // TODO Auto-generated catch block
5077 e
.printStackTrace();
5080 if(parent
== null) {
5081 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5082 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5085 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5086 importer
.getTaxonService().save(parent
);
5087 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5091 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5094 hierarchy
.put(r
,parent
);
5103 private void handleTribeHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5104 String parentStr
= myName
.getSubfamilyStr();
5105 Rank r
= Rank
.SUBFAMILY();
5106 if (parentStr
== null){
5107 parentStr
= myName
.getFamilyStr();
5110 if(parentStr
!=null){
5111 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5112 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5113 // importer.getTaxonService().save(parent);
5114 // parent = CdmBase.deproxy(parent, Taxon.class);
5116 boolean parentDoesNotExists
= true;
5117 for (TaxonNode p
: classification
.getAllNodes()){
5118 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5119 parentDoesNotExists
= false;
5120 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5124 // if(parentDoesNotExists) {
5125 // importer.getTaxonService().save(parent);
5126 // parent = CdmBase.deproxy(parent, Taxon.class);
5127 // lookForParentNode(parentNameName, parent, ref,myName);
5129 if(parentDoesNotExists
) {
5130 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5133 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5134 importer
.getTaxonService().save(parent
);
5135 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5139 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5142 hierarchy
.put(r
,parent
);
5151 private void handleSubtribeHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5152 String parentStr
= myName
.getTribeStr();
5153 Rank r
= Rank
.TRIBE();
5154 if (parentStr
== null){
5155 parentStr
= myName
.getSubfamilyStr();
5156 r
= Rank
.SUBFAMILY();
5158 if (parentStr
== null){
5159 parentStr
= myName
.getFamilyStr();
5162 if(parentStr
!=null){
5163 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5164 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5165 // importer.getTaxonService().save(parent);
5166 // parent = CdmBase.deproxy(parent, Taxon.class);
5168 boolean parentDoesNotExists
= true;
5169 for (TaxonNode p
: classification
.getAllNodes()){
5170 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5171 parentDoesNotExists
= false;
5172 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5177 // if(parentDoesNotExists) {
5178 // importer.getTaxonService().save(parent);
5179 // parent = CdmBase.deproxy(parent, Taxon.class);
5180 // lookForParentNode(parentNameName, parent, ref,myName);
5182 if(parentDoesNotExists
) {
5183 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5186 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5187 importer
.getTaxonService().save(parent
);
5188 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5192 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5195 hierarchy
.put(r
,parent
);
5204 private void handleGenusHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5205 String parentStr
= myName
.getSubtribeStr();
5206 Rank r
= Rank
.SUBTRIBE();
5207 if (parentStr
== null){
5208 parentStr
= myName
.getTribeStr();
5211 if (parentStr
== null){
5212 parentStr
= myName
.getSubfamilyStr();
5213 r
= Rank
.SUBFAMILY();
5215 if (parentStr
== null){
5216 parentStr
= myName
.getFamilyStr();
5219 if(parentStr
!=null){
5220 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5221 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5222 // importer.getTaxonService().save(parent);
5223 // parent = CdmBase.deproxy(parent, Taxon.class);
5225 boolean parentDoesNotExist
= true;
5226 for (TaxonNode p
: classification
.getAllNodes()){
5227 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5228 // System.out.println(p.getTaxon().getUuid());
5229 // System.out.println(parent.getUuid());
5230 parentDoesNotExist
= false;
5231 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5235 // if(parentDoesNotExists) {
5236 // importer.getTaxonService().save(parent);
5237 // parent = CdmBase.deproxy(parent, Taxon.class);
5238 // lookForParentNode(parentNameName, parent, ref,myName);
5240 if(parentDoesNotExist
) {
5241 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5244 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5245 importer
.getTaxonService().save(parent
);
5246 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5250 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5253 hierarchy
.put(r
,parent
);
5262 private void handleSubgenusHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5263 String parentStr
= myName
.getGenusStr();
5264 Rank r
= Rank
.GENUS();
5266 if(parentStr
==null){
5267 parentStr
= myName
.getSubtribeStr();
5268 r
= Rank
.SUBTRIBE();
5270 if (parentStr
== null){
5271 parentStr
= myName
.getTribeStr();
5274 if (parentStr
== null){
5275 parentStr
= myName
.getSubfamilyStr();
5276 r
= Rank
.SUBFAMILY();
5278 if (parentStr
== null){
5279 parentStr
= myName
.getFamilyStr();
5282 if(parentStr
!=null){
5283 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5284 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5285 // importer.getTaxonService().save(parent);
5286 // parent = CdmBase.deproxy(parent, Taxon.class);
5288 boolean parentDoesNotExists
= true;
5289 for (TaxonNode p
: classification
.getAllNodes()){
5290 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5291 // System.out.println(p.getTaxon().getUuid());
5292 // System.out.println(parent.getUuid());
5293 parentDoesNotExists
= false;
5294 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5298 // if(parentDoesNotExists) {
5299 // importer.getTaxonService().save(parent);
5300 // parent = CdmBase.deproxy(parent, Taxon.class);
5301 // lookForParentNode(parentNameName, parent, ref,myName);
5303 if(parentDoesNotExists
) {
5304 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5307 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5308 importer
.getTaxonService().save(parent
);
5309 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5313 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5316 hierarchy
.put(r
,parent
);
5325 private void handleSpeciesHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5326 String parentStr
= myName
.getSubgenusStr();
5327 Rank r
= Rank
.SUBGENUS();
5329 if(parentStr
==null){
5330 parentStr
= myName
.getGenusStr();
5334 if(parentStr
==null){
5335 parentStr
= myName
.getSubtribeStr();
5336 r
= Rank
.SUBTRIBE();
5338 if (parentStr
== null){
5339 parentStr
= myName
.getTribeStr();
5342 if (parentStr
== null){
5343 parentStr
= myName
.getSubfamilyStr();
5344 r
= Rank
.SUBFAMILY();
5346 if (parentStr
== null){
5347 parentStr
= myName
.getFamilyStr();
5350 if(parentStr
!=null){
5351 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5352 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5353 hierarchy
.put(r
,parent
);
5362 private void handleSubSpeciesHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5363 String parentStr
= myName
.getSpeciesStr();
5364 Rank r
= Rank
.SPECIES();
5367 if(parentStr
==null){
5368 parentStr
= myName
.getSubgenusStr();
5369 r
= Rank
.SUBGENUS();
5372 if(parentStr
==null){
5373 parentStr
= myName
.getGenusStr();
5377 if(parentStr
==null){
5378 parentStr
= myName
.getSubtribeStr();
5379 r
= Rank
.SUBTRIBE();
5381 if (parentStr
== null){
5382 parentStr
= myName
.getTribeStr();
5385 if (parentStr
== null){
5386 parentStr
= myName
.getSubfamilyStr();
5387 r
= Rank
.SUBFAMILY();
5389 if (parentStr
== null){
5390 parentStr
= myName
.getFamilyStr();
5393 if(parentStr
!=null){
5394 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5395 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5396 hierarchy
.put(r
,parent
);
5406 private void handleFormHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5407 String parentStr
= myName
.getSubspeciesStr();
5408 Rank r
= Rank
.SUBSPECIES();
5411 if(parentStr
==null){
5412 parentStr
= myName
.getSpeciesStr();
5416 if(parentStr
==null){
5417 parentStr
= myName
.getSubgenusStr();
5418 r
= Rank
.SUBGENUS();
5421 if(parentStr
==null){
5422 parentStr
= myName
.getGenusStr();
5426 if(parentStr
==null){
5427 parentStr
= myName
.getSubtribeStr();
5428 r
= Rank
.SUBTRIBE();
5430 if (parentStr
== null){
5431 parentStr
= myName
.getTribeStr();
5434 if (parentStr
== null){
5435 parentStr
= myName
.getSubfamilyStr();
5436 r
= Rank
.SUBFAMILY();
5438 if (parentStr
== null){
5439 parentStr
= myName
.getFamilyStr();
5442 if(parentStr
!=null){
5443 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5444 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5445 hierarchy
.put(r
,parent
);
5454 private void handleVarietyHierarchy(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5455 String parentStr
= myName
.getSubspeciesStr();
5456 Rank r
= Rank
.SUBSPECIES();
5458 if(parentStr
==null){
5459 parentStr
= myName
.getSpeciesStr();
5463 if(parentStr
==null){
5464 parentStr
= myName
.getSubgenusStr();
5465 r
= Rank
.SUBGENUS();
5468 if(parentStr
==null){
5469 parentStr
= myName
.getGenusStr();
5473 if(parentStr
==null){
5474 parentStr
= myName
.getSubtribeStr();
5475 r
= Rank
.SUBTRIBE();
5477 if (parentStr
== null){
5478 parentStr
= myName
.getTribeStr();
5481 if (parentStr
== null){
5482 parentStr
= myName
.getSubfamilyStr();
5483 r
= Rank
.SUBFAMILY();
5485 if (parentStr
== null){
5486 parentStr
= myName
.getFamilyStr();
5489 if(parentStr
!=null){
5490 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5491 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5492 hierarchy
.put(r
,parent
);
5504 private Taxon
handleParentName(Reference ref
, MyName myName
, INonViralNameParser
<?
> parser
, String parentStr
, Rank r
) {
5505 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5506 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5507 // importer.getTaxonService().save(parent);
5508 // parent = CdmBase.deproxy(parent, Taxon.class);
5510 boolean parentDoesNotExists
= true;
5511 for (TaxonNode p
: classification
.getAllNodes()){
5512 if(p
.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent
.getTitleCache().split("sec.")[0].trim())) {
5513 // System.out.println(p.getTaxon().getUuid());
5514 // System.out.println(parent.getUuid());
5515 parentDoesNotExists
= false;
5516 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5520 if(parentDoesNotExists
) {
5521 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5522 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5525 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5526 importer
.getTaxonService().save(parent
);
5531 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5537 private void addNameDifferenceToFile(String originalname
, String atomisedname
){
5539 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "NamesDifferent_"+classification
.getTitleCache()+".txt",true);
5540 BufferedWriter out
= new BufferedWriter(fstream
);
5541 out
.write(originalname
+" (original) versus "+replaceNull(atomisedname
)+" (atomised) \n");
5542 //Close the output stream
5544 }catch (Exception e
){//Catch exception if any
5545 System
.err
.println("Error: " + e
.getMessage());
5551 * @param nomenclaturalCode2
5554 private void addProblemNameToFile(String name
, String author
, NomenclaturalCode nomenclaturalCode2
, Rank rank
) {
5556 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "NameNotParsed.txt",true);
5557 BufferedWriter out
= new BufferedWriter(fstream
);
5558 out
.write(name
+"\t"+replaceNull(author
)+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\n");
5559 //Close the output stream
5561 }catch (Exception e
){//Catch exception if any
5562 System
.err
.println("Error: " + e
.getMessage());
5569 * @param bestMatchingTaxon
5570 * @param insertAsExisting
5573 private void logDecision(NonViralName
<?
> tnb
, Taxon bestMatchingTaxon
, boolean insertAsExisting
, Reference refMods
) {
5575 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "Decisions_"+classification
.toString()+".txt",true);
5576 BufferedWriter out
= new BufferedWriter(fstream
);
5577 out
.write(tnb
.getTitleCache()+" sec. "+refMods
+"\t"+bestMatchingTaxon
.getTitleCache()+"\t"+insertAsExisting
+"\n");
5578 //Close the output stream
5580 }catch (Exception e
){//Catch exception if any
5581 System
.err
.println("Error: " + e
.getMessage());
5586 @SuppressWarnings("unused")
5587 private String
replaceNull(Object in
){
5591 if (in
.getClass().equals(NomenclaturalCode
.class)) {
5592 return ((NomenclaturalCode
)in
).getTitleCache();
5594 return in
.toString();
5599 * @param nomenclaturalCode2
5602 private void addProblemNameToFile(String type
, String name
, NomenclaturalCode nomenclaturalCode2
, Rank rank
, String problems
) {
5604 FileWriter fstream
= new FileWriter(TaxonXImport
.LOG_FOLDER
+ "NameNotParsed_"+classification
.getTitleCache()+".txt",true);
5605 BufferedWriter out
= new BufferedWriter(fstream
);
5606 out
.write(type
+"\t"+name
+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\t"+problems
+"\n");
5607 //Close the output stream
5609 }catch (Exception e
){//Catch exception if any
5610 System
.err
.println("Error: " + e
.getMessage());