3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.taxonx2013
;
12 import java
.io
.BufferedWriter
;
14 import java
.io
.FileWriter
;
15 import java
.io
.IOException
;
17 import java
.util
.ArrayList
;
18 import java
.util
.HashMap
;
19 import java
.util
.List
;
22 import java
.util
.UUID
;
23 import java
.util
.regex
.Pattern
;
25 import javax
.xml
.transform
.TransformerException
;
26 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
28 import org
.apache
.commons
.lang
.StringUtils
;
29 import org
.apache
.log4j
.Logger
;
30 import org
.w3c
.dom
.Node
;
31 import org
.w3c
.dom
.NodeList
;
33 import com
.ibm
.lsid
.MalformedLSIDException
;
35 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
36 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
37 import eu
.etaxonomy
.cdm
.model
.agent
.AgentBase
;
38 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
39 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
40 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
41 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
42 import eu
.etaxonomy
.cdm
.model
.common
.LSID
;
43 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
44 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
45 import eu
.etaxonomy
.cdm
.model
.common
.UuidAndTitleCache
;
46 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
47 import eu
.etaxonomy
.cdm
.model
.description
.FeatureNode
;
48 import eu
.etaxonomy
.cdm
.model
.description
.FeatureTree
;
49 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
50 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
51 import eu
.etaxonomy
.cdm
.model
.description
.TaxonNameDescription
;
52 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
53 import eu
.etaxonomy
.cdm
.model
.name
.BacterialName
;
54 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
55 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
56 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
57 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
58 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
59 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
60 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
61 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
62 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
63 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
64 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
65 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
66 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
67 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
68 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
69 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
70 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
71 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
72 import eu
.etaxonomy
.cdm
.persistence
.query
.MatchMode
;
73 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
74 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
75 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
82 public class TaxonXTreatmentExtractor
extends TaxonXExtractor
{
84 private static final Logger logger
= Logger
.getLogger(TaxonXTreatmentExtractor
.class);
86 private static final String notMarkedUp
= "Not marked-up";
87 private static final UUID proIbioTreeUUID
= UUID
.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
88 private static final UUID OtherUUID
= UUID
.fromString("6465f8aa-2175-446f-807e-7163994b120f");
89 private static final UUID NotMarkedUpUUID
= UUID
.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
90 private static final boolean skippQuestion
= true;
92 private final NomenclaturalCode nomenclaturalCode
;
93 private Classification classification
;
95 private String treatmentMainName
,originalTreatmentName
;
97 private final HashMap
<String
,Map
<String
,String
>> namesMap
= new HashMap
<String
, Map
<String
,String
>>();
100 private final Pattern keypattern
= Pattern
.compile("^(\\d+.*|-\\d+.*)");
101 private final Pattern keypatternend
= Pattern
.compile("^.+?\\d$");
103 private boolean maxRankRespected
=false;
104 private Map
<String
, Feature
> featuresMap
;
106 private MyName currentMyName
;
108 private Reference
<?
> sourceUrlRef
;
110 private final TaxonXAddSources sourceHandler
= new TaxonXAddSources();
113 * @param nomenclaturalCode
114 * @param classification
118 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode
, Classification classification
, TaxonXImport importer
,
119 TaxonXImportState configState
,Map
<String
, Feature
> featuresMap
, Reference
<?
> urlSource
) {
120 this.nomenclaturalCode
=nomenclaturalCode
;
121 this.classification
= classification
;
122 this.importer
=importer
;
123 this.configState
=configState
;
124 this.featuresMap
=featuresMap
;
125 this.sourceUrlRef
=urlSource
;
126 prepareCollectors(configState
, importer
.getAgentService());
127 this.sourceHandler
.setSourceUrlRef(sourceUrlRef
);
128 this.sourceHandler
.setImporter(importer
);
129 this.sourceHandler
.setConfigState(configState
);
133 * extracts all the treament information and save them
134 * @param treatmentnode: the XML Node
135 * @param tosave: the list of object to save into the CDM
136 * @param refMods: the reference extracted from the MODS
137 * @param sourceName: the URI of the document
139 @SuppressWarnings({ "rawtypes", "unused" })
140 protected void extractTreatment(Node treatmentnode
, List
<Object
> tosave
, Reference
<?
> refMods
, URI sourceName
) {
141 logger
.info("extractTreatment");
142 List
<TaxonNameBase
> nametosave
= new ArrayList
<TaxonNameBase
>();
143 NodeList children
= treatmentnode
.getChildNodes();
144 Taxon acceptedTaxon
=null;
145 Taxon defaultTaxon
=null;
146 boolean refgroup
=false;
148 for (int i
=0;i
<children
.getLength();i
++){
149 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group")) {
154 for (int i
=0;i
<children
.getLength();i
++){
156 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:nomenclature")){
157 NodeList nomenclature
= children
.item(i
).getChildNodes();
158 boolean containsName
=false;
159 for(int k
=0;k
<nomenclature
.getLength();k
++){
160 if(nomenclature
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
166 reloadClassification();
167 //extract "main" the scientific name
169 acceptedTaxon
= extractNomenclature(children
.item(i
),nametosave
,refMods
);
170 }catch(ClassCastException e
){e
.printStackTrace();System
.exit(0);}
171 // System.out.println("acceptedTaxon : "+acceptedTaxon);
174 else if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
175 reloadClassification();
176 //extract the References within the document
177 extractReferences(children
.item(i
),nametosave
,acceptedTaxon
,refMods
);
179 else if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
180 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected
){
181 File file
= new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
184 writer
= new FileWriter(file
,true);
185 writer
.write(sourceName
+"\n");
188 } catch (IOException e1
) {
189 // TODO Auto-generated catch block
190 e1
.printStackTrace();
192 // String multiple = askMultiple(children.item(i));
193 String multiple
= "Other";
194 if (multiple
.equalsIgnoreCase("other")) {
195 extractSpecificFeatureNotStructured(children
.item(i
),acceptedTaxon
, defaultTaxon
,nametosave
, refMods
,multiple
);
198 if (multiple
.equalsIgnoreCase("synonyms")) {
200 extractSynonyms(children
.item(i
),acceptedTaxon
, refMods
);
201 }catch(NullPointerException e
){
202 logger
.warn("the accepted taxon is maybe null");
206 if(multiple
.equalsIgnoreCase("material examined")){
207 extractMaterials(children
.item(i
),acceptedTaxon
, refMods
, nametosave
);
210 if (multiple
.equalsIgnoreCase("distribution")){
211 extractDistribution(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
);
214 if (multiple
.equalsIgnoreCase("type status")){
215 extractDescriptionWithReference(children
.item(i
),acceptedTaxon
,defaultTaxon
,refMods
, "TypeStatus");
218 if (multiple
.equalsIgnoreCase("vernacular name")){
219 extractDescriptionWithReference(children
.item(i
),acceptedTaxon
,defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
223 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,multiple
);
227 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
228 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected
){
229 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
, nametosave
, refMods
, Feature
.BIOLOGY_ECOLOGY());
231 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
232 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected
){
233 extractDescriptionWithReference(children
.item(i
),acceptedTaxon
,defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
235 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
236 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected
){
237 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DESCRIPTION());
239 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
240 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected
){
241 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,Feature
.DIAGNOSIS());
243 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
244 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected
){
245 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DISCUSSION());
247 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
248 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected
){
249 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DESCRIPTION());
252 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
253 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected
){
254 extractDistribution(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
);
256 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
257 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected
){
258 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
,refMods
,Feature
.ETYMOLOGY());
261 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
262 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected
){
263 extractMaterials(children
.item(i
),acceptedTaxon
, refMods
, nametosave
);
265 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:figure") && maxRankRespected
){
266 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
, nametosave
, refMods
, "Figure");
268 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
269 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected
){
270 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
, nametosave
, refMods
, "table");
273 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
274 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected
){
275 //TODO IGNORE keys for the moment
276 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
277 extractSpecificFeatureNotStructured(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,"Keys - unparsed");
280 if (!children
.item(i
).getNodeName().equalsIgnoreCase("tax:pb")){
281 //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
282 if (children
.item(i
).getAttributes() !=null) {
283 //logger.info(children.item(i).getAttributes().item(0));
285 extractSpecificFeatureNotStructured(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,notMarkedUp
);
289 // logger.info("saveUpdateNames");
290 if (maxRankRespected
){
291 importer
.getNameService().saveOrUpdate(nametosave
);
292 importer
.getClassificationService().saveOrUpdate(classification
);
293 //logger.info("saveUpdateNames-ok");
300 protected Map
<String
,Feature
> getFeaturesUsed(){
306 private void buildFeatureTree() {
307 logger
.info("buildFeatureTree");
308 FeatureTree proibiospheretree
= importer
.getFeatureTreeService().find(proIbioTreeUUID
);
309 if (proibiospheretree
== null){
310 List
<FeatureTree
> trees
= importer
.getFeatureTreeService().list(FeatureTree
.class, null, null, null, null);
311 if (trees
.size()==1) {
312 FeatureTree ft
= trees
.get(0);
313 if (featuresMap
==null) {
314 featuresMap
=new HashMap
<String
, Feature
>();
316 for (Feature feature
: ft
.getDistinctFeatures()){
318 featuresMap
.put(feature
.getTitleCache(), feature
);
322 proibiospheretree
= FeatureTree
.NewInstance();
323 proibiospheretree
.setUuid(proIbioTreeUUID
);
325 // FeatureNode root = proibiospheretree.getRoot();
326 FeatureNode root2
= proibiospheretree
.getRoot();
328 int nbChildren
= root2
.getChildCount()-1;
329 while (nbChildren
>-1){
331 root2
.removeChild(nbChildren
);
332 }catch(Exception e
){logger
.warn("Can't remove child from FeatureTree "+e
);}
338 for (Feature feature
:featuresMap
.values()) {
339 root2
.addChild(FeatureNode
.NewInstance(feature
));
341 importer
.getFeatureTreeService().saveOrUpdate(proibiospheretree
);
348 * @param acceptedTaxon: the current acceptedTaxon
349 * @param nametosave: the list of objects to save into the CDM
350 * @param refMods: the current reference extracted from the MODS
352 /* @SuppressWarnings("rawtypes")
353 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
354 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
356 NodeList children = keys.getChildNodes();
358 PolytomousKey poly = PolytomousKey.NewInstance();
359 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
360 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
361 poly.addTaxonomicScope(acceptedTaxon);
362 poly.setTitleCache("bloup");
363 // poly.addCoveredTaxon(acceptedTaxon);
364 PolytomousKeyNode root = poly.getRoot();
365 PolytomousKeyNode previous = null,tmpKey=null;
367 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
369 // String fullContent = keys.getTextContent();
370 for (int i=0;i<children.getLength();i++){
371 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
372 NodeList paragraph = children.item(i).getChildNodes();
375 for (int j=0;j<paragraph.getLength();j++){
376 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
377 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
378 key+=paragraph.item(j).getTextContent().trim();
379 // logger.info("KEY: "+j+"--"+key);
382 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
383 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
386 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
387 if (keypattern.matcher(key).matches()){
388 tmpKey = PolytomousKeyNode.NewInstance(key);
389 if (taxonKey!=null) {
390 tmpKey.setTaxon(taxonKey);
392 polyNodes.add(tmpKey);
393 if (previous == null) {
394 root.addChild(tmpKey);
396 previous.addChild(tmpKey);
400 tmpKey=PolytomousKeyNode.NewInstance(key);
401 if (taxonKey!=null) {
402 tmpKey.setTaxon(taxonKey);
404 polyNodes.add(tmpKey);
405 if (keypatternend.matcher(key).matches()) {
406 root.addChild(tmpKey);
409 previous.addChild(tmpKey);
416 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
417 importer.getPolytomousKeyService().saveOrUpdate(poly);
421 // * @param taxons: the XML Nodegroup
422 // * @param nametosave: the list of objects to save into the CDM
423 // * @param acceptedTaxon: the current accepted Taxon
424 // * @param refMods: the current reference extracted from the MODS
426 // * @return Taxon object built
428 // @SuppressWarnings({ "rawtypes", "unchecked" })
429 // private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
430 // // logger.info("getTaxonFromXML");
431 // // logger.info("acceptedTaxon: "+acceptedTaxon);
433 // // TaxonNameBase nameToBeFilled = null;
435 // currentMyName = new MyName();
436 // NomenclaturalStatusType statusType = null;
439 // currentMyName = extractScientificName(taxons);
440 // if (!currentMyName.getStatus().isEmpty()){
442 // statusType = nomStatusString2NomStatus(currentMyName.getStatus());
443 // } catch (UnknownCdmTypeException e) {
444 // addProblematicStatusToFile(currentMyName.getStatus());
445 // logger.warn("Problem with status");
449 // } catch (TransformerFactoryConfigurationError e1) {
451 // } catch (TransformerException e1) {
454 // /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
456 // nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
457 // if (nameToBeFilled.hasProblem() &&
458 // !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
459 // // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
460 // addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
461 // nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser, currentMyName.getAuthor(), currentMyName.getRank());
464 // nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
466 // TaxonNameBase nameToBeFilled = currentMyName.getTaxonNameBase();
467 // Taxon t = currentMyName.getTaxon();
468 // // importer.getNameService().saveOrUpdate(nametosave);
469 // /* Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
471 // boolean statusMatch=false;
473 // statusMatch=compareStatus(t, statusType);
475 // if (t ==null || (t != null && !statusMatch)){
476 // if(statusType != null) {
477 // nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
479 // t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
480 // if (t.getSec() == null) {
481 // t.setSec(refMods);
483 // if(!configState.getConfig().doKeepOriginalSecundum()) {
484 // t.setSec(configState.getConfig().getSecundum());
485 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
487 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
488 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
491 // if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
492 // setLSID(currentMyName.getIdentifier(), t);
495 // // Taxon parentTaxon = currentMyName.getHigherTaxa();
496 // // if (parentTaxon == null && !skippQuestion) {
497 // // parentTaxon = askParent(t, classification);
499 // // if (parentTaxon ==null){
500 // // while (parentTaxon == null) {
501 // // System.out.println("parent is null");
502 // // parentTaxon = createParent(t, refMods);
503 // // classification.addParentChild(parentTaxon, t, refMods, null);
506 // // classification.addParentChild(parentTaxon, t, refMods, null);
510 // t = CdmBase.deproxy(t, Taxon.class);
512 // if (!configState.getConfig().doKeepOriginalSecundum()) {
513 // t.setSec(configState.getConfig().getSecundum());
514 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
522 // private Taxon getTaxonFromTaxonNameBase(TaxonNameBase tnb,Reference<?> ref){
523 // Taxon taxon = null;
524 //// System.out.println(tnb.getTitleCache());
525 // Taxon cc= importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
527 // if ((cc.getSec() == null || cc.getSec().toString().isEmpty()) || (cc.getSec() != null &&
528 // cc.getSec().getTitleCache().equalsIgnoreCase(ref.getTitleCache()))) {
529 // if(cc.getSec() == null || cc.getSec().toString().isEmpty()){
531 // importer.getTaxonService().saveOrUpdate(cc);
537 // // List<TaxonBase> c = importer.getTaxonService().searchTaxaByName(tnb.getTitleCache(), ref);
538 // List<TaxonBase> c = importer.getTaxonService().list(TaxonBase.class, 0, 0, null, null);
539 // for (TaxonBase b : c) {
541 // taxon = (Taxon) b;
542 // }catch(ClassCastException e){logger.warn("error while casting existing taxonnamebase");}
545 // if (taxon == null){
546 //// System.out.println("NEW TAXON HERE "+tnb.toString()+", "+ref.toString());
547 // taxon = Taxon.NewInstance(tnb, ref); //sec set null
548 // importer.getTaxonService().save(taxon);
551 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
553 // boolean exist = false;
554 // for (TaxonNode p : classification.getAllNodes()){
555 // if(p.getTaxon().equals(taxon)) {
560 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
561 // Taxon parentTaxon = currentMyName.getHigherTaxa();
562 // if (parentTaxon != null) {
563 // classification.addParentChild(parentTaxon, taxon, ref, null);
565 // System.out.println("HERE???");
566 // classification.addChildTaxon(taxon, ref, null);
568 // importer.getClassificationService().saveOrUpdate(classification);
569 // // refreshTransaction();
571 // taxon = CdmBase.deproxy(taxon, Taxon.class);
572 // // System.out.println("TAXON RETOURNE : "+taxon.getTitleCache());
576 * @param taxons: the XML Nodegroup
577 * @param nametosave: the list of objects to save into the CDM
578 * @param acceptedTaxon: the current accepted Taxon
579 * @param refMods: the current reference extracted from the MODS
581 * @return Taxon object built
583 @SuppressWarnings({ "rawtypes", "unused" })
584 private TaxonNameBase
getTaxonNameBaseFromXML(Node taxons
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, boolean isSynonym
) {
585 // logger.info("getTaxonFromXML");
586 // logger.info("acceptedTaxon: "+acceptedTaxon);
587 logger
.info("getTaxonNameBaseFromXML");
588 TaxonNameBase nameToBeFilled
= null;
590 currentMyName
=new MyName(isSynonym
);
592 NomenclaturalStatusType statusType
= null;
594 currentMyName
= extractScientificName(taxons
,refMods
);
595 } catch (TransformerFactoryConfigurationError e1
) {
597 } catch (TransformerException e1
) {
600 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
602 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
603 if (nameToBeFilled.hasProblem() &&
604 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
605 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
606 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
607 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
610 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
612 nameToBeFilled
= currentMyName
.getTaxonNameBase();
613 return nameToBeFilled
;
617 // @SuppressWarnings("rawtypes")
618 // private TaxonNameBase getTaxonNameBase (TaxonNameBase name, List<TaxonNameBase> nametosave, NomenclaturalStatusType statusType){
619 // List<TaxonNameBase> names = importer.getNameService().list(TaxonNameBase.class, null, null, null, null);
620 // for (TaxonNameBase tb : names){
621 // if (tb.getTitleCache().equalsIgnoreCase(name.getTitleCache())) {
622 // boolean statusMatch=false;
624 // statusMatch=compareStatus(tb, statusType);
626 // if (!statusMatch){
627 // if(statusType != null) {
628 // name.addStatus(NomenclaturalStatus.NewInstance(statusType));
632 // logger.info("TaxonNameBase FOUND"+name.getTitleCache());
633 // return CdmBase.deproxy(tb, TaxonNameBase.class);
637 // // logger.info("TaxonNameBase NOT FOUND "+name.getTitleCache());
638 // // System.out.println("add name "+name);
639 // nametosave.add(name);
640 // name = CdmBase.deproxy(name, TaxonNameBase.class);
649 // * @param statusType
652 // private boolean compareStatus(TaxonNameBase tb, NomenclaturalStatusType statusType) {
653 // boolean statusMatch=false;
655 // Set<NomenclaturalStatus> status = tb.getStatus();
656 // if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
657 // for (NomenclaturalStatus st:status){
658 // NomenclaturalStatusType stype = st.getType();
659 // if (stype.toString().equalsIgnoreCase(statusType.toString())) {
665 // if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
669 // return statusMatch;
675 private void reloadClassification() {
676 logger
.info("reloadClassification");
677 Classification cl
= importer
.getClassificationService().find(classification
.getUuid());
681 importer
.getClassificationService().saveOrUpdate(classification
);
682 classification
= importer
.getClassificationService().find(classification
.getUuid());
687 // * Create a Taxon for the current NameBase, based on the current reference
688 // * @param taxonNameBase
689 // * @param refMods: the current reference extracted from the MODS
692 // @SuppressWarnings({ "unused", "rawtypes" })
693 // private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
694 // Taxon t = new Taxon(taxonNameBase,null );
695 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
696 // t.setSec(configState.getConfig().getSecundum());
697 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
699 // /*<<<<<<< .courant
700 // boolean sourceExists=false;
701 // Set<IdentifiableSource> sources = t.getSources();
702 // for (IdentifiableSource src : sources){
703 // String micro = src.getCitationMicroReference();
704 // Reference r = src.getCitation();
705 // if (r.equals(refMods) && micro == null) {
706 // sourceExists=true;
709 // if(!sourceExists) {
710 // t.addSource(null,null,refMods,null);
713 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
714 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
718 private void extractDescriptionWithReference(Node typestatus
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference
<?
> refMods
,
719 String featureName
) {
720 // System.out.println("extractDescriptionWithReference !");
721 logger
.info("extractDescriptionWithReference");
722 NodeList children
= typestatus
.getChildNodes();
724 Feature currentFeature
=getFeatureObjectFromString(featureName
);
726 String r
="";String s
="";
727 for (int i
=0;i
<children
.getLength();i
++){
728 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
729 s
+=children
.item(i
).getTextContent().trim();
731 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
732 r
+= children
.item(i
).getTextContent().trim();
734 if (s
.indexOf(r
)>-1) {
739 Reference
<?
> currentref
= ReferenceFactory
.newGeneric();
741 currentref
.setTitleCache(r
, true);
745 setParticularDescription(s
,acceptedTaxon
,defaultTaxon
, currentref
, refMods
,currentFeature
);
750 * @param distribution: the XML node group
751 * @param acceptedTaxon: the current accepted Taxon
752 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
753 * @param refMods: the current reference extracted from the MODS
755 @SuppressWarnings("rawtypes")
756 private void extractDistribution(Node distribution
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
757 logger
.info("extractDistribution");
758 // logger.info("acceptedTaxon: "+acceptedTaxon);
759 NodeList children
= distribution
.getChildNodes();
760 Map
<Integer
,List
<MySpecimenOrObservation
>> specimenOrObservations
= new HashMap
<Integer
, List
<MySpecimenOrObservation
>>();
761 Map
<Integer
,String
> descriptionsFulltext
= new HashMap
<Integer
,String
>();
763 for (int i
=0;i
<children
.getLength();i
++){
764 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
765 NodeList paragraph
= children
.item(i
).getChildNodes();
766 for (int j
=0;j
<paragraph
.getLength();j
++){
767 if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("#text")){
768 extractText(descriptionsFulltext
, i
, paragraph
.item(j
));
770 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
771 extractInLine(nametosave
, refMods
, descriptionsFulltext
, i
,paragraph
.item(j
));
773 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")){
774 MySpecimenOrObservation specimenOrObservation
= new MySpecimenOrObservation();
775 DerivedUnit derivedUnitBase
= null;
776 specimenOrObservation
= extractSpecimenOrObservation(paragraph
.item(j
), derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
);
777 extractTextFromSpecimenOrObservation(specimenOrObservations
, descriptionsFulltext
, i
, specimenOrObservation
);
784 for (int k
:descriptionsFulltext
.keySet()) {
789 for (int k
:specimenOrObservations
.keySet()) {
796 if(acceptedTaxon
!=null){
797 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
798 Feature currentFeature
= Feature
.DISTRIBUTION();
799 // DerivedUnit derivedUnitBase=null;
801 for (int k
=0;k
<=m
;k
++){
802 if(specimenOrObservations
.keySet().contains(k
)){
803 for (MySpecimenOrObservation soo
:specimenOrObservations
.get(k
) ) {
804 handleAssociation(acceptedTaxon
, refMods
, td
, soo
);
808 if (descriptionsFulltext
.keySet().contains(k
)){
809 if (!stringIsEmpty(descriptionsFulltext
.get(k
).trim()) && (descriptionsFulltext
.get(k
).startsWith("Hab.") || descriptionsFulltext
.get(k
).startsWith("Habitat"))){
810 setParticularDescription(descriptionsFulltext
.get(k
),acceptedTaxon
,defaultTaxon
, refMods
, Feature
.HABITAT());
814 handleTextData(refMods
, descriptionsFulltext
, td
, currentFeature
, k
);
818 if (descriptionsFulltext
.keySet().contains(k
) || specimenOrObservations
.keySet().contains(k
)){
819 acceptedTaxon
.addDescription(td
);
820 sourceHandler
.addAndSaveSource(refMods
, td
, null);
821 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
829 * @param descriptionsFulltext
831 * @param currentFeature
834 private void handleTextData(Reference
<?
> refMods
, Map
<Integer
, String
> descriptionsFulltext
, TaxonDescription td
,
835 Feature currentFeature
, int k
) {
836 //logger.info("handleTextData");
837 TextData textData
= TextData
.NewInstance();
838 textData
.setFeature(currentFeature
);
839 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descriptionsFulltext
.get(k
));
840 sourceHandler
.addSource(refMods
, textData
);
841 td
.addElement(textData
);
845 * @param acceptedTaxon
850 private void handleAssociation(Taxon acceptedTaxon
, Reference
<?
> refMods
, TaxonDescription td
, MySpecimenOrObservation soo
) {
851 logger
.info("handleAssociation");
852 String descr
=soo
.getDescr();
853 DerivedUnit derivedUnitBase
= soo
.getDerivedUnitBase();
855 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
857 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
859 Feature feature
=null;
860 feature
= makeFeature(derivedUnitBase
);
861 if(!StringUtils
.isEmpty(descr
)) {
862 derivedUnitBase
.setTitleCache(descr
, true);
865 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
867 taxonDescription
.addElement(indAssociation
);
868 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
869 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
870 td
.setDescribedSpecimenOrObservation(soo
.getDerivedUnitBase());
874 * create an individualAssociation
876 * @param derivedUnitBase
880 private IndividualsAssociation
createIndividualAssociation(Reference
<?
> refMods
, DerivedUnit derivedUnitBase
,
882 logger
.info("createIndividualAssociation");
883 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
884 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
885 indAssociation
.setFeature(feature
);
886 indAssociation
= sourceHandler
.addSource(refMods
, indAssociation
);
887 return indAssociation
;
891 * @param specimenOrObservations
892 * @param descriptionsFulltext
894 * @param specimenOrObservation
896 private void extractTextFromSpecimenOrObservation(Map
<Integer
, List
<MySpecimenOrObservation
>> specimenOrObservations
,
897 Map
<Integer
, String
> descriptionsFulltext
, int i
, MySpecimenOrObservation specimenOrObservation
) {
898 logger
.info("extractTextFromSpecimenOrObservation");
899 List
<MySpecimenOrObservation
> speObsList
= specimenOrObservations
.get(i
);
900 if (speObsList
== null) {
901 speObsList
=new ArrayList
<MySpecimenOrObservation
>();
903 speObsList
.add(specimenOrObservation
);
904 specimenOrObservations
.put(i
,speObsList
);
906 String s
= specimenOrObservation
.getDerivedUnitBase().toString();
907 if (descriptionsFulltext
.get(i
) !=null){
908 s
= descriptionsFulltext
.get(i
)+" "+s
;
910 descriptionsFulltext
.put(i
, s
);
914 * Extract the text with the inline link to a taxon
917 * @param descriptionsFulltext
921 @SuppressWarnings("rawtypes")
922 private void extractInLine(List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, Map
<Integer
, String
> descriptionsFulltext
,
923 int i
, Node paragraph
) {
924 //logger.info("extractInLine");
925 String inLine
=getInlineText(nametosave
, refMods
, paragraph
);
926 if (descriptionsFulltext
.get(i
) !=null){
927 inLine
= descriptionsFulltext
.get(i
)+inLine
;
929 descriptionsFulltext
.put(i
, inLine
);
933 * Extract the raw text from a Node
934 * @param descriptionsFulltext
938 private void extractText(Map
<Integer
, String
> descriptionsFulltext
, int i
, Node node
) {
939 //logger.info("extractText");
940 if(!node
.getTextContent().trim().isEmpty()) {
941 String s
=node
.getTextContent().trim();
942 if (descriptionsFulltext
.get(i
) !=null){
943 s
= descriptionsFulltext
.get(i
)+" "+s
;
945 descriptionsFulltext
.put(i
, s
);
951 * @param materials: the XML node group
952 * @param acceptedTaxon: the current accepted Taxon
953 * @param refMods: the current reference extracted from the MODS
955 @SuppressWarnings("rawtypes")
956 private void extractMaterials(Node materials
, Taxon acceptedTaxon
, Reference
<?
> refMods
,List
<TaxonNameBase
> nametosave
) {
957 logger
.info("EXTRACTMATERIALS");
958 // logger.info("acceptedTaxon: "+acceptedTaxon);
959 NodeList children
= materials
.getChildNodes();
960 NodeList events
= null;
964 for (int i
=0;i
<children
.getLength();i
++){
965 String rawAssociation
="";
967 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
968 events
= children
.item(i
).getChildNodes();
969 for(int k
=0;k
<events
.getLength();k
++){
970 if (events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
971 String inLine
= getInlineText(nametosave
, refMods
, events
.item(k
));
972 if(!inLine
.isEmpty()) {
973 rawAssociation
+=inLine
;
976 if (! events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")
977 && !events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
978 rawAssociation
+= events
.item(k
).getTextContent().trim();
980 if(events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
981 if (!containsDistinctLetters(rawAssociation
.replaceAll(";",""))) {
982 rawAssociation
="no description text";
985 handleDerivedUnitFacadeAndBase(acceptedTaxon
, refMods
, events
.item(k
), rawAssociation
);
987 if (!rawAssociation
.isEmpty() && !added
){
989 Feature feature
= Feature
.MATERIALS_EXAMINED();
990 featuresMap
.put(feature
.getTitleCache(),feature
);
992 TextData textData
= createTextData(rawAssociation
, refMods
, feature
);
994 if(! rawAssociation
.isEmpty() && (acceptedTaxon
!=null)){
995 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
996 td
.addElement(textData
);
997 acceptedTaxon
.addDescription(td
);
998 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1000 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
1001 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
1003 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1004 // acceptedTaxon.addDescription(taxonDescription);
1006 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
1008 // Feature feature = Feature.MATERIALS_EXAMINED();
1009 // featuresMap.put(feature.getTitleCache(),feature);
1010 // if(!StringUtils.isEmpty(rawAssociation)) {
1011 // derivedUnitBase.setTitleCache(rawAssociation, true);
1013 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
1014 // indAssociation.setFeature(feature);
1015 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
1017 // /*boolean sourceExists=false;
1018 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
1019 // for (DescriptionElementSource src : dsources){
1020 // String micro = src.getCitationMicroReference();
1021 // Reference r = src.getCitation();
1022 // if (r.equals(refMods) && micro == null) {
1023 // sourceExists=true;
1026 // if(!sourceExists) {
1027 // indAssociation.addSource(null, null, refMods, null);
1029 // taxonDescription.addElement(indAssociation);
1030 // taxonDescription.setTaxon(acceptedTaxon);
1031 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
1033 // /*sourceExists=false;
1034 // Set<IdentifiableSource> sources = taxonDescription.getSources();
1035 // for (IdentifiableSource src : sources){
1036 // String micro = src.getCitationMicroReference();
1037 // Reference r = src.getCitation();
1038 // if (r.equals(refMods) && micro == null) {
1039 // sourceExists=true;
1042 // if(!sourceExists) {
1043 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
1046 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
1047 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1057 * @param acceptedTaxon
1060 * @param rawAssociation
1063 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon
, Reference
<?
> refMods
, Node event
,
1064 String rawAssociation
) {
1065 logger
.info("handleDerivedUnitFacadeAndBase");
1067 DerivedUnit derivedUnitBase
;
1068 MySpecimenOrObservation myspecimenOrObservation
;
1069 DerivedUnitFacade derivedUnitFacade
= getFacade(rawAssociation
.replaceAll(";",""),SpecimenOrObservationType
.DerivedUnit
);
1070 derivedUnitBase
= derivedUnitFacade
.innerDerivedUnit();
1072 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
1074 myspecimenOrObservation
= extractSpecimenOrObservation(event
,derivedUnitBase
,SpecimenOrObservationType
.DerivedUnit
);
1075 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
1076 descr
=myspecimenOrObservation
.getDescr();
1078 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
1080 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
1082 Feature feature
= makeFeature(derivedUnitBase
);
1083 featuresMap
.put(feature
.getTitleCache(),feature
);
1084 if(!StringUtils
.isEmpty(descr
)) {
1085 derivedUnitBase
.setTitleCache(descr
, true);
1088 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
1090 taxonDescription
.addElement(indAssociation
);
1091 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
1092 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1098 * @param materials: the XML node group
1099 * @param acceptedTaxon: the current accepted Taxon
1100 * @param refMods: the current reference extracted from the MODS
1102 private String
extractMaterialsDirect(Node materials
, Taxon acceptedTaxon
, Reference
<?
> refMods
, String event
) {
1103 logger
.info("extractMaterialsDirect");
1104 // logger.info("acceptedTaxon: "+acceptedTaxon);
1107 DerivedUnit derivedUnitBase
=null;
1108 MySpecimenOrObservation myspecimenOrObservation
= extractSpecimenOrObservation(materials
,derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
);
1109 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
1111 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
1113 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
1115 Feature feature
=null;
1116 if (event
.equalsIgnoreCase("collection")){
1117 feature
= makeFeature(derivedUnitBase
);
1120 feature
= Feature
.MATERIALS_EXAMINED();
1122 featuresMap
.put(feature
.getTitleCache(), feature
);
1124 descr
=myspecimenOrObservation
.getDescr();
1125 if(!StringUtils
.isEmpty(descr
)) {
1126 derivedUnitBase
.setTitleCache(descr
, true);
1129 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
1131 taxonDescription
.addElement(indAssociation
);
1132 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
1133 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1135 return derivedUnitBase
.getTitleCache();
1141 * @param description: the XML node group
1142 * @param acceptedTaxon: the current acceptedTaxon
1143 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1144 * @param nametosave: the list of objects to save into the CDM
1145 * @param refMods: the current reference extracted from the MODS
1146 * @param featureName: the feature name
1148 @SuppressWarnings({ "rawtypes"})
1149 private String
extractSpecificFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
1150 List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, String featureName
) {
1151 logger
.info("extractSpecificFeature "+featureName
);
1152 // System.out.println("GRUUUUuu");
1153 NodeList children
= description
.getChildNodes();
1154 NodeList insideNodes
;
1156 // String descr ="";
1157 String localdescr
="";
1158 List
<String
> blabla
=null;
1159 List
<String
> text
= new ArrayList
<String
>();
1161 String table
="<table>";
1165 Feature currentFeature
=getFeatureObjectFromString(featureName
);
1167 // String fullContent = description.getTextContent();
1168 for (int i
=0;i
<children
.getLength();i
++){
1170 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
1171 text
.add(children
.item(i
).getTextContent().trim());
1173 if (featureName
.equalsIgnoreCase("table")){
1174 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
1175 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
1176 head
= extractTableHead(children
.item(i
));
1178 line
= extractTableLine(children
.item(i
));
1179 if (!line
.equalsIgnoreCase("<tr></tr>")) {
1183 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
1184 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1185 line
= extractTableLineWithColumn(children
.item(i
).getChildNodes());
1186 if(!line
.equalsIgnoreCase("<tr></tr>")) {
1191 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1192 insideNodes
=children
.item(i
).getChildNodes();
1193 blabla
= new ArrayList
<String
>();
1194 for (int j
=0;j
<insideNodes
.getLength();j
++){
1195 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1196 String inlinetext
= getInlineText(nametosave
, refMods
, insideNodes
.item(j
));
1197 if (!inlinetext
.isEmpty()) {
1198 blabla
.add(inlinetext
);
1201 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1202 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1203 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1204 // localdescr += insideNodes.item(j).getTextContent().trim();
1208 if (!blabla
.isEmpty()) {
1209 String blaStr
= StringUtils
.join(blabla
," ").trim();
1210 if(!stringIsEmpty(blaStr
)) {
1211 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1217 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1218 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1219 localdescr
= children
.item(i
).getTextContent().trim();
1220 if(!stringIsEmpty(localdescr
)) {
1221 setParticularDescription(localdescr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1228 if (!table
.equalsIgnoreCase("<table></table>")){
1229 // System.out.println("TABLE : "+table);
1233 if (text
!=null && !text
.isEmpty()) {
1234 return StringUtils
.join(text
," ");
1246 private String
extractTableLine(Node child
) {
1247 //logger.info("extractTableLine");
1250 if (child
.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1251 line
= extractTableLineWithColumn(child
.getChildNodes());
1262 private String
extractTableHead(Node child
) {
1263 //logger.info("extractTableHead");
1267 NodeList trNodes
= child
.getChildNodes();
1268 for (int k
=0;k
<trNodes
.getLength();k
++){
1269 if (trNodes
.item(k
).getNodeName().equalsIgnoreCase("tax:div")
1270 && trNodes
.item(k
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1271 line
= extractTableLineWithColumn(trNodes
.item(k
).getChildNodes());
1280 * build a html table line, with td columns
1282 * @return an html coded line
1284 private String
extractTableLineWithColumn(NodeList tdNodes
) {
1285 //logger.info("extractTableLineWithColumn");
1288 for (int l
=0;l
<tdNodes
.getLength();l
++){
1289 if (tdNodes
.item(l
).getNodeName().equalsIgnoreCase("tax:p")){
1290 line
+="<td>"+tdNodes
.item(l
).getTextContent()+"</td>";
1298 * @param description: the XML node group
1299 * @param acceptedTaxon: the current acceptedTaxon
1300 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1301 * @param nametosave: the list of objects to save into the CDM
1302 * @param refMods: the current reference extracted from the MODS
1303 * @param featureName: the feature name
1305 @SuppressWarnings({ "unused", "rawtypes" })
1306 private String
extractSpecificFeatureNotStructured(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
1307 List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, String featureName
) {
1308 logger
.info("extractSpecificFeatureNotStructured "+featureName
);
1309 NodeList children
= description
.getChildNodes();
1310 NodeList insideNodes
;
1311 List
<String
> blabla
= new ArrayList
<String
>();
1314 Feature currentFeature
= getFeatureObjectFromString(featureName
);
1316 String fullContent
= description
.getTextContent();
1317 for (int i
=0;i
<children
.getLength();i
++){
1318 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1319 insideNodes
=children
.item(i
).getChildNodes();
1320 for (int j
=0;j
<insideNodes
.getLength();j
++){
1321 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1322 String inlineText
=getInlineText(nametosave
, refMods
, insideNodes
.item(j
));
1323 if(!inlineText
.isEmpty()) {
1324 blabla
.add(inlineText
);
1327 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1328 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1329 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1334 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1335 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1336 String localdescr
= children
.item(i
).getTextContent().trim();
1337 if(!localdescr
.isEmpty())
1339 blabla
.add(localdescr
);
1345 if (blabla
!=null && !blabla
.isEmpty()) {
1346 String blaStr
= StringUtils
.join(blabla
," ").trim();
1347 if (! stringIsEmpty(blaStr
)) {
1348 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1363 private boolean stringIsEmpty(String blaStr
) {
1364 if (!StringUtils
.isEmpty(blaStr
)) {
1365 if (!blaStr
.equalsIgnoreCase(".")) {
1366 if (!blaStr
.equalsIgnoreCase(",")) {
1367 if (!blaStr
.equalsIgnoreCase(";")) {
1379 * @param insideNodes
1383 @SuppressWarnings({ "rawtypes" })
1384 private String
getInlineText(List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, Node insideNode
) {
1385 //logger.info("getInlineText");
1386 TaxonNameBase tnb
= getTaxonNameBaseFromXML(insideNode
, nametosave
,refMods
,false);
1387 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1388 Taxon tax
= currentMyName
.getTaxon();
1390 String linkedTaxon
= tnb
.toString().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1391 return "<cdm:taxon uuid='"+tax
.getUuid()+"'>"+linkedTaxon
+"</cdm:taxon>";
1397 * @param featureName
1400 @SuppressWarnings("rawtypes")
1401 private Feature
getFeatureObjectFromString(String featureName
) {
1402 logger
.info("getFeatureObjectFromString");
1403 List
<DefinedTermBase
> features
= importer
.getTermService().list(Feature
.class, null,null,null,null);
1404 Feature currentFeature
=null;
1405 for (DefinedTermBase feature
: features
){
1406 String tmpF
= ((Feature
)feature
).getTitleCache();
1407 if (tmpF
.equalsIgnoreCase(featureName
)) {
1408 currentFeature
=(Feature
)feature
;
1409 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1412 if (currentFeature
== null) {
1413 currentFeature
=Feature
.NewInstance(featureName
, featureName
, featureName
);
1414 if(featureName
.equalsIgnoreCase("Other")){
1415 currentFeature
.setUuid(OtherUUID
);
1417 if(featureName
.equalsIgnoreCase(notMarkedUp
)){
1418 currentFeature
.setUuid(NotMarkedUpUUID
);
1420 importer
.getTermService().saveOrUpdate(currentFeature
);
1422 return currentFeature
;
1429 * @param children: the XML node group
1430 * @param nametosave: the list of objects to save into the CDM
1431 * @param acceptedTaxon: the current acceptedTaxon
1432 * @param refMods: the current reference extracted from the MODS
1433 * @param fullContent :the parsed XML content
1434 * @return a list of description (text)
1436 @SuppressWarnings({ "unused", "rawtypes" })
1437 private List
<String
> parseParagraph(List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference
<?
> refMods
, Node paragraph
, Feature feature
){
1438 logger
.info("parseParagraph "+feature
.toString());
1439 List
<String
> fullDescription
= new ArrayList
<String
>();
1440 // String localdescr;
1442 NodeList insideNodes
;
1443 boolean collectionEvent
= false;
1444 List
<Node
>collectionEvents
= new ArrayList
<Node
>();
1446 NodeList children
= paragraph
.getChildNodes();
1448 for (int i
=0;i
<children
.getLength();i
++){
1450 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
1451 descr
+= children
.item(i
).getTextContent().trim();
1453 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1454 insideNodes
=children
.item(i
).getChildNodes();
1455 List
<String
> blabla
= new ArrayList
<String
>();
1456 for (int j
=0;j
<insideNodes
.getLength();j
++){
1457 boolean nodeKnown
= false;
1458 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1459 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1460 String inlineText
= getInlineText(nametosave
, refMods
, insideNodes
.item(j
));
1461 if (!inlineText
.isEmpty()) {
1462 blabla
.add(inlineText
);
1466 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1467 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1468 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1469 // localdescr += insideNodes.item(j).getTextContent().trim();
1473 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")) {
1474 String ref
= insideNodes
.item(j
).getTextContent().trim();
1475 if (ref
.endsWith(";") && ((ref
.length())>1)) {
1476 ref
=ref
.substring(0, ref
.length()-1)+".";
1478 Reference
<?
> reference
= ReferenceFactory
.newGeneric();
1479 reference
.setTitleCache(ref
, true);
1480 blabla
.add(reference
.getTitleCache());
1483 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:figure")){
1484 String figure
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, nametosave
, refMods
, "figure");
1487 if(insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:div") &&
1488 insideNodes
.item(j
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1489 insideNodes
.item(j
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1490 String table
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, nametosave
, refMods
, "table");
1493 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1494 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1495 String titlecache
= extractMaterialsDirect(insideNodes
.item(j
), acceptedTaxon
, refMods
, "collection");
1496 blabla
.add(titlecache
);
1497 collectionEvent
=true;
1498 collectionEvents
.add(insideNodes
.item(j
));
1501 // if (!nodeKnown && !insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:pb")) {
1502 // logger.info("Node not handled yet : "+insideNodes.item(j).getNodeName());
1506 if (!StringUtils
.isEmpty(StringUtils
.join(blabla
," "))) {
1507 fullDescription
.add(StringUtils
.join(blabla
," "));
1510 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:figure")){
1511 String figure
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, nametosave
, refMods
, "Figure");
1512 fullDescription
.add(figure
);
1514 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
1515 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1516 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1517 String table
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, nametosave
, refMods
, "table");
1518 fullDescription
.add(table
);
1522 if( !stringIsEmpty(descr
.trim())){
1523 Feature currentFeature
= getNotMarkedUpFeatureObject();
1524 setParticularDescription(descr
.trim(),acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1526 // if (collectionEvent) {
1527 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1528 // for (Node coll:collectionEvents){
1529 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1532 return fullDescription
;
1537 * @param description: the XML node group
1538 * @param acceptedTaxon: the current acceptedTaxon
1539 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1540 * @param nametosave: the list of objects to save into the CDM
1541 * @param refMods: the current reference extracted from the MODS
1542 * @param feature: the feature to link the data with
1544 @SuppressWarnings("rawtypes")
1545 private void extractFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, Feature feature
){
1546 logger
.info("EXTRACT FEATURE "+feature
.toString());
1547 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1548 List
<String
> fullDescription
= parseParagraph( nametosave
, acceptedTaxon
, refMods
, description
,feature
);
1550 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1551 if (!fullDescription
.isEmpty() &&!stringIsEmpty(StringUtils
.join(fullDescription
," ").trim())) {
1552 setParticularDescription(StringUtils
.join(fullDescription
," ").trim(),acceptedTaxon
,defaultTaxon
, refMods
,feature
);
1559 * @param descr: the XML Nodegroup to parse
1560 * @param acceptedTaxon: the current acceptedTaxon
1561 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1562 * @param refMods: the current reference extracted from the MODS
1563 * @param currentFeature: the feature name
1566 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference
<?
> refMods
, Feature currentFeature
) {
1567 logger
.info("setParticularDescription "+currentFeature
.getTitleCache()+", \n blabla : "+descr
);
1568 // System.out.println("setParticularDescription "+currentFeature.getTitleCache()+", \n blabla : "+descr);
1569 // logger.info("acceptedTaxon: "+acceptedTaxon);
1570 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1572 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1574 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1576 if(acceptedTaxon
!=null){
1577 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1578 td
.addElement(textData
);
1579 acceptedTaxon
.addDescription(td
);
1581 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1582 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1585 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1587 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1589 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1591 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1593 }catch(Exception e
){
1594 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1597 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1598 defaultTaxon
.addDescription(td
);
1599 td
.addElement(textData
);
1600 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1601 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1608 * @param currentFeature
1611 private TextData
createTextData(String descr
, Reference
<?
> refMods
, Feature currentFeature
) {
1612 //logger.info("createTextData");
1613 TextData textData
= TextData
.NewInstance();
1614 textData
.setFeature(currentFeature
);
1615 sourceHandler
.addSource(refMods
, textData
);
1617 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descr
);
1624 * @param descr: the XML Nodegroup to parse
1625 * @param acceptedTaxon: the current acceptedTaxon
1626 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1627 * @param refMods: the current reference extracted from the MODS
1628 * @param currentFeature: the feature name
1631 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
,Reference
<?
> currentRef
, Reference
<?
> refMods
, Feature currentFeature
) {
1632 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1633 // logger.info("acceptedTaxon: "+acceptedTaxon);
1634 logger
.info("setParticularDescription");
1635 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1637 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1638 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1640 if(! descr
.isEmpty() && (acceptedTaxon
!=null)){
1641 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1642 td
.addElement(textData
);
1643 acceptedTaxon
.addDescription(td
);
1645 sourceHandler
.addAndSaveSource(refMods
, td
, currentRef
);
1646 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1649 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1651 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1653 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1655 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1657 }catch(Exception e
){
1658 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1661 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1662 defaultTaxon
.addDescription(td
);
1663 td
.addElement(textData
);
1664 sourceHandler
.addAndSaveSource(currentRef
, td
,currentRef
);
1665 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1672 * @param synonyms: the XML Nodegroup to parse
1673 * @param nametosave: the list of objects to save into the CDM
1674 * @param acceptedTaxon: the current acceptedTaxon
1675 * @param refMods: the current reference extracted from the MODS
1677 @SuppressWarnings({ "rawtypes" })
1678 private void extractSynonyms(Node synonyms
, Taxon acceptedTaxon
,Reference
<?
> refMods
) {
1679 logger
.info("extractSynonyms");
1680 //System.out.println("extractSynonyms for: "+acceptedTaxon);
1681 Taxon ttmp
= (Taxon
) importer
.getTaxonService().find(acceptedTaxon
.getUuid());
1683 acceptedTaxon
= CdmBase
.deproxy(ttmp
,Taxon
.class);
1686 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1688 NodeList children
= synonyms
.getChildNodes();
1689 TaxonNameBase nameToBeFilled
= null;
1690 List
<MyName
> names
= new ArrayList
<MyName
>();
1692 if(synonyms
.getNodeName().equalsIgnoreCase("tax:name")){
1695 myName
= extractScientificNameSynonym(synonyms
,refMods
);
1697 } catch (TransformerFactoryConfigurationError e
) {
1699 } catch (TransformerException e
) {
1705 for (int i
=0;i
<children
.getLength();i
++){
1706 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1707 NodeList tmp
= children
.item(i
).getChildNodes();
1708 // String fullContent = children.item(i).getTextContent();
1709 for (int j
=0; j
< tmp
.getLength();j
++){
1710 if(tmp
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1713 myName
= extractScientificNameSynonym(tmp
.item(j
),refMods
);
1715 } catch (TransformerFactoryConfigurationError e
) {
1717 } catch (TransformerException e
) {
1724 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:name")){
1727 myName
= extractScientificNameSynonym(children
.item(i
),refMods
);
1729 } catch (TransformerFactoryConfigurationError e
) {
1731 } catch (TransformerException e
) {
1737 NomenclaturalStatusType statusType
= null;
1738 //System.out.println("names: "+names);
1739 for(MyName name
:names
){
1740 //System.out.println("HANDLE NAME "+name);
1744 nameToBeFilled
= name
.getTaxonNameBase();
1746 Synonym synonym
= name
.getSyno();
1747 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1748 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1749 if (nameToBeFilled.hasProblem() &&
1750 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1751 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1752 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1753 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1755 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1757 if (!name
.getIdentifier().isEmpty() && (name
.getIdentifier().length()>2)){
1758 setLSID(name
.getIdentifier(), synonym
);
1761 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1762 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1763 boolean synoExist
= false;
1764 for (Synonym syn
: synonymsSet
){
1765 //System.out.println(syn.getName()+" -- "+syn.getSec());
1766 boolean a
=syn
.getName().equals(synonym
.getName());
1767 boolean b
= syn
.getSec().equals(synonym
.getSec());
1772 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1773 //System.out.println("SYNONYM");
1774 sourceHandler
.addSource(refMods
, synonym
);
1776 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),refMods
, null);
1780 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1785 * @param refgroup: the XML nodes
1786 * @param nametosave: the list of objects to save into the CDM
1787 * @param acceptedTaxon: the current acceptedTaxon
1788 * @param nametosave: the list of objects to save into the CDM
1789 * @param refMods: the current reference extracted from the MODS
1790 * @return the acceptedTaxon (why?)
1791 * handle cases where the bibref are inside <p> and outside
1793 @SuppressWarnings({ "rawtypes" })
1794 private Taxon
extractReferences(Node refgroup
, List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference
<?
> refMods
) {
1795 logger
.info("extractReferences");
1796 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1798 NodeList children
= refgroup
.getChildNodes();
1799 NonViralName
<?
> nameToBeFilled
= getNonViralNameAccNomenclature();
1801 ReferenceBuilder refBuild
= new ReferenceBuilder(sourceHandler
);
1802 for (int i
=0;i
<children
.getLength();i
++){
1803 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
1804 String ref
= children
.item(i
).getTextContent().trim();
1805 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1806 if (!refBuild
.isFoundBibref()){
1807 extractReferenceRawText(children
.item(i
).getChildNodes(), nameToBeFilled
, refMods
, acceptedTaxon
);
1811 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1812 NodeList references
= children
.item(i
).getChildNodes();
1814 for (int j
=0;j
<references
.getLength();j
++){
1815 if(references
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")){
1816 String ref
= references
.item(j
).getTextContent().trim();
1817 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1820 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")
1821 && !references
.item(j
).getTextContent().trim().isEmpty()){
1822 descr
+= references
.item(j
).getTextContent().trim();
1826 if (!refBuild
.isFoundBibref()){
1827 //if it's not tagged, put it as row information.
1828 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1829 //then put it as a not markup feature if not empty
1830 if (!stringIsEmpty(descr
.trim())){
1831 Feature currentFeature
= getNotMarkedUpFeatureObject();
1832 setParticularDescription(descr
.trim(),acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1837 // importer.getClassificationService().saveOrUpdate(classification);
1838 return acceptedTaxon
;
1843 * get the non viral name according to the current nomenclature
1846 private NonViralName
<?
> getNonViralNameAccNomenclature() {
1847 //logger.info("getNonViralNameAccNomenclature");
1848 NonViralName
<?
> nameToBeFilled
= null;
1849 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)){
1850 nameToBeFilled
= BotanicalName
.NewInstance(null);
1852 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1853 nameToBeFilled
= ZoologicalName
.NewInstance(null);
1855 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1856 nameToBeFilled
= BacterialName
.NewInstance(null);
1858 return nameToBeFilled
;
1862 * @return the feature object for the category "not marked up"
1864 @SuppressWarnings("rawtypes")
1865 private Feature
getNotMarkedUpFeatureObject() {
1866 logger
.info("getNotMarkedUpFeatureObject");
1867 List
<DefinedTermBase
> features
= importer
.getTermService().list(Feature
.class, null,null,null,null);
1868 Feature currentFeature
=null;
1869 for (DefinedTermBase feat
: features
){
1870 String tmpF
= ((Feature
)feat
).getTitleCache();
1871 if (tmpF
.equalsIgnoreCase(notMarkedUp
)) {
1872 currentFeature
=(Feature
)feat
;
1875 if (currentFeature
== null) {
1876 currentFeature
=Feature
.NewInstance(notMarkedUp
, notMarkedUp
, notMarkedUp
);
1877 currentFeature
.setUuid(NotMarkedUpUUID
);
1878 importer
.getTermService().saveOrUpdate(currentFeature
);
1880 return currentFeature
;
1885 * handle cases where the bibref are inside <p> and outside
1887 @SuppressWarnings("rawtypes")
1888 private void extractReferenceRawText(NodeList references
, NonViralName
<?
> nameToBeFilled
, Reference
<?
> refMods
,
1889 Taxon acceptedTaxon
) {
1890 logger
.info("extractReferenceRawText");
1891 String refString
="";
1892 NomenclaturalStatusType statusType
= null;
1893 currentMyName
= new MyName(true);
1894 for (int j
=0;j
<references
.getLength();j
++){
1895 acceptedTaxon
=CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1896 //no bibref tag inside
1897 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1898 if (references
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1901 currentMyName
= extractScientificName(references
.item(j
),refMods
);
1902 // if (myName.getNewName().isEmpty()) {
1903 // name=myName.getOriginalName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1905 // name=myName.getNewName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1907 } catch (TransformerFactoryConfigurationError e
) {
1909 } catch (TransformerException e
) {
1913 // name=name.trim();
1915 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")){
1916 refString
= references
.item(j
).getTextContent().trim();
1918 if(references
.item(j
).getNodeName().equalsIgnoreCase("#text") && !references
.item(j
).getTextContent().trim().isEmpty()){
1921 if (!currentMyName
.getStatus().isEmpty()){
1923 statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1924 } catch (UnknownCdmTypeException e
) {
1925 addProblematicStatusToFile(currentMyName
.getStatus());
1926 logger
.warn("Problem with status");
1931 /*INonViralNameParser parser = NonViralNameParserImpl.NewInstance();*/
1932 String fullLineRefName
= references
.item(j
).getTextContent().trim();
1933 int nameOrRefOrOther
=2;
1934 nameOrRefOrOther
=askIfNameContained(fullLineRefName
);
1935 // System.out.println("NAMEORREFOR?? "+nameOrRefOrOther);
1936 if (nameOrRefOrOther
==0){
1937 /*TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1938 if (nameTBF.hasProblem() &&
1939 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1940 addProblemNameToFile(fullLineRefName,"",nomenclaturalCode,Rank.UNKNOWN_RANK());
1941 nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser,currentMyName.getAuthor(), currentMyName.getRank());
1943 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1945 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1946 Synonym synonym
= null;
1947 if (!currentMyName
.getStatus().isEmpty()){
1949 statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1950 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1951 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1952 } catch (UnknownCdmTypeException e
) {
1953 addProblematicStatusToFile(currentMyName
.getStatus());
1954 logger
.warn("Problem with status");
1955 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1956 synonym
.setAppendedPhrase(currentMyName
.getStatus());
1960 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1963 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1964 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1965 boolean synoExist
= false;
1966 for (Synonym syn
: synonymsSet
){
1967 // System.out.println(syn.getName()+" -- "+syn.getSec());
1968 boolean a
=syn
.getName().equals(synonym
.getName());
1969 boolean b
= syn
.getSec().equals(synonym
.getSec());
1974 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1975 sourceHandler
.addSource(refMods
, synonym
);
1977 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),refMods
, null);
1981 if (nameOrRefOrOther
==1){
1982 Reference
<?
> re
= ReferenceFactory
.newGeneric();
1983 re
.setTitleCache(fullLineRefName
, true);
1985 /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1986 if (nameTBF.hasProblem() &&
1987 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1988 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1989 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1991 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1993 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1994 Synonym synonym
= null;
1995 if (!currentMyName
.getStatus().isEmpty()){
1997 statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1998 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1999 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
2000 } catch (UnknownCdmTypeException e
) {
2001 addProblematicStatusToFile(currentMyName
.getStatus());
2002 logger
.warn("Problem with status");
2003 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
2004 synonym
.setAppendedPhrase(currentMyName
.getStatus());
2008 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
2011 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
2012 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
2013 boolean synoExist
= false;
2014 for (Synonym syn
: synonymsSet
){
2015 // System.out.println(syn.getName()+" -- "+syn.getSec());
2016 boolean a
=syn
.getName().equals(synonym
.getName());
2017 boolean b
= syn
.getSec().equals(synonym
.getSec());
2022 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
2023 sourceHandler
.addSource(refMods
, synonym
);
2025 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),re
, null);
2031 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2032 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2036 if(!currentMyName
.getName().isEmpty()){
2037 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
2038 if (acceptedTaxon
.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName
.getName().trim())){
2039 Reference
<?
> refS
= ReferenceFactory
.newGeneric();
2040 refS
.setTitleCache(refString
, true);
2041 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
2042 // acceptedTaxon.addDescription(td);
2043 // acceptedTaxon.addSource(refSource);
2045 // TextData textData = TextData.NewInstance(Feature.CITATION());
2047 // textData.addSource(null, null, refS, null);
2048 // td.addElement(textData);
2049 // td.addSource(refSource);
2050 // importer.getDescriptionService().saveOrUpdate(td);
2053 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2054 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2058 acceptedTaxon
.getName().setNomenclaturalReference(refS
);
2061 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2062 TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
2063 if (nameTBF.hasProblem() &&
2064 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
2065 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
2066 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
2067 nameTBF=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
2069 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
2071 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
2072 Synonym synonym
= null;
2073 if (!currentMyName
.getStatus().isEmpty()){
2075 statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
2076 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
2077 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
2078 } catch (UnknownCdmTypeException e
) {
2079 addProblematicStatusToFile(currentMyName
.getStatus());
2080 logger
.warn("Problem with status");
2081 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
2082 synonym
.setAppendedPhrase(currentMyName
.getStatus());
2086 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
2090 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2091 setLSID(currentMyName
.getIdentifier(), synonym
);
2094 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
2095 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
2096 boolean synoExist
= false;
2097 for (Synonym syn
: synonymsSet
){
2098 // System.out.println(syn.getName()+" -- "+syn.getSec());
2099 boolean a
=syn
.getName().equals(synonym
.getName());
2100 boolean b
= syn
.getSec().equals(synonym
.getSec());
2105 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
2106 sourceHandler
.addSource(refMods
, synonym
);
2108 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),refMods
, null);
2112 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2120 * @param acceptedTaxon
2122 @SuppressWarnings("rawtypes")
2123 private void setLSID(String identifier
, TaxonBase
<?
> taxon
) {
2124 //logger.info("setLSID");
2125 // boolean lsidok=false;
2126 String id
= identifier
.split("__")[0];
2127 String source
= identifier
.split("__")[1];
2128 if (id
.indexOf("lsid")>-1){
2130 LSID lsid
= new LSID(id
);
2131 taxon
.setLsid(lsid
);
2133 } catch (MalformedLSIDException e
) {
2134 logger
.warn("Malformed LSID");
2139 //logger.info("search reference for LSID");
2140 // if ((id.indexOf("lsid")<0) || !lsidok){
2141 //ADD ORIGINAL SOURCE ID EVEN IF LSID
2142 Reference
<?
> re
= null;
2143 Pager
<Reference
> references
= importer
.getReferenceService().findByTitle(Reference
.class, source
, MatchMode
.EXACT
, null, 1, null, null, null);
2144 if( references
!=null && references
.getCount()>0){
2145 re
=references
.getRecords().get(0);
2147 //logger.info("search reference for LSID-end");
2149 re
= ReferenceFactory
.newGeneric();
2150 re
.setTitleCache(source
, true);
2151 importer
.getReferenceService().saveOrUpdate(re
);
2153 re
=CdmBase
.deproxy(re
, Reference
.class);
2155 //logger.info("search source for LSID");
2156 Set
<IdentifiableSource
> sources
= taxon
.getSources();
2157 boolean lsidinsource
=false;
2158 boolean urlinsource
=false;
2159 for (IdentifiableSource src
:sources
){
2160 if (id
.equalsIgnoreCase(src
.getIdInSource()) && re
.getTitleCache().equals(src
.getCitation().getTitleCache())) {
2163 if (src
.getIdInSource() == null && re
.getTitleCache().equals(sourceUrlRef
.getTitleCache())) {
2168 taxon
.addSource(OriginalSourceType
.Import
, id
,null,re
,null);
2172 sourceUrlRef
=CdmBase
.deproxy(sourceUrlRef
, Reference
.class);
2173 taxon
.addSource(OriginalSourceType
.Import
, null,null,sourceUrlRef
,null);
2180 * try to solve a parsing problem for a scientific name
2181 * @param original : the name from the OCR document
2182 * @param name : the tagged version
2184 * @return the corrected TaxonNameBase
2186 /* @SuppressWarnings({ "unchecked", "rawtypes" })
2187 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
2188 Map<String,String> ato = namesMap.get(original);
2190 ato = namesMap.get(original+" "+author);
2194 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
2195 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
2197 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
2198 rank = getRank(ato);
2200 // TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
2201 TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2202 // logger.info("RANK: "+rank);
2204 List<ParserProblem> problems = nameTBF.getParsingProblems();
2205 for (ParserProblem pb:problems) {
2206 System.out.println(pb.toString());
2208 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
2209 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2210 String fullname=name;
2211 if(! skippQuestion) {
2212 fullname = getFullReference(name,nameTBF.getParsingProblems());
2214 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2215 nameTBF = BotanicalName.NewInstance(null);
2217 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2218 nameTBF = ZoologicalName.NewInstance(null);
2220 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2221 nameTBF= BacterialName.NewInstance(null);
2223 parser.parseReferencedName(nameTBF, fullname, rank, false);
2228 if (name.indexOf(author)>-1) {
2229 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
2231 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2233 if (nameTBF.hasProblem()){
2234 if (name.indexOf(author)>-1) {
2235 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
2237 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2239 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
2240 problems = nameTBF.getParsingProblems();
2241 for (ParserProblem pb:problems) {
2242 System.out.println(pb.toString());
2244 nameTBF.setFullTitleCache(name, true);
2246 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2247 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2249 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2250 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2252 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2253 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2256 // logger.info("FULL TITLE CACHE "+name);
2258 nameTBF.setFullTitleCache(name, true);
2267 * @param nomenclatureNode: the XML nodes
2268 * @param nametosave: the list of objects to save into the CDM
2269 * @param refMods: the current reference extracted from the MODS
2272 @SuppressWarnings({ "rawtypes" })
2273 private Taxon
extractNomenclature(Node nomenclatureNode
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) throws ClassCastException
{
2274 refMods
=CdmBase
.deproxy(refMods
, Reference
.class);
2276 logger
.info("extractNomenclature");
2277 NodeList children
= nomenclatureNode
.getChildNodes();
2279 NonViralName
<?
> nameToBeFilled
= null;
2280 Taxon acceptedTaxon
= null;
2281 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2283 // String fullContent = nomenclatureNode.getTextContent();
2285 NomenclaturalStatusType statusType
= null;
2286 for (int i
=0;i
<children
.getLength();i
++){
2287 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status")){
2288 String status
= children
.item(i
).getTextContent().trim();
2289 if (!status
.isEmpty()){
2291 statusType
= nomStatusString2NomStatus(status
);
2292 } catch (UnknownCdmTypeException e
) {
2293 addProblematicStatusToFile(status
);
2294 logger
.warn("Problem with status");
2300 boolean containsSynonyms
=false;
2301 for (int i
=0;i
<children
.getLength();i
++){
2303 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")) {
2304 freetext
=children
.item(i
).getTextContent();
2306 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
2307 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2308 extractMaterialsDirect(children
.item(i
), acceptedTaxon
, refMods
, "collection");
2310 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:name")){
2311 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2312 if(!containsSynonyms
){
2313 //System.out.println("I : "+i);
2314 currentMyName
= new MyName(false);
2316 currentMyName
= extractScientificName(children
.item(i
),refMods
);
2317 treatmentMainName
= currentMyName
.getNewName();
2318 originalTreatmentName
= currentMyName
.getOriginalName();
2320 } catch (TransformerFactoryConfigurationError e1
) {
2322 } catch (TransformerException e1
) {
2326 if (currentMyName
.getRank().equals(Rank
.UNKNOWN_RANK()) || currentMyName
.getRank().isLower(configState
.getConfig().getMaxRank()) || currentMyName
.getRank().equals(configState
.getConfig().getMaxRank())){
2327 maxRankRespected
=true;
2329 nameToBeFilled
=currentMyName
.getTaxonNameBase();
2331 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2332 acceptedTaxon
=currentMyName
.getTaxon();
2333 //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2336 boolean statusMatch
=false;
2337 if(acceptedTaxon
!=null ){
2338 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2339 statusMatch
=compareStatus(acceptedTaxon
, statusType
);
2340 //System.out.println("statusMatch: "+statusMatch);
2342 if (acceptedTaxon
==null || (acceptedTaxon
!= null && !statusMatch
)){
2344 nameToBeFilled
=currentMyName
.getTaxonNameBase();
2345 if (nameToBeFilled
!=null){
2346 if (!originalTreatmentName
.isEmpty()) {
2347 TaxonNameDescription td
= TaxonNameDescription
.NewInstance();
2348 td
.setTitleCache(originalTreatmentName
, true);
2349 nameToBeFilled
.addDescription(td
);
2352 if(statusType
!= null) {
2353 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
2355 sourceHandler
.addSource(refMods
, nameToBeFilled
);
2357 if (nameToBeFilled
.getNomenclaturalReference() == null) {
2358 acceptedTaxon
= new Taxon(nameToBeFilled
,refMods
);
2359 //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2362 acceptedTaxon
= new Taxon(nameToBeFilled
,(Reference
<?
>) nameToBeFilled
.getNomenclaturalReference() );//TODO TOFIX reference
2363 //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2366 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2368 if(!configState
.getConfig().doKeepOriginalSecundum()) {
2369 acceptedTaxon
.setSec(configState
.getConfig().getSecundum());
2370 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2371 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2374 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2375 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2379 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2380 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2384 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2385 Set
<IdentifiableSource
> sources
= acceptedTaxon
.getSources();
2386 boolean sourcelinked
=false;
2387 for (IdentifiableSource source
:sources
){
2388 if (source
.getCitation().getTitleCache().equalsIgnoreCase(refMods
.getTitleCache())) {
2392 if (!configState
.getConfig().doKeepOriginalSecundum()) {
2393 acceptedTaxon
.setSec(configState
.getConfig().getSecundum());
2394 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2395 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2397 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2398 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2400 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2402 if (!sourcelinked
|| !configState
.getConfig().doKeepOriginalSecundum()){
2404 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2405 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2407 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2411 maxRankRespected
=false;
2413 containsSynonyms
=true;
2415 //System.out.println("YOUHOUUU "+i);
2417 extractSynonyms(children
.item(i
), acceptedTaxon
, refMods
);
2418 }catch(NullPointerException e
){
2419 logger
.warn("nullpointerexception, the accepted taxon might be null");
2422 containsSynonyms
=true;
2424 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
2425 reloadClassification();
2426 //extract the References within the document
2427 extractReferences(children
.item(i
),nametosave
,acceptedTaxon
,refMods
);
2429 if(!stringIsEmpty(freetext
.trim())) {
2430 setParticularDescription(freetext
.trim(),acceptedTaxon
,acceptedTaxon
, refMods
,getNotMarkedUpFeatureObject());
2434 // importer.getClassificationService().saveOrUpdate(classification);
2435 return acceptedTaxon
;
2443 private boolean compareStatus(TaxonBase
<?
> t
, NomenclaturalStatusType statusType
) {
2444 //logger.info("compareStatus");
2445 boolean statusMatch
=false;
2447 Set
<NomenclaturalStatus
> status
= t
.getName().getStatus();
2448 if (statusType
!=null && status
.size()>0){ //the statusType is known for both taxon
2449 for (NomenclaturalStatus st
:status
){
2450 NomenclaturalStatusType stype
= st
.getType();
2451 if (stype
.toString().equalsIgnoreCase(statusType
.toString())) {
2457 if(statusType
== null && status
.size()==0) {//there is no statusType, we can assume it's the same
2465 * @param acceptedTaxon: the current acceptedTaxon
2466 * @param ref: the current reference extracted from the MODS
2467 * @return the parent for the current accepted taxon
2469 /* private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
2470 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2472 List<Rank> rankList = new ArrayList<Rank>();
2473 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2475 List<String> rankListStr = new ArrayList<String>();
2476 for (Rank r:rankList) {
2477 rankListStr.add(r.toString());
2480 String s = acceptedTaxon.getTitleCache();
2483 int addTaxon = askAddParent(s);
2484 logger.info("ADD TAXON: "+addTaxon);
2485 if (addTaxon == 0 ){
2486 Taxon tmp = askParent(acceptedTaxon, classification);
2488 s = askSetParent(s);
2489 r = askRank(s,rankListStr);
2491 NonViralName<?> nameToBeFilled = null;
2492 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2493 nameToBeFilled = BotanicalName.NewInstance(null);
2495 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2496 nameToBeFilled = ZoologicalName.NewInstance(null);
2498 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2499 nameToBeFilled = BacterialName.NewInstance(null);
2501 nameToBeFilled.setTitleCache(s);
2502 nameToBeFilled.setRank(getRank(r));
2504 tax = Taxon.NewInstance(nameToBeFilled, ref);
2510 createParent(tax, ref);
2511 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2512 classification.addParentChild(tax, acceptedTaxon, ref, null);
2515 classification.addChildTaxon(acceptedTaxon, ref, null);
2519 classification.addChildTaxon(acceptedTaxon, ref, null);
2522 // logger.info("RETURN: "+tax );
2530 private MyName
extractScientificNameSynonym(Node name
, Reference
<?
> refMods
) throws TransformerFactoryConfigurationError
, TransformerException
{
2531 //System.out.println("extractScientificNameSynonym");
2532 logger
.info("extractScientificNameSynonym");
2533 String
[] rankListToPrint_tmp
={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2534 List
<String
> rankListToPrint
= new ArrayList
<String
>();
2535 for (String r
: rankListToPrint_tmp
) {
2536 rankListToPrint
.add(r
.toLowerCase());
2539 Rank rank
= Rank
.UNKNOWN_RANK();
2540 NodeList children
= name
.getChildNodes();
2541 String originalName
="";
2542 String fullName
= "";
2544 String identifier
="";
2545 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
2546 List
<String
> atomisedName
= new ArrayList
<String
>();
2548 String rankStr
= "";
2551 String status
= extractStatus(children
);
2553 for (int i
=0;i
<children
.getLength();i
++){
2554 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:xmldata")){
2555 NodeList atom
= children
.item(i
).getChildNodes();
2556 for (int k
=0;k
<atom
.getLength();k
++){
2557 identifier
= extractIdentifier(identifier
, atom
.item(k
));
2559 rankStr
= atom
.item(k
).getNodeName().toLowerCase();
2560 // logger.info("RANKSTR:*"+rankStr+"*");
2561 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
2562 rankStr
=atom
.item(k
).getTextContent().trim();
2563 tmpRank
= getRank(rankStr
);
2565 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2566 if (tmpRank
!= null){
2569 atomisedMap
.put(rankStr
.toLowerCase(),atom
.item(k
).getTextContent().trim());
2571 addAtomisedNamesToMap(rankListToPrint
, rank
, atomisedName
, atom
);
2573 if(children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !StringUtils
.isBlank(children
.item(i
).getTextContent())){
2574 // logger.info("name non atomised: "+children.item(i).getTextContent());
2575 fullName
= children
.item(i
).getTextContent().trim();
2576 // logger.info("fullname: "+fullName);
2579 originalName
=fullName
;
2580 fullName
= cleanName(fullName
, atomisedName
);
2581 namesMap
.put(fullName
,atomisedMap
);
2583 String atomisedNameStr
= getAtomisedNameStr(atomisedName
);
2585 if (fullName
!= null){
2586 // System.out.println("fullname: "+fullName);
2587 // System.out.println("atomised: "+atomisedNameStr);
2588 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
2590 // String defaultN = "";
2591 if (atomisedNameStr
.length()>fullName
.length()) {
2592 newName
=atomisedNameStr
;
2594 if (fullName
.length()>atomisedNameStr
.length() && (rank
.isLower(Rank
.SPECIES()) && fullName
.length()>2 && !fullName
.substring(0, 1).equals("."))) {
2595 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2601 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2608 // rank = askForRank(newName, rank, nomenclaturalCode);
2609 // System.out.println("atomised: "+atomisedMap.toString());
2611 // String[] names = new String[5];
2612 MyName myname
= new MyName(true);
2614 //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2615 // System.out.println(atomisedMap.keySet());
2616 fullName
= extractAuthorFromNames(rank
, fullName
, atomisedMap
, myname
);
2617 myname
.setOriginalName(fullName
);
2618 myname
.setNewName(newName
);
2619 myname
.setRank(rank
);
2620 myname
.setIdentifier(identifier
);
2621 myname
.setStatus(status
);
2622 myname
.setSource(refMods
);
2624 // boolean higherAdded=false;
2627 boolean parseNameManually
=false;
2628 INonViralNameParser
<?
> parser
= NonViralNameParserImpl
.NewInstance();
2629 TaxonNameBase
<?
,?
> nameToBeFilledTest
;
2631 //if selected the atomised version
2632 if(newName
==atomisedNameStr
){
2633 nameToBeFilledTest
= parser
.parseFullName(atomisedNameStr
, nomenclaturalCode
, rank
);
2634 if (nameToBeFilledTest
.hasProblem()){
2635 addProblemNameToFile("ato",atomisedNameStr
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2636 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2637 if (nameToBeFilledTest
.hasProblem()){
2638 addProblemNameToFile("full",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2639 parseNameManually
=true;
2643 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
, rank
);
2644 if (nameToBeFilledTest
.hasProblem()){
2645 addProblemNameToFile("fullversion",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2646 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2647 parseNameManually
=true;
2648 if(!originalName
.equalsIgnoreCase(atomisedNameStr
)) {
2649 addNameDifferenceToFile(originalName
,atomisedNameStr
);
2654 if(parseNameManually
){
2655 //System.out.println("DO IT MANUALLY");
2656 createSynonym(rank
, newName
, atomisedMap
, myname
);
2659 //System.out.println("AUTOMATIC!");
2660 // createAtomisedTaxonString(newName, atomisedMap, myname);
2661 myname
.setParsedName(nameToBeFilledTest
);
2662 myname
.buildTaxon();
2664 //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2669 * @throws TransformerFactoryConfigurationError
2670 * @throws TransformerException
2671 * @return a list of possible names
2673 @SuppressWarnings({ "null", "rawtypes" })
2674 private MyName
extractScientificName(Node name
, Reference
<?
> refMods
) throws TransformerFactoryConfigurationError
, TransformerException
{
2675 logger
.info("extractScientificName");
2677 String
[] rankListToPrint_tmp
={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2678 List
<String
> rankListToPrint
= new ArrayList
<String
>();
2679 for (String r
: rankListToPrint_tmp
) {
2680 rankListToPrint
.add(r
.toLowerCase());
2683 Rank rank
= Rank
.UNKNOWN_RANK();
2684 NodeList children
= name
.getChildNodes();
2685 String originalName
="";
2686 String fullName
= "";
2688 String identifier
="";
2689 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
2690 List
<String
> atomisedName
= new ArrayList
<String
>();
2692 String rankStr
= "";
2695 String status
= extractStatus(children
);
2697 for (int i
=0;i
<children
.getLength();i
++){
2698 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:xmldata")){
2699 NodeList atom
= children
.item(i
).getChildNodes();
2700 for (int k
=0;k
<atom
.getLength();k
++){
2701 identifier
= extractIdentifier(identifier
, atom
.item(k
));
2703 rankStr
= atom
.item(k
).getNodeName().toLowerCase();
2704 // logger.info("RANKSTR:*"+rankStr+"*");
2705 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
2706 rankStr
=atom
.item(k
).getTextContent().trim();
2707 tmpRank
= getRank(rankStr
);
2709 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2710 if (tmpRank
!= null){
2713 atomisedMap
.put(rankStr
.toLowerCase(),atom
.item(k
).getTextContent().trim());
2715 addAtomisedNamesToMap(rankListToPrint
, rank
, atomisedName
, atom
);
2717 if(children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !StringUtils
.isBlank(children
.item(i
).getTextContent())){
2718 // logger.info("name non atomised: "+children.item(i).getTextContent());
2719 fullName
= children
.item(i
).getTextContent().trim();
2720 // logger.info("fullname: "+fullName);
2723 originalName
=fullName
;
2724 fullName
= cleanName(fullName
, atomisedName
);
2725 namesMap
.put(fullName
,atomisedMap
);
2727 String atomisedNameStr
= getAtomisedNameStr(atomisedName
);
2729 if (fullName
!= null){
2730 // System.out.println("fullname: "+fullName);
2731 // System.out.println("atomised: "+atomisedNameStr);
2732 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
2733 //System.out.println("atomisedNameStr vs. fullName:"+atomisedNameStr+"--"+fullName);
2735 // String defaultN = "";
2736 if (atomisedNameStr
.length()>fullName
.length()) {
2737 newName
=atomisedNameStr
;
2739 if (fullName
.length()>atomisedNameStr
.length() && (rank
.isLower(Rank
.SPECIES()) && fullName
.length()>2 && !fullName
.substring(0, 1).equals("."))) {
2740 //System.out.println("là ");
2741 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2743 //System.out.println("ici");
2748 newName
=askWhichScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2755 // rank = askForRank(newName, rank, nomenclaturalCode);
2756 // System.out.println("atomised: "+atomisedMap.toString());
2758 // String[] names = new String[5];
2759 MyName myname
= new MyName(false);
2761 //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2762 // System.out.println(atomisedMap.keySet());
2763 fullName
= extractAuthorFromNames(rank
, fullName
, atomisedMap
, myname
);
2764 myname
.setOriginalName(fullName
);
2765 myname
.setNewName(newName
);
2767 myname
.setRank(rank
);
2768 myname
.setIdentifier(identifier
);
2769 myname
.setStatus(status
);
2770 myname
.setSource(refMods
);
2772 // boolean higherAdded=false;
2775 boolean parseNameManually
=false;
2776 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
2777 TaxonNameBase nameToBeFilledTest
= null;
2779 //if selected the atomised version
2780 if(newName
==atomisedNameStr
){
2781 nameToBeFilledTest
= parser
.parseFullName(atomisedNameStr
, nomenclaturalCode
, rank
);
2782 if (nameToBeFilledTest
.hasProblem()){
2783 addProblemNameToFile("ato",atomisedNameStr
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2784 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2785 if (nameToBeFilledTest
.hasProblem()){
2786 addProblemNameToFile("full",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2787 parseNameManually
=true;
2791 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
, rank
);
2792 if (nameToBeFilledTest
.hasProblem()){
2793 addProblemNameToFile("fullversion",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2794 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2795 parseNameManually
=true;
2796 if(!originalName
.equalsIgnoreCase(atomisedNameStr
)) {
2797 addNameDifferenceToFile(originalName
,atomisedNameStr
);
2802 //System.out.println("parseNameManually: "+parseNameManually);
2803 if(parseNameManually
){
2804 createAtomisedTaxon(rank
, newName
, atomisedMap
, myname
);
2807 createAtomisedTaxonString(newName
, atomisedMap
, myname
);
2808 myname
.setParsedName(nameToBeFilledTest
);
2809 myname
.buildTaxon();
2816 * @param atomisedName
2819 private String
getAtomisedNameStr(List
<String
> atomisedName
) {
2820 //logger.info("getAtomisedNameStr");
2821 String atomisedNameStr
= StringUtils
.join(atomisedName
," ");
2822 while(atomisedNameStr
.contains(" ")) {
2823 atomisedNameStr
=atomisedNameStr
.replace(" ", " ");
2825 atomisedNameStr
=atomisedNameStr
.trim();
2826 return atomisedNameStr
;
2834 private String
extractStatus(NodeList children
) {
2835 logger
.info("extractStatus");
2837 for (int i
=0;i
<children
.getLength();i
++){
2838 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status") ||
2839 (children
.item(i
).getNodeName().equalsIgnoreCase("tax:namePart") &&
2840 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2841 status
= children
.item(i
).getTextContent().trim();
2853 private String
extractIdentifier(String identifier
, Node atom
) {
2854 //logger.info("extractIdentifier");
2855 if (atom
.getNodeName().equalsIgnoreCase("tax:xid")){
2857 identifier
= atom
.getAttributes().getNamedItem("identifier").getNodeValue();
2858 }catch(Exception e
){
2859 System
.out
.println("pb with identifier, maybe empty");
2862 identifier
+="__"+atom
.getAttributes().getNamedItem("source").getNodeValue();
2863 }catch(Exception e
){
2864 System
.out
.println("pb with identifier, maybe empty");
2871 * @param rankListToPrint
2873 * @param atomisedName
2876 private void addAtomisedNamesToMap(List
<String
> rankListToPrint
, Rank rank
, List
<String
> atomisedName
, NodeList atom
) {
2877 logger
.info("addAtomisedNamesToMap");
2878 for (int k
=0;k
<atom
.getLength();k
++){
2879 if (!atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:taxonRank") ) {
2880 if (atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:subgenus") || atom
.item(k
).getNodeName().equalsIgnoreCase("dwcranks:subgenus")) {
2881 atomisedName
.add("("+atom
.item(k
).getTextContent().trim()+")");
2883 if(atom
.item(k
).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet") || atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:Subspecies")) {
2884 if(atom
.item(k
).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet")){
2885 atomisedName
.add("var. "+atom
.item(k
).getTextContent().trim());
2887 if(atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:Subspecies") || atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:infraspecificepithet")) {
2888 atomisedName
.add("subsp. "+atom
.item(k
).getTextContent().trim());
2892 if(rankListToPrint
.contains(atom
.item(k
).getNodeName().toLowerCase())) {
2893 atomisedName
.add(atom
.item(k
).getTextContent().trim());
2896 // System.out.println("rank : "+rank.toString());
2897 if (rank
.isHigher(Rank
.GENUS()) && (atom
.item(k
).getNodeName().indexOf("dwcranks:")>-1 || atom
.item(k
).getNodeName().indexOf("dwc:Family")>-1)) {
2898 atomisedName
.add(atom
.item(k
).getTextContent().trim());
2901 // System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2905 // System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2915 * @param atomisedName
2918 private String
cleanName(String name
, List
<String
> atomisedName
) {
2919 //logger.info("cleanName");
2920 String fullName
=name
;
2921 if (fullName
!= null){
2922 fullName
= fullName
.replace("( ", "(");
2923 fullName
= fullName
.replace(" )",")");
2925 if (fullName
.trim().isEmpty()){
2926 fullName
=StringUtils
.join(atomisedName
," ");
2929 while(fullName
.contains(" ")) {
2930 fullName
=fullName
.replace(" ", " ");
2931 // logger.info("while");
2933 fullName
=fullName
.trim();
2941 * @param atomisedMap
2945 private String
extractAuthorFromNames(Rank rank
, String name
, HashMap
<String
, String
> atomisedMap
,
2947 logger
.info("extractAuthorFromNames");
2948 String fullName
=name
;
2949 if (atomisedMap
.get("dwc:scientificnameauthorship") == null && fullName
!=null){
2950 // System.out.println("rank : "+rank.toString());
2951 if(rank
.isHigher(Rank
.SPECIES())){
2954 if(atomisedMap
.get("dwcranks:subgenus") != null) {
2955 author
= fullName
.split(atomisedMap
.get("dwcranks:subgenus"))[1].trim();
2957 if(atomisedMap
.get("dwc:subgenus") != null) {
2958 author
= fullName
.split(atomisedMap
.get("dwc:subgenus"))[1].trim();
2960 if(author
== null) {
2961 if(atomisedMap
.get("dwc:genus") != null) {
2962 author
= fullName
.split(atomisedMap
.get("dwc:genus"))[1].trim();
2966 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2967 author
=author
.replaceAll(",","").trim();
2968 myname
.setAuthor(author
);
2970 }catch(Exception e
){
2971 //could not extract the author
2974 if(rank
.equals(Rank
.SPECIES())){
2977 if(author
== null) {
2978 if(atomisedMap
.get("dwc:species") != null) {
2979 String
[] t
= fullName
.split(atomisedMap
.get("dwc:species"));
2980 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2981 author
= fullName
.split(atomisedMap
.get("dwc:species"))[1].trim();
2982 // System.out.println("AUTEUR "+author);
2986 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2987 author
=author
.replaceAll(",","").trim();
2988 myname
.setAuthor(author
);
2990 }catch(Exception e
){
2991 //could not extract the author
2995 myname
.setAuthor(atomisedMap
.get("dwc:scientificnameauthorship"));
3002 * @param atomisedMap
3005 private void createAtomisedTaxonString(String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
3006 logger
.info("createAtomisedTaxonString "+atomisedMap
);
3007 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
3008 myname
.setFamilyStr(atomisedMap
.get("dwc:family"));
3010 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
3011 myname
.setSubfamilyStr(atomisedMap
.get("dwcranks:subfamily"));
3013 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
3014 myname
.setTribeStr(atomisedMap
.get("dwcranks:tribe"));
3016 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
3017 myname
.setSubtribeStr(atomisedMap
.get("dwcranks:subtribe"));
3019 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
3020 myname
.setGenusStr(atomisedMap
.get("dwc:genus"));
3022 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3023 myname
.setSubgenusStr(atomisedMap
.get("dwcranks:subgenus"));
3025 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3026 myname
.setSubgenusStr(atomisedMap
.get("dwc:subgenus"));
3028 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
3030 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
3031 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
3032 n
=n
.replace("subsp.","");
3034 if(atomisedMap
.get("dwc:subspecies") != null) {
3035 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
3036 n
=n
.replace("subsp.","");
3038 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
3039 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
3040 n
=n
.replace("var.","");
3041 n
=n
.replace("v.","");
3043 if(atomisedMap
.get("dwcranks:formepithet") != null) {
3045 System
.out
.println("TODO FORMA");
3046 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
3047 n
=n
.replace("forma","");
3050 String author
= myname
.getAuthor();
3051 if(n
.split(" ").length
>2)
3053 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3056 a
=n
.split(n2
)[1].trim();
3057 }catch(Exception e
){
3058 logger
.info("no author in "+n
+"?");}
3060 myname
.setAuthor(a
);
3061 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3066 myname
.setSpeciesStr(atomisedMap
.get("dwc:species"));
3067 myname
.setAuthor(author
);
3069 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3070 myname
.setSubspeciesStr(atomisedMap
.get("dwc:subspecies"));
3072 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3073 myname
.setSubspeciesStr(atomisedMap
.get("dwc:infraspecificepithet"));
3075 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
3076 myname
.setVarietyStr(atomisedMap
.get("dwcranks:varietyepithet"));
3078 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
3079 myname
.setFormStr(atomisedMap
.get("dwcranks:formepithet"));
3083 private void createSynonym(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
3084 logger
.info("createSynonym");
3085 //System.out.println("createsynonym");
3086 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3087 myname
.setNotParsableTaxon(newName
);
3089 {if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY()) && rank
.equals(Rank
.FAMILY())){
3090 myname
.setFamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:family"),newName
, Rank
.FAMILY(),rank
));
3092 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY()) && rank
.equals(Rank
.SUBFAMILY())){
3093 myname
.setSubfamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subfamily"), newName
,Rank
.SUBFAMILY(),rank
));
3095 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE()) && rank
.equals(Rank
.TRIBE())){
3096 myname
.setTribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:tribe"),newName
, Rank
.TRIBE(),rank
));
3098 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE()) && rank
.equals(Rank
.SUBTRIBE())){
3099 myname
.setSubtribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subtribe"),newName
, Rank
.SUBTRIBE(),rank
));
3101 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS()) && rank
.equals(Rank
.GENUS())){
3102 myname
.setGenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:genus"),newName
, Rank
.GENUS(),rank
));
3104 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS()) && rank
.equals(Rank
.SUBGENUS())){
3105 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3107 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS()) && rank
.equals(Rank
.SUBGENUS())){
3108 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3110 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES()) && rank
.equals(Rank
.SPECIES())){
3112 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
3113 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
3114 n
=n
.replace("subsp.","");
3116 if(atomisedMap
.get("dwc:subspecies") != null) {
3117 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
3118 n
=n
.replace("subsp.","");
3120 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
3121 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
3122 n
=n
.replace("var.","");
3123 n
=n
.replace("v.","");
3125 if(atomisedMap
.get("dwcranks:formepithet") != null) {
3127 //System.out.println("TODO FORMA");
3128 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
3129 n
=n
.replace("forma","");
3132 String author
= myname
.getAuthor();
3133 if(n
.split(" ").length
>2)
3135 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3138 a
= n
.split(n2
)[1].trim();
3139 }catch(Exception e
){logger
.info("no author in "+n
);}
3140 myname
.setAuthor(a
);
3141 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3146 myname
.setSpecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:species"),n
, Rank
.SPECIES(),rank
));
3147 myname
.setAuthor(author
);
3149 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES()) && rank
.equals(Rank
.SUBSPECIES())){
3150 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subspecies"), newName
,Rank
.SUBSPECIES(),rank
));
3152 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES()) && rank
.equals(Rank
.SUBSPECIES())){
3153 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:infraspecificepithet"),newName
, Rank
.SUBSPECIES(),rank
));
3155 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY()) && rank
.equals(Rank
.VARIETY())){
3156 myname
.setVariety(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:varietyepithet"),newName
, Rank
.VARIETY(),rank
));
3158 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM()) && rank
.equals(Rank
.FORM())){
3159 myname
.setForm(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:formepithet"), newName
,Rank
.FORM(),rank
));
3167 * @param atomisedMap
3170 private void createAtomisedTaxon(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
3171 logger
.info("createAtomisedTaxon "+atomisedMap
);
3172 if(rank
.equals(Rank
.UNKNOWN_RANK())){
3173 myname
.setNotParsableTaxon(newName
);
3176 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
3177 myname
.setFamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:family"),newName
, Rank
.FAMILY(),rank
));
3179 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
3180 myname
.setSubfamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subfamily"), newName
,Rank
.SUBFAMILY(),rank
));
3182 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
3183 myname
.setTribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:tribe"),newName
, Rank
.TRIBE(),rank
));
3185 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
3186 myname
.setSubtribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subtribe"),newName
, Rank
.SUBTRIBE(),rank
));
3188 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
3189 myname
.setGenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:genus"),newName
, Rank
.GENUS(),rank
));
3191 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3192 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3194 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
3195 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
3197 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
3199 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
3200 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
3201 n
=n
.replace("subsp.","");
3203 if(atomisedMap
.get("dwc:subspecies") != null) {
3204 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
3205 n
=n
.replace("subsp.","");
3207 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
3208 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
3209 n
=n
.replace("var.","");
3210 n
=n
.replace("v.","");
3212 if(atomisedMap
.get("dwcranks:formepithet") != null) {
3214 //System.out.println("TODO FORMA");
3215 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
3216 n
=n
.replace("forma","");
3219 String author
= myname
.getAuthor();
3220 if(n
.split(" ").length
>2)
3222 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3225 a
= n
.split(n2
)[1].trim();
3226 }catch(Exception e
){logger
.info("no author in "+n
);}
3227 myname
.setAuthor(a
);
3228 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3233 myname
.setSpecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:species"),n
, Rank
.SPECIES(),rank
));
3234 myname
.setAuthor(author
);
3236 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3237 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subspecies"), newName
,Rank
.SUBSPECIES(),rank
));
3239 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3240 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:infraspecificepithet"),newName
, Rank
.SUBSPECIES(),rank
));
3242 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
3243 myname
.setVariety(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:varietyepithet"),newName
, Rank
.VARIETY(),rank
));
3245 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
3246 myname
.setForm(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:formepithet"), newName
,Rank
.FORM(),rank
));
3254 private boolean checkRankValidForImport(Rank currentRank
) {
3255 //logger.info("checkRankValidForImport");
3256 return currentRank
.isLower(configState
.getConfig().getMaxRank()) || currentRank
.equals(configState
.getConfig().getMaxRank());
3262 * @param classification2
3264 public void updateClassification(Classification classification2
) {
3265 //logger.info("updateClassification");
3266 classification
= classification2
;
3271 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3272 * if errors, cast into a classis nonviralname
3273 * @param taxonnamebase2
3275 @SuppressWarnings("rawtypes")
3276 public NonViralName
<?
> castTaxonNameBase(TaxonNameBase tnb
, NonViralName
<?
> nvn
) {
3277 //logger.info("castTaxonNameBase");
3278 NonViralName
<?
> taxonnamebase2
= nvn
;
3279 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)) {
3281 taxonnamebase2
=(BotanicalName
) tnb
;
3282 }catch(Exception e
){
3283 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3286 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)) {
3288 taxonnamebase2
=(ZoologicalName
) tnb
;
3289 }catch(Exception e
){
3290 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3293 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)) {
3295 taxonnamebase2
=(BacterialName
) tnb
;
3296 }catch(Exception e
){
3297 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3300 return taxonnamebase2
;
3305 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3306 * if errors, cast into a classis nonviralname
3307 * @param taxonnamebase2
3309 @SuppressWarnings("rawtypes")
3310 public NonViralName
<?
> castTaxonNameBase(TaxonNameBase tnb
) {
3311 //logger.info("castTaxonNameBase2");
3312 NonViralName
<?
> taxonnamebase2
= null;
3313 tnb
=CdmBase
.deproxy(tnb
, TaxonNameBase
.class);
3314 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)) {
3316 taxonnamebase2
=(BotanicalName
) tnb
;
3317 }catch(Exception e
){
3318 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3321 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)) {
3323 taxonnamebase2
=(ZoologicalName
) tnb
;
3324 }catch(Exception e
){
3325 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3328 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)) {
3330 taxonnamebase2
=(BacterialName
) tnb
;
3331 }catch(Exception e
){
3332 taxonnamebase2
= (NonViralName
<?
>) tnb
;
3335 return taxonnamebase2
;
3338 public class MyName
{
3342 public MyName(boolean isSynonym
) {
3344 this.isSynonym
= isSynonym
;
3347 String originalName
="";
3349 Rank rank
=Rank
.UNKNOWN_RANK();
3350 String identifier
="";
3354 NonViralName
<?
> taxonnamebase
;
3356 Reference
<?
> refMods
;
3358 Taxon family
,subfamily
,tribe
,subtribe
,genus
,subgenus
,species
,subspecies
, variety
,form
;
3359 NonViralName
<?
> familyName
, subfamilyName
, tribeName
,subtribeName
,genusName
,subgenusName
,speciesName
,subspeciesName
;
3360 String familyStr
, subfamilyStr
, tribeStr
,subtribeStr
,genusStr
,subgenusStr
,speciesStr
,subspeciesStr
,formStr
,varietyStr
;
3363 private Taxon taxon
;
3364 private Synonym syno
;
3369 public Synonym
getSyno() {
3374 public String
toString(){
3375 List
<String
> tot
=new ArrayList
<String
>();
3376 String
[] n
= {familyStr
, subfamilyStr
, tribeStr
,subtribeStr
,genusStr
,subgenusStr
,speciesStr
,subspeciesStr
,formStr
,varietyStr
};
3378 if (!StringUtils
.isEmpty(elt
)) {
3384 return StringUtils
.join(tot
," ");
3387 * @param syno the syno to set
3389 public void setSyno(Synonym syno
) {
3393 boolean isSynonym
=false;
3396 * @return the isSynonym
3398 public boolean isSynonym() {
3403 * @param isSynonym the isSynonym to set
3405 public void setSynonym(boolean isSynonym
) {
3406 this.isSynonym
= isSynonym
;
3409 public void setSource(Reference
<?
> re
){
3416 public void setFormStr(String string
) {
3417 this.formStr
=string
;
3423 public void setVarietyStr(String string
) {
3424 this.varietyStr
=string
;
3430 public void setSubspeciesStr(String string
) {
3431 this.subspeciesStr
=string
;
3437 public void setSpeciesStr(String string
) {
3438 this.speciesStr
=string
;
3444 public void setSubgenusStr(String string
) {
3445 this.subgenusStr
=string
;
3451 public void setGenusStr(String string
) {
3452 this.genusStr
=string
;
3458 public void setSubtribeStr(String string
) {
3459 this.subtribeStr
=string
;
3465 public void setTribeStr(String string
) {
3466 this.tribeStr
=string
;
3472 public void setSubfamilyStr(String string
) {
3473 this.subfamilyStr
=string
;
3479 public void setFamilyStr(String string
) {
3480 this.familyStr
=string
;
3484 * @return the familyStr
3486 public String
getFamilyStr() {
3490 * @return the subfamilyStr
3492 public String
getSubfamilyStr() {
3493 return subfamilyStr
;
3496 * @return the tribeStr
3498 public String
getTribeStr() {
3502 * @return the subtribeStr
3504 public String
getSubtribeStr() {
3508 * @return the genusStr
3510 public String
getGenusStr() {
3514 * @return the subgenusStr
3516 public String
getSubgenusStr() {
3520 * @return the speciesStr
3522 public String
getSpeciesStr() {
3526 * @return the subspeciesStr
3528 public String
getSubspeciesStr() {
3529 return subspeciesStr
;
3532 * @return the formStr
3534 public String
getFormStr() {
3538 * @return the varietyStr
3540 public String
getVarietyStr() {
3547 public void setNotParsableTaxon(String newName2
) {
3548 //takes too much time
3549 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3551 NomenclaturalStatusType statusType
= null;
3552 if (!getStatus().isEmpty()){
3554 statusType
= nomStatusString2NomStatus(getStatus());
3555 } catch (UnknownCdmTypeException e
) {
3556 addProblematicStatusToFile(getStatus());
3557 logger
.warn("Problem with status");
3560 List
<TaxonBase
> tmpList
= new ArrayList
<TaxonBase
>();
3562 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, newName2
, MatchMode
.BEGINNING
, null, null, null, null, null);
3563 tmpList
.addAll(taxontest
.getRecords());
3565 //logger.info("tmpList returned: "+tmpList.size());
3568 boolean foundIdentic
=false;
3569 TaxonBase
<?
> tmptaxonbase
=null;
3570 // Taxon tmpPartial=null;
3571 for (TaxonBase
<?
> tmpb
:tmpList
){
3573 TaxonNameBase
<?
,?
> tnb
= tmpb
.getName();
3576 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2
) ){
3577 crank
=tnb
.getRank();
3578 if (crank
!=null && rank
!=null){
3579 if (crank
.equals(rank
)){
3588 }catch(Exception e
){
3589 e
.printStackTrace();
3597 boolean statusMatch
=false;
3598 boolean appendedMatch
=false;
3599 if(tmptaxonbase
!=null && foundIdentic
){
3600 statusMatch
=compareStatus(tmptaxonbase
, statusType
);
3601 if (!getStatus().isEmpty() && ! (tmptaxonbase
.getAppendedPhrase() == null)) {
3602 appendedMatch
=tmptaxonbase
.getAppendedPhrase().equals(getStatus());
3604 if (getStatus().isEmpty() && tmptaxonbase
.getAppendedPhrase() == null) {
3609 if ((tmptaxonbase
== null || !foundIdentic
) || (tmptaxonbase
!= null && !statusMatch
) || (tmptaxonbase
!= null && !appendedMatch
&& !statusMatch
)){
3611 NonViralName
<?
> tnb
= getNonViralNameAccNomenclature();
3614 if(statusType
!= null) {
3615 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3617 if(getStatus()!=null) {
3618 tnb
.setAppendedPhrase(getStatus());
3621 tnb
.setTitleCache(newName2
,true);
3622 tmptaxonbase
= findMatchingTaxon(tnb
,refMods
);
3623 if(tmptaxonbase
==null){
3624 tmptaxonbase
=Taxon
.NewInstance(tnb
, refMods
);
3625 if(!configState
.getConfig().doKeepOriginalSecundum()) {
3626 tmptaxonbase
.setSec(configState
.getConfig().getSecundum());
3628 // tmptaxonbase.setSec(refMods);
3630 classification
.addChildTaxon((Taxon
)tmptaxonbase
, null, null);
3631 sourceHandler
.addSource(refMods
, (Taxon
)tmptaxonbase
);
3636 tmptaxonbase
= CdmBase
.deproxy(tmptaxonbase
, Taxon
.class);
3638 tmptaxonbase
= CdmBase
.deproxy(tmptaxonbase
, Synonym
.class);
3640 if (author
!= null) {
3641 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3642 setLSID(getIdentifier(), tmptaxonbase
);
3643 importer
.getTaxonService().saveOrUpdate(tmptaxonbase
);
3645 tmptaxonbase
= CdmBase
.deproxy(tmptaxonbase
, Taxon
.class);
3647 tmptaxonbase
= CdmBase
.deproxy(tmptaxonbase
, Synonym
.class);
3651 TaxonNameBase
<?
,?
> tnb
= CdmBase
.deproxy(tmptaxonbase
.getName(), TaxonNameBase
.class);
3654 this.taxon
=(Taxon
)tmptaxonbase
;
3656 this.syno
=(Synonym
)tmptaxonbase
;
3658 castTaxonNameBase(tnb
, taxonnamebase
);
3665 public void buildTaxon() {
3666 //System.out.println("BUILD TAXON");
3667 logger
.info("buildTaxon");
3668 NomenclaturalStatusType statusType
= null;
3669 if (!getStatus().isEmpty()){
3671 statusType
= nomStatusString2NomStatus(getStatus());
3672 taxonnamebase
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3673 } catch (UnknownCdmTypeException e
) {
3674 addProblematicStatusToFile(getStatus());
3675 logger
.warn("Problem with status");
3678 importer
.getNameService().save(taxonnamebase
);
3680 TaxonBase
<?
> tmptaxonbase
;
3682 tmptaxonbase
=Taxon
.NewInstance(taxonnamebase
, refMods
); //sec set null
3685 tmptaxonbase
=Synonym
.NewInstance(taxonnamebase
, refMods
); //sec set null
3687 boolean exist
= false;
3688 for (TaxonNode p
: classification
.getAllNodes()){
3690 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(tmptaxonbase
.getTitleCache())) {
3691 if(compareStatus(p
.getTaxon(), statusType
)){
3694 tmptaxonbase
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
3696 tmptaxonbase
=CdmBase
.deproxy(p
.getTaxon(), Synonym
.class);
3699 }catch(Exception e
){
3700 logger
.warn("Found the same name but from another type (taxon/synonym)");
3701 TaxonNameBase
<?
,?
> existingTnb
= getTaxon().getName();
3703 tmptaxonbase
= new Synonym(existingTnb
, refMods
);
3704 importer
.getTaxonService().saveOrUpdate(tmptaxonbase
);
3705 tmptaxonbase
=CdmBase
.deproxy(tmptaxonbase
, Synonym
.class);
3709 tmptaxonbase
= new Taxon(existingTnb
, refMods
);
3714 }catch(NullPointerException n
){logger
.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3718 boolean insertAsExisting
=false;
3719 List
<Taxon
> existingTaxons
=new ArrayList
<Taxon
>();
3721 existingTaxons
= getMatchingTaxon(taxonnamebase
);
3722 } catch (Exception e1
) {
3723 // TODO Auto-generated catch block
3724 e1
.printStackTrace();
3726 double similarityScore
=0.0;
3727 double similarityAuthor
=-1;
3732 for (Taxon bestMatchingTaxon
:existingTaxons
){
3733 //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3734 //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3736 if(taxonnamebase
.getAuthorshipCache()!=null) {
3737 author1
=taxonnamebase
.getAuthorshipCache();
3739 } catch (Exception e
) {
3740 // TODO Auto-generated catch block
3741 e
.printStackTrace();
3744 if(castTaxonNameBase(bestMatchingTaxon
.getName()).getAuthorshipCache()!=null) {
3745 author2
=castTaxonNameBase(bestMatchingTaxon
.getName()).getAuthorshipCache();
3747 } catch (Exception e
) {
3748 // TODO Auto-generated catch block
3749 e
.printStackTrace();
3752 t1
=taxonnamebase
.getTitleCache().split("sec.")[0].trim();
3753 if (author1
!=null && !StringUtils
.isEmpty(author1
)) {
3754 t1
=t1
.split(Pattern
.quote(author1
))[0];
3756 } catch (Exception e
) {
3757 // TODO Auto-generated catch block
3758 e
.printStackTrace();
3761 t2
=bestMatchingTaxon
.getTitleCache().split("sec.")[0].trim();
3762 if (author2
!=null && !StringUtils
.isEmpty(author2
)) {
3763 t2
=t2
.split(Pattern
.quote(author2
))[0];
3765 } catch (Exception e
) {
3766 // TODO Auto-generated catch block
3767 e
.printStackTrace();
3770 similarityScore
=similarity(t1
.trim(), t2
.trim());
3771 //System.out.println("taxonscore "+similarityScore);
3772 similarityAuthor
=similarity(author1
.trim(), author2
.trim());
3773 //System.out.println("authorscore "+similarityAuthor);
3774 insertAsExisting
= compareAndCheckTaxon(taxonnamebase
, refMods
, similarityScore
, bestMatchingTaxon
,similarityAuthor
);
3775 if(insertAsExisting
) {
3776 tmptaxonbase
=bestMatchingTaxon
;
3780 if (!insertAsExisting
){
3781 if(!configState
.getConfig().doKeepOriginalSecundum()) {
3782 tmptaxonbase
.setSec(configState
.getConfig().getSecundum());
3785 // tmptaxonbase.setSec(refMods);
3786 if (taxonnamebase
.getRank().equals(configState
.getConfig().getMaxRank())) {
3787 //System.out.println("****************************"+tmptaxonbase);
3789 classification
.addChildTaxon((Taxon
)tmptaxonbase
, refMods
, null);
3792 hierarchy
= new HashMap
<Rank
, Taxon
>();
3793 //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3795 lookForParentNode(taxonnamebase
,(Taxon
)tmptaxonbase
, refMods
,this);
3796 //System.out.println("HIERARCHY "+hierarchy);
3797 Taxon parent
= buildHierarchy();
3798 if(!taxonExistsInClassification(parent
,(Taxon
)tmptaxonbase
)){
3800 classification
.addParentChild(parent
, (Taxon
)tmptaxonbase
, refMods
, null);
3802 classification
.addChildTaxon((Taxon
)tmptaxonbase
, refMods
, null);
3804 importer
.getClassificationService().saveOrUpdate(classification
);
3807 // Set<TaxonNode> nodeList = classification.getAllNodes();
3808 // for(TaxonNode tn:nodeList) {
3809 // System.out.println(tn.getTaxon());
3813 importer
.getClassificationService().saveOrUpdate(classification
);
3814 // refreshTransaction();
3817 Synonym castTest
=CdmBase
.deproxy(tmptaxonbase
, Synonym
.class);
3818 }catch(Exception e
){
3819 TaxonNameBase
<?
,?
> existingTnb
= tmptaxonbase
.getName();
3820 Synonym castTest
= new Synonym(existingTnb
, refMods
);
3821 importer
.getTaxonService().saveOrUpdate(castTest
);
3822 tmptaxonbase
=CdmBase
.deproxy(castTest
, Synonym
.class);
3827 taxon
=CdmBase
.deproxy(tmptaxonbase
, Taxon
.class);
3829 syno
=CdmBase
.deproxy(tmptaxonbase
, Synonym
.class);
3840 private Taxon
buildHierarchy() {
3841 logger
.info("buildHierarchy");
3842 Taxon higherTaxon
= null;
3843 //add the maxRank as a root
3844 if(hierarchy
.containsKey(configState
.getConfig().getMaxRank())){
3845 Taxon ct
=hierarchy
.get(configState
.getConfig().getMaxRank());
3846 if(!taxonExistsInClassification(higherTaxon
, ct
)) {
3847 //System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"+hierarchy.get(configState.getConfig().getMaxRank()));
3848 classification
.addChildTaxon(ct
, refMods
, null);
3850 higherTaxon
= hierarchy
.get(configState
.getConfig().getMaxRank());
3851 // return higherTaxon;
3853 //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3854 if(hierarchy
.containsKey(Rank
.SUBFAMILY()) && !configState
.getConfig().getMaxRank().equals(Rank
.SUBFAMILY())){
3855 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBFAMILY(),higherTaxon
);
3857 if(hierarchy
.containsKey(Rank
.TRIBE())&& !configState
.getConfig().getMaxRank().equals(Rank
.TRIBE())){
3858 higherTaxon
=saveAndGetHigherTaxon(Rank
.TRIBE(),higherTaxon
);
3860 if(hierarchy
.containsKey(Rank
.SUBTRIBE())&& !configState
.getConfig().getMaxRank().equals(Rank
.SUBTRIBE())){
3861 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBTRIBE(),higherTaxon
);
3863 if(hierarchy
.containsKey(Rank
.GENUS())&& !configState
.getConfig().getMaxRank().equals(Rank
.SUBGENUS())){
3864 higherTaxon
=saveAndGetHigherTaxon(Rank
.GENUS(),higherTaxon
);
3866 if(hierarchy
.containsKey(Rank
.SUBGENUS())&& !configState
.getConfig().getMaxRank().equals(Rank
.SUBGENUS())){
3867 higherTaxon
=saveAndGetHigherTaxon(Rank
.SUBGENUS(),higherTaxon
);
3869 importer
.getClassificationService().saveOrUpdate(classification
);
3873 private Taxon
saveAndGetHigherTaxon(Rank r
, Taxon higherTaxon
){
3874 Taxon ct
=hierarchy
.get(r
);
3875 if(!taxonExistsInClassification(higherTaxon
,ct
)) {
3876 if(higherTaxon
!= null && ct
!=null) {
3877 classification
.addParentChild(higherTaxon
, ct
, refMods
, null);
3879 if(higherTaxon
== null && ct
!=null) {
3880 classification
.addChildTaxon(ct
, refMods
, null);
3886 private boolean taxonExistsInClassification(Taxon parent
, Taxon child
){
3887 logger
.info("taxonExistsInClassification");
3888 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3889 boolean found
=false;
3891 for (TaxonNode p
: classification
.getAllNodes()){
3892 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
3893 for (TaxonNode c
: p
.getChildNodes()) {
3894 if (c
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3903 for (TaxonNode p
: classification
.getAllNodes()){
3904 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3910 // System.out.println("LOOK IF TAXA EXIST? "+found);
3914 * @param nameToBeFilledTest
3916 @SuppressWarnings("rawtypes")
3917 public void setParsedName(TaxonNameBase nameToBeFilledTest
) {
3918 this.taxonnamebase
= (NonViralName
<?
>) nameToBeFilledTest
;
3921 //variety dwcranks:varietyEpithet
3923 * @return the author
3925 public String
getAuthor() {
3931 public Taxon
getTaxon() {
3937 public NonViralName
<?
> getTaxonNameBase() {
3938 return taxonnamebase
;
3942 * @param findOrCreateTaxon
3944 public void setForm(Taxon form
) {
3949 * @param findOrCreateTaxon
3951 public void setVariety(Taxon variety
) {
3952 this.variety
=variety
;
3959 @SuppressWarnings("rawtypes")
3960 public Taxon
findOrCreateTaxon(String partialname
,String fullname
, Rank rank
, Rank globalrank
) {
3961 logger
.info("findOrCreateTaxon");
3962 sourceUrlRef
=CdmBase
.deproxy(sourceUrlRef
, Reference
.class);
3963 //takes too much time
3964 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3965 // logger.info("tmpList returned: "+tmpList.size());
3967 NomenclaturalStatusType statusType
= null;
3968 if (!getStatus().isEmpty()){
3970 statusType
= nomStatusString2NomStatus(getStatus());
3971 } catch (UnknownCdmTypeException e
) {
3972 addProblematicStatusToFile(getStatus());
3973 logger
.warn("Problem with status");
3977 List
<TaxonBase
> tmpListFiltered
= new ArrayList
<TaxonBase
>();
3979 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, fullname
, MatchMode
.BEGINNING
, null, null, null, null, null);
3981 tmpListFiltered
.addAll(taxontest
.getRecords());
3982 taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, partialname
, MatchMode
.BEGINNING
, null, null, null, null, null);
3983 tmpListFiltered
.addAll(taxontest
.getRecords());
3985 //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3987 boolean nameCorrected
=false;
3988 if (fullname
.indexOf(partialname
)<0) {
3992 boolean foundIdentic
=false;
3994 // Taxon tmpPartial=null;
3995 for (TaxonBase tmpb
:tmpListFiltered
){
3997 TaxonNameBase tnb
= tmpb
.getName();
4000 // //System.out.println(tnb.getTitleCache());
4001 // if (tnb.getTitleCache().split("sec.")[0].equals(partialname) ||tnb.getTitleCache().split("sec.")[0].equals(fullname) ){
4002 if(globalrank
.equals(rank
) || (globalrank
.isLower(Rank
.SPECIES()) && rank
.equals(Rank
.SPECIES()))){
4003 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname
) ){
4004 crank
=tnb
.getRank();
4005 if (crank
!=null && rank
!=null){
4006 if (crank
.equals(rank
)){
4011 }catch(Exception e
){
4012 e
.printStackTrace();
4017 if(nameCorrected
){ //for corrected names such as Anochetus -- A. blf-pat
4018 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname
) ){
4019 crank
=tnb
.getRank();
4020 if (crank
!=null && rank
!=null){
4021 if (crank
.equals(rank
)){
4026 }catch(Exception e
){
4027 e
.printStackTrace();
4035 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname
) ){
4036 crank
=tnb
.getRank();
4037 if (crank
!=null && rank
!=null){
4038 if (crank
.equals(rank
)){
4043 }catch(Exception e
){
4044 e
.printStackTrace();
4053 boolean statusMatch
=false;
4054 boolean appendedMatch
=false;
4055 if(tmp
!=null && foundIdentic
){
4056 statusMatch
=compareStatus(tmp
, statusType
);
4057 if (!getStatus().isEmpty() && ! (tmp
.getAppendedPhrase() == null)) {
4058 appendedMatch
=tmp
.getAppendedPhrase().equals(getStatus());
4060 if (getStatus().isEmpty() && tmp
.getAppendedPhrase() == null) {
4065 if ((tmp
== null || !foundIdentic
) || (tmp
!= null && !statusMatch
) || (tmp
!= null && !appendedMatch
&& !statusMatch
)){
4067 NonViralName
<?
> tnb
= getNonViralNameAccNomenclature();
4070 if(statusType
!= null) {
4071 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
4073 if(getStatus()!=null) {
4074 tnb
.setAppendedPhrase(getStatus());
4077 if(rank
.equals(Rank
.UNKNOWN_RANK())){
4078 tnb
.setTitleCache(fullname
);
4079 // tnb.setGenusOrUninomial(fullname);
4080 tnb
.setProtectedTitleCache(true);
4082 if(rank
.isHigher(Rank
.GENUS())) {
4083 tnb
.setGenusOrUninomial(partialname
);
4086 if(rank
.isHigher(Rank
.SPECIES())) {
4087 tnb
.setTitleCache(partialname
);
4090 if (rank
.equals(globalrank
) && author
!= null) {
4091 if(fullname
.indexOf("opulifolium")>-1) {
4092 //System.out.println("AUTOR: "+author);
4094 tnb
.setCombinationAuthorTeam(findOrCreateAuthor(author
));
4095 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4096 Taxon taxonLSID
= getTaxonByLSID(getIdentifier());
4097 if (taxonLSID
!=null) {
4104 if (rank
.equals(Rank
.FAMILY())) {
4105 tmp
= buildFamily(tnb
);
4107 if (rank
.equals(Rank
.SUBFAMILY())) {
4108 tmp
= buildSubfamily(tnb
);
4110 if (rank
.equals(Rank
.TRIBE())) {
4111 tmp
= buildTribe(tnb
);
4113 if (rank
.equals(Rank
.SUBTRIBE())) {
4114 tmp
= buildSubtribe(tnb
);
4116 if (rank
.equals(Rank
.GENUS())) {
4117 tmp
= buildGenus(partialname
, tnb
);
4120 if (rank
.equals(Rank
.SUBGENUS())) {
4121 tmp
= buildSubgenus(partialname
, tnb
);
4123 if (rank
.equals(Rank
.SPECIES())) {
4124 tmp
= buildSpecies(partialname
, tnb
);
4127 if (rank
.equals(Rank
.SUBSPECIES())) {
4128 tmp
= buildSubspecies(partialname
, tnb
);
4131 if (rank
.equals(Rank
.VARIETY())) {
4132 tmp
= buildVariety(fullname
, partialname
, tnb
);
4135 if (rank
.equals(Rank
.FORM())) {
4136 tmp
= buildForm(fullname
, partialname
, tnb
);
4139 importer
.getClassificationService().saveOrUpdate(classification
);
4143 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
4144 if (rank
.equals(globalrank
) && author
!= null) {
4145 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4146 setLSID(getIdentifier(), tmp
);
4147 importer
.getTaxonService().saveOrUpdate(tmp
);
4148 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
4151 TaxonNameBase tnb
= CdmBase
.deproxy(tmp
.getName(), TaxonNameBase
.class);
4154 castTaxonNameBase(tnb
, taxonnamebase
);
4161 private Taxon
buildSubfamily(NonViralName
<?
> tnb
) {
4163 // tnb.generateTitle();
4164 tmp
= findMatchingTaxon(tnb
,refMods
);
4166 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4167 if(!configState
.getConfig().doKeepOriginalSecundum()) {
4168 tmp
.setSec(configState
.getConfig().getSecundum());
4170 // tmp.setSec(refMods);
4171 // sourceHandler.addSource(refMods, tmp);
4172 if(family
!= null) {
4173 classification
.addParentChild(family
, tmp
, null, null);
4174 higherRank
=Rank
.FAMILY();
4177 //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4178 classification
.addChildTaxon(tmp
, null, null);
4187 private Taxon
buildFamily(NonViralName
<?
> tnb
) {
4189 // tnb.generateTitle();
4190 tmp
= findMatchingTaxon(tnb
,refMods
);
4192 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4193 if(!configState
.getConfig().doKeepOriginalSecundum()) {
4194 tmp
.setSec(configState
.getConfig().getSecundum());
4196 // tmp.setSec(refMods);
4197 //sourceHandler.addSource(refMods, tmp);
4198 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4199 classification
.addChildTaxon(tmp
, null, null);
4208 private Taxon
buildForm(String fullname
, String partialname
, NonViralName
<?
> tnb
) {
4210 if (genusName
!=null) {
4211 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4213 if (subgenusName
!=null) {
4214 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4216 if(speciesName
!=null) {
4217 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4219 if(subspeciesName
!= null) {
4220 tnb
.setInfraSpecificEpithet(subspeciesName
.getInfraSpecificEpithet());
4222 if(partialname
!= null) {
4223 tnb
.setInfraSpecificEpithet(partialname
);
4225 tnb
.generateTitle();
4226 //TODO how to save form??
4227 tnb
.setTitleCache(fullname
, true);
4228 tmp
= findMatchingTaxon(tnb
,refMods
);
4230 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4231 if(!configState
.getConfig().doKeepOriginalSecundum()) {
4232 tmp
.setSec(configState
.getConfig().getSecundum());
4234 // tmp.setSec(refMods);
4235 //sourceHandler.addSource(refMods, tmp);
4236 if (subspecies
!=null) {
4237 classification
.addParentChild(subspecies
, tmp
, null, null);
4238 higherRank
=Rank
.SUBSPECIES();
4239 higherTaxa
=subspecies
;
4241 if (species
!=null) {
4242 classification
.addParentChild(species
, tmp
, null, null);
4243 higherRank
=Rank
.SPECIES();
4247 // System.out.println("ADDCHILDTAXON FORM "+tmp);
4248 classification
.addChildTaxon(tmp
, null, null);
4259 private Taxon
buildVariety(String fullname
, String partialname
, NonViralName
<?
> tnb
) {
4261 if (genusName
!=null) {
4262 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4264 if (subgenusName
!=null) {
4265 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4267 if(speciesName
!=null) {
4268 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4270 if(subspeciesName
!= null) {
4271 tnb
.setInfraSpecificEpithet(subspeciesName
.getSpecificEpithet());
4273 if(partialname
!= null) {
4274 tnb
.setInfraSpecificEpithet(partialname
);
4276 //TODO how to save variety?
4277 tnb
.setTitleCache(fullname
, true);
4278 tmp
= findMatchingTaxon(tnb
,refMods
);
4280 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4281 if(!configState
.getConfig().doKeepOriginalSecundum()) {
4282 tmp
.setSec(configState
.getConfig().getSecundum());
4284 // tmp.setSec(refMods);
4285 //sourceHandler.addSource(refMods, tmp);
4286 if (subspecies
!=null) {
4287 classification
.addParentChild(subspecies
, tmp
, null, null);
4288 higherRank
=Rank
.SUBSPECIES();
4289 higherTaxa
=subspecies
;
4291 if(species
!=null) {
4292 classification
.addParentChild(species
, tmp
, null, null);
4293 higherRank
=Rank
.SPECIES();
4297 //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4298 classification
.addChildTaxon(tmp
, null, null);
4305 * @param partialname
4309 private Taxon
buildSubspecies(String partialname
, NonViralName
<?
> tnb
) {
4311 if (genusName
!=null) {
4312 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4314 if (subgenusName
!=null) {
4315 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4316 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4318 if(speciesName
!=null) {
4319 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
4320 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
4322 tnb
.setInfraSpecificEpithet(partialname
);
4323 tnb
.generateTitle();
4324 tmp
= findMatchingTaxon(tnb
,refMods
);
4326 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4327 if(!configState
.getConfig().doKeepOriginalSecundum())
4329 tmp
.setSec(configState
.getConfig().getSecundum());
4330 // tmp.setSec(refMods);
4331 //sourceHandler.addSource(refMods, tmp);
4334 if(species
!= null) {
4335 classification
.addParentChild(species
, tmp
, null, null);
4336 higherRank
=Rank
.SPECIES();
4340 //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4341 classification
.addChildTaxon(tmp
, null, null);
4347 * @param partialname
4351 private Taxon
buildSpecies(String partialname
, NonViralName
<?
> tnb
) {
4353 if (genusName
!=null) {
4354 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4356 if (subgenusName
!=null) {
4357 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
4359 tnb
.setSpecificEpithet(partialname
.toLowerCase());
4360 tnb
.generateTitle();
4361 tmp
= findMatchingTaxon(tnb
,refMods
);
4363 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4364 if(!configState
.getConfig().doKeepOriginalSecundum()) {
4365 tmp
.setSec(configState
.getConfig().getSecundum());
4367 // tmp.setSec(refMods);
4368 //sourceHandler.addSource(refMods, tmp);
4369 if (subgenus
!=null) {
4370 classification
.addParentChild(subgenus
, tmp
, null, null);
4371 higherRank
=Rank
.SUBGENUS();
4372 higherTaxa
=subgenus
;
4375 classification
.addParentChild(genus
, tmp
, null, null);
4376 higherRank
=Rank
.GENUS();
4380 //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4381 classification
.addChildTaxon(tmp
, null, null);
4388 * @param partialname
4392 private Taxon
buildSubgenus(String partialname
, NonViralName
<?
> tnb
) {
4394 tnb
.setInfraGenericEpithet(partialname
);
4395 if (genusName
!=null) {
4396 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
4398 tnb
.generateTitle();
4399 tmp
= findMatchingTaxon(tnb
,refMods
);
4401 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4402 if(!configState
.getConfig().doKeepOriginalSecundum()) {
4403 tmp
.setSec(configState
.getConfig().getSecundum());
4405 // tmp.setSec(refMods);
4406 //sourceHandler.addSource(refMods, tmp);
4408 classification
.addParentChild(genus
, tmp
, null, null);
4409 higherRank
=Rank
.GENUS();
4412 //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4413 classification
.addChildTaxon(tmp
, null, null);
4419 * @param partialname
4423 private Taxon
buildGenus(String partialname
, NonViralName
<?
> tnb
) {
4425 tnb
.setGenusOrUninomial(partialname
);
4426 tnb
.generateTitle();
4428 tmp
= findMatchingTaxon(tnb
,refMods
);
4430 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4431 if(!configState
.getConfig().doKeepOriginalSecundum())
4433 tmp
.setSec(configState
.getConfig().getSecundum());
4434 // tmp.setSec(refMods);
4435 //sourceHandler.addSource(refMods, tmp);
4438 if(subtribe
!= null) {
4439 classification
.addParentChild(subtribe
, tmp
, null, null);
4440 higherRank
=Rank
.SUBTRIBE();
4441 higherTaxa
=subtribe
;
4444 classification
.addParentChild(tribe
, tmp
, null, null);
4445 higherRank
=Rank
.TRIBE();
4448 if(subfamily
!=null) {
4449 classification
.addParentChild(subfamily
, tmp
, null, null);
4450 higherRank
=Rank
.SUBFAMILY();
4451 higherTaxa
=subfamily
;
4454 classification
.addParentChild(family
, tmp
, null, null);
4455 higherRank
=Rank
.FAMILY();
4459 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4460 classification
.addChildTaxon(tmp
, null, null);
4472 private Taxon
buildSubtribe(NonViralName
<?
> tnb
) {
4474 tnb
.generateTitle();
4475 tmp
= findMatchingTaxon(tnb
,refMods
);
4477 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4478 if(!configState
.getConfig().doKeepOriginalSecundum()) {
4479 tmp
.setSec(configState
.getConfig().getSecundum());
4481 // tmp.setSec(refMods);
4482 //sourceHandler.addSource(refMods, tmp);
4484 classification
.addParentChild(tribe
, tmp
, null, null);
4485 higherRank
=Rank
.TRIBE();
4488 //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4489 classification
.addChildTaxon(tmp
, null, null);
4498 private Taxon
buildTribe(NonViralName
<?
> tnb
) {
4500 tnb
.generateTitle();
4501 tmp
= findMatchingTaxon(tnb
,refMods
);
4503 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
4504 if(!configState
.getConfig().doKeepOriginalSecundum()) {
4505 tmp
.setSec(configState
.getConfig().getSecundum());
4507 // tmp.setSec(refMods);
4508 //sourceHandler.addSource(refMods, tmp);
4509 if (subfamily
!=null) {
4510 classification
.addParentChild(subfamily
, tmp
, null, null);
4511 higherRank
=Rank
.SUBFAMILY();
4512 higherTaxa
=subfamily
;
4514 if(family
!= null) {
4515 classification
.addParentChild(family
, tmp
, null, null);
4516 higherRank
=Rank
.FAMILY();
4520 //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4521 classification
.addChildTaxon(tmp
, null, null);
4529 * @param identifier2
4532 @SuppressWarnings("rawtypes")
4533 private Taxon
getTaxonByLSID(String identifier
) {
4534 //logger.info("getTaxonByLSID");
4535 // boolean lsidok=false;
4536 String id
= identifier
.split("__")[0];
4537 // String source = identifier.split("__")[1];
4539 if (id
.indexOf("lsid")>-1){
4541 lsid
= new LSID(id
);
4543 } catch (MalformedLSIDException e
) {
4544 logger
.warn("Malformed LSID");
4548 List
<TaxonBase
> taxons
= importer
.getTaxonService().list(Taxon
.class, 0, 0, null, null);
4549 LSID currentlsid
=null;
4550 for (TaxonBase t
:taxons
){
4551 currentlsid
= t
.getLsid();
4552 if (currentlsid
!=null){
4553 if (currentlsid
.getLsid().equals(lsid
.getLsid())){
4557 catch(Exception e
){logger
.warn("Exception occurred while comparing LSIDs "+e
);}
4568 @SuppressWarnings("rawtypes")
4569 private Person
findOrCreateAuthor(String author2
) {
4570 //logger.info("findOrCreateAuthor");
4571 List
<UuidAndTitleCache
<Person
>> hiberPersons
= importer
.getAgentService().getPersonUuidAndTitleCache();
4572 for (UuidAndTitleCache
<Person
> hibernateP
:hiberPersons
){
4573 if(hibernateP
.getTitleCache().equals(author2
)) {
4574 AgentBase existing
= importer
.getAgentService().find(hibernateP
.getUuid());
4575 return CdmBase
.deproxy(existing
, Person
.class);
4578 Person p
= Person
.NewInstance();
4579 p
.setTitleCache(author2
,true);
4580 importer
.getAgentService().saveOrUpdate(p
);
4581 return CdmBase
.deproxy(p
, Person
.class);
4584 * @param author the author to set
4586 public void setAuthor(String author
) {
4587 this.author
= author
;
4591 * @return the higherTaxa
4593 public Taxon
getHigherTaxa() {
4597 * @param higherTaxa the higherTaxa to set
4599 public void setHigherTaxa(Taxon higherTaxa
) {
4600 this.higherTaxa
= higherTaxa
;
4603 * @return the higherRank
4605 public Rank
getHigherRank() {
4609 * @param higherRank the higherRank to set
4611 public void setHigherRank(Rank higherRank
) {
4612 this.higherRank
= higherRank
;
4614 public String
getName(){
4615 if (newName
.isEmpty()) {
4616 return originalName
;
4623 * @return the fullName
4625 public String
getOriginalName() {
4626 return originalName
;
4629 * @param fullName the fullName to set
4631 public void setOriginalName(String fullName
) {
4632 this.originalName
= fullName
;
4635 * @return the newName
4637 public String
getNewName() {
4641 * @param newName the newName to set
4643 public void setNewName(String newName
) {
4644 this.newName
= newName
;
4649 public Rank
getRank() {
4653 * @param rank the rank to set
4655 public void setRank(Rank rank
) {
4659 * @return the idenfitiger
4661 public String
getIdentifier() {
4665 * @param idenfitiger the idenfitiger to set
4667 public void setIdentifier(String identifier
) {
4668 this.identifier
= identifier
;
4671 * @return the status
4673 public String
getStatus() {
4674 if (status
== null) {
4680 * @param status the status to set
4682 public void setStatus(String status
) {
4683 this.status
= status
;
4686 * @return the family
4688 public Taxon
getFamily() {
4692 * @param family the family to set
4694 @SuppressWarnings("rawtypes")
4695 public void setFamily(Taxon family
) {
4696 this.family
= family
;
4697 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(family
.getName(), TaxonNameBase
.class);
4698 familyName
= castTaxonNameBase(taxonNameBase
,familyName
);
4701 * @return the subfamily
4703 public Taxon
getSubfamily() {
4707 * @param subfamily the subfamily to set
4709 @SuppressWarnings("rawtypes")
4710 public void setSubfamily(Taxon subfamily
) {
4711 this.subfamily
= subfamily
;
4712 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subfamily
.getName(), TaxonNameBase
.class);
4713 subfamilyName
= castTaxonNameBase(taxonNameBase
,subfamilyName
);
4718 public Taxon
getTribe() {
4722 * @param tribe the tribe to set
4724 @SuppressWarnings("rawtypes")
4725 public void setTribe(Taxon tribe
) {
4727 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(tribe
.getName(), TaxonNameBase
.class);
4728 tribeName
= castTaxonNameBase(taxonNameBase
,tribeName
);
4731 * @return the subtribe
4733 public Taxon
getSubtribe() {
4737 * @param subtribe the subtribe to set
4739 @SuppressWarnings("rawtypes")
4740 public void setSubtribe(Taxon subtribe
) {
4741 this.subtribe
= subtribe
;
4742 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subtribe
.getName(), TaxonNameBase
.class);
4743 subtribeName
=castTaxonNameBase(taxonNameBase
,subtribeName
);
4748 public Taxon
getGenus() {
4752 * @param genus the genus to set
4754 @SuppressWarnings("rawtypes")
4755 public void setGenus(Taxon genus
) {
4757 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(genus
.getName(), TaxonNameBase
.class);
4758 genusName
= castTaxonNameBase(taxonNameBase
,genusName
);
4759 //System.out.println("GENUSNAME: "+genusName.toString());
4762 * @return the subgenus
4764 public Taxon
getSubgenus() {
4768 * @param subgenus the subgenus to set
4770 @SuppressWarnings("rawtypes")
4771 public void setSubgenus(Taxon subgenus
) {
4772 this.subgenus
= subgenus
;
4773 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subgenus
.getName(), TaxonNameBase
.class);
4774 subgenusName
= castTaxonNameBase(taxonNameBase
,subgenusName
);
4777 * @return the species
4779 public Taxon
getSpecies() {
4783 * @param species the species to set
4785 public void setSpecies(Taxon species
) {
4786 this.species
= species
;
4787 @SuppressWarnings("rawtypes")
4788 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(species
.getName(), TaxonNameBase
.class);
4789 speciesName
= castTaxonNameBase(taxonNameBase
,speciesName
);
4793 * @return the subspecies
4795 public Taxon
getSubspecies() {
4799 * @param subspecies the subspecies to set
4801 @SuppressWarnings("rawtypes")
4802 public void setSubspecies(Taxon subspecies
) {
4803 this.subspecies
= subspecies
;
4804 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subspecies
.getName(), TaxonNameBase
.class);
4805 subspeciesName
= castTaxonNameBase(taxonNameBase
,subspeciesName
);
4817 private void addProblematicStatusToFile(String status
) {
4819 FileWriter fstream
= new FileWriter("/home/pkelbert/Bureau/StatusUnknown_"+classification
.getTitleCache()+".txt",true);
4820 BufferedWriter out
= new BufferedWriter(fstream
);
4821 out
.write(status
+"\n");
4822 //Close the output stream
4824 }catch (Exception e
){//Catch exception if any
4825 System
.err
.println("Error: " + e
.getMessage());
4836 private Taxon
findMatchingTaxon(NonViralName
<?
> tnb
, Reference refMods
) {
4837 logger
.info("findMatchingTaxon");
4840 refMods
=CdmBase
.deproxy(refMods
, Reference
.class);
4841 boolean insertAsExisting
=false;
4842 List
<Taxon
> existingTaxons
= new ArrayList
<Taxon
>();
4844 existingTaxons
= getMatchingTaxon(tnb
);
4845 } catch (Exception e1
) {
4846 // TODO Auto-generated catch block
4847 e1
.printStackTrace();
4849 double similarityScore
=0.0;
4850 double similarityAuthor
=-1;
4855 for (Taxon bestMatchingTaxon
:existingTaxons
){
4856 if (!existingTaxons
.isEmpty() && configState
.getConfig().isInteractWithUser() && !insertAsExisting
) {
4857 // System.out.println("tnb "+tnb.getTitleCache());
4858 // System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4860 if(tnb
.getAuthorshipCache()!=null) {
4861 author1
=tnb
.getAuthorshipCache();
4863 } catch (Exception e
) {
4864 // TODO Auto-generated catch block
4865 e
.printStackTrace();
4868 if(castTaxonNameBase(bestMatchingTaxon
.getName()).getAuthorshipCache()!=null) {
4869 author2
=castTaxonNameBase(bestMatchingTaxon
.getName()).getAuthorshipCache();
4871 } catch (Exception e
) {
4872 // TODO Auto-generated catch block
4873 e
.printStackTrace();
4876 t1
=tnb
.getTitleCache().split("sec.")[0].trim();
4877 if (author1
!=null && !StringUtils
.isEmpty(author1
)) {
4878 t1
=t1
.split(Pattern
.quote(author1
))[0];
4880 } catch (Exception e
) {
4881 // TODO Auto-generated catch block
4882 e
.printStackTrace();
4885 t2
=bestMatchingTaxon
.getTitleCache().split("sec.")[0].trim();
4886 if (author2
!=null && !StringUtils
.isEmpty(author2
)) {
4887 t2
=t2
.split(Pattern
.quote(author2
))[0];
4889 } catch (Exception e
) {
4890 // TODO Auto-generated catch block
4891 e
.printStackTrace();
4893 similarityScore
=similarity(t1
.trim(), t2
.trim());
4894 // System.out.println("taxascore: "+similarityScore);
4895 similarityAuthor
=similarity(author1
.trim(), author2
.trim());
4896 // System.out.println("authorscore: "+similarityAuthor);
4897 insertAsExisting
= compareAndCheckTaxon(tnb
, refMods
, similarityScore
, bestMatchingTaxon
,similarityAuthor
);
4899 if(insertAsExisting
) {
4900 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4901 tmp
=bestMatchingTaxon
;
4902 sourceHandler
.addSource(refMods
, tmp
);
4913 * @param similarityScore
4914 * @param bestMatchingTaxon
4915 * @param similarityAuthor
4918 private boolean compareAndCheckTaxon(NonViralName
<?
> tnb
, Reference
<?
> refMods
, double similarityScore
,
4919 Taxon bestMatchingTaxon
, double similarityAuthor
) {
4920 //logger.info("compareAndCheckTaxon");
4921 boolean insertAsExisting
;
4922 // if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4923 // insertAsExisting=false;
4925 //a small hack/automatisation for Chenopodium only
4926 if (tnb
.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4927 bestMatchingTaxon
.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4928 insertAsExisting
=true;
4930 insertAsExisting
=askIfReuseBestMatchingTaxon(tnb
, bestMatchingTaxon
, refMods
, similarityScore
,similarityAuthor
);
4934 logDecision(tnb
,bestMatchingTaxon
,insertAsExisting
, refMods
);
4935 return insertAsExisting
;
4941 @SuppressWarnings("rawtypes")
4942 private List
<Taxon
> getMatchingTaxon(TaxonNameBase tnb
) {
4943 //logger.info("getMatchingTaxon");
4944 Pager
<TaxonBase
> pager
=importer
.getTaxonService().findByTitle(TaxonBase
.class, tnb
.getTitleCache().split("sec.")[0].trim(), MatchMode
.BEGINNING
, null, null, null, null, null);
4945 List
<TaxonBase
>records
= pager
.getRecords();
4947 List
<Taxon
> existingTaxons
= new ArrayList
<Taxon
>();
4948 for (TaxonBase r
:records
){
4950 Taxon bestMatchingTaxon
= (Taxon
)r
;
4951 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4952 if(compareTaxonNameLength(bestMatchingTaxon
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4953 existingTaxons
.add(bestMatchingTaxon
);
4955 }catch(ClassCastException e
){logger
.warn("classcast exception, might be a synonym, ignore it");}
4957 Taxon bmt
= importer
.getTaxonService().findBestMatchingTaxon(tnb
.getTitleCache());
4958 if (!existingTaxons
.contains(bmt
) && bmt
!=null) {
4959 if(compareTaxonNameLength(bmt
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4960 existingTaxons
.add(bmt
);
4963 return existingTaxons
;
4967 * Check if the found Taxon can reasonnably be the same
4968 * example: with and without author should match, but the subspecies should not be suggested for a genus
4970 private boolean compareTaxonNameLength(String f
, String o
){
4971 //logger.info("compareTaxonNameLength");
4972 boolean lengthOk
=false;
4973 int sizeF
= f
.length();
4974 int sizeO
= o
.length();
4979 if (sizeF
-sizeO
>10) {
4986 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4990 private double similarity(String s1
, String s2
) {
4991 //logger.info("similarity");
4992 //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4993 if(!StringUtils
.isEmpty(s1
) && !StringUtils
.isEmpty(s2
)){
4994 String l1
=s1
.toLowerCase().trim();
4995 String l2
=s2
.toLowerCase().trim();
4996 if (l1
.length() < l2
.length()) { // s1 should always be bigger
4997 String swap
= l1
; l1
= l2
; l2
= swap
;
4999 int bigLen
= l1
.length();
5000 if (bigLen
== 0) { return 1.0; /* both strings are zero length */ }
5001 return (bigLen
- computeEditDistance(l1
, l2
)) / (double) bigLen
;
5004 if(s1
!=null && s2
!=null){
5005 if (s1
.equalsIgnoreCase(s2
)) {
5013 private int computeEditDistance(String s1
, String s2
) {
5014 //logger.info("computeEditDistance");
5015 int[] costs
= new int[s2
.length() + 1];
5016 for (int i
= 0; i
<= s1
.length(); i
++) {
5018 for (int j
= 0; j
<= s2
.length(); j
++) {
5023 int newValue
= costs
[j
- 1];
5024 if (s1
.charAt(i
- 1) != s2
.charAt(j
- 1)) {
5025 newValue
= Math
.min(Math
.min(newValue
, lastValue
),
5028 costs
[j
- 1] = lastValue
;
5029 lastValue
= newValue
;
5034 costs
[s2
.length()] = lastValue
;
5037 return costs
[s2
.length()];
5040 Map
<Rank
, Taxon
> hierarchy
= new HashMap
<Rank
, Taxon
>();
5042 * @param taxonnamebase
5044 @SuppressWarnings("rawtypes")
5045 public void lookForParentNode(NonViralName
<?
> taxonnamebase
, Taxon tax
, Reference
<?
> ref
, MyName myName
) {
5046 logger
.info("lookForParentNode "+taxonnamebase
.getTitleCache()+" for "+myName
.toString());
5047 //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
5048 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
5049 if (taxonnamebase
.getRank().equals(Rank
.FORM())){
5050 handleFormHierarchy(ref
, myName
, parser
);
5052 if (taxonnamebase
.getRank().equals(Rank
.VARIETY())){
5053 handleVarietyHierarchy(ref
, myName
, parser
);
5055 if (taxonnamebase
.getRank().equals(Rank
.SUBSPECIES())){
5056 handleSubSpeciesHierarchy(ref
, myName
, parser
);
5058 if (taxonnamebase
.getRank().equals(Rank
.SPECIES())){
5059 handleSpeciesHierarchy(ref
, myName
, parser
);
5061 if (taxonnamebase
.getRank().equals(Rank
.SUBGENUS())){
5062 handleSubgenusHierarchy(ref
, myName
, parser
);
5065 if (taxonnamebase
.getRank().equals(Rank
.GENUS())){
5066 handleGenusHierarchy(ref
, myName
, parser
);
5068 if (taxonnamebase
.getRank().equals(Rank
.SUBTRIBE())){
5069 handleSubtribeHierarchy(ref
, myName
, parser
);
5071 if (taxonnamebase
.getRank().equals(Rank
.TRIBE())){
5072 handleTribeHierarchy(ref
, myName
, parser
);
5075 if (taxonnamebase
.getRank().equals(Rank
.SUBFAMILY())){
5076 handleSubfamilyHierarchy(ref
, myName
, parser
);
5085 private void handleSubfamilyHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5086 System
.out
.println("handleSubfamilyHierarchy");
5087 String parentStr
= myName
.getFamilyStr();
5088 Rank r
= Rank
.FAMILY();
5089 if(parentStr
!=null){
5091 Taxon parent
= null;
5092 Pager
<TaxonBase
> taxontest
= importer
.getTaxonService().findByTitle(TaxonBase
.class, parentStr
, MatchMode
.BEGINNING
, null, null, null, null, null);
5093 for(TaxonBase tb
:taxontest
.getRecords()){
5095 if (tb
.getName().getRank().equals(r
)) {
5096 parent
=CdmBase
.deproxy(tb
, Taxon
.class);
5099 } catch (Exception e
) {
5100 // TODO Auto-generated catch block
5101 e
.printStackTrace();
5104 if(parent
== null) {
5105 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5106 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5109 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5110 importer
.getTaxonService().save(parent
);
5111 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5115 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5118 hierarchy
.put(r
,parent
);
5127 private void handleTribeHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5128 String parentStr
= myName
.getSubfamilyStr();
5129 Rank r
= Rank
.SUBFAMILY();
5130 if (parentStr
== null){
5131 parentStr
= myName
.getFamilyStr();
5134 if(parentStr
!=null){
5135 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5136 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5137 // importer.getTaxonService().save(parent);
5138 // parent = CdmBase.deproxy(parent, Taxon.class);
5140 boolean parentDoesNotExists
= true;
5141 for (TaxonNode p
: classification
.getAllNodes()){
5142 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5143 parentDoesNotExists
= false;
5144 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5148 // if(parentDoesNotExists) {
5149 // importer.getTaxonService().save(parent);
5150 // parent = CdmBase.deproxy(parent, Taxon.class);
5151 // lookForParentNode(parentNameName, parent, ref,myName);
5153 if(parentDoesNotExists
) {
5154 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5157 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5158 importer
.getTaxonService().save(parent
);
5159 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5163 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5166 hierarchy
.put(r
,parent
);
5175 private void handleSubtribeHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5176 String parentStr
= myName
.getTribeStr();
5177 Rank r
= Rank
.TRIBE();
5178 if (parentStr
== null){
5179 parentStr
= myName
.getSubfamilyStr();
5180 r
= Rank
.SUBFAMILY();
5182 if (parentStr
== null){
5183 parentStr
= myName
.getFamilyStr();
5186 if(parentStr
!=null){
5187 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5188 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5189 // importer.getTaxonService().save(parent);
5190 // parent = CdmBase.deproxy(parent, Taxon.class);
5192 boolean parentDoesNotExists
= true;
5193 for (TaxonNode p
: classification
.getAllNodes()){
5194 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5195 parentDoesNotExists
= false;
5196 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5201 // if(parentDoesNotExists) {
5202 // importer.getTaxonService().save(parent);
5203 // parent = CdmBase.deproxy(parent, Taxon.class);
5204 // lookForParentNode(parentNameName, parent, ref,myName);
5206 if(parentDoesNotExists
) {
5207 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5210 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5211 importer
.getTaxonService().save(parent
);
5212 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5216 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5219 hierarchy
.put(r
,parent
);
5228 private void handleGenusHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5229 String parentStr
= myName
.getSubtribeStr();
5230 Rank r
= Rank
.SUBTRIBE();
5231 if (parentStr
== null){
5232 parentStr
= myName
.getTribeStr();
5235 if (parentStr
== null){
5236 parentStr
= myName
.getSubfamilyStr();
5237 r
= Rank
.SUBFAMILY();
5239 if (parentStr
== null){
5240 parentStr
= myName
.getFamilyStr();
5243 if(parentStr
!=null){
5244 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5245 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5246 // importer.getTaxonService().save(parent);
5247 // parent = CdmBase.deproxy(parent, Taxon.class);
5249 boolean parentDoesNotExists
= true;
5250 for (TaxonNode p
: classification
.getAllNodes()){
5251 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5252 // System.out.println(p.getTaxon().getUuid());
5253 // System.out.println(parent.getUuid());
5254 parentDoesNotExists
= false;
5255 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5259 // if(parentDoesNotExists) {
5260 // importer.getTaxonService().save(parent);
5261 // parent = CdmBase.deproxy(parent, Taxon.class);
5262 // lookForParentNode(parentNameName, parent, ref,myName);
5264 if(parentDoesNotExists
) {
5265 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5268 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5269 importer
.getTaxonService().save(parent
);
5270 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5274 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5277 hierarchy
.put(r
,parent
);
5286 private void handleSubgenusHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5287 String parentStr
= myName
.getGenusStr();
5288 Rank r
= Rank
.GENUS();
5290 if(parentStr
==null){
5291 parentStr
= myName
.getSubtribeStr();
5292 r
= Rank
.SUBTRIBE();
5294 if (parentStr
== null){
5295 parentStr
= myName
.getTribeStr();
5298 if (parentStr
== null){
5299 parentStr
= myName
.getSubfamilyStr();
5300 r
= Rank
.SUBFAMILY();
5302 if (parentStr
== null){
5303 parentStr
= myName
.getFamilyStr();
5306 if(parentStr
!=null){
5307 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5308 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5309 // importer.getTaxonService().save(parent);
5310 // parent = CdmBase.deproxy(parent, Taxon.class);
5312 boolean parentDoesNotExists
= true;
5313 for (TaxonNode p
: classification
.getAllNodes()){
5314 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
5315 // System.out.println(p.getTaxon().getUuid());
5316 // System.out.println(parent.getUuid());
5317 parentDoesNotExists
= false;
5318 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5322 // if(parentDoesNotExists) {
5323 // importer.getTaxonService().save(parent);
5324 // parent = CdmBase.deproxy(parent, Taxon.class);
5325 // lookForParentNode(parentNameName, parent, ref,myName);
5327 if(parentDoesNotExists
) {
5328 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5331 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5332 importer
.getTaxonService().save(parent
);
5333 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5337 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5340 hierarchy
.put(r
,parent
);
5349 private void handleSpeciesHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5350 String parentStr
= myName
.getSubgenusStr();
5351 Rank r
= Rank
.SUBGENUS();
5353 if(parentStr
==null){
5354 parentStr
= myName
.getGenusStr();
5358 if(parentStr
==null){
5359 parentStr
= myName
.getSubtribeStr();
5360 r
= Rank
.SUBTRIBE();
5362 if (parentStr
== null){
5363 parentStr
= myName
.getTribeStr();
5366 if (parentStr
== null){
5367 parentStr
= myName
.getSubfamilyStr();
5368 r
= Rank
.SUBFAMILY();
5370 if (parentStr
== null){
5371 parentStr
= myName
.getFamilyStr();
5374 if(parentStr
!=null){
5375 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5376 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5377 hierarchy
.put(r
,parent
);
5386 private void handleSubSpeciesHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5387 String parentStr
= myName
.getSpeciesStr();
5388 Rank r
= Rank
.SPECIES();
5391 if(parentStr
==null){
5392 parentStr
= myName
.getSubgenusStr();
5393 r
= Rank
.SUBGENUS();
5396 if(parentStr
==null){
5397 parentStr
= myName
.getGenusStr();
5401 if(parentStr
==null){
5402 parentStr
= myName
.getSubtribeStr();
5403 r
= Rank
.SUBTRIBE();
5405 if (parentStr
== null){
5406 parentStr
= myName
.getTribeStr();
5409 if (parentStr
== null){
5410 parentStr
= myName
.getSubfamilyStr();
5411 r
= Rank
.SUBFAMILY();
5413 if (parentStr
== null){
5414 parentStr
= myName
.getFamilyStr();
5417 if(parentStr
!=null){
5418 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5419 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5420 hierarchy
.put(r
,parent
);
5430 private void handleFormHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5431 String parentStr
= myName
.getSubspeciesStr();
5432 Rank r
= Rank
.SUBSPECIES();
5435 if(parentStr
==null){
5436 parentStr
= myName
.getSpeciesStr();
5440 if(parentStr
==null){
5441 parentStr
= myName
.getSubgenusStr();
5442 r
= Rank
.SUBGENUS();
5445 if(parentStr
==null){
5446 parentStr
= myName
.getGenusStr();
5450 if(parentStr
==null){
5451 parentStr
= myName
.getSubtribeStr();
5452 r
= Rank
.SUBTRIBE();
5454 if (parentStr
== null){
5455 parentStr
= myName
.getTribeStr();
5458 if (parentStr
== null){
5459 parentStr
= myName
.getSubfamilyStr();
5460 r
= Rank
.SUBFAMILY();
5462 if (parentStr
== null){
5463 parentStr
= myName
.getFamilyStr();
5466 if(parentStr
!=null){
5467 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5468 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5469 hierarchy
.put(r
,parent
);
5478 private void handleVarietyHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
) {
5479 String parentStr
= myName
.getSubspeciesStr();
5480 Rank r
= Rank
.SUBSPECIES();
5482 if(parentStr
==null){
5483 parentStr
= myName
.getSpeciesStr();
5487 if(parentStr
==null){
5488 parentStr
= myName
.getSubgenusStr();
5489 r
= Rank
.SUBGENUS();
5492 if(parentStr
==null){
5493 parentStr
= myName
.getGenusStr();
5497 if(parentStr
==null){
5498 parentStr
= myName
.getSubtribeStr();
5499 r
= Rank
.SUBTRIBE();
5501 if (parentStr
== null){
5502 parentStr
= myName
.getTribeStr();
5505 if (parentStr
== null){
5506 parentStr
= myName
.getSubfamilyStr();
5507 r
= Rank
.SUBFAMILY();
5509 if (parentStr
== null){
5510 parentStr
= myName
.getFamilyStr();
5513 if(parentStr
!=null){
5514 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
5515 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5516 hierarchy
.put(r
,parent
);
5528 private Taxon
handleParentName(Reference
<?
> ref
, MyName myName
, INonViralNameParser
<?
> parser
, String parentStr
, Rank r
) {
5529 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
5530 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
5531 // importer.getTaxonService().save(parent);
5532 // parent = CdmBase.deproxy(parent, Taxon.class);
5534 boolean parentDoesNotExists
= true;
5535 for (TaxonNode p
: classification
.getAllNodes()){
5536 if(p
.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent
.getTitleCache().split("sec.")[0].trim())) {
5537 // System.out.println(p.getTaxon().getUuid());
5538 // System.out.println(parent.getUuid());
5539 parentDoesNotExists
= false;
5540 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
5544 if(parentDoesNotExists
) {
5545 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
5546 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5549 parent
=Taxon
.NewInstance(parentNameName
, ref
);
5550 importer
.getTaxonService().save(parent
);
5551 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
5555 lookForParentNode(parentNameName
, parent
, ref
,myName
);
5561 private void addNameDifferenceToFile(String originalname
, String atomisedname
){
5563 FileWriter fstream
= new FileWriter("/home/pkelbert/Bureau/NamesDifferent_"+classification
.getTitleCache()+".txt",true);
5564 BufferedWriter out
= new BufferedWriter(fstream
);
5565 out
.write(originalname
+" (original) versus "+replaceNull(atomisedname
)+" (atomised) \n");
5566 //Close the output stream
5568 }catch (Exception e
){//Catch exception if any
5569 System
.err
.println("Error: " + e
.getMessage());
5575 * @param nomenclaturalCode2
5578 private void addProblemNameToFile(String name
, String author
, NomenclaturalCode nomenclaturalCode2
, Rank rank
) {
5580 FileWriter fstream
= new FileWriter("/home/pkelbert/Bureau/NameNotParsed.txt",true);
5581 BufferedWriter out
= new BufferedWriter(fstream
);
5582 out
.write(name
+"\t"+replaceNull(author
)+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\n");
5583 //Close the output stream
5585 }catch (Exception e
){//Catch exception if any
5586 System
.err
.println("Error: " + e
.getMessage());
5593 * @param bestMatchingTaxon
5594 * @param insertAsExisting
5597 private void logDecision(NonViralName
<?
> tnb
, Taxon bestMatchingTaxon
, boolean insertAsExisting
, Reference refMods
) {
5599 FileWriter fstream
= new FileWriter("/home/pkelbert/Bureau/Decisions_"+classification
.toString()+".txt",true);
5600 BufferedWriter out
= new BufferedWriter(fstream
);
5601 out
.write(tnb
.getTitleCache()+" sec. "+refMods
+"\t"+bestMatchingTaxon
.getTitleCache()+"\t"+insertAsExisting
+"\n");
5602 //Close the output stream
5604 }catch (Exception e
){//Catch exception if any
5605 System
.err
.println("Error: " + e
.getMessage());
5610 @SuppressWarnings("unused")
5611 private String
replaceNull(Object in
){
5615 if (in
.getClass().equals(NomenclaturalCode
.class)) {
5616 return ((NomenclaturalCode
)in
).getTitleCache();
5618 return in
.toString();
5623 * @param nomenclaturalCode2
5626 private void addProblemNameToFile(String type
, String name
, NomenclaturalCode nomenclaturalCode2
, Rank rank
, String problems
) {
5628 FileWriter fstream
= new FileWriter("/home/pkelbert/Bureau/NameNotParsed_"+classification
.getTitleCache()+".txt",true);
5629 BufferedWriter out
= new BufferedWriter(fstream
);
5630 out
.write(type
+"\t"+name
+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\t"+problems
+"\n");
5631 //Close the output stream
5633 }catch (Exception e
){//Catch exception if any
5634 System
.err
.println("Error: " + e
.getMessage());