3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.taxonx2013
;
12 import java
.io
.BufferedWriter
;
14 import java
.io
.FileWriter
;
15 import java
.io
.IOException
;
17 import java
.util
.ArrayList
;
18 import java
.util
.HashMap
;
19 import java
.util
.List
;
22 import java
.util
.UUID
;
23 import java
.util
.regex
.Pattern
;
25 import javax
.xml
.transform
.TransformerException
;
26 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
28 import org
.apache
.commons
.lang
.StringUtils
;
29 import org
.w3c
.dom
.Node
;
30 import org
.w3c
.dom
.NodeList
;
32 import com
.ibm
.lsid
.MalformedLSIDException
;
34 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
35 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
36 import eu
.etaxonomy
.cdm
.model
.agent
.AgentBase
;
37 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
38 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
39 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
40 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
41 import eu
.etaxonomy
.cdm
.model
.common
.LSID
;
42 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
43 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
44 import eu
.etaxonomy
.cdm
.model
.common
.UuidAndTitleCache
;
45 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
46 import eu
.etaxonomy
.cdm
.model
.description
.FeatureNode
;
47 import eu
.etaxonomy
.cdm
.model
.description
.FeatureTree
;
48 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
49 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
50 import eu
.etaxonomy
.cdm
.model
.description
.TaxonNameDescription
;
51 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
52 import eu
.etaxonomy
.cdm
.model
.name
.BacterialName
;
53 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
54 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
55 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
56 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
57 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
58 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
59 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
60 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
61 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
62 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
63 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
64 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
65 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
66 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
67 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
68 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
69 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
70 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
71 import eu
.etaxonomy
.cdm
.persistence
.query
.MatchMode
;
72 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
73 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
74 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
81 public class TaxonXTreatmentExtractor
extends TaxonXExtractor
{
83 private static final String notMarkedUp
= "Not marked-up";
84 private static final UUID proIbioTreeUUID
= UUID
.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
85 private static final UUID OtherUUID
= UUID
.fromString("6465f8aa-2175-446f-807e-7163994b120f");
86 private static final UUID NotMarkedUpUUID
= UUID
.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
87 private static final boolean skippQuestion
= true;
89 private final NomenclaturalCode nomenclaturalCode
;
90 private Classification classification
;
92 private String treatmentMainName
,originalTreatmentName
;
94 private final HashMap
<String
,Map
<String
,String
>> namesMap
= new HashMap
<String
, Map
<String
,String
>>();
97 private final Pattern keypattern
= Pattern
.compile("^(\\d+.*|-\\d+.*)");
98 private final Pattern keypatternend
= Pattern
.compile("^.+?\\d$");
100 private boolean maxRankRespected
=false;
101 private Map
<String
, Feature
> featuresMap
;
103 private MyName currentMyName
=new MyName();
105 private final Reference
<?
> sourceUrlRef
;
107 private final TaxonXAddSources sourceHandler
= new TaxonXAddSources();
110 * @param nomenclaturalCode
111 * @param classification
115 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode
, Classification classification
, TaxonXImport importer
,
116 TaxonXImportState configState
,Map
<String
, Feature
> featuresMap
, Reference
<?
> urlSource
) {
117 this.nomenclaturalCode
=nomenclaturalCode
;
118 this.classification
= classification
;
119 this.importer
=importer
;
120 this.configState
=configState
;
121 this.featuresMap
=featuresMap
;
122 this.sourceUrlRef
=urlSource
;
123 prepareCollectors(configState
, importer
.getAgentService());
124 this.sourceHandler
.setSourceUrlRef(sourceUrlRef
);
125 this.sourceHandler
.setImporter(importer
);
126 this.sourceHandler
.setConfigState(configState
);
130 * extracts all the treament information and save them
131 * @param treatmentnode: the XML Node
132 * @param tosave: the list of object to save into the CDM
133 * @param refMods: the reference extracted from the MODS
134 * @param sourceName: the URI of the document
136 @SuppressWarnings({ "rawtypes", "unused" })
137 protected void extractTreatment(Node treatmentnode
, List
<Object
> tosave
, Reference
<?
> refMods
, URI sourceName
) {
140 logger
.info("extractTreatment");
141 List
<TaxonNameBase
> nametosave
= new ArrayList
<TaxonNameBase
>();
142 NodeList children
= treatmentnode
.getChildNodes();
143 Taxon acceptedTaxon
=null;
144 Taxon defaultTaxon
=null;
145 boolean refgroup
=false;
147 for (int i
=0;i
<children
.getLength();i
++){
148 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group")) {
153 for (int i
=0;i
<children
.getLength();i
++){
155 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:nomenclature")){
156 NodeList nomenclature
= children
.item(i
).getChildNodes();
157 boolean containsName
=false;
158 for(int k
=0;k
<nomenclature
.getLength();k
++){
159 if(nomenclature
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
165 reloadClassification();
166 //extract "main" the scientific name
168 acceptedTaxon
= extractNomenclature(children
.item(i
),nametosave
,refMods
);
169 }catch(ClassCastException e
){e
.printStackTrace();System
.exit(0);}
170 // System.out.println("acceptedTaxon : "+acceptedTaxon);
173 else if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
174 reloadClassification();
175 //extract the References within the document
176 extractReferences(children
.item(i
),nametosave
,acceptedTaxon
,refMods
);
178 else if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
179 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected
){
180 File file
= new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
183 writer
= new FileWriter(file
,true);
184 writer
.write(sourceName
+"\n");
187 } catch (IOException e1
) {
188 // TODO Auto-generated catch block
189 e1
.printStackTrace();
191 // String multiple = askMultiple(children.item(i));
192 String multiple
= "Other";
193 if (multiple
.equalsIgnoreCase("other")) {
194 extractSpecificFeatureNotStructured(children
.item(i
),acceptedTaxon
, defaultTaxon
,nametosave
, refMods
,multiple
);
197 if (multiple
.equalsIgnoreCase("synonyms")) {
199 extractSynonyms(children
.item(i
),acceptedTaxon
, refMods
);
200 }catch(NullPointerException e
){
201 logger
.warn("the accepted taxon is maybe null");
205 if(multiple
.equalsIgnoreCase("material examined")){
206 extractMaterials(children
.item(i
),acceptedTaxon
, refMods
, nametosave
);
209 if (multiple
.equalsIgnoreCase("distribution")){
210 extractDistribution(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
);
213 if (multiple
.equalsIgnoreCase("type status")){
214 extractDescriptionWithReference(children
.item(i
),acceptedTaxon
,defaultTaxon
,refMods
, "TypeStatus");
217 if (multiple
.equalsIgnoreCase("vernacular name")){
218 extractDescriptionWithReference(children
.item(i
),acceptedTaxon
,defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
222 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,multiple
);
226 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
227 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected
){
228 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
, nametosave
, refMods
, Feature
.BIOLOGY_ECOLOGY());
230 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
231 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected
){
232 extractDescriptionWithReference(children
.item(i
),acceptedTaxon
,defaultTaxon
,refMods
, Feature
.COMMON_NAME().getTitleCache());
234 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
235 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected
){
236 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DESCRIPTION());
238 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
239 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected
){
240 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,Feature
.DIAGNOSIS());
242 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
243 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected
){
244 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DISCUSSION());
246 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
247 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected
){
248 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DESCRIPTION());
251 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
252 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected
){
253 extractDistribution(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
);
255 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
256 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected
){
257 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
,refMods
,Feature
.ETYMOLOGY());
260 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
261 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected
){
262 extractMaterials(children
.item(i
),acceptedTaxon
, refMods
, nametosave
);
264 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:figure") && maxRankRespected
){
265 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
, nametosave
, refMods
, "Figure");
267 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
268 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected
){
269 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
, nametosave
, refMods
, "table");
272 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
273 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected
){
274 //TODO IGNORE keys for the moment
275 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
276 extractSpecificFeatureNotStructured(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,"Keys - unparsed");
279 if (!children
.item(i
).getNodeName().equalsIgnoreCase("tax:pb")){
280 logger
.info("ANOTHER KIND OF NODES: "+children
.item(i
).getNodeName()+", "+children
.item(i
).getAttributes());
281 if (children
.item(i
).getAttributes() !=null) {
282 logger
.info(children
.item(i
).getAttributes().item(0));
284 extractSpecificFeatureNotStructured(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,notMarkedUp
);
288 // logger.info("saveUpdateNames");
289 if (maxRankRespected
){
290 importer
.getNameService().saveOrUpdate(nametosave
);
291 importer
.getClassificationService().saveOrUpdate(classification
);
292 logger
.info("saveUpdateNames-ok");
299 protected Map
<String
,Feature
> getFeaturesUsed(){
305 private void buildFeatureTree() {
306 FeatureTree proibiospheretree
= importer
.getFeatureTreeService().find(proIbioTreeUUID
);
307 if (proibiospheretree
== null){
308 List
<FeatureTree
> trees
= importer
.getFeatureTreeService().list(FeatureTree
.class, null, null, null, null);
309 if (trees
.size()==1) {
310 FeatureTree ft
= trees
.get(0);
311 if (featuresMap
==null) {
312 featuresMap
=new HashMap
<String
, Feature
>();
314 for (Feature feature
: ft
.getDistinctFeatures()){
316 featuresMap
.put(feature
.getTitleCache(), feature
);
320 proibiospheretree
= FeatureTree
.NewInstance();
321 proibiospheretree
.setUuid(proIbioTreeUUID
);
323 // FeatureNode root = proibiospheretree.getRoot();
324 FeatureNode root2
= proibiospheretree
.getRoot();
326 int nbChildren
= root2
.getChildCount()-1;
327 while (nbChildren
>-1){
329 root2
.removeChild(nbChildren
);
330 }catch(Exception e
){logger
.warn("Can't remove child from FeatureTree "+e
);}
336 for (Feature feature
:featuresMap
.values()) {
337 root2
.addChild(FeatureNode
.NewInstance(feature
));
339 importer
.getFeatureTreeService().saveOrUpdate(proibiospheretree
);
346 * @param acceptedTaxon: the current acceptedTaxon
347 * @param nametosave: the list of objects to save into the CDM
348 * @param refMods: the current reference extracted from the MODS
350 /* @SuppressWarnings("rawtypes")
351 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
352 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
354 NodeList children = keys.getChildNodes();
356 PolytomousKey poly = PolytomousKey.NewInstance();
357 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
358 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
359 poly.addTaxonomicScope(acceptedTaxon);
360 poly.setTitleCache("bloup");
361 // poly.addCoveredTaxon(acceptedTaxon);
362 PolytomousKeyNode root = poly.getRoot();
363 PolytomousKeyNode previous = null,tmpKey=null;
365 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
367 // String fullContent = keys.getTextContent();
368 for (int i=0;i<children.getLength();i++){
369 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
370 NodeList paragraph = children.item(i).getChildNodes();
373 for (int j=0;j<paragraph.getLength();j++){
374 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
375 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
376 key+=paragraph.item(j).getTextContent().trim();
377 // logger.info("KEY: "+j+"--"+key);
380 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
381 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
384 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
385 if (keypattern.matcher(key).matches()){
386 tmpKey = PolytomousKeyNode.NewInstance(key);
387 if (taxonKey!=null) {
388 tmpKey.setTaxon(taxonKey);
390 polyNodes.add(tmpKey);
391 if (previous == null) {
392 root.addChild(tmpKey);
394 previous.addChild(tmpKey);
398 tmpKey=PolytomousKeyNode.NewInstance(key);
399 if (taxonKey!=null) {
400 tmpKey.setTaxon(taxonKey);
402 polyNodes.add(tmpKey);
403 if (keypatternend.matcher(key).matches()) {
404 root.addChild(tmpKey);
407 previous.addChild(tmpKey);
414 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
415 importer.getPolytomousKeyService().saveOrUpdate(poly);
419 // * @param taxons: the XML Nodegroup
420 // * @param nametosave: the list of objects to save into the CDM
421 // * @param acceptedTaxon: the current accepted Taxon
422 // * @param refMods: the current reference extracted from the MODS
424 // * @return Taxon object built
426 // @SuppressWarnings({ "rawtypes", "unchecked" })
427 // private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
428 // // logger.info("getTaxonFromXML");
429 // // logger.info("acceptedTaxon: "+acceptedTaxon);
431 // // TaxonNameBase nameToBeFilled = null;
433 // currentMyName = new MyName();
434 // NomenclaturalStatusType statusType = null;
437 // currentMyName = extractScientificName(taxons);
438 // if (!currentMyName.getStatus().isEmpty()){
440 // statusType = nomStatusString2NomStatus(currentMyName.getStatus());
441 // } catch (UnknownCdmTypeException e) {
442 // addProblematicStatusToFile(currentMyName.getStatus());
443 // logger.warn("Problem with status");
447 // } catch (TransformerFactoryConfigurationError e1) {
449 // } catch (TransformerException e1) {
452 // /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
454 // nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
455 // if (nameToBeFilled.hasProblem() &&
456 // !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
457 // // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
458 // addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
459 // nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser, currentMyName.getAuthor(), currentMyName.getRank());
462 // nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
464 // TaxonNameBase nameToBeFilled = currentMyName.getTaxonNameBase();
465 // Taxon t = currentMyName.getTaxon();
466 // // importer.getNameService().saveOrUpdate(nametosave);
467 // /* Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
469 // boolean statusMatch=false;
471 // statusMatch=compareStatus(t, statusType);
473 // if (t ==null || (t != null && !statusMatch)){
474 // if(statusType != null) {
475 // nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
477 // t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
478 // if (t.getSec() == null) {
479 // t.setSec(refMods);
481 // if(!configState.getConfig().doKeepOriginalSecundum()) {
482 // t.setSec(configState.getConfig().getSecundum());
483 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
485 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
486 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
489 // if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
490 // setLSID(currentMyName.getIdentifier(), t);
493 // // Taxon parentTaxon = currentMyName.getHigherTaxa();
494 // // if (parentTaxon == null && !skippQuestion) {
495 // // parentTaxon = askParent(t, classification);
497 // // if (parentTaxon ==null){
498 // // while (parentTaxon == null) {
499 // // System.out.println("parent is null");
500 // // parentTaxon = createParent(t, refMods);
501 // // classification.addParentChild(parentTaxon, t, refMods, null);
504 // // classification.addParentChild(parentTaxon, t, refMods, null);
508 // t = CdmBase.deproxy(t, Taxon.class);
510 // if (!configState.getConfig().doKeepOriginalSecundum()) {
511 // t.setSec(configState.getConfig().getSecundum());
512 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
520 // private Taxon getTaxonFromTaxonNameBase(TaxonNameBase tnb,Reference<?> ref){
521 // Taxon taxon = null;
522 //// System.out.println(tnb.getTitleCache());
523 // Taxon cc= importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
525 // if ((cc.getSec() == null || cc.getSec().toString().isEmpty()) || (cc.getSec() != null &&
526 // cc.getSec().getTitleCache().equalsIgnoreCase(ref.getTitleCache()))) {
527 // if(cc.getSec() == null || cc.getSec().toString().isEmpty()){
529 // importer.getTaxonService().saveOrUpdate(cc);
535 // // List<TaxonBase> c = importer.getTaxonService().searchTaxaByName(tnb.getTitleCache(), ref);
536 // List<TaxonBase> c = importer.getTaxonService().list(TaxonBase.class, 0, 0, null, null);
537 // for (TaxonBase b : c) {
539 // taxon = (Taxon) b;
540 // }catch(ClassCastException e){logger.warn("error while casting existing taxonnamebase");}
543 // if (taxon == null){
544 //// System.out.println("NEW TAXON HERE "+tnb.toString()+", "+ref.toString());
545 // taxon = Taxon.NewInstance(tnb, ref); //sec set null
546 // importer.getTaxonService().save(taxon);
549 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
551 // boolean exist = false;
552 // for (TaxonNode p : classification.getAllNodes()){
553 // if(p.getTaxon().equals(taxon)) {
558 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
559 // Taxon parentTaxon = currentMyName.getHigherTaxa();
560 // if (parentTaxon != null) {
561 // classification.addParentChild(parentTaxon, taxon, ref, null);
563 // System.out.println("HERE???");
564 // classification.addChildTaxon(taxon, ref, null);
566 // importer.getClassificationService().saveOrUpdate(classification);
567 // // refreshTransaction();
569 // taxon = CdmBase.deproxy(taxon, Taxon.class);
570 // // System.out.println("TAXON RETOURNE : "+taxon.getTitleCache());
574 * @param taxons: the XML Nodegroup
575 * @param nametosave: the list of objects to save into the CDM
576 * @param acceptedTaxon: the current accepted Taxon
577 * @param refMods: the current reference extracted from the MODS
579 * @return Taxon object built
581 @SuppressWarnings({ "rawtypes", "unused" })
582 private TaxonNameBase
getTaxonNameBaseFromXML(Node taxons
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
583 // logger.info("getTaxonFromXML");
584 // logger.info("acceptedTaxon: "+acceptedTaxon);
586 TaxonNameBase nameToBeFilled
= null;
588 currentMyName
=new MyName();
590 NomenclaturalStatusType statusType
= null;
592 currentMyName
= extractScientificName(taxons
,refMods
);
593 } catch (TransformerFactoryConfigurationError e1
) {
595 } catch (TransformerException e1
) {
598 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
600 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
601 if (nameToBeFilled.hasProblem() &&
602 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
603 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
604 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
605 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
608 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
610 nameToBeFilled
= currentMyName
.getTaxonNameBase();
611 return nameToBeFilled
;
615 // @SuppressWarnings("rawtypes")
616 // private TaxonNameBase getTaxonNameBase (TaxonNameBase name, List<TaxonNameBase> nametosave, NomenclaturalStatusType statusType){
617 // List<TaxonNameBase> names = importer.getNameService().list(TaxonNameBase.class, null, null, null, null);
618 // for (TaxonNameBase tb : names){
619 // if (tb.getTitleCache().equalsIgnoreCase(name.getTitleCache())) {
620 // boolean statusMatch=false;
622 // statusMatch=compareStatus(tb, statusType);
624 // if (!statusMatch){
625 // if(statusType != null) {
626 // name.addStatus(NomenclaturalStatus.NewInstance(statusType));
630 // logger.info("TaxonNameBase FOUND"+name.getTitleCache());
631 // return CdmBase.deproxy(tb, TaxonNameBase.class);
635 // // logger.info("TaxonNameBase NOT FOUND "+name.getTitleCache());
636 // // System.out.println("add name "+name);
637 // nametosave.add(name);
638 // name = CdmBase.deproxy(name, TaxonNameBase.class);
647 // * @param statusType
650 // private boolean compareStatus(TaxonNameBase tb, NomenclaturalStatusType statusType) {
651 // boolean statusMatch=false;
653 // Set<NomenclaturalStatus> status = tb.getStatus();
654 // if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
655 // for (NomenclaturalStatus st:status){
656 // NomenclaturalStatusType stype = st.getType();
657 // if (stype.toString().equalsIgnoreCase(statusType.toString())) {
663 // if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
667 // return statusMatch;
673 private void reloadClassification() {
674 Classification cl
= importer
.getClassificationService().find(classification
.getUuid());
678 importer
.getClassificationService().saveOrUpdate(classification
);
679 classification
= importer
.getClassificationService().find(classification
.getUuid());
684 // * Create a Taxon for the current NameBase, based on the current reference
685 // * @param taxonNameBase
686 // * @param refMods: the current reference extracted from the MODS
689 // @SuppressWarnings({ "unused", "rawtypes" })
690 // private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
691 // Taxon t = new Taxon(taxonNameBase,null );
692 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
693 // t.setSec(configState.getConfig().getSecundum());
694 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
696 // /*<<<<<<< .courant
697 // boolean sourceExists=false;
698 // Set<IdentifiableSource> sources = t.getSources();
699 // for (IdentifiableSource src : sources){
700 // String micro = src.getCitationMicroReference();
701 // Reference r = src.getCitation();
702 // if (r.equals(refMods) && micro == null) {
703 // sourceExists=true;
706 // if(!sourceExists) {
707 // t.addSource(null,null,refMods,null);
710 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
711 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
715 private void extractDescriptionWithReference(Node typestatus
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference
<?
> refMods
,
716 String featureName
) {
717 // System.out.println("extractDescriptionWithReference !");
718 NodeList children
= typestatus
.getChildNodes();
720 Feature currentFeature
=getFeatureObjectFromString(featureName
);
722 String r
="";String s
="";
723 for (int i
=0;i
<children
.getLength();i
++){
724 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
725 s
+=children
.item(i
).getTextContent().trim();
727 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
728 r
+= children
.item(i
).getTextContent().trim();
730 if (s
.indexOf(r
)>-1) {
735 Reference
<?
> currentref
= ReferenceFactory
.newGeneric();
737 currentref
.setTitleCache(r
);
741 setParticularDescription(s
,acceptedTaxon
,defaultTaxon
, currentref
, refMods
,currentFeature
);
746 * @param distribution: the XML node group
747 * @param acceptedTaxon: the current accepted Taxon
748 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
749 * @param refMods: the current reference extracted from the MODS
751 @SuppressWarnings("rawtypes")
752 private void extractDistribution(Node distribution
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
753 // logger.info("DISTRIBUTION");
754 // logger.info("acceptedTaxon: "+acceptedTaxon);
755 NodeList children
= distribution
.getChildNodes();
756 Map
<Integer
,List
<MySpecimenOrObservation
>> specimenOrObservations
= new HashMap
<Integer
, List
<MySpecimenOrObservation
>>();
757 Map
<Integer
,String
> descriptionsFulltext
= new HashMap
<Integer
,String
>();
759 for (int i
=0;i
<children
.getLength();i
++){
760 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
761 NodeList paragraph
= children
.item(i
).getChildNodes();
762 for (int j
=0;j
<paragraph
.getLength();j
++){
763 if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("#text")){
764 extractText(descriptionsFulltext
, i
, paragraph
.item(j
));
766 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
767 extractInLine(nametosave
, refMods
, descriptionsFulltext
, i
,paragraph
.item(j
));
769 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")){
770 MySpecimenOrObservation specimenOrObservation
= new MySpecimenOrObservation();
771 DerivedUnit derivedUnitBase
= null;
772 specimenOrObservation
= extractSpecimenOrObservation(paragraph
.item(j
), derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
);
773 extractTextFromSpecimenOrObservation(specimenOrObservations
, descriptionsFulltext
, i
, specimenOrObservation
);
780 for (int k
:descriptionsFulltext
.keySet()) {
785 for (int k
:specimenOrObservations
.keySet()) {
792 if(acceptedTaxon
!=null){
793 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
794 Feature currentFeature
= Feature
.DISTRIBUTION();
795 // DerivedUnit derivedUnitBase=null;
797 for (int k
=0;k
<=m
;k
++){
798 if(specimenOrObservations
.keySet().contains(k
)){
799 for (MySpecimenOrObservation soo
:specimenOrObservations
.get(k
) ) {
800 handleAssociation(acceptedTaxon
, refMods
, td
, soo
);
804 if (descriptionsFulltext
.keySet().contains(k
)){
805 if (!descriptionsFulltext
.get(k
).isEmpty() && (descriptionsFulltext
.get(k
).startsWith("Hab.") || descriptionsFulltext
.get(k
).startsWith("Habitat"))){
806 setParticularDescription(descriptionsFulltext
.get(k
),acceptedTaxon
,defaultTaxon
, refMods
, Feature
.HABITAT());
810 handleTextData(refMods
, descriptionsFulltext
, td
, currentFeature
, k
);
814 if (descriptionsFulltext
.keySet().contains(k
) || specimenOrObservations
.keySet().contains(k
)){
815 acceptedTaxon
.addDescription(td
);
816 sourceHandler
.addAndSaveSource(refMods
, td
, null);
817 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
825 * @param descriptionsFulltext
827 * @param currentFeature
830 private void handleTextData(Reference
<?
> refMods
, Map
<Integer
, String
> descriptionsFulltext
, TaxonDescription td
,
831 Feature currentFeature
, int k
) {
832 TextData textData
= TextData
.NewInstance();
833 textData
.setFeature(currentFeature
);
834 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descriptionsFulltext
.get(k
));
835 sourceHandler
.addSource(refMods
, textData
);
836 td
.addElement(textData
);
840 * @param acceptedTaxon
845 private void handleAssociation(Taxon acceptedTaxon
, Reference
<?
> refMods
, TaxonDescription td
, MySpecimenOrObservation soo
) {
846 String descr
=soo
.getDescr();
847 DerivedUnit derivedUnitBase
= soo
.getDerivedUnitBase();
849 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
851 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
852 acceptedTaxon
.addDescription(taxonDescription
);
854 Feature feature
=null;
855 feature
= makeFeature(derivedUnitBase
);
856 if(!StringUtils
.isEmpty(descr
)) {
857 derivedUnitBase
.setTitleCache(descr
, true);
860 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
862 taxonDescription
.addElement(indAssociation
);
863 taxonDescription
.setTaxon(acceptedTaxon
);
864 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
865 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
866 td
.setDescribedSpecimenOrObservation(soo
.getDerivedUnitBase());
870 * create an individualAssociation
872 * @param derivedUnitBase
876 private IndividualsAssociation
createIndividualAssociation(Reference
<?
> refMods
, DerivedUnit derivedUnitBase
,
878 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
879 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
880 indAssociation
.setFeature(feature
);
881 indAssociation
= sourceHandler
.addSource(refMods
, indAssociation
);
882 return indAssociation
;
886 * @param specimenOrObservations
887 * @param descriptionsFulltext
889 * @param specimenOrObservation
891 private void extractTextFromSpecimenOrObservation(Map
<Integer
, List
<MySpecimenOrObservation
>> specimenOrObservations
,
892 Map
<Integer
, String
> descriptionsFulltext
, int i
, MySpecimenOrObservation specimenOrObservation
) {
893 List
<MySpecimenOrObservation
> speObsList
= specimenOrObservations
.get(i
);
894 if (speObsList
== null) {
895 speObsList
=new ArrayList
<MySpecimenOrObservation
>();
897 speObsList
.add(specimenOrObservation
);
898 specimenOrObservations
.put(i
,speObsList
);
900 String s
= specimenOrObservation
.getDerivedUnitBase().toString();
901 if (descriptionsFulltext
.get(i
) !=null){
902 s
= descriptionsFulltext
.get(i
)+" "+s
;
904 descriptionsFulltext
.put(i
, s
);
908 * Extract the text with the inline link to a taxon
911 * @param descriptionsFulltext
915 @SuppressWarnings("rawtypes")
916 private void extractInLine(List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, Map
<Integer
, String
> descriptionsFulltext
,
917 int i
, Node paragraph
) {
918 String inLine
=getInlineText(nametosave
, refMods
, paragraph
);
919 if (descriptionsFulltext
.get(i
) !=null){
920 inLine
= descriptionsFulltext
.get(i
)+inLine
;
922 descriptionsFulltext
.put(i
, inLine
);
926 * Extract the raw text from a Node
927 * @param descriptionsFulltext
931 private void extractText(Map
<Integer
, String
> descriptionsFulltext
, int i
, Node node
) {
932 if(!node
.getTextContent().trim().isEmpty()) {
933 String s
=node
.getTextContent().trim();
934 if (descriptionsFulltext
.get(i
) !=null){
935 s
= descriptionsFulltext
.get(i
)+" "+s
;
937 descriptionsFulltext
.put(i
, s
);
943 * @param materials: the XML node group
944 * @param acceptedTaxon: the current accepted Taxon
945 * @param refMods: the current reference extracted from the MODS
947 @SuppressWarnings("rawtypes")
948 private void extractMaterials(Node materials
, Taxon acceptedTaxon
, Reference
<?
> refMods
,List
<TaxonNameBase
> nametosave
) {
949 // logger.info("EXTRACTMATERIALS");
950 // logger.info("acceptedTaxon: "+acceptedTaxon);
951 NodeList children
= materials
.getChildNodes();
952 NodeList events
= null;
956 for (int i
=0;i
<children
.getLength();i
++){
957 String rawAssociation
="";
959 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
960 events
= children
.item(i
).getChildNodes();
961 for(int k
=0;k
<events
.getLength();k
++){
962 if (events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
963 String inLine
= getInlineText(nametosave
, refMods
, events
.item(k
));
964 if(!inLine
.isEmpty()) {
965 rawAssociation
+=inLine
;
968 if (! events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")
969 && !events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
970 rawAssociation
+= events
.item(k
).getTextContent().trim();
972 if(events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
973 if (!containsDistinctLetters(rawAssociation
.replaceAll(";",""))) {
974 rawAssociation
="no description text";
977 handleDerivedUnitFacadeAndBase(acceptedTaxon
, refMods
, events
.item(k
), rawAssociation
);
979 if (!rawAssociation
.isEmpty() && !added
){
981 Feature feature
= Feature
.MATERIALS_EXAMINED();
982 featuresMap
.put(feature
.getTitleCache(),feature
);
984 TextData textData
= createTextData(rawAssociation
, refMods
, feature
);
986 if(! rawAssociation
.isEmpty() && (acceptedTaxon
!=null)){
987 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
988 td
.addElement(textData
);
989 acceptedTaxon
.addDescription(td
);
990 sourceHandler
.addAndSaveSource(refMods
, td
, null);
992 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
993 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
995 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
996 // acceptedTaxon.addDescription(taxonDescription);
998 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
1000 // Feature feature = Feature.MATERIALS_EXAMINED();
1001 // featuresMap.put(feature.getTitleCache(),feature);
1002 // if(!StringUtils.isEmpty(rawAssociation)) {
1003 // derivedUnitBase.setTitleCache(rawAssociation, true);
1005 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
1006 // indAssociation.setFeature(feature);
1007 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
1009 // /*boolean sourceExists=false;
1010 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
1011 // for (DescriptionElementSource src : dsources){
1012 // String micro = src.getCitationMicroReference();
1013 // Reference r = src.getCitation();
1014 // if (r.equals(refMods) && micro == null) {
1015 // sourceExists=true;
1018 // if(!sourceExists) {
1019 // indAssociation.addSource(null, null, refMods, null);
1021 // taxonDescription.addElement(indAssociation);
1022 // taxonDescription.setTaxon(acceptedTaxon);
1023 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
1025 // /*sourceExists=false;
1026 // Set<IdentifiableSource> sources = taxonDescription.getSources();
1027 // for (IdentifiableSource src : sources){
1028 // String micro = src.getCitationMicroReference();
1029 // Reference r = src.getCitation();
1030 // if (r.equals(refMods) && micro == null) {
1031 // sourceExists=true;
1034 // if(!sourceExists) {
1035 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
1038 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
1039 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1049 * @param acceptedTaxon
1052 * @param rawAssociation
1055 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon
, Reference
<?
> refMods
, Node event
,
1056 String rawAssociation
) {
1058 DerivedUnit derivedUnitBase
;
1059 MySpecimenOrObservation myspecimenOrObservation
;
1060 DerivedUnitFacade derivedUnitFacade
= getFacade(rawAssociation
.replaceAll(";",""),SpecimenOrObservationType
.DerivedUnit
);
1061 derivedUnitBase
= derivedUnitFacade
.innerDerivedUnit();
1063 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
1065 myspecimenOrObservation
= extractSpecimenOrObservation(event
,derivedUnitBase
,SpecimenOrObservationType
.DerivedUnit
);
1066 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
1067 descr
=myspecimenOrObservation
.getDescr();
1069 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
1071 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
1072 acceptedTaxon
.addDescription(taxonDescription
);
1074 Feature feature
= makeFeature(derivedUnitBase
);
1075 featuresMap
.put(feature
.getTitleCache(),feature
);
1076 if(!StringUtils
.isEmpty(descr
)) {
1077 derivedUnitBase
.setTitleCache(descr
, true);
1080 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
1082 taxonDescription
.addElement(indAssociation
);
1083 taxonDescription
.setTaxon(acceptedTaxon
);
1084 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
1085 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1091 * @param materials: the XML node group
1092 * @param acceptedTaxon: the current accepted Taxon
1093 * @param refMods: the current reference extracted from the MODS
1095 private String
extractMaterialsDirect(Node materials
, Taxon acceptedTaxon
, Reference
<?
> refMods
, String event
) {
1096 // logger.info("EXTRACTMATERIALS");
1097 // logger.info("acceptedTaxon: "+acceptedTaxon);
1100 DerivedUnit derivedUnitBase
=null;
1101 MySpecimenOrObservation myspecimenOrObservation
= extractSpecimenOrObservation(materials
,derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
);
1102 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
1104 sourceHandler
.addAndSaveSource(refMods
, derivedUnitBase
);
1106 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
1107 acceptedTaxon
.addDescription(taxonDescription
);
1109 Feature feature
=null;
1110 if (event
.equalsIgnoreCase("collection")){
1111 feature
= makeFeature(derivedUnitBase
);
1114 feature
= Feature
.MATERIALS_EXAMINED();
1116 featuresMap
.put(feature
.getTitleCache(), feature
);
1118 descr
=myspecimenOrObservation
.getDescr();
1119 if(!StringUtils
.isEmpty(descr
)) {
1120 derivedUnitBase
.setTitleCache(descr
);
1123 IndividualsAssociation indAssociation
= createIndividualAssociation(refMods
, derivedUnitBase
, feature
);
1125 taxonDescription
.addElement(indAssociation
);
1126 taxonDescription
.setTaxon(acceptedTaxon
);
1127 sourceHandler
.addAndSaveSource(refMods
, taxonDescription
,null);
1128 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1130 return derivedUnitBase
.getTitleCache();
1136 * @param description: the XML node group
1137 * @param acceptedTaxon: the current acceptedTaxon
1138 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1139 * @param nametosave: the list of objects to save into the CDM
1140 * @param refMods: the current reference extracted from the MODS
1141 * @param featureName: the feature name
1143 @SuppressWarnings({ "rawtypes", "null" })
1144 private String
extractSpecificFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
1145 List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, String featureName
) {
1146 // System.out.println("GRUUUUuu");
1147 NodeList children
= description
.getChildNodes();
1148 NodeList insideNodes
;
1150 // String descr ="";
1151 String localdescr
="";
1152 List
<String
> blabla
=null;
1153 List
<String
> text
= new ArrayList
<String
>();
1155 String table
="<table>";
1159 Feature currentFeature
=getFeatureObjectFromString(featureName
);
1161 // String fullContent = description.getTextContent();
1162 for (int i
=0;i
<children
.getLength();i
++){
1164 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
1165 text
.add(children
.item(i
).getTextContent().trim());
1167 if (featureName
.equalsIgnoreCase("table")){
1168 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
1169 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
1170 head
= extractTableHead(children
.item(i
));
1172 line
= extractTableLine(children
.item(i
));
1173 if (!line
.equalsIgnoreCase("<tr></tr>")) {
1177 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
1178 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1179 line
= extractTableLineWithColumn(children
.item(i
).getChildNodes());
1180 if(!line
.equalsIgnoreCase("<tr></tr>")) {
1185 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1186 insideNodes
=children
.item(i
).getChildNodes();
1187 blabla
= new ArrayList
<String
>();
1188 for (int j
=0;j
<insideNodes
.getLength();j
++){
1189 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1190 String inlinetext
= getInlineText(nametosave
, refMods
, insideNodes
.item(j
));
1191 if (!inlinetext
.isEmpty()) {
1192 blabla
.add(inlinetext
);
1195 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1196 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1197 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1198 // localdescr += insideNodes.item(j).getTextContent().trim();
1202 if (!blabla
.isEmpty()) {
1203 String blaStr
= StringUtils
.join(blabla
," ").trim();
1204 if(!blaStr
.isEmpty()) {
1205 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1208 text
.add(StringUtils
.join(blabla
," "));
1210 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1211 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1212 localdescr
= children
.item(i
).getTextContent().trim();
1213 setParticularDescription(localdescr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1219 if (!table
.equalsIgnoreCase("<table></table>")){
1220 // System.out.println("TABLE : "+table);
1224 if (text
!=null && !text
.isEmpty()) {
1225 return StringUtils
.join(text
," ");
1237 private String
extractTableLine(Node child
) {
1240 if (child
.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1241 line
= extractTableLineWithColumn(child
.getChildNodes());
1252 private String
extractTableHead(Node child
) {
1256 NodeList trNodes
= child
.getChildNodes();
1257 for (int k
=0;k
<trNodes
.getLength();k
++){
1258 if (trNodes
.item(k
).getNodeName().equalsIgnoreCase("tax:div")
1259 && trNodes
.item(k
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1260 line
= extractTableLineWithColumn(trNodes
.item(k
).getChildNodes());
1269 * build a html table line, with td columns
1271 * @return an html coded line
1273 private String
extractTableLineWithColumn(NodeList tdNodes
) {
1276 for (int l
=0;l
<tdNodes
.getLength();l
++){
1277 if (tdNodes
.item(l
).getNodeName().equalsIgnoreCase("tax:p")){
1278 line
+="<td>"+tdNodes
.item(l
).getTextContent()+"</td>";
1286 * @param description: the XML node group
1287 * @param acceptedTaxon: the current acceptedTaxon
1288 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1289 * @param nametosave: the list of objects to save into the CDM
1290 * @param refMods: the current reference extracted from the MODS
1291 * @param featureName: the feature name
1293 @SuppressWarnings({ "unused", "rawtypes" })
1294 private String
extractSpecificFeatureNotStructured(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
1295 List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, String featureName
) {
1296 NodeList children
= description
.getChildNodes();
1297 NodeList insideNodes
;
1298 List
<String
> blabla
= new ArrayList
<String
>();
1301 Feature currentFeature
= getFeatureObjectFromString(featureName
);
1303 String fullContent
= description
.getTextContent();
1304 for (int i
=0;i
<children
.getLength();i
++){
1305 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1306 insideNodes
=children
.item(i
).getChildNodes();
1307 for (int j
=0;j
<insideNodes
.getLength();j
++){
1308 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1309 String inlineText
=getInlineText(nametosave
, refMods
, insideNodes
.item(j
));
1310 if(!inlineText
.isEmpty()) {
1311 blabla
.add(inlineText
);
1314 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1315 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1316 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1321 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")){
1322 if(!children
.item(i
).getTextContent().trim().isEmpty()){
1323 String localdescr
= children
.item(i
).getTextContent().trim();
1324 if(!localdescr
.isEmpty())
1326 blabla
.add(localdescr
);
1332 if (blabla
!=null && !blabla
.isEmpty()) {
1333 String blaStr
= StringUtils
.join(blabla
," ").trim();
1334 if (!blaStr
.isEmpty() && !blaStr
.equalsIgnoreCase(".") && !blaStr
.equalsIgnoreCase(",") && !blaStr
.equalsIgnoreCase(";")) {
1335 setParticularDescription(blaStr
,acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
1347 * @param insideNodes
1351 @SuppressWarnings({ "rawtypes" })
1352 private String
getInlineText(List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, Node insideNode
) {
1353 TaxonNameBase tnb
= getTaxonNameBaseFromXML(insideNode
, nametosave
,refMods
);
1354 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1355 Taxon tax
= currentMyName
.getTaxon();
1357 String linkedTaxon
= tnb
.toString().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1358 return "<cdm:taxon uuid='"+tax
.getUuid()+"'>"+linkedTaxon
+"</cdm:taxon>";
1364 * @param featureName
1367 @SuppressWarnings("rawtypes")
1368 private Feature
getFeatureObjectFromString(String featureName
) {
1369 List
<DefinedTermBase
> features
= importer
.getTermService().list(Feature
.class, null,null,null,null);
1370 Feature currentFeature
=null;
1371 for (DefinedTermBase feature
: features
){
1372 String tmpF
= ((Feature
)feature
).getTitleCache();
1373 if (tmpF
.equalsIgnoreCase(featureName
)) {
1374 currentFeature
=(Feature
)feature
;
1375 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1378 if (currentFeature
== null) {
1379 currentFeature
=Feature
.NewInstance(featureName
, featureName
, featureName
);
1380 if(featureName
.equalsIgnoreCase("Other")){
1381 currentFeature
.setUuid(OtherUUID
);
1383 if(featureName
.equalsIgnoreCase(notMarkedUp
)){
1384 currentFeature
.setUuid(NotMarkedUpUUID
);
1386 importer
.getTermService().saveOrUpdate(currentFeature
);
1388 return currentFeature
;
1395 * @param children: the XML node group
1396 * @param nametosave: the list of objects to save into the CDM
1397 * @param acceptedTaxon: the current acceptedTaxon
1398 * @param refMods: the current reference extracted from the MODS
1399 * @param fullContent :the parsed XML content
1400 * @return a list of description (text)
1402 @SuppressWarnings({ "unused", "rawtypes" })
1403 private List
<String
> parseParagraph(List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference
<?
> refMods
, Node paragraph
, Feature feature
){
1404 List
<String
> fullDescription
= new ArrayList
<String
>();
1405 // String localdescr;
1407 NodeList insideNodes
;
1408 boolean collectionEvent
= false;
1409 List
<Node
>collectionEvents
= new ArrayList
<Node
>();
1411 NodeList children
= paragraph
.getChildNodes();
1413 for (int i
=0;i
<children
.getLength();i
++){
1415 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
1416 descr
+= children
.item(i
).getTextContent().trim();
1418 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1419 insideNodes
=children
.item(i
).getChildNodes();
1420 List
<String
> blabla
= new ArrayList
<String
>();
1421 for (int j
=0;j
<insideNodes
.getLength();j
++){
1422 boolean nodeKnown
= false;
1423 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1424 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1425 String inlineText
= getInlineText(nametosave
, refMods
, insideNodes
.item(j
));
1426 if (!inlineText
.isEmpty()) {
1427 blabla
.add(inlineText
);
1431 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
1432 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
1433 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
1434 // localdescr += insideNodes.item(j).getTextContent().trim();
1438 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")) {
1439 String ref
= insideNodes
.item(j
).getTextContent().trim();
1440 if (ref
.endsWith(";") && ((ref
.length())>1)) {
1441 ref
=ref
.substring(0, ref
.length()-1)+".";
1443 Reference
<?
> reference
= ReferenceFactory
.newGeneric();
1444 reference
.setTitleCache(ref
, true);
1445 blabla
.add(reference
.getTitleCache());
1448 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:figure")){
1449 String figure
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, nametosave
, refMods
, "figure");
1452 if(insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:div") &&
1453 insideNodes
.item(j
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1454 insideNodes
.item(j
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1455 String table
= extractSpecificFeature(insideNodes
.item(j
),acceptedTaxon
,acceptedTaxon
, nametosave
, refMods
, "table");
1458 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1459 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1460 String titlecache
= extractMaterialsDirect(insideNodes
.item(j
), acceptedTaxon
, refMods
, "collection");
1461 blabla
.add(titlecache
);
1462 collectionEvent
=true;
1463 collectionEvents
.add(insideNodes
.item(j
));
1466 // if (!nodeKnown && !insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:pb")) {
1467 // logger.info("Node not handled yet : "+insideNodes.item(j).getNodeName());
1471 if (!blabla
.isEmpty()) {
1472 fullDescription
.add(StringUtils
.join(blabla
," "));
1475 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:figure")){
1476 String figure
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, nametosave
, refMods
, "Figure");
1477 fullDescription
.add(figure
);
1479 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
1480 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1481 children
.item(i
).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1482 String table
= extractSpecificFeature(children
.item(i
),acceptedTaxon
,acceptedTaxon
, nametosave
, refMods
, "table");
1483 fullDescription
.add(table
);
1487 if(descr
.length()>0){
1490 Feature currentFeature
= getNotMarkedUpFeatureObject();
1491 setParticularDescription(descr
,acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1493 // if (collectionEvent) {
1494 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1495 // for (Node coll:collectionEvents){
1496 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1499 return fullDescription
;
1504 * @param description: the XML node group
1505 * @param acceptedTaxon: the current acceptedTaxon
1506 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1507 * @param nametosave: the list of objects to save into the CDM
1508 * @param refMods: the current reference extracted from the MODS
1509 * @param feature: the feature to link the data with
1511 @SuppressWarnings("rawtypes")
1512 private void extractFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, Feature feature
){
1513 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1514 List
<String
> fullDescription
= parseParagraph( nametosave
, acceptedTaxon
, refMods
, description
,feature
);
1516 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1517 if (!fullDescription
.isEmpty()) {
1518 setParticularDescription(StringUtils
.join(fullDescription
," "),acceptedTaxon
,defaultTaxon
, refMods
,feature
);
1525 * @param descr: the XML Nodegroup to parse
1526 * @param acceptedTaxon: the current acceptedTaxon
1527 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1528 * @param refMods: the current reference extracted from the MODS
1529 * @param currentFeature: the feature name
1532 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference
<?
> refMods
, Feature currentFeature
) {
1533 logger
.info("setParticularDescription "+currentFeature
.getTitleCache()+", \n blabla : "+descr
);
1534 // System.out.println("setParticularDescription "+currentFeature.getTitleCache()+", \n blabla : "+descr);
1535 // logger.info("acceptedTaxon: "+acceptedTaxon);
1536 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1538 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1540 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1542 if(! descr
.isEmpty() && (acceptedTaxon
!=null)){
1543 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1544 td
.addElement(textData
);
1545 acceptedTaxon
.addDescription(td
);
1547 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1548 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1551 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1553 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1555 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1557 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1559 }catch(Exception e
){
1560 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1563 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1564 defaultTaxon
.addDescription(td
);
1565 td
.addElement(textData
);
1566 sourceHandler
.addAndSaveSource(refMods
, td
, null);
1567 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1574 * @param currentFeature
1577 private TextData
createTextData(String descr
, Reference
<?
> refMods
, Feature currentFeature
) {
1578 TextData textData
= TextData
.NewInstance();
1579 textData
.setFeature(currentFeature
);
1580 sourceHandler
.addSource(refMods
, textData
);
1582 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descr
);
1589 * @param descr: the XML Nodegroup to parse
1590 * @param acceptedTaxon: the current acceptedTaxon
1591 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1592 * @param refMods: the current reference extracted from the MODS
1593 * @param currentFeature: the feature name
1596 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
,Reference
<?
> currentRef
, Reference
<?
> refMods
, Feature currentFeature
) {
1597 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1598 // logger.info("acceptedTaxon: "+acceptedTaxon);
1599 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1601 featuresMap
.put(currentFeature
.getTitleCache(),currentFeature
);
1602 TextData textData
= createTextData(descr
, refMods
, currentFeature
);
1604 if(! descr
.isEmpty() && (acceptedTaxon
!=null)){
1605 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1606 td
.addElement(textData
);
1607 acceptedTaxon
.addDescription(td
);
1609 sourceHandler
.addAndSaveSource(refMods
, td
, currentRef
);
1610 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1613 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
1615 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
1617 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
1619 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1621 }catch(Exception e
){
1622 logger
.debug("TAXON EXISTS"+defaultTaxon
);
1625 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
1626 defaultTaxon
.addDescription(td
);
1627 td
.addElement(textData
);
1628 sourceHandler
.addAndSaveSource(currentRef
, td
,currentRef
);
1629 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
1636 * @param synonyms: the XML Nodegroup to parse
1637 * @param nametosave: the list of objects to save into the CDM
1638 * @param acceptedTaxon: the current acceptedTaxon
1639 * @param refMods: the current reference extracted from the MODS
1641 @SuppressWarnings({ "rawtypes" })
1642 private void extractSynonyms(Node synonyms
, Taxon acceptedTaxon
,Reference
<?
> refMods
) {
1643 // logger.info("extractSynonyms: "+acceptedTaxon);
1644 Taxon ttmp
= (Taxon
) importer
.getTaxonService().find(acceptedTaxon
.getUuid());
1646 acceptedTaxon
= CdmBase
.deproxy(ttmp
,Taxon
.class);
1649 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1651 NodeList children
= synonyms
.getChildNodes();
1652 TaxonNameBase nameToBeFilled
= null;
1653 List
<MyName
> names
= new ArrayList
<MyName
>();
1655 if(synonyms
.getNodeName().equalsIgnoreCase("tax:name")){
1658 myName
= extractScientificName(synonyms
,refMods
);
1660 } catch (TransformerFactoryConfigurationError e
) {
1662 } catch (TransformerException e
) {
1668 for (int i
=0;i
<children
.getLength();i
++){
1669 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1670 NodeList tmp
= children
.item(i
).getChildNodes();
1671 // String fullContent = children.item(i).getTextContent();
1672 for (int j
=0; j
< tmp
.getLength();j
++){
1673 if(tmp
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1676 myName
= extractScientificName(tmp
.item(j
),refMods
);
1678 } catch (TransformerFactoryConfigurationError e
) {
1680 } catch (TransformerException e
) {
1687 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:name")){
1690 myName
= extractScientificName(children
.item(i
),refMods
);
1692 } catch (TransformerFactoryConfigurationError e
) {
1694 } catch (TransformerException e
) {
1700 NomenclaturalStatusType statusType
= null;
1702 for(MyName name
:names
){
1703 // System.out.println("HANDLE NAME "+name);
1707 nameToBeFilled
= currentMyName
.getTaxonNameBase();
1709 Synonym synonym
= null;
1711 if (!name
.getStatus().isEmpty()){
1713 statusType
= nomStatusString2NomStatus(name
.getStatus());
1714 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1715 synonym
= Synonym
.NewInstance(nameToBeFilled
, refMods
);
1716 } catch (UnknownCdmTypeException e
) {
1717 addProblematicStatusToFile(name
.getStatus());
1718 logger
.warn("Problem with status");
1719 synonym
= Synonym
.NewInstance(nameToBeFilled
, refMods
);
1720 synonym
.setAppendedPhrase(name
.getStatus());
1724 synonym
= Synonym
.NewInstance(nameToBeFilled
, refMods
);
1726 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1727 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1728 if (nameToBeFilled.hasProblem() &&
1729 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1730 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1731 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1732 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1734 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1736 if (!name
.getIdentifier().isEmpty() && (name
.getIdentifier().length()>2)){
1737 setLSID(name
.getIdentifier(), synonym
);
1740 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1741 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1742 boolean synoExist
= false;
1743 for (Synonym syn
: synonymsSet
){
1744 System
.out
.println(syn
.getName()+" -- "+syn
.getSec());
1745 boolean a
=syn
.getName().equals(synonym
.getName());
1746 boolean b
= syn
.getSec().equals(synonym
.getSec());
1751 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1752 // System.out.println("SYNONYM");
1753 sourceHandler
.addSource(refMods
, synonym
);
1755 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),refMods
, null);
1759 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1764 * @param refgroup: the XML nodes
1765 * @param nametosave: the list of objects to save into the CDM
1766 * @param acceptedTaxon: the current acceptedTaxon
1767 * @param nametosave: the list of objects to save into the CDM
1768 * @param refMods: the current reference extracted from the MODS
1769 * @return the acceptedTaxon (why?)
1770 * handle cases where the bibref are inside <p> and outside
1772 @SuppressWarnings({ "rawtypes" })
1773 private Taxon
extractReferences(Node refgroup
, List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference
<?
> refMods
) {
1774 // logger.info("extractReferences");
1775 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1777 NodeList children
= refgroup
.getChildNodes();
1778 NonViralName
<?
> nameToBeFilled
= getNonViralNameAccNomenclature();
1780 ReferenceBuilder refBuild
= new ReferenceBuilder();
1781 for (int i
=0;i
<children
.getLength();i
++){
1782 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:bibref")){
1783 String ref
= children
.item(i
).getTextContent().trim();
1784 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1785 if (!refBuild
.isFoundBibref()){
1786 extractReferenceRawText(children
.item(i
).getChildNodes(), nameToBeFilled
, refMods
, acceptedTaxon
);
1790 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1791 NodeList references
= children
.item(i
).getChildNodes();
1793 for (int j
=0;j
<references
.getLength();j
++){
1794 if(references
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")){
1795 String ref
= references
.item(j
).getTextContent().trim();
1796 refBuild
.builReference(ref
, treatmentMainName
, nomenclaturalCode
, acceptedTaxon
, refMods
);
1799 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")
1800 && !references
.item(j
).getTextContent().trim().isEmpty()){
1801 descr
+= references
.item(j
).getTextContent().trim();
1805 if (!refBuild
.isFoundBibref()){
1806 //if it's not tagged, put it as row information.
1807 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1808 //then put it as a not markup feature if not empty
1809 if (descr
.length()>0){
1810 Feature currentFeature
= getNotMarkedUpFeatureObject();
1811 setParticularDescription(descr
,acceptedTaxon
,acceptedTaxon
, refMods
,currentFeature
);
1816 // importer.getClassificationService().saveOrUpdate(classification);
1817 return acceptedTaxon
;
1822 * get the non viral name according to the current nomenclature
1825 private NonViralName
<?
> getNonViralNameAccNomenclature() {
1826 NonViralName
<?
> nameToBeFilled
= null;
1827 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)){
1828 nameToBeFilled
= BotanicalName
.NewInstance(null);
1830 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1831 nameToBeFilled
= ZoologicalName
.NewInstance(null);
1833 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1834 nameToBeFilled
= BacterialName
.NewInstance(null);
1836 return nameToBeFilled
;
1840 * @return the feature object for the category "not marked up"
1842 @SuppressWarnings("rawtypes")
1843 private Feature
getNotMarkedUpFeatureObject() {
1844 List
<DefinedTermBase
> features
= importer
.getTermService().list(Feature
.class, null,null,null,null);
1845 Feature currentFeature
=null;
1846 for (DefinedTermBase feat
: features
){
1847 String tmpF
= ((Feature
)feat
).getTitleCache();
1848 if (tmpF
.equalsIgnoreCase(notMarkedUp
)) {
1849 currentFeature
=(Feature
)feat
;
1852 if (currentFeature
== null) {
1853 currentFeature
=Feature
.NewInstance(notMarkedUp
, notMarkedUp
, notMarkedUp
);
1854 currentFeature
.setUuid(NotMarkedUpUUID
);
1855 importer
.getTermService().saveOrUpdate(currentFeature
);
1857 return currentFeature
;
1862 * handle cases where the bibref are inside <p> and outside
1864 @SuppressWarnings("rawtypes")
1865 private void extractReferenceRawText(NodeList references
, NonViralName
<?
> nameToBeFilled
, Reference
<?
> refMods
,
1866 Taxon acceptedTaxon
) {
1867 String refString
="";
1868 NomenclaturalStatusType statusType
= null;
1869 currentMyName
= new MyName();
1870 for (int j
=0;j
<references
.getLength();j
++){
1871 acceptedTaxon
=CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1872 //no bibref tag inside
1873 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1874 if (references
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1877 currentMyName
= extractScientificName(references
.item(j
),refMods
);
1878 // if (myName.getNewName().isEmpty()) {
1879 // name=myName.getOriginalName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1881 // name=myName.getNewName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1883 } catch (TransformerFactoryConfigurationError e
) {
1885 } catch (TransformerException e
) {
1889 // name=name.trim();
1891 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")){
1892 refString
= references
.item(j
).getTextContent().trim();
1894 if(references
.item(j
).getNodeName().equalsIgnoreCase("#text") && !references
.item(j
).getTextContent().trim().isEmpty()){
1897 if (!currentMyName
.getStatus().isEmpty()){
1899 statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1900 } catch (UnknownCdmTypeException e
) {
1901 addProblematicStatusToFile(currentMyName
.getStatus());
1902 logger
.warn("Problem with status");
1907 /*INonViralNameParser parser = NonViralNameParserImpl.NewInstance();*/
1908 String fullLineRefName
= references
.item(j
).getTextContent().trim();
1909 int nameOrRefOrOther
=2;
1910 nameOrRefOrOther
=askIfNameContained(fullLineRefName
);
1911 // System.out.println("NAMEORREFOR?? "+nameOrRefOrOther);
1912 if (nameOrRefOrOther
==0){
1913 /*TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1914 if (nameTBF.hasProblem() &&
1915 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1916 addProblemNameToFile(fullLineRefName,"",nomenclaturalCode,Rank.UNKNOWN_RANK());
1917 nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser,currentMyName.getAuthor(), currentMyName.getRank());
1919 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1921 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1922 Synonym synonym
= null;
1923 if (!currentMyName
.getStatus().isEmpty()){
1925 statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1926 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1927 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1928 } catch (UnknownCdmTypeException e
) {
1929 addProblematicStatusToFile(currentMyName
.getStatus());
1930 logger
.warn("Problem with status");
1931 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1932 synonym
.setAppendedPhrase(currentMyName
.getStatus());
1936 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1939 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1940 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1941 boolean synoExist
= false;
1942 for (Synonym syn
: synonymsSet
){
1943 // System.out.println(syn.getName()+" -- "+syn.getSec());
1944 boolean a
=syn
.getName().equals(synonym
.getName());
1945 boolean b
= syn
.getSec().equals(synonym
.getSec());
1950 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1951 sourceHandler
.addSource(refMods
, synonym
);
1953 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),refMods
, null);
1957 if (nameOrRefOrOther
==1){
1958 Reference
<?
> re
= ReferenceFactory
.newGeneric();
1959 re
.setTitleCache(fullLineRefName
);
1961 /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1962 if (nameTBF.hasProblem() &&
1963 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1964 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1965 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1967 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1969 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
1970 Synonym synonym
= null;
1971 if (!currentMyName
.getStatus().isEmpty()){
1973 statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
1974 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
1975 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1976 } catch (UnknownCdmTypeException e
) {
1977 addProblematicStatusToFile(currentMyName
.getStatus());
1978 logger
.warn("Problem with status");
1979 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1980 synonym
.setAppendedPhrase(currentMyName
.getStatus());
1984 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1987 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
1988 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1989 boolean synoExist
= false;
1990 for (Synonym syn
: synonymsSet
){
1991 // System.out.println(syn.getName()+" -- "+syn.getSec());
1992 boolean a
=syn
.getName().equals(synonym
.getName());
1993 boolean b
= syn
.getSec().equals(synonym
.getSec());
1998 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
1999 sourceHandler
.addSource(refMods
, synonym
);
2001 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),re
, null);
2007 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2008 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2012 if(!currentMyName
.getName().isEmpty()){
2013 logger
.info("acceptedTaxon and name: *"+acceptedTaxon
.getTitleCache()+"*, *"+currentMyName
.getName()+"*");
2014 if (acceptedTaxon
.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName
.getName().trim())){
2015 Reference
<?
> refS
= ReferenceFactory
.newGeneric();
2016 refS
.setTitleCache(refString
, true);
2017 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
2018 // acceptedTaxon.addDescription(td);
2019 // acceptedTaxon.addSource(refSource);
2021 // TextData textData = TextData.NewInstance(Feature.CITATION());
2023 // textData.addSource(null, null, refS, null);
2024 // td.addElement(textData);
2025 // td.addSource(refSource);
2026 // importer.getDescriptionService().saveOrUpdate(td);
2029 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2030 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2034 acceptedTaxon
.getName().setNomenclaturalReference(refS
);
2037 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2038 TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
2039 if (nameTBF.hasProblem() &&
2040 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
2041 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
2042 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
2043 nameTBF=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
2045 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
2047 TaxonNameBase nameTBF
= currentMyName
.getTaxonNameBase();
2048 Synonym synonym
= null;
2049 if (!currentMyName
.getStatus().isEmpty()){
2051 statusType
= nomStatusString2NomStatus(currentMyName
.getStatus());
2052 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
2053 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
2054 } catch (UnknownCdmTypeException e
) {
2055 addProblematicStatusToFile(currentMyName
.getStatus());
2056 logger
.warn("Problem with status");
2057 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
2058 synonym
.setAppendedPhrase(currentMyName
.getStatus());
2062 synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
2066 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2067 setLSID(currentMyName
.getIdentifier(), synonym
);
2070 Set
<Synonym
> synonymsSet
= acceptedTaxon
.getSynonyms();
2071 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
2072 boolean synoExist
= false;
2073 for (Synonym syn
: synonymsSet
){
2074 // System.out.println(syn.getName()+" -- "+syn.getSec());
2075 boolean a
=syn
.getName().equals(synonym
.getName());
2076 boolean b
= syn
.getSec().equals(synonym
.getSec());
2081 if (!synonymsSet
.contains(synonym
) && ! (synoExist
)) {
2082 sourceHandler
.addSource(refMods
, synonym
);
2084 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF(),refMods
, null);
2088 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2096 * @param acceptedTaxon
2098 @SuppressWarnings("rawtypes")
2099 private void setLSID(String identifier
, TaxonBase
<?
> taxon
) {
2100 // boolean lsidok=false;
2101 String id
= identifier
.split("__")[0];
2102 String source
= identifier
.split("__")[1];
2103 if (id
.indexOf("lsid")>-1){
2105 LSID lsid
= new LSID(id
);
2106 taxon
.setLsid(lsid
);
2108 } catch (MalformedLSIDException e
) {
2109 logger
.warn("Malformed LSID");
2114 // if ((id.indexOf("lsid")<0) || !lsidok){
2115 //ADD ORIGINAL SOURCE ID EVEN IF LSID
2116 Reference
<?
> re
= null;
2117 List
<Reference
> references
= importer
.getReferenceService().list(Reference
.class, null, null, null, null);
2118 for (Reference
<?
> refe
: references
) {
2119 if (refe
.getTitleCache().equalsIgnoreCase(source
)) {
2125 re
= ReferenceFactory
.newGeneric();
2126 re
.setTitleCache(source
);
2127 importer
.getReferenceService().saveOrUpdate(re
);
2129 re
=CdmBase
.deproxy(re
, Reference
.class);
2131 Set
<IdentifiableSource
> sources
= taxon
.getSources();
2132 boolean lsidinsource
=false;
2133 boolean urlinsource
=false;
2134 for (IdentifiableSource src
:sources
){
2135 if (id
.equalsIgnoreCase(src
.getIdInSource()) && re
.getTitleCache().equals(src
.getCitation().getTitleCache())) {
2138 if (src
.getIdInSource() == null && re
.getTitleCache().equals(sourceUrlRef
.getTitleCache())) {
2143 taxon
.addSource(OriginalSourceType
.Import
, id
,null,re
,null);
2147 taxon
.addSource(OriginalSourceType
.Import
, null,null,sourceUrlRef
,null);
2154 * try to solve a parsing problem for a scientific name
2155 * @param original : the name from the OCR document
2156 * @param name : the tagged version
2158 * @return the corrected TaxonNameBase
2160 /* @SuppressWarnings({ "unchecked", "rawtypes" })
2161 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
2162 Map<String,String> ato = namesMap.get(original);
2164 ato = namesMap.get(original+" "+author);
2168 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
2169 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
2171 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
2172 rank = getRank(ato);
2174 // TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
2175 TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2176 // logger.info("RANK: "+rank);
2178 List<ParserProblem> problems = nameTBF.getParsingProblems();
2179 for (ParserProblem pb:problems) {
2180 System.out.println(pb.toString());
2182 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
2183 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2184 String fullname=name;
2185 if(! skippQuestion) {
2186 fullname = getFullReference(name,nameTBF.getParsingProblems());
2188 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2189 nameTBF = BotanicalName.NewInstance(null);
2191 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2192 nameTBF = ZoologicalName.NewInstance(null);
2194 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2195 nameTBF= BacterialName.NewInstance(null);
2197 parser.parseReferencedName(nameTBF, fullname, rank, false);
2202 if (name.indexOf(author)>-1) {
2203 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
2205 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2207 if (nameTBF.hasProblem()){
2208 if (name.indexOf(author)>-1) {
2209 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
2211 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2213 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
2214 problems = nameTBF.getParsingProblems();
2215 for (ParserProblem pb:problems) {
2216 System.out.println(pb.toString());
2218 nameTBF.setFullTitleCache(name, true);
2220 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2221 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2223 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2224 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2226 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2227 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2230 // logger.info("FULL TITLE CACHE "+name);
2232 nameTBF.setFullTitleCache(name, true);
2241 * @param nomenclatureNode: the XML nodes
2242 * @param nametosave: the list of objects to save into the CDM
2243 * @param refMods: the current reference extracted from the MODS
2246 @SuppressWarnings({ "rawtypes", "null" })
2247 private Taxon
extractNomenclature(Node nomenclatureNode
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) throws ClassCastException
{
2248 // logger.info("extractNomenclature");
2249 NodeList children
= nomenclatureNode
.getChildNodes();
2251 NonViralName
<?
> nameToBeFilled
= null;
2252 Taxon acceptedTaxon
= null;
2253 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2255 // String fullContent = nomenclatureNode.getTextContent();
2257 NomenclaturalStatusType statusType
= null;
2258 for (int i
=0;i
<children
.getLength();i
++){
2259 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status")){
2260 String status
= children
.item(i
).getTextContent().trim();
2261 if (!status
.isEmpty()){
2263 statusType
= nomStatusString2NomStatus(status
);
2264 } catch (UnknownCdmTypeException e
) {
2265 addProblematicStatusToFile(currentMyName
.getStatus());
2266 logger
.warn("Problem with status");
2272 boolean containsSynonyms
=false;
2273 for (int i
=0;i
<children
.getLength();i
++){
2275 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")) {
2276 freetext
=children
.item(i
).getTextContent();
2278 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
2279 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2280 extractMaterialsDirect(children
.item(i
), acceptedTaxon
, refMods
, "collection");
2282 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:name")){
2283 if(!containsSynonyms
){
2284 currentMyName
= new MyName();
2286 currentMyName
= extractScientificName(children
.item(i
),refMods
);
2287 treatmentMainName
= currentMyName
.getNewName();
2288 originalTreatmentName
= currentMyName
.getOriginalName();
2290 } catch (TransformerFactoryConfigurationError e1
) {
2292 } catch (TransformerException e1
) {
2296 if (currentMyName
.getRank().equals(Rank
.UNKNOWN_RANK()) || currentMyName
.getRank().isLower(configState
.getConfig().getMaxRank()) || currentMyName
.getRank().equals(configState
.getConfig().getMaxRank())){
2297 maxRankRespected
=true;
2298 /* if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2299 nameToBeFilled = BotanicalName.NewInstance(null);
2301 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2302 nameToBeFilled = ZoologicalName.NewInstance(null);
2304 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2305 nameToBeFilled = BacterialName.NewInstance(null);
2308 nameToBeFilled
=currentMyName
.getTaxonNameBase();
2310 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2311 acceptedTaxon
=currentMyName
.getTaxon();
2312 System
.out
.println("TreatmentName "+treatmentMainName
+" - "+acceptedTaxon
);
2315 boolean statusMatch
=false;
2316 if(acceptedTaxon
!=null ){
2317 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2318 statusMatch
=compareStatus(acceptedTaxon
, statusType
);
2319 System
.out
.println("statusMatch: "+statusMatch
);
2321 if (acceptedTaxon
==null || (acceptedTaxon
!= null && !statusMatch
)){
2322 // acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2324 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2325 nameToBeFilled = (BotanicalName) parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
2327 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2328 nameToBeFilled = (ZoologicalName) parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
2330 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2331 nameToBeFilled = (BacterialName) parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
2335 if (nameToBeFilled.hasProblem() &&
2336 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
2337 addProblemNameToFile(treatmentMainName,"",nomenclaturalCode,null);
2338 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2339 nameToBeFilled = (BotanicalName) solveNameProblem(originalTreatmentName,treatmentMainName,parser,currentMyName.getAuthor(), currentMyName.getRank());
2341 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2342 nameToBeFilled = (ZoologicalName)solveNameProblem(originalTreatmentName,treatmentMainName,parser,currentMyName.getAuthor(), currentMyName.getRank());
2344 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2345 nameToBeFilled = (BacterialName) solveNameProblem(originalTreatmentName,treatmentMainName,parser,currentMyName.getAuthor(), currentMyName.getRank());
2350 nameToBeFilled
=currentMyName
.getTaxonNameBase();
2351 if (nameToBeFilled
!=null){
2354 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2355 nameToBeFilled = (BotanicalName) getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2357 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2358 nameToBeFilled = (ZoologicalName)getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2360 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2361 nameToBeFilled = (BacterialName)getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2363 }catch(Exception e){
2364 TaxonNameBase n = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2365 System.out.println(n.getClass());
2366 nameToBeFilled = (NonViralName<?>) getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2370 if (!originalTreatmentName
.isEmpty()) {
2371 TaxonNameDescription td
= TaxonNameDescription
.NewInstance();
2372 td
.setTitleCache(originalTreatmentName
);
2373 nameToBeFilled
.addDescription(td
);
2376 if(statusType
!= null) {
2377 nameToBeFilled
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
2379 sourceHandler
.addSource(refMods
, nameToBeFilled
);
2381 if (nameToBeFilled
.getNomenclaturalReference() == null) {
2382 acceptedTaxon
= new Taxon(nameToBeFilled
,refMods
);
2383 System
.out
.println("NEW ACCEPTED HERE "+nameToBeFilled
);
2386 acceptedTaxon
= new Taxon(nameToBeFilled
,(Reference
<?
>) nameToBeFilled
.getNomenclaturalReference() );//TODO TOFIX reference
2387 System
.out
.println("NEW ACCEPTED HERE2 "+nameToBeFilled
);
2390 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2392 if(!configState
.getConfig().doKeepOriginalSecundum()) {
2393 acceptedTaxon
.setSec(configState
.getConfig().getSecundum());
2394 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
2395 System
.out
.println("SET SECUNDUM "+configState
.getConfig().getSecundum());
2398 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2399 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2403 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2404 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2406 // Taxon parentTaxon = currentMyName.getHigherTaxa();
2407 // if (parentTaxon == null && !skippQuestion) {
2408 // parentTaxon = askParent(acceptedTaxon, classification);
2410 // if (parentTaxon ==null){
2411 // while (parentTaxon == null) {
2412 // parentTaxon = createParent(acceptedTaxon, refMods);
2413 // classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
2416 // classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
2420 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
2421 Set
<IdentifiableSource
> sources
= acceptedTaxon
.getSources();
2422 boolean sourcelinked
=false;
2423 for (IdentifiableSource source
:sources
){
2424 if (source
.getCitation().getTitleCache().equalsIgnoreCase(refMods
.getTitleCache())) {
2428 if (!configState
.getConfig().doKeepOriginalSecundum()) {
2429 acceptedTaxon
.setSec(configState
.getConfig().getSecundum());
2430 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
2431 System
.out
.println("SET SECUNDUM "+configState
.getConfig().getSecundum());
2434 sourceHandler
.addSource(refMods
, acceptedTaxon
);
2436 if (!sourcelinked
|| !configState
.getConfig().doKeepOriginalSecundum()){
2438 if (!currentMyName
.getIdentifier().isEmpty() && (currentMyName
.getIdentifier().length()>2)){
2439 setLSID(currentMyName
.getIdentifier(), acceptedTaxon
);
2441 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
2445 maxRankRespected
=false;
2447 containsSynonyms
=true;
2450 extractSynonyms(children
.item(i
), acceptedTaxon
, refMods
);
2451 }catch(NullPointerException e
){
2452 logger
.warn("nullpointerexception, the accepted taxon might be null");
2456 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
2457 reloadClassification();
2458 //extract the References within the document
2459 extractReferences(children
.item(i
),nametosave
,acceptedTaxon
,refMods
);
2461 if(!freetext
.isEmpty()) {
2462 setParticularDescription(freetext
,acceptedTaxon
,acceptedTaxon
, refMods
,getNotMarkedUpFeatureObject());
2466 // importer.getClassificationService().saveOrUpdate(classification);
2467 return acceptedTaxon
;
2474 @SuppressWarnings("unchecked")
2475 private boolean compareStatus(Taxon t
, NomenclaturalStatusType statusType
) {
2476 boolean statusMatch
=false;
2478 Set
<NomenclaturalStatus
> status
= t
.getName().getStatus();
2479 if (statusType
!=null && status
.size()>0){ //the statusType is known for both taxon
2480 for (NomenclaturalStatus st
:status
){
2481 NomenclaturalStatusType stype
= st
.getType();
2482 if (stype
.toString().equalsIgnoreCase(statusType
.toString())) {
2488 if(statusType
== null && status
.size()==0) {//there is no statusType, we can assume it's the same
2496 * @param acceptedTaxon: the current acceptedTaxon
2497 * @param ref: the current reference extracted from the MODS
2498 * @return the parent for the current accepted taxon
2500 /* private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
2501 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2503 List<Rank> rankList = new ArrayList<Rank>();
2504 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2506 List<String> rankListStr = new ArrayList<String>();
2507 for (Rank r:rankList) {
2508 rankListStr.add(r.toString());
2511 String s = acceptedTaxon.getTitleCache();
2514 int addTaxon = askAddParent(s);
2515 logger.info("ADD TAXON: "+addTaxon);
2516 if (addTaxon == 0 ){
2517 Taxon tmp = askParent(acceptedTaxon, classification);
2519 s = askSetParent(s);
2520 r = askRank(s,rankListStr);
2522 NonViralName<?> nameToBeFilled = null;
2523 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2524 nameToBeFilled = BotanicalName.NewInstance(null);
2526 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2527 nameToBeFilled = ZoologicalName.NewInstance(null);
2529 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2530 nameToBeFilled = BacterialName.NewInstance(null);
2532 nameToBeFilled.setTitleCache(s);
2533 nameToBeFilled.setRank(getRank(r));
2535 tax = Taxon.NewInstance(nameToBeFilled, ref);
2541 createParent(tax, ref);
2542 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2543 classification.addParentChild(tax, acceptedTaxon, ref, null);
2546 classification.addChildTaxon(acceptedTaxon, ref, null);
2550 classification.addChildTaxon(acceptedTaxon, ref, null);
2553 // logger.info("RETURN: "+tax );
2562 * @throws TransformerFactoryConfigurationError
2563 * @throws TransformerException
2564 * @return a list of possible names
2566 @SuppressWarnings({ "null", "rawtypes" })
2567 private MyName
extractScientificName(Node name
, Reference
<?
> refMods
) throws TransformerFactoryConfigurationError
, TransformerException
{
2568 // System.out.println("extractScientificName");
2570 String
[] rankListToPrint_tmp
={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2571 List
<String
> rankListToPrint
= new ArrayList
<String
>();
2572 for (String r
: rankListToPrint_tmp
) {
2573 rankListToPrint
.add(r
.toLowerCase());
2576 Rank rank
= Rank
.UNKNOWN_RANK();
2577 NodeList children
= name
.getChildNodes();
2578 String fullName
= "";
2580 String identifier
="";
2581 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
2582 List
<String
> atomisedName
= new ArrayList
<String
>();
2584 String rankStr
= "";
2587 String status
= extractStatus(children
);
2589 for (int i
=0;i
<children
.getLength();i
++){
2590 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:xmldata")){
2591 NodeList atom
= children
.item(i
).getChildNodes();
2592 for (int k
=0;k
<atom
.getLength();k
++){
2593 identifier
= extractIdentifier(identifier
, atom
.item(k
));
2595 rankStr
= atom
.item(k
).getNodeName().toLowerCase();
2596 // logger.info("RANKSTR:*"+rankStr+"*");
2597 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
2598 rankStr
=atom
.item(k
).getTextContent().trim();
2599 tmpRank
= getRank(rankStr
);
2601 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2602 if (tmpRank
!= null){
2605 atomisedMap
.put(rankStr
.toLowerCase(),atom
.item(k
).getTextContent().trim());
2607 addAtomisedNamesToMap(rankListToPrint
, rank
, atomisedName
, atom
);
2609 if(children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !StringUtils
.isBlank(children
.item(i
).getTextContent())){
2610 // logger.info("name non atomised: "+children.item(i).getTextContent());
2611 fullName
= children
.item(i
).getTextContent().trim();
2612 // logger.info("fullname: "+fullName);
2615 fullName
= cleanName(fullName
, atomisedName
);
2616 namesMap
.put(fullName
,atomisedMap
);
2618 String atomisedNameStr
= getAtomisedNameStr(atomisedName
);
2620 if (fullName
!= null){
2621 // System.out.println("fullname: "+fullName);
2622 // System.out.println("atomised: "+atomisedNameStr);
2623 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
2625 // String defaultN = "";
2626 if (atomisedNameStr
.length()>fullName
.length()) {
2627 newName
=atomisedNameStr
;
2629 if (fullName
.length()>atomisedNameStr
.length() && (rank
.isLower(Rank
.SPECIES()) && fullName
.length()>2 && !fullName
.substring(0, 1).equals("."))) {
2630 newName
=getScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2636 newName
=getScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
2643 // rank = askForRank(newName, rank, nomenclaturalCode);
2644 // System.out.println("atomised: "+atomisedMap.toString());
2646 // String[] names = new String[5];
2647 MyName myname
= new MyName();
2649 System
.out
.println("\n\nBUILD "+newName
+ "(rank: "+rank
+")");
2650 // System.out.println(atomisedMap.keySet());
2651 fullName
= extractAuthorFromNames(rank
, fullName
, atomisedMap
, myname
);
2652 myname
.setOriginalName(fullName
);
2653 myname
.setNewName(newName
);
2654 myname
.setRank(rank
);
2655 myname
.setIdentifier(identifier
);
2656 myname
.setStatus(status
);
2657 myname
.setSource(refMods
);
2659 // boolean higherAdded=false;
2662 boolean parseNameManually
=false;
2663 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
2664 TaxonNameBase nameToBeFilledTest
= parser
.parseFullName(atomisedNameStr
, nomenclaturalCode
, rank
);
2665 if (nameToBeFilledTest
.hasProblem()){
2666 addProblemNameToFile("ato",atomisedNameStr
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2667 nameToBeFilledTest
= parser
.parseFullName(fullName
, nomenclaturalCode
,rank
);
2668 if (nameToBeFilledTest
.hasProblem()){
2669 addProblemNameToFile("full",fullName
,nomenclaturalCode
,rank
, nameToBeFilledTest
.getParsingProblems().toString());
2670 parseNameManually
=true;
2674 if(parseNameManually
){
2675 createAtomisedTaxon(rank
, newName
, atomisedMap
, myname
);
2678 createAtomisedTaxonString(newName
, atomisedMap
, myname
);
2679 myname
.setParsedName(nameToBeFilledTest
);
2680 myname
.buildTaxon();
2687 * @param atomisedName
2690 private String
getAtomisedNameStr(List
<String
> atomisedName
) {
2691 String atomisedNameStr
= StringUtils
.join(atomisedName
," ");
2692 while(atomisedNameStr
.contains(" ")) {
2693 atomisedNameStr
=atomisedNameStr
.replace(" ", " ");
2695 atomisedNameStr
=atomisedNameStr
.trim();
2696 return atomisedNameStr
;
2704 private String
extractStatus(NodeList children
) {
2706 for (int i
=0;i
<children
.getLength();i
++){
2707 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:status") ||
2708 (children
.item(i
).getNodeName().equalsIgnoreCase("tax:namePart") &&
2709 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2710 status
= children
.item(i
).getTextContent().trim();
2722 private String
extractIdentifier(String identifier
, Node atom
) {
2723 if (atom
.getNodeName().equalsIgnoreCase("tax:xid")){
2725 identifier
= atom
.getAttributes().getNamedItem("identifier").getNodeValue();
2726 }catch(Exception e
){
2727 System
.out
.println("pb with identifier, maybe empty");
2730 identifier
+="__"+atom
.getAttributes().getNamedItem("source").getNodeValue();
2731 }catch(Exception e
){
2732 System
.out
.println("pb with identifier, maybe empty");
2739 * @param rankListToPrint
2741 * @param atomisedName
2744 private void addAtomisedNamesToMap(List
<String
> rankListToPrint
, Rank rank
, List
<String
> atomisedName
, NodeList atom
) {
2745 for (int k
=0;k
<atom
.getLength();k
++){
2746 if (!atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:taxonRank") ) {
2747 if (atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:subgenus") || atom
.item(k
).getNodeName().equalsIgnoreCase("dwcranks:subgenus")) {
2748 atomisedName
.add("("+atom
.item(k
).getTextContent().trim()+")");
2750 if(atom
.item(k
).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet") || atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:Subspecies")) {
2751 if(atom
.item(k
).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet")){
2752 atomisedName
.add("var. "+atom
.item(k
).getTextContent().trim());
2754 if(atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:Subspecies") || atom
.item(k
).getNodeName().equalsIgnoreCase("dwc:infraspecificepithet")) {
2755 atomisedName
.add("subsp. "+atom
.item(k
).getTextContent().trim());
2759 if(rankListToPrint
.contains(atom
.item(k
).getNodeName().toLowerCase())) {
2760 atomisedName
.add(atom
.item(k
).getTextContent().trim());
2763 // System.out.println("rank : "+rank.toString());
2764 if (rank
.isHigher(Rank
.GENUS()) && (atom
.item(k
).getNodeName().indexOf("dwcranks:")>-1 || atom
.item(k
).getNodeName().indexOf("dwc:Family")>-1)) {
2765 atomisedName
.add(atom
.item(k
).getTextContent().trim());
2768 // System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2772 // System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2782 * @param atomisedName
2785 private String
cleanName(String name
, List
<String
> atomisedName
) {
2786 String fullName
=name
;
2787 if (fullName
!= null){
2788 fullName
= fullName
.replace("( ", "(");
2789 fullName
= fullName
.replace(" )",")");
2791 if (fullName
.trim().isEmpty()){
2792 fullName
=StringUtils
.join(atomisedName
," ");
2795 while(fullName
.contains(" ")) {
2796 fullName
=fullName
.replace(" ", " ");
2797 // logger.info("while");
2799 fullName
=fullName
.trim();
2807 * @param atomisedMap
2811 private String
extractAuthorFromNames(Rank rank
, String name
, HashMap
<String
, String
> atomisedMap
,
2813 String fullName
=name
;
2814 if (atomisedMap
.get("dwc:scientificnameauthorship") == null && fullName
!=null){
2815 // System.out.println("rank : "+rank.toString());
2816 if(rank
.isHigher(Rank
.SPECIES())){
2819 if(atomisedMap
.get("dwcranks:subgenus") != null) {
2820 author
= fullName
.split(atomisedMap
.get("dwcranks:subgenus"))[1].trim();
2822 if(atomisedMap
.get("dwc:subgenus") != null) {
2823 author
= fullName
.split(atomisedMap
.get("dwc:subgenus"))[1].trim();
2825 if(author
== null) {
2826 if(atomisedMap
.get("dwc:genus") != null) {
2827 author
= fullName
.split(atomisedMap
.get("dwc:genus"))[1].trim();
2831 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2832 author
=author
.replaceAll(",","").trim();
2833 myname
.setAuthor(author
);
2835 }catch(Exception e
){
2836 //could not extract the author
2839 if(rank
.equals(Rank
.SPECIES())){
2842 if(author
== null) {
2843 if(atomisedMap
.get("dwc:species") != null) {
2844 String
[] t
= fullName
.split(atomisedMap
.get("dwc:species"));
2845 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2846 author
= fullName
.split(atomisedMap
.get("dwc:species"))[1].trim();
2847 // System.out.println("AUTEUR "+author);
2851 fullName
= fullName
.substring(0, fullName
.indexOf(author
));
2852 author
=author
.replaceAll(",","").trim();
2853 myname
.setAuthor(author
);
2855 }catch(Exception e
){
2856 //could not extract the author
2860 myname
.setAuthor(atomisedMap
.get("dwc:scientificnameauthorship"));
2867 * @param atomisedMap
2870 private void createAtomisedTaxonString(String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2871 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
2872 myname
.setFamilyStr(atomisedMap
.get("dwc:family"));
2874 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
2875 myname
.setSubfamilyStr(atomisedMap
.get("dwcranks:subfamily"));
2877 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
2878 myname
.setTribeStr(atomisedMap
.get("dwcranks:tribe"));
2880 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
2881 myname
.setSubtribeStr(atomisedMap
.get("dwcranks:subtribe"));
2883 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
2884 myname
.setGenusStr(atomisedMap
.get("dwc:genus"));
2886 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2887 myname
.setSubgenusStr(atomisedMap
.get("dwcranks:subgenus"));
2889 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2890 myname
.setSubgenusStr(atomisedMap
.get("dwc:subgenus"));
2892 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
2894 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
2895 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
2896 n
=n
.replace("subsp.","");
2898 if(atomisedMap
.get("dwc:subspecies") != null) {
2899 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
2900 n
=n
.replace("subsp.","");
2902 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
2903 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
2904 n
=n
.replace("var.","");
2905 n
=n
.replace("v.","");
2907 if(atomisedMap
.get("dwcranks:formepithet") != null) {
2909 System
.out
.println("TODO FORMA");
2910 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
2911 n
=n
.replace("forma","");
2914 String author
= myname
.getAuthor();
2915 if(n
.split(" ").length
>2)
2917 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
2920 a
=n
.split(n2
)[1].trim();
2921 }catch(Exception e
){logger
.info("no author in "+n
+"?");}
2923 myname
.setAuthor(a
);
2924 System
.out
.println("FINDCREATESPECIES --"+n2
+"--"+n
+"**"+a
+"##");
2929 myname
.setSpeciesStr(atomisedMap
.get("dwc:species"));
2930 myname
.setAuthor(author
);
2932 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
2933 myname
.setSubspeciesStr(atomisedMap
.get("dwc:subspecies"));
2935 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
2936 myname
.setSubspeciesStr(atomisedMap
.get("dwc:infraspecificepithet"));
2938 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
2939 myname
.setVarietyStr(atomisedMap
.get("dwcranks:varietyepithet"));
2941 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
2942 myname
.setFormStr(atomisedMap
.get("dwcranks:formepithet"));
2949 * @param atomisedMap
2952 private void createAtomisedTaxon(Rank rank
, String newName
, HashMap
<String
, String
> atomisedMap
, MyName myname
) {
2953 if(rank
.equals(Rank
.UNKNOWN_RANK())){
2954 myname
.setNotParsableTaxon(newName
);
2957 if(atomisedMap
.get("dwc:family") != null && checkRankValidForImport(Rank
.FAMILY())){
2958 myname
.setFamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:family"),newName
, Rank
.FAMILY(),rank
));
2960 if(atomisedMap
.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank
.SUBFAMILY())){
2961 myname
.setSubfamily(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subfamily"), newName
,Rank
.SUBFAMILY(),rank
));
2963 if(atomisedMap
.get("dwcranks:tribe") != null && checkRankValidForImport(Rank
.TRIBE())){
2964 myname
.setTribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:tribe"),newName
, Rank
.TRIBE(),rank
));
2966 if(atomisedMap
.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank
.SUBTRIBE())){
2967 myname
.setSubtribe(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subtribe"),newName
, Rank
.SUBTRIBE(),rank
));
2969 if(atomisedMap
.get("dwc:genus") != null && checkRankValidForImport(Rank
.GENUS())){
2970 myname
.setGenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:genus"),newName
, Rank
.GENUS(),rank
));
2972 if(atomisedMap
.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2973 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
2975 if(atomisedMap
.get("dwc:subgenus") != null && checkRankValidForImport(Rank
.SUBGENUS())){
2976 myname
.setSubgenus(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subgenus"),newName
, Rank
.SUBGENUS(),rank
));
2978 if(atomisedMap
.get("dwc:species") != null && checkRankValidForImport(Rank
.SPECIES())){
2980 if(atomisedMap
.get("dwc:infraspecificepithet") != null) {
2981 n
=newName
.split(atomisedMap
.get("dwc:infraspecificepithet"))[0];
2982 n
=n
.replace("subsp.","");
2984 if(atomisedMap
.get("dwc:subspecies") != null) {
2985 n
=newName
.split(atomisedMap
.get("dwc:subspecies"))[0];
2986 n
=n
.replace("subsp.","");
2988 if(atomisedMap
.get("dwcranks:varietyepithet") != null) {
2989 n
=newName
.split(atomisedMap
.get("dwcranks:varietyepithet"))[0];
2990 n
=n
.replace("var.","");
2991 n
=n
.replace("v.","");
2993 if(atomisedMap
.get("dwcranks:formepithet") != null) {
2995 System
.out
.println("TODO FORMA");
2996 n
=newName
.split(atomisedMap
.get("dwcranks:formepithet"))[0];
2997 n
=n
.replace("forma","");
3000 String author
= myname
.getAuthor();
3001 if(n
.split(" ").length
>2)
3003 String n2
=n
.split(" ")[0]+" "+n
.split(" ")[1];
3006 a
= n
.split(n2
)[1].trim();
3007 }catch(Exception e
){logger
.info("no author?");}
3008 myname
.setAuthor(a
);
3009 System
.out
.println("FINDCREATESPECIES --"+n2
+"--"+n
+"**"+a
+"##");
3014 myname
.setSpecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:species"),n
, Rank
.SPECIES(),rank
));
3015 myname
.setAuthor(author
);
3017 if(atomisedMap
.get("dwc:subspecies") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3018 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:subspecies"), newName
,Rank
.SUBSPECIES(),rank
));
3020 if(atomisedMap
.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank
.SUBSPECIES())){
3021 myname
.setSubspecies(myname
.findOrCreateTaxon(atomisedMap
.get("dwc:infraspecificepithet"),newName
, Rank
.SUBSPECIES(),rank
));
3023 if(atomisedMap
.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank
.VARIETY())){
3024 myname
.setVariety(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:varietyepithet"),newName
, Rank
.VARIETY(),rank
));
3026 if(atomisedMap
.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank
.FORM())){
3027 myname
.setForm(myname
.findOrCreateTaxon(atomisedMap
.get("dwcranks:formepithet"), newName
,Rank
.FORM(),rank
));
3035 private boolean checkRankValidForImport(Rank currentRank
) {
3036 return currentRank
.isLower(configState
.getConfig().getMaxRank()) || currentRank
.equals(configState
.getConfig().getMaxRank());
3042 * @param classification2
3044 public void updateClassification(Classification classification2
) {
3045 classification
= classification2
;
3048 public class MyName
{
3049 String originalName
="";
3051 Rank rank
=Rank
.UNKNOWN_RANK();
3052 String identifier
="";
3056 NonViralName
<?
> taxonnamebase
;
3058 Reference
<?
> refMods
;
3060 Taxon family
,subfamily
,tribe
,subtribe
,genus
,subgenus
,species
,subspecies
, variety
,form
;
3061 NonViralName
<?
> familyName
, subfamilyName
, tribeName
,subtribeName
,genusName
,subgenusName
,speciesName
,subspeciesName
;
3062 String familyStr
, subfamilyStr
, tribeStr
,subtribeStr
,genusStr
,subgenusStr
,speciesStr
,subspeciesStr
,formStr
,varietyStr
;
3065 private Taxon taxon
;
3068 public void setSource(Reference
<?
> re
){
3075 public void setFormStr(String string
) {
3076 this.formStr
=string
;
3082 public void setVarietyStr(String string
) {
3083 this.varietyStr
=string
;
3089 public void setSubspeciesStr(String string
) {
3090 this.subspeciesStr
=string
;
3096 public void setSpeciesStr(String string
) {
3097 this.speciesStr
=string
;
3103 public void setSubgenusStr(String string
) {
3104 this.subgenusStr
=string
;
3110 public void setGenusStr(String string
) {
3111 this.genusStr
=string
;
3117 public void setSubtribeStr(String string
) {
3118 this.subtribeStr
=string
;
3124 public void setTribeStr(String string
) {
3125 this.tribeStr
=string
;
3131 public void setSubfamilyStr(String string
) {
3132 this.subfamilyStr
=string
;
3138 public void setFamilyStr(String string
) {
3139 this.familyStr
=string
;
3143 * @return the familyStr
3145 public String
getFamilyStr() {
3149 * @return the subfamilyStr
3151 public String
getSubfamilyStr() {
3152 return subfamilyStr
;
3155 * @return the tribeStr
3157 public String
getTribeStr() {
3161 * @return the subtribeStr
3163 public String
getSubtribeStr() {
3167 * @return the genusStr
3169 public String
getGenusStr() {
3173 * @return the subgenusStr
3175 public String
getSubgenusStr() {
3179 * @return the speciesStr
3181 public String
getSpeciesStr() {
3185 * @return the subspeciesStr
3187 public String
getSubspeciesStr() {
3188 return subspeciesStr
;
3191 * @return the formStr
3193 public String
getFormStr() {
3197 * @return the varietyStr
3199 public String
getVarietyStr() {
3206 public void setNotParsableTaxon(String newName2
) {
3207 List
<TaxonBase
> tmpList
= importer
.getTaxonService().list(Taxon
.class, 0, 0, null, null);
3209 NomenclaturalStatusType statusType
= null;
3210 if (!getStatus().isEmpty()){
3212 statusType
= nomStatusString2NomStatus(getStatus());
3213 } catch (UnknownCdmTypeException e
) {
3214 addProblematicStatusToFile(getStatus());
3215 logger
.warn("Problem with status");
3219 boolean foundIdentic
=false;
3221 // Taxon tmpPartial=null;
3222 for (TaxonBase tmpb
:tmpList
){
3224 TaxonNameBase tnb
= tmpb
.getName();
3227 if (tnb
.getTitleCache().split("sec.")[0].equals(newName2
) ){
3228 crank
=tnb
.getRank();
3229 if (crank
!=null && rank
!=null){
3230 if (crank
.equals(rank
)){
3234 }catch(Exception e
){
3235 e
.printStackTrace();
3243 boolean statusMatch
=false;
3244 boolean appendedMatch
=false;
3245 if(tmp
!=null && foundIdentic
){
3246 statusMatch
=compareStatus(tmp
, statusType
);
3247 if (!getStatus().isEmpty() && ! (tmp
.getAppendedPhrase() == null)) {
3248 appendedMatch
=tmp
.getAppendedPhrase().equals(getStatus());
3250 if (getStatus().isEmpty() && tmp
.getAppendedPhrase() == null) {
3255 if ((tmp
== null || !foundIdentic
) || (tmp
!= null && !statusMatch
) || (tmp
!= null && !appendedMatch
&& !statusMatch
)){
3257 NonViralName
<?
> tnb
= getNonViralNameAccNomenclature();
3260 if(statusType
!= null) {
3261 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3263 if(getStatus()!=null) {
3264 tnb
.setAppendedPhrase(getStatus());
3267 tnb
.setTitleCache(newName2
,true);
3268 tmp
= findMatchingTaxon(tnb
,refMods
);
3270 tmp
=Taxon
.NewInstance(tnb
, refMods
);
3271 tmp
.setSec(refMods
);
3272 sourceHandler
.addSource(refMods
, tmp
);
3273 classification
.addChildTaxon(tmp
, null, null);
3276 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
3277 if (author
!= null) {
3278 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3279 setLSID(getIdentifier(), tmp
);
3280 importer
.getTaxonService().saveOrUpdate(tmp
);
3281 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
3284 TaxonNameBase tnb
= CdmBase
.deproxy(tmp
.getName(), TaxonNameBase
.class);
3287 castTaxonNameBase(tnb
, taxonnamebase
);
3294 public void buildTaxon() {
3295 System
.out
.println("BUILD TAXON");
3297 NomenclaturalStatusType statusType
= null;
3298 if (!getStatus().isEmpty()){
3300 statusType
= nomStatusString2NomStatus(getStatus());
3301 taxonnamebase
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3302 } catch (UnknownCdmTypeException e
) {
3303 addProblematicStatusToFile(getStatus());
3304 logger
.warn("Problem with status");
3307 importer
.getNameService().save(taxonnamebase
);
3308 Taxon tmptaxon
= Taxon
.NewInstance(taxonnamebase
, refMods
); //sec set null
3310 boolean exist
= false;
3311 for (TaxonNode p
: classification
.getAllNodes()){
3313 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(tmptaxon
.getTitleCache())) {
3314 if(compareStatus(p
.getTaxon(), statusType
)){
3315 tmptaxon
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
3319 }catch(NullPointerException n
){logger
.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3323 boolean insertAsExisting
=false;
3324 List
<Taxon
> existingTaxons
= getMatchingTaxon(taxonnamebase
);
3325 double similarityScore
=0.0;
3326 for (Taxon bestMatchingTaxon
:existingTaxons
){
3327 similarityScore
=similarity(taxonnamebase
.getTitleCache().split("sec.")[0].toLowerCase().trim(), bestMatchingTaxon
.getTitleCache().split("sec.")[0].toLowerCase().trim());
3328 insertAsExisting
= compareAndCheckTaxon(taxonnamebase
, refMods
, similarityScore
, bestMatchingTaxon
);
3329 if(insertAsExisting
) {
3330 tmptaxon
=bestMatchingTaxon
;
3334 if (!insertAsExisting
){
3335 tmptaxon
.setSec(refMods
);
3336 if (taxonnamebase
.getRank().equals(configState
.getConfig().getMaxRank())) {
3337 System
.out
.println("****************************"+tmptaxon
);
3338 classification
.addChildTaxon(tmptaxon
, refMods
, null);
3340 hierarchy
= new HashMap
<Rank
, Taxon
>();
3341 System
.out
.println("LOOK FOR PARENT "+taxonnamebase
.toString()+", "+tmptaxon
.toString());
3342 lookForParentNode(taxonnamebase
,tmptaxon
, refMods
,this);
3343 System
.out
.println("HIERARCHY "+hierarchy
);
3344 Taxon parent
= buildHierarchy();
3345 if(!taxonExistsInClassification(parent
,tmptaxon
)){
3346 classification
.addParentChild(parent
, tmptaxon
, refMods
, null);
3347 importer
.getClassificationService().saveOrUpdate(classification
);
3349 // Set<TaxonNode> nodeList = classification.getAllNodes();
3350 // for(TaxonNode tn:nodeList) {
3351 // System.out.println(tn.getTaxon());
3355 importer
.getClassificationService().saveOrUpdate(classification
);
3356 // refreshTransaction();
3358 taxon
=CdmBase
.deproxy(tmptaxon
, Taxon
.class);
3366 private Taxon
buildHierarchy() {
3367 Taxon higherTaxon
= null;
3368 if(hierarchy
.containsKey(configState
.getConfig().getMaxRank())){
3369 if(!taxonExistsInClassification(higherTaxon
, hierarchy
.get(configState
.getConfig().getMaxRank()))) {
3370 System
.out
.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"+hierarchy
.get(configState
.getConfig().getMaxRank()));
3371 classification
.addChildTaxon(hierarchy
.get(configState
.getConfig().getMaxRank()), refMods
, null);
3373 higherTaxon
= hierarchy
.get(configState
.getConfig().getMaxRank());
3376 if(hierarchy
.containsKey(Rank
.SUBFAMILY())){
3377 if(!taxonExistsInClassification(higherTaxon
, hierarchy
.get(Rank
.SUBFAMILY()))) {
3378 classification
.addParentChild(higherTaxon
, hierarchy
.get(Rank
.SUBFAMILY()), refMods
, null);
3380 higherTaxon
=hierarchy
.get(Rank
.SUBFAMILY());
3382 if(hierarchy
.containsKey(Rank
.TRIBE())){
3383 if(!taxonExistsInClassification(higherTaxon
, hierarchy
.get(Rank
.TRIBE()))) {
3384 classification
.addParentChild(higherTaxon
, hierarchy
.get(Rank
.TRIBE()), refMods
, null);
3386 higherTaxon
=hierarchy
.get(Rank
.TRIBE());
3388 if(hierarchy
.containsKey(Rank
.SUBTRIBE())){
3389 if(!taxonExistsInClassification(higherTaxon
, hierarchy
.get(Rank
.SUBTRIBE()))) {
3390 classification
.addParentChild(higherTaxon
, hierarchy
.get(Rank
.SUBTRIBE()), refMods
, null);
3392 higherTaxon
=hierarchy
.get(Rank
.SUBTRIBE());
3394 if(hierarchy
.containsKey(Rank
.GENUS())){
3395 if(!taxonExistsInClassification(higherTaxon
, hierarchy
.get(Rank
.GENUS()))) {
3396 classification
.addParentChild(higherTaxon
, hierarchy
.get(Rank
.GENUS()), refMods
, null);
3398 higherTaxon
=hierarchy
.get(Rank
.GENUS());
3400 if(hierarchy
.containsKey(Rank
.SUBGENUS())){
3401 if(!taxonExistsInClassification(higherTaxon
, hierarchy
.get(Rank
.SUBGENUS()))) {
3402 classification
.addParentChild(higherTaxon
, hierarchy
.get(Rank
.SUBGENUS()), refMods
, null);
3404 higherTaxon
=hierarchy
.get(Rank
.SUBGENUS());
3406 importer
.getClassificationService().saveOrUpdate(classification
);
3410 private boolean taxonExistsInClassification(Taxon parent
, Taxon child
){
3411 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3412 boolean found
=false;
3414 for (TaxonNode p
: classification
.getAllNodes()){
3415 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
3416 for (TaxonNode c
: p
.getChildNodes()) {
3417 if (c
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3426 for (TaxonNode p
: classification
.getAllNodes()){
3427 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(child
.getTitleCache())) {
3433 // System.out.println("LOOK IF TAXA EXIST? "+found);
3437 * @param nameToBeFilledTest
3439 @SuppressWarnings("rawtypes")
3440 public void setParsedName(TaxonNameBase nameToBeFilledTest
) {
3441 this.taxonnamebase
= (NonViralName
<?
>) nameToBeFilledTest
;
3444 //variety dwcranks:varietyEpithet
3446 * @return the author
3448 public String
getAuthor() {
3454 public Taxon
getTaxon() {
3460 public NonViralName
<?
> getTaxonNameBase() {
3461 return taxonnamebase
;
3465 * @param findOrCreateTaxon
3467 public void setForm(Taxon form
) {
3472 * @param findOrCreateTaxon
3474 public void setVariety(Taxon variety
) {
3475 this.variety
=variety
;
3482 @SuppressWarnings("rawtypes")
3483 public Taxon
findOrCreateTaxon(String partialname
,String fullname
, Rank rank
, Rank globalrank
) {
3484 List
<TaxonBase
> tmpList
= importer
.getTaxonService().list(Taxon
.class, 0, 0, null, null);
3486 NomenclaturalStatusType statusType
= null;
3487 if (!getStatus().isEmpty()){
3489 statusType
= nomStatusString2NomStatus(getStatus());
3490 } catch (UnknownCdmTypeException e
) {
3491 addProblematicStatusToFile(getStatus());
3492 logger
.warn("Problem with status");
3496 boolean foundIdentic
=false;
3498 // Taxon tmpPartial=null;
3499 for (TaxonBase tmpb
:tmpList
){
3501 TaxonNameBase tnb
= tmpb
.getName();
3504 // System.out.println(tnb.getTitleCache());
3505 // if (tnb.getTitleCache().split("sec.")[0].equals(partialname) ||tnb.getTitleCache().split("sec.")[0].equals(fullname) ){
3506 if(globalrank
.equals(rank
) || (globalrank
.isLower(Rank
.SPECIES()) && rank
.equals(Rank
.SPECIES()))){
3507 if (tnb
.getTitleCache().split("sec.")[0].equals(fullname
) ){
3508 crank
=tnb
.getRank();
3509 if (crank
!=null && rank
!=null){
3510 if (crank
.equals(rank
)){
3514 }catch(Exception e
){
3515 e
.printStackTrace();
3520 if(fullname
.indexOf(partialname
)<0){ //for corrected names such as Anochetus -- A. blf-pat
3521 if (tnb
.getTitleCache().split("sec.")[0].equals(partialname
) ){
3522 crank
=tnb
.getRank();
3523 if (crank
!=null && rank
!=null){
3524 if (crank
.equals(rank
)){
3528 }catch(Exception e
){
3529 e
.printStackTrace();
3537 if (tnb
.getTitleCache().split("sec.")[0].equals(partialname
) ){
3538 crank
=tnb
.getRank();
3539 if (crank
!=null && rank
!=null){
3540 if (crank
.equals(rank
)){
3544 }catch(Exception e
){
3545 e
.printStackTrace();
3554 boolean statusMatch
=false;
3555 boolean appendedMatch
=false;
3556 if(tmp
!=null && foundIdentic
){
3557 statusMatch
=compareStatus(tmp
, statusType
);
3558 if (!getStatus().isEmpty() && ! (tmp
.getAppendedPhrase() == null)) {
3559 appendedMatch
=tmp
.getAppendedPhrase().equals(getStatus());
3561 if (getStatus().isEmpty() && tmp
.getAppendedPhrase() == null) {
3566 if ((tmp
== null || !foundIdentic
) || (tmp
!= null && !statusMatch
) || (tmp
!= null && !appendedMatch
&& !statusMatch
)){
3568 NonViralName
<?
> tnb
= getNonViralNameAccNomenclature();
3571 if(statusType
!= null) {
3572 tnb
.addStatus(NomenclaturalStatus
.NewInstance(statusType
));
3574 if(getStatus()!=null) {
3575 tnb
.setAppendedPhrase(getStatus());
3578 if(rank
.equals(Rank
.UNKNOWN_RANK())) {
3579 tnb
.setTitleCache(fullname
);
3582 if(rank
.isHigher(Rank
.SPECIES())) {
3583 tnb
.setTitleCache(partialname
);
3586 if (rank
.equals(globalrank
) && author
!= null) {
3587 if(fullname
.indexOf("opulifolium")>-1) {
3588 System
.out
.println("AUTOR: "+author
);
3590 tnb
.setCombinationAuthorTeam(findOrCreateAuthor(author
));
3591 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
3592 Taxon taxonLSID
= getTaxonByLSID(getIdentifier());
3593 if (taxonLSID
!=null) {
3600 if (rank
.equals(Rank
.FAMILY())) {
3601 tmp
= buildFamily(tnb
);
3603 if (rank
.equals(Rank
.SUBFAMILY())) {
3604 tmp
= buildSubfamily(tnb
);
3606 if (rank
.equals(Rank
.TRIBE())) {
3607 tmp
= buildTribe(tnb
);
3609 if (rank
.equals(Rank
.SUBTRIBE())) {
3610 tmp
= buildSubtribe(tnb
);
3612 if (rank
.equals(Rank
.GENUS())) {
3613 tmp
= buildGenus(partialname
, tnb
);
3616 if (rank
.equals(Rank
.SUBGENUS())) {
3617 tmp
= buildSubgenus(partialname
, tnb
);
3619 if (rank
.equals(Rank
.SPECIES())) {
3620 tmp
= buildSpecies(partialname
, tnb
);
3623 if (rank
.equals(Rank
.SUBSPECIES())) {
3624 tmp
= buildSubspecies(partialname
, tnb
);
3627 if (rank
.equals(Rank
.VARIETY())) {
3628 tmp
= buildVariety(fullname
, partialname
, tnb
);
3631 if (rank
.equals(Rank
.FORM())) {
3632 tmp
= buildForm(fullname
, partialname
, tnb
);
3635 importer
.getClassificationService().saveOrUpdate(classification
);
3639 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
3640 if (rank
.equals(globalrank
) && author
!= null) {
3641 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3642 setLSID(getIdentifier(), tmp
);
3643 importer
.getTaxonService().saveOrUpdate(tmp
);
3644 tmp
= CdmBase
.deproxy(tmp
, Taxon
.class);
3647 TaxonNameBase tnb
= CdmBase
.deproxy(tmp
.getName(), TaxonNameBase
.class);
3650 castTaxonNameBase(tnb
, taxonnamebase
);
3657 private Taxon
buildSubfamily(NonViralName
<?
> tnb
) {
3659 tnb
.generateTitle();
3660 tmp
= findMatchingTaxon(tnb
,refMods
);
3662 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3663 tmp
.setSec(refMods
);
3664 sourceHandler
.addSource(refMods
, tmp
);
3665 if(family
!= null) {
3666 classification
.addParentChild(family
, tmp
, null, null);
3667 higherRank
=Rank
.FAMILY();
3670 System
.out
.println("ADDCHILDTAXON SUBFAMILY "+tmp
);
3671 classification
.addChildTaxon(tmp
, null, null);
3680 private Taxon
buildFamily(NonViralName
<?
> tnb
) {
3682 tnb
.generateTitle();
3683 tmp
= findMatchingTaxon(tnb
,refMods
);
3685 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3686 tmp
.setSec(refMods
);
3687 sourceHandler
.addSource(refMods
, tmp
);
3688 System
.out
.println("ADDCHILDTAXON FAMILY "+tmp
);
3689 classification
.addChildTaxon(tmp
, null, null);
3698 private Taxon
buildForm(String fullname
, String partialname
, NonViralName
<?
> tnb
) {
3700 if (genusName
!=null) {
3701 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
3703 if (subgenusName
!=null) {
3704 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
3706 if(speciesName
!=null) {
3707 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
3709 if(subspeciesName
!= null) {
3710 tnb
.setInfraSpecificEpithet(subspeciesName
.getInfraSpecificEpithet());
3712 if(partialname
!= null) {
3713 tnb
.setInfraSpecificEpithet(partialname
);
3715 tnb
.generateTitle();
3716 //TODO how to save form??
3717 tnb
.setTitleCache(fullname
, true);
3718 tmp
= findMatchingTaxon(tnb
,refMods
);
3720 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3721 tmp
.setSec(refMods
);
3722 sourceHandler
.addSource(refMods
, tmp
);
3723 if (subspecies
!=null) {
3724 classification
.addParentChild(subspecies
, tmp
, null, null);
3725 higherRank
=Rank
.SUBSPECIES();
3726 higherTaxa
=subspecies
;
3728 if (species
!=null) {
3729 classification
.addParentChild(species
, tmp
, null, null);
3730 higherRank
=Rank
.SPECIES();
3734 System
.out
.println("ADDCHILDTAXON FORM "+tmp
);
3735 classification
.addChildTaxon(tmp
, null, null);
3746 private Taxon
buildVariety(String fullname
, String partialname
, NonViralName
<?
> tnb
) {
3748 if (genusName
!=null) {
3749 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
3751 if (subgenusName
!=null) {
3752 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
3754 if(speciesName
!=null) {
3755 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
3757 if(subspeciesName
!= null) {
3758 tnb
.setInfraSpecificEpithet(subspeciesName
.getSpecificEpithet());
3760 if(partialname
!= null) {
3761 tnb
.setInfraSpecificEpithet(partialname
);
3763 //TODO how to save variety?
3764 tnb
.setTitleCache(fullname
, true);
3765 tmp
= findMatchingTaxon(tnb
,refMods
);
3767 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3768 tmp
.setSec(refMods
);
3769 sourceHandler
.addSource(refMods
, tmp
);
3770 if (subspecies
!=null) {
3771 classification
.addParentChild(subspecies
, tmp
, null, null);
3772 higherRank
=Rank
.SUBSPECIES();
3773 higherTaxa
=subspecies
;
3775 if(species
!=null) {
3776 classification
.addParentChild(species
, tmp
, null, null);
3777 higherRank
=Rank
.SPECIES();
3781 System
.out
.println("ADDCHILDTAXON VARIETY "+tmp
);
3782 classification
.addChildTaxon(tmp
, null, null);
3789 * @param partialname
3793 private Taxon
buildSubspecies(String partialname
, NonViralName
<?
> tnb
) {
3795 if (genusName
!=null) {
3796 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
3798 if (subgenusName
!=null) {
3799 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
3800 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
3802 if(speciesName
!=null) {
3803 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
3804 tnb
.setSpecificEpithet(speciesName
.getSpecificEpithet());
3806 tnb
.setInfraSpecificEpithet(partialname
);
3807 tnb
.generateTitle();
3808 tmp
= findMatchingTaxon(tnb
,refMods
);
3810 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3811 tmp
.setSec(refMods
);
3812 sourceHandler
.addSource(refMods
, tmp
);
3814 if(species
!= null) {
3815 classification
.addParentChild(species
, tmp
, null, null);
3816 higherRank
=Rank
.SPECIES();
3820 System
.out
.println("ADDCHILDTAXON SUBSPECIES "+tmp
);
3821 classification
.addChildTaxon(tmp
, null, null);
3827 * @param partialname
3831 private Taxon
buildSpecies(String partialname
, NonViralName
<?
> tnb
) {
3833 if (genusName
!=null) {
3834 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
3836 if (subgenusName
!=null) {
3837 tnb
.setInfraGenericEpithet(subgenusName
.getInfraGenericEpithet());
3839 tnb
.setSpecificEpithet(partialname
.toLowerCase());
3840 tnb
.generateTitle();
3841 tmp
= findMatchingTaxon(tnb
,refMods
);
3843 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3844 tmp
.setSec(refMods
);
3845 sourceHandler
.addSource(refMods
, tmp
);
3846 if (subgenus
!=null) {
3847 classification
.addParentChild(subgenus
, tmp
, null, null);
3848 higherRank
=Rank
.SUBGENUS();
3849 higherTaxa
=subgenus
;
3852 classification
.addParentChild(genus
, tmp
, null, null);
3853 higherRank
=Rank
.GENUS();
3857 System
.out
.println("ADDCHILDTAXON SPECIES "+tmp
);
3858 classification
.addChildTaxon(tmp
, null, null);
3865 * @param partialname
3869 private Taxon
buildSubgenus(String partialname
, NonViralName
<?
> tnb
) {
3871 tnb
.setInfraGenericEpithet(partialname
);
3872 if (genusName
!=null) {
3873 tnb
.setGenusOrUninomial(genusName
.getGenusOrUninomial());
3875 tnb
.generateTitle();
3876 tmp
= findMatchingTaxon(tnb
,refMods
);
3878 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3879 tmp
.setSec(refMods
);
3880 sourceHandler
.addSource(refMods
, tmp
);
3882 classification
.addParentChild(genus
, tmp
, null, null);
3883 higherRank
=Rank
.GENUS();
3886 System
.out
.println("ADDCHILDTAXON SUBGENUS "+tmp
);
3887 classification
.addChildTaxon(tmp
, null, null);
3893 * @param partialname
3897 private Taxon
buildGenus(String partialname
, NonViralName
<?
> tnb
) {
3899 tnb
.setGenusOrUninomial(partialname
);
3900 tnb
.generateTitle();
3902 tmp
= findMatchingTaxon(tnb
,refMods
);
3904 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3905 tmp
.setSec(refMods
);
3906 sourceHandler
.addSource(refMods
, tmp
);
3908 if(subtribe
!= null) {
3909 classification
.addParentChild(subtribe
, tmp
, null, null);
3910 higherRank
=Rank
.SUBTRIBE();
3911 higherTaxa
=subtribe
;
3914 classification
.addParentChild(tribe
, tmp
, null, null);
3915 higherRank
=Rank
.TRIBE();
3918 if(subfamily
!=null) {
3919 classification
.addParentChild(subfamily
, tmp
, null, null);
3920 higherRank
=Rank
.SUBFAMILY();
3921 higherTaxa
=subfamily
;
3924 classification
.addParentChild(family
, tmp
, null, null);
3925 higherRank
=Rank
.FAMILY();
3929 System
.out
.println("ADDCHILDTAXON GENUS "+tmp
);
3930 classification
.addChildTaxon(tmp
, null, null);
3942 private Taxon
buildSubtribe(NonViralName
<?
> tnb
) {
3944 tnb
.generateTitle();
3945 tmp
= findMatchingTaxon(tnb
,refMods
);
3947 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3948 tmp
.setSec(refMods
);
3949 sourceHandler
.addSource(refMods
, tmp
);
3951 classification
.addParentChild(tribe
, tmp
, null, null);
3952 higherRank
=Rank
.TRIBE();
3955 System
.out
.println("ADDCHILDTAXON SUBTRIBE "+tmp
);
3956 classification
.addChildTaxon(tmp
, null, null);
3965 private Taxon
buildTribe(NonViralName
<?
> tnb
) {
3967 tnb
.generateTitle();
3968 tmp
= findMatchingTaxon(tnb
,refMods
);
3970 tmp
= Taxon
.NewInstance(tnb
, sourceUrlRef
);
3971 tmp
.setSec(refMods
);
3972 sourceHandler
.addSource(refMods
, tmp
);
3973 if (subfamily
!=null) {
3974 classification
.addParentChild(subfamily
, tmp
, null, null);
3975 higherRank
=Rank
.SUBFAMILY();
3976 higherTaxa
=subfamily
;
3978 if(family
!= null) {
3979 classification
.addParentChild(family
, tmp
, null, null);
3980 higherRank
=Rank
.FAMILY();
3984 System
.out
.println("ADDCHILDTAXON TRIBE "+tmp
);
3985 classification
.addChildTaxon(tmp
, null, null);
3993 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3994 * if errors, cast into a classis nonviralname
3995 * @param taxonnamebase2
3997 @SuppressWarnings("rawtypes")
3998 private NonViralName
<?
> castTaxonNameBase(TaxonNameBase tnb
, NonViralName
<?
> nvn
) {
3999 NonViralName
<?
> taxonnamebase2
= nvn
;
4000 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)) {
4002 taxonnamebase2
=(BotanicalName
) tnb
;
4003 }catch(Exception e
){
4004 taxonnamebase2
= (NonViralName
<?
>) tnb
;
4007 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)) {
4009 taxonnamebase2
=(ZoologicalName
) tnb
;
4010 }catch(Exception e
){
4011 taxonnamebase2
= (NonViralName
<?
>) tnb
;
4014 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)) {
4016 taxonnamebase2
=(BacterialName
) tnb
;
4017 }catch(Exception e
){
4018 taxonnamebase2
= (NonViralName
<?
>) tnb
;
4021 return taxonnamebase2
;
4024 * @param identifier2
4027 @SuppressWarnings("rawtypes")
4028 private Taxon
getTaxonByLSID(String identifier
) {
4029 // boolean lsidok=false;
4030 String id
= identifier
.split("__")[0];
4031 // String source = identifier.split("__")[1];
4033 if (id
.indexOf("lsid")>-1){
4035 lsid
= new LSID(id
);
4037 } catch (MalformedLSIDException e
) {
4038 logger
.warn("Malformed LSID");
4042 List
<TaxonBase
> taxons
= importer
.getTaxonService().list(Taxon
.class, 0, 0, null, null);
4043 LSID currentlsid
=null;
4044 for (TaxonBase t
:taxons
){
4045 currentlsid
= t
.getLsid();
4046 if (currentlsid
!=null){
4047 if (currentlsid
.getLsid().equals(lsid
.getLsid())){
4051 catch(Exception e
){logger
.warn("Exception occurred while comparing LSIDs "+e
);}
4062 @SuppressWarnings("rawtypes")
4063 private Person
findOrCreateAuthor(String author2
) {
4064 List
<UuidAndTitleCache
<Person
>> hiberPersons
= importer
.getAgentService().getPersonUuidAndTitleCache();
4065 for (UuidAndTitleCache
<Person
> hibernateP
:hiberPersons
){
4066 if(hibernateP
.getTitleCache().equals(author2
)) {
4067 AgentBase existing
= importer
.getAgentService().find(hibernateP
.getUuid());
4068 return CdmBase
.deproxy(existing
, Person
.class);
4071 Person p
= Person
.NewInstance();
4072 p
.setTitleCache(author2
,true);
4073 importer
.getAgentService().saveOrUpdate(p
);
4074 return CdmBase
.deproxy(p
, Person
.class);
4077 * @param author the author to set
4079 public void setAuthor(String author
) {
4080 this.author
= author
;
4084 * @return the higherTaxa
4086 public Taxon
getHigherTaxa() {
4090 * @param higherTaxa the higherTaxa to set
4092 public void setHigherTaxa(Taxon higherTaxa
) {
4093 this.higherTaxa
= higherTaxa
;
4096 * @return the higherRank
4098 public Rank
getHigherRank() {
4102 * @param higherRank the higherRank to set
4104 public void setHigherRank(Rank higherRank
) {
4105 this.higherRank
= higherRank
;
4107 public String
getName(){
4108 if (newName
.isEmpty()) {
4109 return originalName
;
4116 * @return the fullName
4118 public String
getOriginalName() {
4119 return originalName
;
4122 * @param fullName the fullName to set
4124 public void setOriginalName(String fullName
) {
4125 this.originalName
= fullName
;
4128 * @return the newName
4130 public String
getNewName() {
4134 * @param newName the newName to set
4136 public void setNewName(String newName
) {
4137 this.newName
= newName
;
4142 public Rank
getRank() {
4146 * @param rank the rank to set
4148 public void setRank(Rank rank
) {
4152 * @return the idenfitiger
4154 public String
getIdentifier() {
4158 * @param idenfitiger the idenfitiger to set
4160 public void setIdentifier(String identifier
) {
4161 this.identifier
= identifier
;
4164 * @return the status
4166 public String
getStatus() {
4167 if (status
== null) {
4173 * @param status the status to set
4175 public void setStatus(String status
) {
4176 this.status
= status
;
4179 * @return the family
4181 public Taxon
getFamily() {
4185 * @param family the family to set
4187 @SuppressWarnings("rawtypes")
4188 public void setFamily(Taxon family
) {
4189 this.family
= family
;
4190 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(family
.getName(), TaxonNameBase
.class);
4191 familyName
= castTaxonNameBase(taxonNameBase
,familyName
);
4194 * @return the subfamily
4196 public Taxon
getSubfamily() {
4200 * @param subfamily the subfamily to set
4202 @SuppressWarnings("rawtypes")
4203 public void setSubfamily(Taxon subfamily
) {
4204 this.subfamily
= subfamily
;
4205 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subfamily
.getName(), TaxonNameBase
.class);
4206 subfamilyName
= castTaxonNameBase(taxonNameBase
,subfamilyName
);
4211 public Taxon
getTribe() {
4215 * @param tribe the tribe to set
4217 @SuppressWarnings("rawtypes")
4218 public void setTribe(Taxon tribe
) {
4220 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(tribe
.getName(), TaxonNameBase
.class);
4221 tribeName
= castTaxonNameBase(taxonNameBase
,tribeName
);
4224 * @return the subtribe
4226 public Taxon
getSubtribe() {
4230 * @param subtribe the subtribe to set
4232 @SuppressWarnings("rawtypes")
4233 public void setSubtribe(Taxon subtribe
) {
4234 this.subtribe
= subtribe
;
4235 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subtribe
.getName(), TaxonNameBase
.class);
4236 subtribeName
=castTaxonNameBase(taxonNameBase
,subtribeName
);
4241 public Taxon
getGenus() {
4245 * @param genus the genus to set
4247 @SuppressWarnings("rawtypes")
4248 public void setGenus(Taxon genus
) {
4250 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(genus
.getName(), TaxonNameBase
.class);
4251 genusName
= castTaxonNameBase(taxonNameBase
,genusName
);
4252 System
.out
.println("GENUSNAME: "+genusName
.toString());
4255 * @return the subgenus
4257 public Taxon
getSubgenus() {
4261 * @param subgenus the subgenus to set
4263 @SuppressWarnings("rawtypes")
4264 public void setSubgenus(Taxon subgenus
) {
4265 this.subgenus
= subgenus
;
4266 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subgenus
.getName(), TaxonNameBase
.class);
4267 subgenusName
= castTaxonNameBase(taxonNameBase
,subgenusName
);
4270 * @return the species
4272 public Taxon
getSpecies() {
4276 * @param species the species to set
4278 public void setSpecies(Taxon species
) {
4279 this.species
= species
;
4280 @SuppressWarnings("rawtypes")
4281 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(species
.getName(), TaxonNameBase
.class);
4282 speciesName
= castTaxonNameBase(taxonNameBase
,speciesName
);
4286 * @return the subspecies
4288 public Taxon
getSubspecies() {
4292 * @param subspecies the subspecies to set
4294 @SuppressWarnings("rawtypes")
4295 public void setSubspecies(Taxon subspecies
) {
4296 this.subspecies
= subspecies
;
4297 TaxonNameBase taxonNameBase
= CdmBase
.deproxy(subspecies
.getName(), TaxonNameBase
.class);
4298 subspeciesName
= castTaxonNameBase(taxonNameBase
,subspeciesName
);
4310 private void addProblematicStatusToFile(String status
) {
4312 FileWriter fstream
= new FileWriter("/home/pkelbert/Bureau/StatusUnknown_"+classification
.getTitleCache()+".txt",true);
4313 BufferedWriter out
= new BufferedWriter(fstream
);
4314 out
.write(status
+"\n");
4315 //Close the output stream
4317 }catch (Exception e
){//Catch exception if any
4318 System
.err
.println("Error: " + e
.getMessage());
4329 private Taxon
findMatchingTaxon(NonViralName
<?
> tnb
, Reference refMods
) {
4332 boolean insertAsExisting
=false;
4333 List
<Taxon
> existingTaxons
= getMatchingTaxon(tnb
);
4334 double similarityScore
=0.0;
4335 for (Taxon bestMatchingTaxon
:existingTaxons
){
4336 if (!existingTaxons
.isEmpty() && configState
.getConfig().isInteractWithUser() && !insertAsExisting
) {
4337 similarityScore
=similarity(tnb
.getTitleCache().split("sec.")[0].toLowerCase().trim(), bestMatchingTaxon
.getTitleCache().split("sec.")[0].toLowerCase().trim());
4338 insertAsExisting
= compareAndCheckTaxon(tnb
, refMods
, similarityScore
, bestMatchingTaxon
);
4340 if(insertAsExisting
) {
4341 System
.out
.println("KEEP "+bestMatchingTaxon
.toString());
4342 tmp
=bestMatchingTaxon
;
4343 sourceHandler
.addSource(refMods
, tmp
);
4353 * @param similarityScore
4354 * @param bestMatchingTaxon
4357 private boolean compareAndCheckTaxon(NonViralName
<?
> tnb
, Reference refMods
, double similarityScore
,
4358 Taxon bestMatchingTaxon
) {
4359 boolean insertAsExisting
;
4360 if (tnb
.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon
.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4361 insertAsExisting
=false;
4363 if (tnb
.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") &&
4364 bestMatchingTaxon
.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4365 insertAsExisting
=true;
4367 insertAsExisting
=askIfReuseBestMatchingTaxon(tnb
, bestMatchingTaxon
, refMods
, similarityScore
);
4370 return insertAsExisting
;
4376 @SuppressWarnings("rawtypes")
4377 private List
<Taxon
> getMatchingTaxon(TaxonNameBase tnb
) {
4378 Pager
<TaxonBase
> pager
=importer
.getTaxonService().findByTitle(TaxonBase
.class, tnb
.getTitleCache().split("sec.")[0], MatchMode
.BEGINNING
, null, null, null, null, null);
4379 List
<TaxonBase
>records
= pager
.getRecords();
4381 List
<Taxon
> existingTaxons
= new ArrayList
<Taxon
>();
4382 for (TaxonBase r
:records
){
4384 Taxon bestMatchingTaxon
= (Taxon
)r
;
4385 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4386 if(compareTaxonNameLength(bestMatchingTaxon
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4387 existingTaxons
.add(bestMatchingTaxon
);
4389 }catch(ClassCastException e
){logger
.warn("classcast exception, might be a synonym, ignore it");}
4391 Taxon bmt
= importer
.getTaxonService().findBestMatchingTaxon(tnb
.getTitleCache());
4392 if (!existingTaxons
.contains(bmt
) && bmt
!=null) {
4393 if(compareTaxonNameLength(bmt
.getTitleCache().split(".sec")[0],tnb
.getTitleCache().split(".sec")[0])) {
4394 existingTaxons
.add(bmt
);
4397 return existingTaxons
;
4401 * Check if the found Taxon can reasonnably be the same
4402 * example: with and without author should match, but the subspecies should not be suggested for a genus
4404 private boolean compareTaxonNameLength(String f
, String o
){
4405 boolean lengthOk
=false;
4406 int sizeF
= f
.length();
4407 int sizeO
= o
.length();
4412 if (sizeF
-sizeO
>10) {
4419 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4423 private double similarity(String s1
, String s2
) {
4424 if (s1
.length() < s2
.length()) { // s1 should always be bigger
4425 String swap
= s1
; s1
= s2
; s2
= swap
;
4427 int bigLen
= s1
.length();
4428 if (bigLen
== 0) { return 1.0; /* both strings are zero length */ }
4429 return (bigLen
- computeEditDistance(s1
, s2
)) / (double) bigLen
;
4432 private int computeEditDistance(String s1
, String s2
) {
4433 int[] costs
= new int[s2
.length() + 1];
4434 for (int i
= 0; i
<= s1
.length(); i
++) {
4436 for (int j
= 0; j
<= s2
.length(); j
++) {
4441 int newValue
= costs
[j
- 1];
4442 if (s1
.charAt(i
- 1) != s2
.charAt(j
- 1)) {
4443 newValue
= Math
.min(Math
.min(newValue
, lastValue
),
4446 costs
[j
- 1] = lastValue
;
4447 lastValue
= newValue
;
4452 costs
[s2
.length()] = lastValue
;
4455 return costs
[s2
.length()];
4458 Map
<Rank
, Taxon
> hierarchy
= new HashMap
<Rank
, Taxon
>();
4460 * @param taxonnamebase
4462 @SuppressWarnings("rawtypes")
4463 public void lookForParentNode(NonViralName
<?
> taxonnamebase
, Taxon tax
, Reference
<?
> ref
, MyName myName
) {
4464 System
.out
.println("LOOK FOR PARENT NODE "+taxonnamebase
.toString()+"; "+tax
.toString()+"; "+taxonnamebase
.getRank());
4465 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
4466 if (taxonnamebase
.getRank().equals(Rank
.FORM())){
4467 handleFormHierarchy(ref
, myName
, parser
);
4469 if (taxonnamebase
.getRank().equals(Rank
.VARIETY())){
4470 handleVarietyHierarchy(ref
, myName
, parser
);
4472 if (taxonnamebase
.getRank().equals(Rank
.SUBSPECIES())){
4473 handleSubSpeciesHierarchy(ref
, myName
, parser
);
4475 if (taxonnamebase
.getRank().equals(Rank
.SPECIES())){
4476 handleSpeciesHierarchy(ref
, myName
, parser
);
4478 if (taxonnamebase
.getRank().equals(Rank
.SUBGENUS())){
4479 handleSubgenusHierarchy(ref
, myName
, parser
);
4482 if (taxonnamebase
.getRank().equals(Rank
.GENUS())){
4483 handleGenusHierarchy(ref
, myName
, parser
);
4485 if (taxonnamebase
.getRank().equals(Rank
.SUBTRIBE())){
4486 handleSubtribeHierarchy(ref
, myName
, parser
);
4488 if (taxonnamebase
.getRank().equals(Rank
.TRIBE())){
4489 handleTribeHierarchy(ref
, myName
, parser
);
4492 if (taxonnamebase
.getRank().equals(Rank
.SUBFAMILY())){
4493 handleSubfamilyHierarchy(ref
, myName
, parser
);
4502 private void handleSubfamilyHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
) {
4503 String parentStr
= myName
.getFamilyStr();
4504 Rank r
= Rank
.FAMILY();
4505 if(parentStr
!=null){
4506 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
4507 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
4508 // importer.getTaxonService().save(parent);
4509 // parent = CdmBase.deproxy(parent, Taxon.class);
4511 boolean parentDoesNotExists
= true;
4512 for (TaxonNode p
: classification
.getAllNodes()){
4513 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
4514 parentDoesNotExists
= false;
4515 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
4519 // if(parentDoesNotExists) {
4520 // importer.getTaxonService().save(parent);
4521 // parent = CdmBase.deproxy(parent, Taxon.class);
4522 // lookForParentNode(parentNameName, parent, ref,myName);
4524 if(parentDoesNotExists
) {
4525 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
4528 parent
=Taxon
.NewInstance(parentNameName
, ref
);
4529 importer
.getTaxonService().save(parent
);
4530 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
4534 lookForParentNode(parentNameName
, parent
, ref
,myName
);
4537 hierarchy
.put(r
,parent
);
4546 private void handleTribeHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
) {
4547 String parentStr
= myName
.getSubfamilyStr();
4548 Rank r
= Rank
.SUBFAMILY();
4549 if (parentStr
== null){
4550 parentStr
= myName
.getFamilyStr();
4553 if(parentStr
!=null){
4554 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
4555 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
4556 // importer.getTaxonService().save(parent);
4557 // parent = CdmBase.deproxy(parent, Taxon.class);
4559 boolean parentDoesNotExists
= true;
4560 for (TaxonNode p
: classification
.getAllNodes()){
4561 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
4562 parentDoesNotExists
= false;
4563 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
4567 // if(parentDoesNotExists) {
4568 // importer.getTaxonService().save(parent);
4569 // parent = CdmBase.deproxy(parent, Taxon.class);
4570 // lookForParentNode(parentNameName, parent, ref,myName);
4572 if(parentDoesNotExists
) {
4573 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
4576 parent
=Taxon
.NewInstance(parentNameName
, ref
);
4577 importer
.getTaxonService().save(parent
);
4578 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
4582 lookForParentNode(parentNameName
, parent
, ref
,myName
);
4585 hierarchy
.put(r
,parent
);
4594 private void handleSubtribeHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
) {
4595 String parentStr
= myName
.getTribeStr();
4596 Rank r
= Rank
.TRIBE();
4597 if (parentStr
== null){
4598 parentStr
= myName
.getSubfamilyStr();
4599 r
= Rank
.SUBFAMILY();
4601 if (parentStr
== null){
4602 parentStr
= myName
.getFamilyStr();
4605 if(parentStr
!=null){
4606 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
4607 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
4608 // importer.getTaxonService().save(parent);
4609 // parent = CdmBase.deproxy(parent, Taxon.class);
4611 boolean parentDoesNotExists
= true;
4612 for (TaxonNode p
: classification
.getAllNodes()){
4613 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
4614 parentDoesNotExists
= false;
4615 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
4620 // if(parentDoesNotExists) {
4621 // importer.getTaxonService().save(parent);
4622 // parent = CdmBase.deproxy(parent, Taxon.class);
4623 // lookForParentNode(parentNameName, parent, ref,myName);
4625 if(parentDoesNotExists
) {
4626 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
4629 parent
=Taxon
.NewInstance(parentNameName
, ref
);
4630 importer
.getTaxonService().save(parent
);
4631 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
4635 lookForParentNode(parentNameName
, parent
, ref
,myName
);
4638 hierarchy
.put(r
,parent
);
4647 private void handleGenusHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
) {
4648 String parentStr
= myName
.getSubtribeStr();
4649 Rank r
= Rank
.SUBTRIBE();
4650 if (parentStr
== null){
4651 parentStr
= myName
.getTribeStr();
4654 if (parentStr
== null){
4655 parentStr
= myName
.getSubfamilyStr();
4656 r
= Rank
.SUBFAMILY();
4658 if (parentStr
== null){
4659 parentStr
= myName
.getFamilyStr();
4662 if(parentStr
!=null){
4663 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
4664 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
4665 // importer.getTaxonService().save(parent);
4666 // parent = CdmBase.deproxy(parent, Taxon.class);
4668 boolean parentDoesNotExists
= true;
4669 for (TaxonNode p
: classification
.getAllNodes()){
4670 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
4671 // System.out.println(p.getTaxon().getUuid());
4672 // System.out.println(parent.getUuid());
4673 parentDoesNotExists
= false;
4674 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
4678 // if(parentDoesNotExists) {
4679 // importer.getTaxonService().save(parent);
4680 // parent = CdmBase.deproxy(parent, Taxon.class);
4681 // lookForParentNode(parentNameName, parent, ref,myName);
4683 if(parentDoesNotExists
) {
4684 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
4687 parent
=Taxon
.NewInstance(parentNameName
, ref
);
4688 importer
.getTaxonService().save(parent
);
4689 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
4693 lookForParentNode(parentNameName
, parent
, ref
,myName
);
4696 hierarchy
.put(r
,parent
);
4705 private void handleSubgenusHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
) {
4706 String parentStr
= myName
.getGenusStr();
4707 Rank r
= Rank
.GENUS();
4709 if(parentStr
==null){
4710 parentStr
= myName
.getSubtribeStr();
4711 r
= Rank
.SUBTRIBE();
4713 if (parentStr
== null){
4714 parentStr
= myName
.getTribeStr();
4717 if (parentStr
== null){
4718 parentStr
= myName
.getSubfamilyStr();
4719 r
= Rank
.SUBFAMILY();
4721 if (parentStr
== null){
4722 parentStr
= myName
.getFamilyStr();
4725 if(parentStr
!=null){
4726 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
4727 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
4728 // importer.getTaxonService().save(parent);
4729 // parent = CdmBase.deproxy(parent, Taxon.class);
4731 boolean parentDoesNotExists
= true;
4732 for (TaxonNode p
: classification
.getAllNodes()){
4733 if(p
.getTaxon().getTitleCache().equalsIgnoreCase(parent
.getTitleCache())) {
4734 // System.out.println(p.getTaxon().getUuid());
4735 // System.out.println(parent.getUuid());
4736 parentDoesNotExists
= false;
4737 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
4741 // if(parentDoesNotExists) {
4742 // importer.getTaxonService().save(parent);
4743 // parent = CdmBase.deproxy(parent, Taxon.class);
4744 // lookForParentNode(parentNameName, parent, ref,myName);
4746 if(parentDoesNotExists
) {
4747 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
4750 parent
=Taxon
.NewInstance(parentNameName
, ref
);
4751 importer
.getTaxonService().save(parent
);
4752 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
4756 lookForParentNode(parentNameName
, parent
, ref
,myName
);
4759 hierarchy
.put(r
,parent
);
4768 private void handleSpeciesHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
) {
4769 String parentStr
= myName
.getSubgenusStr();
4770 Rank r
= Rank
.SUBGENUS();
4772 if(parentStr
==null){
4773 parentStr
= myName
.getGenusStr();
4777 if(parentStr
==null){
4778 parentStr
= myName
.getSubtribeStr();
4779 r
= Rank
.SUBTRIBE();
4781 if (parentStr
== null){
4782 parentStr
= myName
.getTribeStr();
4785 if (parentStr
== null){
4786 parentStr
= myName
.getSubfamilyStr();
4787 r
= Rank
.SUBFAMILY();
4789 if (parentStr
== null){
4790 parentStr
= myName
.getFamilyStr();
4793 if(parentStr
!=null){
4794 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
4795 System
.out
.println("PUT IN HIERARCHY "+r
+", "+parent
);
4796 hierarchy
.put(r
,parent
);
4805 private void handleSubSpeciesHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
) {
4806 String parentStr
= myName
.getSpeciesStr();
4807 Rank r
= Rank
.SPECIES();
4810 if(parentStr
==null){
4811 parentStr
= myName
.getSubgenusStr();
4812 r
= Rank
.SUBGENUS();
4815 if(parentStr
==null){
4816 parentStr
= myName
.getGenusStr();
4820 if(parentStr
==null){
4821 parentStr
= myName
.getSubtribeStr();
4822 r
= Rank
.SUBTRIBE();
4824 if (parentStr
== null){
4825 parentStr
= myName
.getTribeStr();
4828 if (parentStr
== null){
4829 parentStr
= myName
.getSubfamilyStr();
4830 r
= Rank
.SUBFAMILY();
4832 if (parentStr
== null){
4833 parentStr
= myName
.getFamilyStr();
4836 if(parentStr
!=null){
4837 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
4838 System
.out
.println("PUT IN HIERARCHY "+r
+", "+parent
);
4839 hierarchy
.put(r
,parent
);
4849 private void handleFormHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
) {
4850 String parentStr
= myName
.getSubspeciesStr();
4851 Rank r
= Rank
.SUBSPECIES();
4854 if(parentStr
==null){
4855 parentStr
= myName
.getSpeciesStr();
4859 if(parentStr
==null){
4860 parentStr
= myName
.getSubgenusStr();
4861 r
= Rank
.SUBGENUS();
4864 if(parentStr
==null){
4865 parentStr
= myName
.getGenusStr();
4869 if(parentStr
==null){
4870 parentStr
= myName
.getSubtribeStr();
4871 r
= Rank
.SUBTRIBE();
4873 if (parentStr
== null){
4874 parentStr
= myName
.getTribeStr();
4877 if (parentStr
== null){
4878 parentStr
= myName
.getSubfamilyStr();
4879 r
= Rank
.SUBFAMILY();
4881 if (parentStr
== null){
4882 parentStr
= myName
.getFamilyStr();
4885 if(parentStr
!=null){
4886 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
4887 System
.out
.println("PUT IN HIERARCHY "+r
+", "+parent
);
4888 hierarchy
.put(r
,parent
);
4897 private void handleVarietyHierarchy(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
) {
4898 String parentStr
= myName
.getSubspeciesStr();
4899 Rank r
= Rank
.SUBSPECIES();
4901 if(parentStr
==null){
4902 parentStr
= myName
.getSpeciesStr();
4906 if(parentStr
==null){
4907 parentStr
= myName
.getSubgenusStr();
4908 r
= Rank
.SUBGENUS();
4911 if(parentStr
==null){
4912 parentStr
= myName
.getGenusStr();
4916 if(parentStr
==null){
4917 parentStr
= myName
.getSubtribeStr();
4918 r
= Rank
.SUBTRIBE();
4920 if (parentStr
== null){
4921 parentStr
= myName
.getTribeStr();
4924 if (parentStr
== null){
4925 parentStr
= myName
.getSubfamilyStr();
4926 r
= Rank
.SUBFAMILY();
4928 if (parentStr
== null){
4929 parentStr
= myName
.getFamilyStr();
4932 if(parentStr
!=null){
4933 Taxon parent
= handleParentName(ref
, myName
, parser
, parentStr
, r
);
4934 System
.out
.println("PUT IN HIERARCHY "+r
+", "+parent
);
4935 hierarchy
.put(r
,parent
);
4947 private Taxon
handleParentName(Reference
<?
> ref
, MyName myName
, INonViralNameParser parser
, String parentStr
, Rank r
) {
4948 NonViralName
<?
> parentNameName
= (NonViralName
<?
>) parser
.parseFullName(parentStr
, nomenclaturalCode
, r
);
4949 Taxon parent
= Taxon
.NewInstance(parentNameName
, ref
); //sec set null
4950 // importer.getTaxonService().save(parent);
4951 // parent = CdmBase.deproxy(parent, Taxon.class);
4953 boolean parentDoesNotExists
= true;
4954 for (TaxonNode p
: classification
.getAllNodes()){
4955 if(p
.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent
.getTitleCache().split("sec.")[0].trim())) {
4956 // System.out.println(p.getTaxon().getUuid());
4957 // System.out.println(parent.getUuid());
4958 parentDoesNotExists
= false;
4959 parent
=CdmBase
.deproxy(p
.getTaxon(), Taxon
.class);
4963 if(parentDoesNotExists
) {
4964 Taxon tmp
= findMatchingTaxon(parentNameName
,ref
);
4965 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
4968 parent
=Taxon
.NewInstance(parentNameName
, ref
);
4969 importer
.getTaxonService().save(parent
);
4970 parent
= CdmBase
.deproxy(parent
, Taxon
.class);
4974 lookForParentNode(parentNameName
, parent
, ref
,myName
);
4983 * @param nomenclaturalCode2
4986 private void addProblemNameToFile(String name
, String author
, NomenclaturalCode nomenclaturalCode2
, Rank rank
) {
4988 FileWriter fstream
= new FileWriter("/home/pkelbert/Bureau/NameNotParsed.txt",true);
4989 BufferedWriter out
= new BufferedWriter(fstream
);
4990 out
.write(name
+"\t"+replaceNull(author
)+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\n");
4991 //Close the output stream
4993 }catch (Exception e
){//Catch exception if any
4994 System
.err
.println("Error: " + e
.getMessage());
4998 @SuppressWarnings("unused")
4999 private String
replaceNull(Object in
){
5003 if (in
.getClass().equals(NomenclaturalCode
.class)) {
5004 return ((NomenclaturalCode
)in
).getTitleCache();
5006 return in
.toString();
5011 * @param nomenclaturalCode2
5014 private void addProblemNameToFile(String type
, String name
, NomenclaturalCode nomenclaturalCode2
, Rank rank
, String problems
) {
5016 FileWriter fstream
= new FileWriter("/home/pkelbert/Bureau/NameNotParsed_"+classification
.getTitleCache()+".txt",true);
5017 BufferedWriter out
= new BufferedWriter(fstream
);
5018 out
.write(type
+"\t"+name
+"\t"+replaceNull(nomenclaturalCode2
)+"\t"+replaceNull(rank
)+"\t"+problems
+"\n");
5019 //Close the output stream
5021 }catch (Exception e
){//Catch exception if any
5022 System
.err
.println("Error: " + e
.getMessage());