3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.taxonx2013
;
13 import java
.io
.FileWriter
;
14 import java
.io
.IOException
;
16 import java
.util
.ArrayList
;
17 import java
.util
.HashMap
;
18 import java
.util
.List
;
21 import java
.util
.regex
.Pattern
;
23 import javax
.xml
.transform
.TransformerException
;
24 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
26 import org
.apache
.commons
.lang
.StringUtils
;
27 import org
.w3c
.dom
.Node
;
28 import org
.w3c
.dom
.NodeList
;
30 import com
.ibm
.lsid
.MalformedLSIDException
;
32 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
33 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
34 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
35 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
36 import eu
.etaxonomy
.cdm
.model
.common
.LSID
;
37 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
38 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
39 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
40 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
41 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
42 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKeyNode
;
43 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
44 import eu
.etaxonomy
.cdm
.model
.description
.TaxonNameDescription
;
45 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
46 import eu
.etaxonomy
.cdm
.model
.name
.BacterialName
;
47 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
48 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
49 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
50 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
51 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
52 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
53 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
54 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
55 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
56 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
57 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
58 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
59 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
60 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
61 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
62 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
63 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
64 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
65 import eu
.etaxonomy
.cdm
.strategy
.parser
.ParserProblem
;
72 public class TaxonXTreatmentExtractor
extends TaxonXExtractor
{
74 private final NomenclaturalCode nomenclaturalCode
;
75 private Classification classification
;
77 private String treatmentMainName
,originalTreatmentName
;
79 private final HashMap
<String
,Map
<String
,String
>> namesMap
= new HashMap
<String
, Map
<String
,String
>>();
82 private final Pattern keypattern
= Pattern
.compile("^(\\d+.*|-\\d+.*)");
83 private final Pattern keypatternend
= Pattern
.compile("^.+?\\d$");
85 private boolean maxRankRespected
=false;
88 * @param nomenclaturalCode
89 * @param classification
93 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode
, Classification classification
, TaxonXImport importer
,
94 TaxonXImportState configState
) {
95 this.nomenclaturalCode
=nomenclaturalCode
;
96 this.classification
= classification
;
97 this.importer
=importer
;
98 this.configState
=configState
;
99 prepareCollectors(configState
, importer
.getAgentService());
103 * extracts all the treament information and save them
104 * @param treatmentnode: the XML Node
105 * @param tosave: the list of object to save into the CDM
106 * @param refMods: the reference extracted from the MODS
107 * @param sourceName: the URI of the document
109 @SuppressWarnings({ "rawtypes", "unused" })
110 protected void extractTreatment(Node treatmentnode
, List
<Object
> tosave
, Reference
<?
> refMods
, URI sourceName
) {
111 logger
.info("extractTreatment");
112 List
<TaxonNameBase
> nametosave
= new ArrayList
<TaxonNameBase
>();
113 NodeList children
= treatmentnode
.getChildNodes();
114 Taxon acceptedTaxon
=null;
115 Taxon defaultTaxon
=null;
116 boolean refgroup
=false;
118 for (int i
=0;i
<children
.getLength();i
++){
119 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group")) {
124 for (int i
=0;i
<children
.getLength();i
++){
126 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:nomenclature")){
127 NodeList nomenclature
= children
.item(i
).getChildNodes();
128 boolean containsName
=false;
129 for(int k
=0;k
<nomenclature
.getLength();k
++){
130 if(nomenclature
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
136 reloadClassification();
137 //extract "main" the scientific name
138 acceptedTaxon
= extractNomenclature(children
.item(i
),nametosave
,refMods
);
141 else if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected
){
142 reloadClassification();
143 //extract the References within the document
144 extractReferences(children
.item(i
),nametosave
,acceptedTaxon
,refMods
);
146 else if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
147 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected
){
148 File file
= new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
151 writer
= new FileWriter(file
,true);
152 writer
.write(sourceName
+"\n");
155 } catch (IOException e1
) {
156 // TODO Auto-generated catch block
157 e1
.printStackTrace();
159 String multiple
= askMultiple(children
.item(i
));
160 if (multiple
.equalsIgnoreCase("synonyms")) {
161 extractSynonyms(children
.item(i
),nametosave
, acceptedTaxon
,refMods
);
164 if(multiple
.equalsIgnoreCase("material examined")){
165 extractMaterials(children
.item(i
),acceptedTaxon
, refMods
, nametosave
);
168 if (multiple
.equalsIgnoreCase("distribution")){
169 extractDistribution(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
);
172 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,multiple
);
175 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
176 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected
){
177 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
, nametosave
, refMods
, Feature
.BIOLOGY_ECOLOGY());
179 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
180 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected
){
181 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DESCRIPTION());
183 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
184 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected
){
185 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,Feature
.DIAGNOSIS());
187 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
188 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected
){
189 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
, Feature
.DISCUSSION());
192 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
193 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected
){
194 extractDistribution(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
);
196 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
197 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected
){
198 extractFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
,refMods
,Feature
.ETYMOLOGY());
201 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
202 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected
){
203 extractMaterials(children
.item(i
),acceptedTaxon
, refMods
, nametosave
);
206 else if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:div") &&
207 children
.item(i
).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected
){
208 //TODO IGNORE keys for the moment
209 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
210 extractSpecificFeature(children
.item(i
),acceptedTaxon
,defaultTaxon
,nametosave
, refMods
,"Keys - unparsed");
213 logger
.info("ANOTHER KIND OF NODES: "+children
.item(i
).getNodeName()+", "+children
.item(i
).getAttributes());
214 if (children
.item(i
).getAttributes() !=null) {
215 logger
.info(children
.item(i
).getAttributes().item(0));
219 // logger.info("saveUpdateNames");
220 if (maxRankRespected
){
221 importer
.getNameService().saveOrUpdate(nametosave
);
222 importer
.getClassificationService().saveOrUpdate(classification
);
223 logger
.info("saveUpdateNames-ok");
230 * @param acceptedTaxon: the current acceptedTaxon
231 * @param nametosave: the list of objects to save into the CDM
232 * @param refMods: the current reference extracted from the MODS
234 @SuppressWarnings("rawtypes")
235 private void extractKey(Node keys
, Taxon acceptedTaxon
,List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
236 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
238 NodeList children
= keys
.getChildNodes();
240 PolytomousKey poly
= PolytomousKey
.NewInstance();
241 poly
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
242 poly
.addTaxonomicScope(acceptedTaxon
);
243 poly
.setTitleCache("bloup");
244 // poly.addCoveredTaxon(acceptedTaxon);
245 PolytomousKeyNode root
= poly
.getRoot();
246 PolytomousKeyNode previous
= null,tmpKey
=null;
248 List
<PolytomousKeyNode
> polyNodes
= new ArrayList
<PolytomousKeyNode
>();
250 // String fullContent = keys.getTextContent();
251 for (int i
=0;i
<children
.getLength();i
++){
252 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
253 NodeList paragraph
= children
.item(i
).getChildNodes();
256 for (int j
=0;j
<paragraph
.getLength();j
++){
257 if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("#text")){
258 if (! paragraph
.item(j
).getTextContent().trim().isEmpty()){
259 key
+=paragraph
.item(j
).getTextContent().trim();
260 // logger.info("KEY: "+j+"--"+key);
263 if(paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
264 taxonKey
=getTaxonFromXML(paragraph
.item(j
),nametosave
,refMods
);
267 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
268 if (keypattern
.matcher(key
).matches()){
269 tmpKey
= PolytomousKeyNode
.NewInstance(key
);
270 if (taxonKey
!=null) {
271 tmpKey
.setTaxon(taxonKey
);
273 polyNodes
.add(tmpKey
);
274 if (previous
== null) {
275 root
.addChild(tmpKey
);
277 previous
.addChild(tmpKey
);
281 tmpKey
=PolytomousKeyNode
.NewInstance(key
);
282 if (taxonKey
!=null) {
283 tmpKey
.setTaxon(taxonKey
);
285 polyNodes
.add(tmpKey
);
286 if (keypatternend
.matcher(key
).matches()) {
287 root
.addChild(tmpKey
);
290 previous
.addChild(tmpKey
);
297 importer
.getPolytomousKeyNodeService().saveOrUpdate(polyNodes
);
298 importer
.getPolytomousKeyService().saveOrUpdate(poly
);
302 * @param taxons: the XML Nodegroup
303 * @param nametosave: the list of objects to save into the CDM
304 * @param acceptedTaxon: the current accepted Taxon
305 * @param refMods: the current reference extracted from the MODS
307 * @return Taxon object built
309 @SuppressWarnings({ "rawtypes", "unchecked" })
310 private Taxon
getTaxonFromXML(Node taxons
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
311 // logger.info("getTaxonFromXML");
312 // logger.info("acceptedTaxon: "+acceptedTaxon);
314 TaxonNameBase nameToBeFilled
= null;
317 String
[] enames
= null;
318 Rank rank
= Rank
.UNKNOWN_RANK();
320 String identifier
="";
323 enames
= extractScientificName(taxons
);
324 if (enames
[1].isEmpty()) {
330 rank
= Rank
.getRankByName(enames
[2]);
331 identifier
= enames
[3];
332 } catch (TransformerFactoryConfigurationError e1
) {
334 } catch (TransformerException e1
) {
336 } catch (UnknownCdmTypeException e
) {
337 logger
.warn("Rank problem!"+enames
[2]);
338 rank
=Rank
.UNKNOWN_RANK();
340 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
342 nameToBeFilled
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
343 if (nameToBeFilled
.hasProblem() &&
344 !((nameToBeFilled
.getParsingProblems().size()==1) && nameToBeFilled
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
345 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
346 nameToBeFilled
=solveNameProblem(original
, name
,parser
);
349 nameToBeFilled
= getTaxonNameBase(nameToBeFilled
,nametosave
);
351 // importer.getNameService().saveOrUpdate(nametosave);
352 Taxon t
= importer
.getTaxonService().findBestMatchingTaxon(nameToBeFilled
.getTitleCache());
354 // logger.info("BestTaxonService not the best or null");
355 t
= new Taxon(nameToBeFilled
,(Reference
<?
>) nameToBeFilled
.getNomenclaturalReference() );//TODO TOFIX reference
356 if (t
.getSec() == null) {
359 if(!configState
.getConfig().doKeepOriginalSecundum()) {
360 t
.setSec(configState
.getConfig().getSecundum());
361 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
363 t
.addSource(OriginalSourceType
.Import
,null,null,refMods
,null);
365 if (!identifier
.isEmpty() && (identifier
.length()>2)){
366 setLSID(identifier
, t
);
369 Taxon parentTaxon
= askParent(t
, classification
);
370 if (parentTaxon
==null){
371 while (parentTaxon
== null) {
372 parentTaxon
= createParent(t
, refMods
);
373 classification
.addParentChild(parentTaxon
, t
, refMods
, null);
376 classification
.addParentChild(parentTaxon
, t
, refMods
, null);
380 t
= CdmBase
.deproxy(t
, Taxon
.class);
382 if (!configState
.getConfig().doKeepOriginalSecundum()) {
383 t
.setSec(configState
.getConfig().getSecundum());
384 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
391 * @param taxons: the XML Nodegroup
392 * @param nametosave: the list of objects to save into the CDM
393 * @param acceptedTaxon: the current accepted Taxon
394 * @param refMods: the current reference extracted from the MODS
396 * @return Taxon object built
398 @SuppressWarnings({ "rawtypes", "unchecked" })
399 private TaxonNameBase
getTaxonNameBaseFromXML(Node taxons
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
400 // logger.info("getTaxonFromXML");
401 // logger.info("acceptedTaxon: "+acceptedTaxon);
403 TaxonNameBase nameToBeFilled
= null;
406 String
[] enames
= null;
407 Rank rank
= Rank
.UNKNOWN_RANK();
409 String identifier
="";
412 enames
= extractScientificName(taxons
);
413 if (enames
[1].isEmpty()) {
419 rank
= Rank
.getRankByName(enames
[2]);
420 identifier
= enames
[3];
421 } catch (TransformerFactoryConfigurationError e1
) {
423 } catch (TransformerException e1
) {
425 } catch (UnknownCdmTypeException e
) {
426 logger
.warn("Rank problem!"+enames
[2]);
427 rank
=Rank
.UNKNOWN_RANK();
429 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
431 nameToBeFilled
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
432 if (nameToBeFilled
.hasProblem() &&
433 !((nameToBeFilled
.getParsingProblems().size()==1) && nameToBeFilled
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
434 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
435 nameToBeFilled
=solveNameProblem(original
, name
,parser
);
438 nameToBeFilled
= getTaxonNameBase(nameToBeFilled
,nametosave
);
439 return nameToBeFilled
;
444 @SuppressWarnings("rawtypes")
445 private TaxonNameBase
getTaxonNameBase (TaxonNameBase name
, List
<TaxonNameBase
> nametosave
){
446 List
<TaxonNameBase
> names
= importer
.getNameService().list(TaxonNameBase
.class, null, null, null, null);
447 for (TaxonNameBase tb
: names
){
448 if (tb
.getTitleCache().equalsIgnoreCase(name
.getTitleCache())) {
449 logger
.info("TaxonNameBase FOUND"+name
.getTitleCache());
453 logger
.info("TaxonNameBase NOT FOUND "+name
.getTitleCache());
454 nametosave
.add(name
);
464 private void reloadClassification() {
465 Classification cl
= importer
.getClassificationService().find(classification
.getUuid());
469 importer
.getClassificationService().saveOrUpdate(classification
);
470 classification
= importer
.getClassificationService().find(classification
.getUuid());
476 * Create a Taxon for the current NameBase, based on the current reference
477 * @param taxonNameBase
478 * @param refMods: the current reference extracted from the MODS
481 @SuppressWarnings({ "unused", "rawtypes" })
482 private Taxon
getTaxon(TaxonNameBase taxonNameBase
, Reference
<?
> refMods
) {
483 Taxon t
= new Taxon(taxonNameBase
,null );
484 if (!configState
.getConfig().doKeepOriginalSecundum() || (t
.getSec() == null)) {
485 t
.setSec(configState
.getConfig().getSecundum());
486 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
488 t
.addSource(OriginalSourceType
.Import
,null,null,refMods
,null);
494 * @param distribution: the XML node group
495 * @param acceptedTaxon: the current accepted Taxon
496 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
497 * @param refMods: the current reference extracted from the MODS
499 @SuppressWarnings("rawtypes")
500 private void extractDistribution(Node distribution
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
501 // logger.info("DISTRIBUTION");
502 // logger.info("acceptedTaxon: "+acceptedTaxon);
503 NodeList children
= distribution
.getChildNodes();
504 Map
<Integer
,List
<MySpecimenOrObservation
>> specimenOrObservations
= new HashMap
<Integer
, List
<MySpecimenOrObservation
>>();
505 Map
<Integer
,String
> descriptionsFulltext
= new HashMap
<Integer
,String
>();
507 for (int i
=0;i
<children
.getLength();i
++){
508 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
509 NodeList paragraph
= children
.item(i
).getChildNodes();
510 for (int j
=0;j
<paragraph
.getLength();j
++){
511 if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("#text")){
512 if(!paragraph
.item(j
).getTextContent().trim().isEmpty()) {
513 String s
=paragraph
.item(j
).getTextContent().trim();
514 if (descriptionsFulltext
.get(i
) !=null){
515 s
= descriptionsFulltext
.get(i
)+" "+s
;
517 descriptionsFulltext
.put(i
, s
);
520 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
521 String s
=getTaxonNameBaseFromXML(paragraph
.item(j
),nametosave
,refMods
).toString().split("sec.")[0];
522 if (descriptionsFulltext
.get(i
) !=null){
523 s
= descriptionsFulltext
.get(i
)+" "+s
;
525 descriptionsFulltext
.put(i
, s
);
527 else if (paragraph
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")){
528 MySpecimenOrObservation specimenOrObservation
= new MySpecimenOrObservation();
529 DerivedUnit derivedUnitBase
= null;
530 specimenOrObservation
= extractSpecimenOrObservation(paragraph
.item(j
), derivedUnitBase
, SpecimenOrObservationType
.DerivedUnit
);
531 List
<MySpecimenOrObservation
> speObsList
= specimenOrObservations
.get(i
);
532 if (speObsList
== null) {
533 speObsList
=new ArrayList
<MySpecimenOrObservation
>();
535 speObsList
.add(specimenOrObservation
);
536 specimenOrObservations
.put(i
,speObsList
);
538 String s
= specimenOrObservation
.getDerivedUnitBase().toString();
539 if (descriptionsFulltext
.get(i
) !=null){
540 s
= descriptionsFulltext
.get(i
)+" "+s
;
542 descriptionsFulltext
.put(i
, s
);
550 for (int k
:descriptionsFulltext
.keySet()) {
555 for (int k
:specimenOrObservations
.keySet()) {
562 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
563 Feature currentFeature
= Feature
.DISTRIBUTION();
564 DerivedUnit derivedUnitBase
=null;
566 for (int k
=0;k
<=m
;k
++){
567 if(specimenOrObservations
.keySet().contains(k
)){
568 for (MySpecimenOrObservation soo
:specimenOrObservations
.get(k
) ) {
569 derivedUnitBase
= soo
.getDerivedUnitBase();
570 descr
=soo
.getDescr();
572 derivedUnitBase
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
574 importer
.getOccurrenceService().saveOrUpdate(derivedUnitBase
);
576 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
577 acceptedTaxon
.addDescription(taxonDescription
);
580 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
582 Feature feature
=null;
583 feature
= makeFeature(derivedUnitBase
);
584 if(!StringUtils
.isEmpty(descr
)) {
585 derivedUnitBase
.setTitleCache(descr
, true);
587 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
588 indAssociation
.setFeature(feature
);
589 indAssociation
.addSource(OriginalSourceType
.Import
, null, null, refMods
, null);
591 taxonDescription
.addElement(indAssociation
);
592 taxonDescription
.setTaxon(acceptedTaxon
);
593 taxonDescription
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
595 importer
.getDescriptionService().saveOrUpdate(taxonDescription
);
596 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
597 td
.setDescribedSpecimenOrObservation(soo
.getDerivedUnitBase());
601 if (descriptionsFulltext
.keySet().contains(k
)){
602 if (!descriptionsFulltext
.get(k
).isEmpty() && (descriptionsFulltext
.get(k
).startsWith("Hab.") || descriptionsFulltext
.get(k
).startsWith("Habitat"))){
603 setParticularDescription(descriptionsFulltext
.get(k
),acceptedTaxon
,defaultTaxon
, refMods
, Feature
.HABITAT());
607 TextData textData
= TextData
.NewInstance();
609 textData
.setFeature(currentFeature
);
610 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descriptionsFulltext
.get(k
));
611 textData
.addSource(OriginalSourceType
.Import
, null, null, refMods
, null);
613 td
.addElement(textData
);
618 if (descriptionsFulltext
.keySet().contains(k
) || specimenOrObservations
.keySet().contains(k
)){
619 td
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
620 acceptedTaxon
.addDescription(td
);
621 importer
.getDescriptionService().saveOrUpdate(td
);
622 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
629 * @param materials: the XML node group
630 * @param acceptedTaxon: the current accepted Taxon
631 * @param refMods: the current reference extracted from the MODS
633 @SuppressWarnings("rawtypes")
634 private void extractMaterials(Node materials
, Taxon acceptedTaxon
, Reference
<?
> refMods
,List
<TaxonNameBase
> nametosave
) {
635 // logger.info("EXTRACTMATERIALS");
636 // logger.info("acceptedTaxon: "+acceptedTaxon);
637 NodeList children
= materials
.getChildNodes();
638 NodeList events
= null;
641 DerivedUnit derivedUnitBase
=null;
642 MySpecimenOrObservation myspecimenOrObservation
= null;
644 for (int i
=0;i
<children
.getLength();i
++){
645 String rawAssociation
="";
647 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
648 events
= children
.item(i
).getChildNodes();
649 for(int k
=0;k
<events
.getLength();k
++){
650 if (events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")){
651 String linkedTaxon
= getTaxonNameBaseFromXML(events
.item(k
), nametosave
,refMods
).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
652 rawAssociation
+=linkedTaxon
.split("sec")[0];
654 if (! events
.item(k
).getNodeName().equalsIgnoreCase("tax:name")
655 && !events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
656 rawAssociation
+= events
.item(k
).getTextContent().trim();
658 if(events
.item(k
).getNodeName().equalsIgnoreCase("tax:collection_event")){
659 if (!containsDistinctLetters(rawAssociation
.replaceAll(";",""))) {
660 rawAssociation
="no description text";
663 DerivedUnitFacade derivedUnitFacade
= getFacade(rawAssociation
.replaceAll(";",""),SpecimenOrObservationType
.FieldUnit
);
664 derivedUnitBase
= derivedUnitFacade
.innerDerivedUnit();
665 derivedUnitBase
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
666 importer
.getOccurrenceService().saveOrUpdate(derivedUnitBase
);
668 myspecimenOrObservation
= extractSpecimenOrObservation(events
.item(k
),derivedUnitBase
,SpecimenOrObservationType
.FieldUnit
);
669 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
670 descr
=myspecimenOrObservation
.getDescr();
672 derivedUnitBase
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
674 importer
.getOccurrenceService().saveOrUpdate(derivedUnitBase
);
676 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
677 acceptedTaxon
.addDescription(taxonDescription
);
680 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
682 Feature feature
= makeFeature(derivedUnitBase
);
683 if(!StringUtils
.isEmpty(descr
)) {
684 derivedUnitBase
.setTitleCache(descr
, true);
686 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
687 indAssociation
.setFeature(feature
);
688 indAssociation
.addSource(OriginalSourceType
.Import
,null, null, refMods
, null);
690 taxonDescription
.addElement(indAssociation
);
691 taxonDescription
.setTaxon(acceptedTaxon
);
692 taxonDescription
.addSource(OriginalSourceType
.Import
,null,null,refMods
,null);
694 importer
.getDescriptionService().saveOrUpdate(taxonDescription
);
695 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
697 if (!rawAssociation
.isEmpty() && !added
){
698 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
699 acceptedTaxon
.addDescription(taxonDescription
);
701 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
703 Feature feature
= Feature
.MATERIALS_EXAMINED();
704 if(!StringUtils
.isEmpty(rawAssociation
)) {
705 derivedUnitBase
.setTitleCache(rawAssociation
, true);
707 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
708 indAssociation
.setFeature(feature
);
709 indAssociation
.addSource(OriginalSourceType
.Import
, null, null, refMods
, null);
711 taxonDescription
.addElement(indAssociation
);
712 taxonDescription
.setTaxon(acceptedTaxon
);
713 taxonDescription
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
715 importer
.getDescriptionService().saveOrUpdate(taxonDescription
);
716 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
726 * @param materials: the XML node group
727 * @param acceptedTaxon: the current accepted Taxon
728 * @param refMods: the current reference extracted from the MODS
730 @SuppressWarnings("rawtypes")
731 private void extractMaterialsDirect(Node materials
, Taxon acceptedTaxon
, Reference
<?
> refMods
, String event
) {
732 // logger.info("EXTRACTMATERIALS");
733 // logger.info("acceptedTaxon: "+acceptedTaxon);
736 DerivedUnit derivedUnitBase
=null;
737 MySpecimenOrObservation myspecimenOrObservation
= null;
740 myspecimenOrObservation
= extractSpecimenOrObservation(materials
,derivedUnitBase
, SpecimenOrObservationType
.FieldUnit
);
741 derivedUnitBase
= myspecimenOrObservation
.getDerivedUnitBase();
742 descr
=myspecimenOrObservation
.getDescr();
744 derivedUnitBase
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
746 importer
.getOccurrenceService().saveOrUpdate(derivedUnitBase
);
748 TaxonDescription taxonDescription
= importer
.getTaxonDescription(acceptedTaxon
, false, true);
749 acceptedTaxon
.addDescription(taxonDescription
);
752 IndividualsAssociation indAssociation
= IndividualsAssociation
.NewInstance();
754 Feature feature
=null;
755 if (event
.equalsIgnoreCase("collection")){
756 feature
= makeFeature(derivedUnitBase
);
759 feature
= Feature
.MATERIALS_EXAMINED();
761 if(!StringUtils
.isEmpty(descr
)) {
762 derivedUnitBase
.setTitleCache(descr
, true);
764 indAssociation
.setAssociatedSpecimenOrObservation(derivedUnitBase
);
765 indAssociation
.setFeature(feature
);
766 indAssociation
.addSource(OriginalSourceType
.Import
, null, null, refMods
, null);
768 taxonDescription
.addElement(indAssociation
);
769 taxonDescription
.setTaxon(acceptedTaxon
);
770 taxonDescription
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
772 importer
.getDescriptionService().saveOrUpdate(taxonDescription
);
773 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
780 * @param description: the XML node group
781 * @param acceptedTaxon: the current acceptedTaxon
782 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
783 * @param nametosave: the list of objects to save into the CDM
784 * @param refMods: the current reference extracted from the MODS
785 * @param featureName: the feature name
787 private void extractSpecificFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
,
788 List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, String featureName
) {
789 NodeList children
= description
.getChildNodes();
790 NodeList insideNodes
;
792 String localdescr
="";
794 // String fullContent = description.getTextContent();
795 for (int i
=0;i
<children
.getLength();i
++){
797 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
798 descr
+= children
.item(i
).getTextContent().trim();
800 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
801 insideNodes
=children
.item(i
).getChildNodes();
802 List
<String
> blabla
= new ArrayList
<String
>();
803 for (int j
=0;j
<insideNodes
.getLength();j
++){
804 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
805 String linkedTaxon
= getTaxonNameBaseFromXML(insideNodes
.item(j
), nametosave
,refMods
).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
806 blabla
.add(linkedTaxon
.split("sec")[0]);
808 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
809 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
810 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
811 localdescr
+= insideNodes
.item(j
).getTextContent().trim();
815 if (!blabla
.isEmpty()) {
816 List
<DefinedTermBase
> features
= importer
.getTermService().list(Feature
.class, null,null,null,null);
817 Feature currentFeature
=null;
818 for (DefinedTermBase feature
: features
){
819 String tmpF
= ((Feature
)feature
).getTitleCache();
820 if (tmpF
.equalsIgnoreCase(featureName
)) {
821 currentFeature
=(Feature
)feature
;
824 if (currentFeature
== null) {
825 currentFeature
=Feature
.NewInstance(featureName
, featureName
, featureName
);
826 importer
.getTermService().saveOrUpdate(currentFeature
);
828 setParticularDescription(StringUtils
.join(blabla
," "),acceptedTaxon
,defaultTaxon
, refMods
,currentFeature
);
840 * @param children: the XML node group
841 * @param nametosave: the list of objects to save into the CDM
842 * @param acceptedTaxon: the current acceptedTaxon
843 * @param refMods: the current reference extracted from the MODS
844 * @param fullContent :the parsed XML content
845 * @return a list of description (text)
847 @SuppressWarnings("unused")
848 private List
<String
> parseParagraph(List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference
<?
> refMods
, Node paragraph
, Feature feature
){
849 List
<String
> fullDescription
= new ArrayList
<String
>();
850 // String localdescr;
852 NodeList insideNodes
;
853 boolean collectionEvent
= false;
854 List
<Node
>collectionEvents
= new ArrayList
<Node
>();
856 NodeList children
= paragraph
.getChildNodes();
858 for (int i
=0;i
<children
.getLength();i
++){
860 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !children
.item(i
).getTextContent().trim().isEmpty()){
861 descr
+= children
.item(i
).getTextContent().trim();
863 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
864 insideNodes
=children
.item(i
).getChildNodes();
865 List
<String
> blabla
= new ArrayList
<String
>();
866 for (int j
=0;j
<insideNodes
.getLength();j
++){
867 boolean nodeKnown
= false;
868 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
869 String linkedTaxon
= getTaxonNameBaseFromXML(insideNodes
.item(j
), nametosave
,refMods
).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
870 blabla
.add(linkedTaxon
.split("sec")[0]);
873 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("#text")) {
874 if(!insideNodes
.item(j
).getTextContent().trim().isEmpty()){
875 blabla
.add(insideNodes
.item(j
).getTextContent().trim());
876 // localdescr += insideNodes.item(j).getTextContent().trim();
880 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")) {
881 String ref
= insideNodes
.item(j
).getTextContent().trim();
882 if (ref
.endsWith(";") && ((ref
.length())>1)) {
883 ref
=ref
.substring(0, ref
.length()-1)+".";
885 Reference
<?
> reference
= ReferenceFactory
.newGeneric();
886 reference
.setTitleCache(ref
, true);
887 blabla
.add(reference
.getTitleCache());
890 if (insideNodes
.item(j
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
891 collectionEvent
=true;
892 collectionEvents
.add(insideNodes
.item(j
));
896 logger
.info("Node not handled yet : "+insideNodes
.item(j
).getNodeName());
897 logger
.warn("Node not handled yet : "+insideNodes
.item(j
).getNodeName());
901 if (!blabla
.isEmpty()) {
902 fullDescription
.add(StringUtils
.join(blabla
," "));
906 if (collectionEvent
) {
907 logger
.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature
.toString());
908 for (Node coll
:collectionEvents
){
909 extractMaterialsDirect(coll
, acceptedTaxon
, refMods
, "collection");
912 return fullDescription
;
917 * @param description: the XML node group
918 * @param acceptedTaxon: the current acceptedTaxon
919 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
920 * @param nametosave: the list of objects to save into the CDM
921 * @param refMods: the current reference extracted from the MODS
922 * @param feature: the feature to link the data with
924 private void extractFeature(Node description
, Taxon acceptedTaxon
, Taxon defaultTaxon
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
, Feature feature
){
925 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
926 List
<String
> fullDescription
= parseParagraph( nametosave
, acceptedTaxon
, refMods
, description
,feature
);
928 if (!fullDescription
.isEmpty()) {
929 setParticularDescription(StringUtils
.join(fullDescription
,"<br/>"),acceptedTaxon
,defaultTaxon
, refMods
,feature
);
936 * @param descr: the XML Nodegroup to parse
937 * @param acceptedTaxon: the current acceptedTaxon
938 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
939 * @param refMods: the current reference extracted from the MODS
940 * @param currentFeature: the feature name
943 private void setParticularDescription(String descr
, Taxon acceptedTaxon
, Taxon defaultTaxon
, Reference
<?
> refMods
, Feature currentFeature
) {
944 // logger.info("setParticularDescription "+currentFeature);
945 // logger.info("acceptedTaxon: "+acceptedTaxon);
946 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
948 TextData textData
= TextData
.NewInstance();
949 textData
.setFeature(currentFeature
);
950 textData
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
952 textData
.putText(Language
.UNKNOWN_LANGUAGE(), descr
+"<br/>");
954 if(! descr
.isEmpty() && (acceptedTaxon
!=null)){
955 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
956 td
.addElement(textData
);
957 td
.addSource(OriginalSourceType
.Import
,null,null,refMods
,null);
958 acceptedTaxon
.addDescription(td
);
959 importer
.getDescriptionService().saveOrUpdate(td
);
960 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
963 if(! descr
.isEmpty() && (acceptedTaxon
== null) && (defaultTaxon
!= null)){
965 Taxon tmp
=(Taxon
) importer
.getTaxonService().find(defaultTaxon
.getUuid());
967 defaultTaxon
=CdmBase
.deproxy(tmp
,Taxon
.class);
969 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
972 logger
.debug("TAXON EXISTS"+defaultTaxon
);
975 TaxonDescription td
=importer
.getTaxonDescription(defaultTaxon
, false, true);
976 defaultTaxon
.addDescription(td
);
977 td
.addElement(textData
);
978 td
.addSource(OriginalSourceType
.Import
,null,null,refMods
,null);
979 importer
.getDescriptionService().saveOrUpdate(td
);
980 importer
.getTaxonService().saveOrUpdate(defaultTaxon
);
987 * @param synonyms: the XML Nodegroup to parse
988 * @param nametosave: the list of objects to save into the CDM
989 * @param acceptedTaxon: the current acceptedTaxon
990 * @param refMods: the current reference extracted from the MODS
992 @SuppressWarnings({ "rawtypes", "unchecked" })
993 private void extractSynonyms(Node synonyms
, List
<TaxonNameBase
> nametosave
,Taxon acceptedTaxon
, Reference
<?
> refMods
) {
994 // logger.info("extractSynonyms: "+acceptedTaxon);
995 Taxon ttmp
= (Taxon
) importer
.getTaxonService().find(acceptedTaxon
.getUuid());
997 acceptedTaxon
= CdmBase
.deproxy(ttmp
,Taxon
.class);
1000 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1002 NodeList children
= synonyms
.getChildNodes();
1003 TaxonNameBase nameToBeFilled
= null;
1004 List
<String
> names
= new ArrayList
<String
>();
1006 String identifier
="";
1008 for (int i
=0;i
<children
.getLength();i
++){
1009 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1010 NodeList tmp
= children
.item(i
).getChildNodes();
1011 // String fullContent = children.item(i).getTextContent();
1012 for (int j
=0; j
< tmp
.getLength();j
++){
1013 if(tmp
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1016 enames
= extractScientificName(tmp
.item(j
));
1017 if (enames
[1].isEmpty()) {
1018 names
.add(enames
[0]+"---"+enames
[2]+"---"+enames
[3]);
1020 names
.add(enames
[1]+"---"+enames
[2]+"---"+enames
[3]);
1022 } catch (TransformerFactoryConfigurationError e
) {
1024 } catch (TransformerException e
) {
1032 for(String name
:names
){
1033 System
.out
.println("HANDLE NAME "+name
);
1036 rank
= Rank
.getRankByName(name
.split("---")[1]);
1037 } catch (UnknownCdmTypeException e
) {
1038 logger
.warn("Rank problem!");
1042 identifier
= name
.split("---")[2];
1043 }catch(Exception e
){logger
.warn("identifier empty"); identifier
="";}
1044 name
= name
.split("---")[0];
1046 String original
= name
;
1048 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1049 nameToBeFilled
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
1050 if (nameToBeFilled
.hasProblem() &&
1051 !((nameToBeFilled
.getParsingProblems().size()==1) && nameToBeFilled
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
1052 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1053 nameToBeFilled
= solveNameProblem(original
, name
, parser
);
1055 nameToBeFilled
= getTaxonNameBase(nameToBeFilled
,nametosave
);
1056 Synonym synonym
= Synonym
.NewInstance(nameToBeFilled
, refMods
);
1059 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1060 setLSID(identifier
, synonym
);
1063 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF());
1064 System
.out
.println("SYNONYM");
1066 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1076 * @param refgroup: the XML nodes
1077 * @param nametosave: the list of objects to save into the CDM
1078 * @param acceptedTaxon: the current acceptedTaxon
1079 * @param nametosave: the list of objects to save into the CDM
1080 * @param refMods: the current reference extracted from the MODS
1081 * @return the acceptedTaxon (why?)
1083 @SuppressWarnings({ "null", "unused" ,"rawtypes" })
1084 private Taxon
extractReferences(Node refgroup
, List
<TaxonNameBase
> nametosave
, Taxon acceptedTaxon
, Reference
<?
> refMods
) {
1085 // logger.info("extractReferences");
1086 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1088 NodeList children
= refgroup
.getChildNodes();
1089 NonViralName
<?
> nameToBeFilled
= null;
1090 boolean accepted
=true;
1091 for (int i
=0;i
<children
.getLength();i
++){
1092 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:p")){
1093 NodeList references
= children
.item(i
).getChildNodes();
1095 boolean foundBibref
=false;
1096 for (int j
=0;j
<references
.getLength();j
++){
1097 if(references
.item(j
).getNodeName().equalsIgnoreCase("tax:bibref")){
1099 String ref
= references
.item(j
).getTextContent().trim();
1100 if (ref
.endsWith(";") && ((ref
.length())>1)) {
1101 ref
=ref
.substring(0, ref
.length()-1)+".";
1103 if (ref
.startsWith(treatmentMainName
) && !ref
.endsWith(treatmentMainName
)) {
1104 ref
=ref
.replace(treatmentMainName
, "");
1106 while (ref
.startsWith(".") || ref
.startsWith(",")) {
1107 ref
=ref
.replace(".","").replace(",","").trim();
1111 // logger.info("Current reference :"+nbRef+", "+ref+", "+treatmentMainName+"--"+ref.indexOf(treatmentMainName));
1112 Reference
<?
> reference
= ReferenceFactory
.newGeneric();
1113 reference
.setTitleCache(ref
, true);
1115 boolean makeEmpty
= false;
1116 // Rank rank = null;
1117 // logger.info("TREATMENTMAINNAME: "+treatmentMainName);
1118 // logger.info("ref: "+ref);
1125 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1126 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)){
1127 nameToBeFilled
= BotanicalName
.NewInstance(null);
1129 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1130 nameToBeFilled
= ZoologicalName
.NewInstance(null);
1132 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1133 nameToBeFilled
= BacterialName
.NewInstance(null);
1136 acceptedTaxon
.getName().setNomenclaturalReference(reference
);
1137 nameToBeFilled
.setNomenclaturalReference(reference
);
1138 acceptedTaxon
.addSource(OriginalSourceType
.Import
,null,null,refMods
,null);
1140 TaxonDescription td
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
1141 acceptedTaxon
.addDescription(td
);
1142 acceptedTaxon
.addSource(OriginalSourceType
.Import
,null,null,refMods
,null);
1144 TextData textData
= TextData
.NewInstance(Feature
.CITATION());
1146 textData
.addSource(OriginalSourceType
.Import
, null, null, reference
, null, acceptedTaxon
.getName(), ref
);
1147 td
.addElement(textData
);
1148 td
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
1150 importer
.getDescriptionService().saveOrUpdate(td
);
1152 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1153 // logger.warn("BWAAHHHH: "+nameToBeFilled.getParsingProblems()+", "+ref);
1158 String refString
="";
1160 String identifier
="";
1161 for (int j
=0;j
<references
.getLength();j
++){
1162 //no bibref tag inside
1163 logger
.info("references.item(j).getNodeName()"+references
.item(j
).getNodeName());
1164 if (references
.item(j
).getNodeName().equalsIgnoreCase("tax:name")){
1167 enames
= extractScientificName(references
.item(j
));
1168 if (enames
[1].isEmpty()) {
1169 name
=enames
[0]+"---"+enames
[2]+"---"+enames
[3];
1171 name
=enames
[1]+"---"+enames
[2]+"---"+enames
[3];
1173 } catch (TransformerFactoryConfigurationError e
) {
1175 } catch (TransformerException e
) {
1181 if (references
.item(j
).getNodeName().equalsIgnoreCase("#text")){
1182 refString
= references
.item(j
).getTextContent().trim();
1184 if(references
.item(j
).getNodeName().equalsIgnoreCase("#text") && name
.isEmpty() && !references
.item(j
).getTextContent().trim().isEmpty()){
1186 identifier
= name
.split("---")[3];
1187 }catch(Exception e
){logger
.warn("no identifier");identifier
="";}
1188 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1189 String fullLineRefName
= references
.item(j
).getTextContent().trim();
1190 TaxonNameBase nameTBF
= parser
.parseFullName(fullLineRefName
, nomenclaturalCode
, Rank
.UNKNOWN_RANK());
1191 if (nameTBF
.hasProblem() &&
1192 !((nameTBF
.getParsingProblems().size()==1) && nameTBF
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
1193 nameTBF
=solveNameProblem(fullLineRefName
, fullLineRefName
,parser
);
1195 nameTBF
= getTaxonNameBase(nameTBF
,nametosave
);
1196 Synonym synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1199 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1200 setLSID(identifier
, acceptedTaxon
);
1203 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF());
1204 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1208 if(!name
.isEmpty()){
1209 logger
.info("acceptedTaxon and name: *"+acceptedTaxon
.getTitleCache()+"*, *"+name
+"*");
1210 if (acceptedTaxon
.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(name
.split("---")[0].trim())){
1211 identifier
= name
.split("---")[3];
1212 Reference
<?
> refS
= ReferenceFactory
.newGeneric();
1213 refS
.setTitleCache(refString
, true);
1214 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1215 // acceptedTaxon.addDescription(td);
1216 // acceptedTaxon.addSource(refSource);
1218 // TextData textData = TextData.NewInstance(Feature.CITATION());
1220 // textData.addSource(null, null, refS, null);
1221 // td.addElement(textData);
1222 // td.addSource(refSource);
1223 // importer.getDescriptionService().saveOrUpdate(td);
1226 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1227 setLSID(identifier
, acceptedTaxon
);
1231 acceptedTaxon
.getName().setNomenclaturalReference(refS
);
1232 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1237 rank
= Rank
.getRankByName(name
.split("---")[1]);
1238 } catch (Exception e
) {
1239 logger
.warn("Rank or name problem!");
1242 name
= name
.split("---")[0].trim() + refString
;
1243 String original
= name
;
1245 identifier
= name
.split("---")[3];
1248 logger
.warn("no identifier");
1251 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1252 TaxonNameBase nameTBF
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
1253 if (nameTBF
.hasProblem() &&
1254 !((nameTBF
.getParsingProblems().size()==1) && nameTBF
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
1255 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1256 nameTBF
=solveNameProblem(original
, name
,parser
);
1258 nameTBF
= getTaxonNameBase(nameTBF
,nametosave
);
1259 Synonym synonym
= Synonym
.NewInstance(nameTBF
, refMods
);
1262 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1263 String id
= identifier
.split("__")[0];
1264 String source
= identifier
.split("__")[1];
1265 if (id
.indexOf("lsid")>-1){
1267 LSID lsid
= new LSID(id
);
1268 synonym
.setLsid(lsid
);
1269 } catch (MalformedLSIDException e
) {
1270 // TODO Auto-generated catch block
1271 e
.printStackTrace();
1276 //TODO ADD ORIGINAL SOURCE ID
1277 IdentifiableSource os
= IdentifiableSource
.NewInstance(OriginalSourceType
.Import
);
1278 os
.setIdInSource(id
);
1279 Reference
<?
> re
= ReferenceFactory
.newGeneric();
1280 re
.setTitle(source
);
1282 synonym
.addSource(os
);
1286 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF());
1287 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1293 // importer.getClassificationService().saveOrUpdate(classification);
1294 return acceptedTaxon
;
1300 * @param acceptedTaxon
1302 private void setLSID(String identifier
, TaxonBase
<?
> taxon
) {
1303 boolean lsidok
=false;
1304 String id
= identifier
.split("__")[0];
1305 String source
= identifier
.split("__")[1];
1306 if (id
.indexOf("lsid")>-1){
1308 LSID lsid
= new LSID(id
);
1309 taxon
.setLsid(lsid
);
1311 } catch (MalformedLSIDException e
) {
1312 logger
.warn("Malformed LSID");
1316 if ((id
.indexOf("lsid")<0) || !lsidok
){
1317 //ADD ORIGINAL SOURCE ID
1318 IdentifiableSource os
= IdentifiableSource
.NewInstance(OriginalSourceType
.Import
);
1319 os
.setIdInSource(id
);
1320 Reference
<?
> re
= ReferenceFactory
.newGeneric();
1321 re
.setTitle(source
);
1323 taxon
.addSource(os
);
1329 * try to solve a parsing problem for a scientific name
1330 * @param original : the name from the OCR document
1331 * @param name : the tagged version
1333 * @return the corrected TaxonNameBase
1335 @SuppressWarnings({ "unchecked", "rawtypes" })
1336 private TaxonNameBase
<?
,?
> solveNameProblem(String original
, String name
, INonViralNameParser parser
) {
1337 Map
<String
,String
> ato
= namesMap
.get(original
);
1338 Rank rank
=Rank
.UNKNOWN_RANK();
1341 rank
=askForRank(original
, Rank
.UNKNOWN_RANK(), nomenclaturalCode
);
1343 rank
= getRank(ato
);
1345 TaxonNameBase
<?
,?
> nameTBF
= parser
.parseFullName(name
, nomenclaturalCode
, rank
);
1346 // logger.info("RANK: "+rank);
1348 while (nameTBF
.hasProblem() && (retry
<1) && !((nameTBF
.getParsingProblems().size()==1) && nameTBF
.getParsingProblems().contains(ParserProblem
.CheckRank
))){
1349 String fullname
= getFullReference(name
,nameTBF
.getParsingProblems());
1350 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)){
1351 nameTBF
= BotanicalName
.NewInstance(null);
1353 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1354 nameTBF
= ZoologicalName
.NewInstance(null);
1356 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1357 nameTBF
= BacterialName
.NewInstance(null);
1359 parser
.parseReferencedName(nameTBF
, fullname
, rank
, false);
1363 nameTBF
.setFullTitleCache(name
, true);
1364 // logger.info("FULL TITLE CACHE "+name);
1370 * @param nomenclatureNode: the XML nodes
1371 * @param nametosave: the list of objects to save into the CDM
1372 * @param refMods: the current reference extracted from the MODS
1375 @SuppressWarnings({ "rawtypes", "unused" })
1376 private Taxon
extractNomenclature(Node nomenclatureNode
, List
<TaxonNameBase
> nametosave
, Reference
<?
> refMods
) {
1377 // logger.info("extractNomenclature");
1378 NodeList children
= nomenclatureNode
.getChildNodes();
1380 TaxonNameBase nameToBeFilled
= null;
1381 Taxon acceptedTaxon
= null;
1382 INonViralNameParser parser
= NonViralNameParserImpl
.NewInstance();
1383 String identifier
="";
1385 Rank rank
= Rank
.UNKNOWN_RANK();
1386 // String fullContent = nomenclatureNode.getTextContent();
1387 for (int i
=0;i
<children
.getLength();i
++){
1388 if (children
.item(i
).getNodeName().equalsIgnoreCase("#text")) {
1389 freetext
=children
.item(i
).getTextContent();
1391 if (children
.item(i
).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1392 System
.out
.println("COLLECTION EVENT INSIDE NOMENCLATURE");
1393 extractMaterialsDirect(children
.item(i
), acceptedTaxon
, refMods
, "collection");
1395 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:name")){
1398 names
= extractScientificName(children
.item(i
));
1399 treatmentMainName
= names
[1];
1400 originalTreatmentName
= names
[0];
1401 rank
= Rank
.getRankByName(names
[2]);
1402 identifier
=names
[3];
1404 } catch (TransformerFactoryConfigurationError e1
) {
1406 } catch (TransformerException e1
) {
1408 } catch (UnknownCdmTypeException e
) {
1412 if (rank
.equals(Rank
.UNKNOWN_RANK()) || rank
.isLower(configState
.getConfig().getMaxRank())){
1413 maxRankRespected
=true;
1414 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)){
1415 nameToBeFilled
= BotanicalName
.NewInstance(null);
1417 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1418 nameToBeFilled
= ZoologicalName
.NewInstance(null);
1420 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1421 nameToBeFilled
= BacterialName
.NewInstance(null);
1423 acceptedTaxon
= importer
.getTaxonService().findBestMatchingTaxon(treatmentMainName
);
1424 if (acceptedTaxon
==null ){
1425 nameToBeFilled
= parser
.parseFullName(treatmentMainName
, nomenclaturalCode
, null);
1426 if (nameToBeFilled
.hasProblem() &&
1427 !((nameToBeFilled
.getParsingProblems().size()==1) && nameToBeFilled
.getParsingProblems().contains(ParserProblem
.CheckRank
)) ) {
1428 nameToBeFilled
= solveNameProblem(originalTreatmentName
,treatmentMainName
,parser
);
1430 nameToBeFilled
= getTaxonNameBase(nameToBeFilled
,nametosave
);
1431 if (!originalTreatmentName
.isEmpty()) {
1432 TaxonNameDescription td
= TaxonNameDescription
.NewInstance();
1433 td
.setTitleCache(originalTreatmentName
);
1434 nameToBeFilled
.addDescription(td
);
1436 nameToBeFilled
.addSource(OriginalSourceType
.Import
,null,null,refMods
,null);
1437 acceptedTaxon
= new Taxon(nameToBeFilled
,(Reference
<?
>) nameToBeFilled
.getNomenclaturalReference() );//TODO TOFIX reference
1438 if(!configState
.getConfig().doKeepOriginalSecundum()) {
1439 acceptedTaxon
.setSec(configState
.getConfig().getSecundum());
1440 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
1444 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1445 boolean lsidok
=false;
1446 String id
= identifier
.split("__")[0];
1447 String source
= identifier
.split("__")[1];
1448 if (id
.indexOf("lsid")>-1){
1450 LSID lsid
= new LSID(id
);
1451 acceptedTaxon
.setLsid(lsid
);
1453 } catch (MalformedLSIDException e
) {
1454 logger
.warn("Malformed LSID");
1458 if ((id
.indexOf("lsid")<0) || !lsidok
){
1459 //TODO ADD ORIGINAL SOURCE ID
1460 IdentifiableSource os
= IdentifiableSource
.NewInstance(OriginalSourceType
.Import
);
1461 os
.setIdInSource(id
);
1462 Reference
<?
> re
= ReferenceFactory
.newGeneric();
1463 re
.setTitle(source
);
1465 acceptedTaxon
.addSource(os
);
1469 acceptedTaxon
.addSource(OriginalSourceType
.Import
, null,null,refMods
,null);
1470 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1472 Taxon parentTaxon
= askParent(acceptedTaxon
, classification
);
1473 if (parentTaxon
==null){
1474 while (parentTaxon
== null) {
1475 parentTaxon
= createParent(acceptedTaxon
, refMods
);
1476 classification
.addParentChild(parentTaxon
, acceptedTaxon
, refMods
, null);
1479 classification
.addParentChild(parentTaxon
, acceptedTaxon
, refMods
, null);
1482 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1483 Set
<IdentifiableSource
> sources
= acceptedTaxon
.getSources();
1484 boolean sourcelinked
=false;
1485 for (IdentifiableSource source
:sources
){
1486 if (source
.getCitation().getTitle().equalsIgnoreCase(refMods
.getTitleCache())) {
1490 if (!configState
.getConfig().doKeepOriginalSecundum()) {
1491 acceptedTaxon
.setSec(configState
.getConfig().getSecundum());
1492 logger
.info("SET SECUNDUM "+configState
.getConfig().getSecundum());
1495 acceptedTaxon
.addSource(OriginalSourceType
.Import
, null, null, refMods
, null);
1497 if (!sourcelinked
|| !configState
.getConfig().doKeepOriginalSecundum()){
1499 if (!identifier
.isEmpty() && (identifier
.length()>2)){
1500 setLSID(identifier
, acceptedTaxon
);
1502 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
1506 maxRankRespected
=false;
1510 // importer.getClassificationService().saveOrUpdate(classification);
1511 return acceptedTaxon
;
1515 * @param acceptedTaxon: the current acceptedTaxon
1516 * @param ref: the current reference extracted from the MODS
1517 * @return the parent for the current accepted taxon
1519 private Taxon
createParent(Taxon acceptedTaxon
, Reference
<?
> ref
) {
1520 acceptedTaxon
= CdmBase
.deproxy(acceptedTaxon
, Taxon
.class);
1522 List
<Rank
> rankList
= new ArrayList
<Rank
>();
1523 rankList
= importer
.getTermService().listByTermClass(Rank
.class, null, null, null, null);
1525 List
<String
> rankListStr
= new ArrayList
<String
>();
1526 for (Rank r
:rankList
) {
1527 rankListStr
.add(r
.toString());
1530 String s
= acceptedTaxon
.getTitleCache();
1533 int addTaxon
= askAddParent(s
);
1534 logger
.info("ADD TAXON: "+addTaxon
);
1536 Taxon tmp
= askParent(acceptedTaxon
, classification
);
1538 s
= askSetParent(s
);
1539 r
= askRank(s
,rankListStr
);
1541 NonViralName
<?
> nameToBeFilled
= null;
1542 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNAFP
)){
1543 nameToBeFilled
= BotanicalName
.NewInstance(null);
1545 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICZN
)){
1546 nameToBeFilled
= ZoologicalName
.NewInstance(null);
1548 if (nomenclaturalCode
.equals(NomenclaturalCode
.ICNB
)){
1549 nameToBeFilled
= BacterialName
.NewInstance(null);
1551 nameToBeFilled
.setTitleCache(s
);
1552 nameToBeFilled
.setRank(getRank(r
));
1554 tax
= Taxon
.NewInstance(nameToBeFilled
, ref
);
1560 createParent(tax
, ref
);
1561 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
1562 classification
.addParentChild(tax
, acceptedTaxon
, ref
, null);
1565 classification
.addChildTaxon(acceptedTaxon
, ref
, null);
1568 // logger.info("RETURN: "+tax );
1577 * @throws TransformerFactoryConfigurationError
1578 * @throws TransformerException
1579 * @return a list of possible names
1581 private String
[] extractScientificName(Node name
) throws TransformerFactoryConfigurationError
, TransformerException
{
1582 // System.out.println("extractScientificName");
1583 Rank rank
= Rank
.UNKNOWN_RANK();
1584 NodeList children
= name
.getChildNodes();
1585 String fullName
= "";
1587 String identifier
="";
1588 HashMap
<String
, String
> atomisedMap
= new HashMap
<String
, String
>();
1589 List
<String
> atomisedName
= new ArrayList
<String
>();
1591 String rankStr
= "";
1593 for (int i
=0;i
<children
.getLength();i
++){
1594 if(children
.item(i
).getNodeName().equalsIgnoreCase("tax:xmldata")){
1595 NodeList atom
= children
.item(i
).getChildNodes();
1596 for (int k
=0;k
<atom
.getLength();k
++){
1597 if (atom
.item(k
).getNodeName().equalsIgnoreCase("tax:xid")){
1599 identifier
= atom
.item(k
).getAttributes().getNamedItem("identifier").getNodeValue();
1600 }catch(Exception e
){
1601 System
.out
.println("pb with identifier, maybe empty");
1604 identifier
+="__"+atom
.item(k
).getAttributes().getNamedItem("source").getNodeValue();
1605 }catch(Exception e
){
1606 System
.out
.println("pb with identifier, maybe empty");
1610 rankStr
= atom
.item(k
).getNodeName().toLowerCase();
1611 // logger.info("RANKSTR:*"+rankStr+"*");
1612 if (rankStr
.equalsIgnoreCase("dwc:taxonRank")) {
1613 rankStr
=atom
.item(k
).getTextContent().trim();
1614 tmpRank
= getRank(rankStr
);
1616 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
1617 if (tmpRank
!= null){
1621 atomisedMap
.put(rankStr
.toLowerCase(),atom
.item(k
).getTextContent().trim());
1622 atomisedName
.add(atom
.item(k
).getTextContent().trim());
1625 if(children
.item(i
).getNodeName().equalsIgnoreCase("#text") && !StringUtils
.isBlank(children
.item(i
).getTextContent())){
1626 // logger.info("name non atomised: "+children.item(i).getTextContent());
1627 fullName
= children
.item(i
).getTextContent().trim();
1628 // logger.info("fullname: "+fullName);
1631 if (fullName
!= null){
1632 fullName
= fullName
.replace("( ", "(");
1633 fullName
= fullName
.replace(" )",")");
1636 if (fullName
.trim().isEmpty()){
1637 fullName
=StringUtils
.join(atomisedName
," ");
1640 while(fullName
.contains(" ")) {
1641 fullName
=fullName
.replace(" ", " ");
1642 // logger.info("while");
1645 namesMap
.put(fullName
,atomisedMap
);
1646 String atomisedNameStr
= StringUtils
.join(atomisedName
," ");
1647 while(atomisedNameStr
.contains(" ")) {
1648 atomisedNameStr
=atomisedNameStr
.replace(" ", " ");
1649 // logger.info("atomisedNameStr: "+atomisedNameStr);
1651 atomisedNameStr
=atomisedNameStr
.trim();
1653 if (fullName
!= null){
1654 if (!fullName
.equalsIgnoreCase(atomisedNameStr
)) {
1655 newName
=getScientificName(fullName
,atomisedNameStr
,classification
.getTitleCache(),name
);
1660 rank
= askForRank(newName
, rank
, nomenclaturalCode
);
1661 String
[] names
= new String
[4];
1664 names
[2]=rank
.toString();
1665 names
[3]=identifier
;
1671 * @param classification2
1673 public void updateClassification(Classification classification2
) {
1674 classification
= classification2
;