minor
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / taxonx2013 / TaxonXTreatmentExtractor.java
1 // $Id$
2 /**
3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.taxonx2013;
11
12 import java.io.File;
13 import java.io.FileWriter;
14 import java.io.IOException;
15 import java.net.URI;
16 import java.util.ArrayList;
17 import java.util.HashMap;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.regex.Pattern;
22
23 import javax.xml.transform.TransformerException;
24 import javax.xml.transform.TransformerFactoryConfigurationError;
25
26 import org.apache.commons.lang.StringUtils;
27 import org.w3c.dom.Node;
28 import org.w3c.dom.NodeList;
29
30 import com.ibm.lsid.MalformedLSIDException;
31
32 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
33 import eu.etaxonomy.cdm.model.common.CdmBase;
34 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
35 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
36 import eu.etaxonomy.cdm.model.common.LSID;
37 import eu.etaxonomy.cdm.model.common.Language;
38 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
39 import eu.etaxonomy.cdm.model.description.Feature;
40 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
41 import eu.etaxonomy.cdm.model.description.PolytomousKey;
42 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
43 import eu.etaxonomy.cdm.model.description.TaxonDescription;
44 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
45 import eu.etaxonomy.cdm.model.description.TextData;
46 import eu.etaxonomy.cdm.model.name.BacterialName;
47 import eu.etaxonomy.cdm.model.name.BotanicalName;
48 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
49 import eu.etaxonomy.cdm.model.name.NonViralName;
50 import eu.etaxonomy.cdm.model.name.Rank;
51 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
52 import eu.etaxonomy.cdm.model.name.ZoologicalName;
53 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
54 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
55 import eu.etaxonomy.cdm.model.reference.Reference;
56 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
57 import eu.etaxonomy.cdm.model.taxon.Classification;
58 import eu.etaxonomy.cdm.model.taxon.Synonym;
59 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
60 import eu.etaxonomy.cdm.model.taxon.Taxon;
61 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
62 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
63 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
64 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
65 import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
66
67 /**
68 * @author pkelbert
69 * @date 2 avr. 2013
70 *
71 */
72 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
73
74 private final NomenclaturalCode nomenclaturalCode;
75 private Classification classification;
76
77 private String treatmentMainName,originalTreatmentName;
78
79 private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
80
81
82 private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
83 private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
84
85 private boolean maxRankRespected =false;
86
87 /**
88 * @param nomenclaturalCode
89 * @param classification
90 * @param importer
91 * @param configState
92 */
93 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
94 TaxonXImportState configState) {
95 this.nomenclaturalCode=nomenclaturalCode;
96 this.classification = classification;
97 this.importer=importer;
98 this.configState=configState;
99 prepareCollectors(configState, importer.getAgentService());
100 }
101
102 /**
103 * extracts all the treament information and save them
104 * @param treatmentnode: the XML Node
105 * @param tosave: the list of object to save into the CDM
106 * @param refMods: the reference extracted from the MODS
107 * @param sourceName: the URI of the document
108 */
109 @SuppressWarnings({ "rawtypes", "unused" })
110 protected void extractTreatment(Node treatmentnode, List<Object> tosave, Reference<?> refMods, URI sourceName) {
111 logger.info("extractTreatment");
112 List<TaxonNameBase> nametosave = new ArrayList<TaxonNameBase>();
113 NodeList children = treatmentnode.getChildNodes();
114 Taxon acceptedTaxon =null;
115 Taxon defaultTaxon =null;
116 boolean refgroup=false;
117
118 for (int i=0;i<children.getLength();i++){
119 if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
120 refgroup=true;
121 }
122 }
123
124 for (int i=0;i<children.getLength();i++){
125
126 if (children.item(i).getNodeName().equalsIgnoreCase("tax:nomenclature")){
127 NodeList nomenclature = children.item(i).getChildNodes();
128 boolean containsName=false;
129 for(int k=0;k<nomenclature.getLength();k++){
130 if(nomenclature.item(k).getNodeName().equalsIgnoreCase("tax:name")){
131 containsName=true;
132 break;
133 }
134 }
135 if (containsName){
136 reloadClassification();
137 //extract "main" the scientific name
138 acceptedTaxon = extractNomenclature(children.item(i),nametosave,refMods);
139 }
140 }
141 else if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
142 reloadClassification();
143 //extract the References within the document
144 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
145 }
146 else if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
147 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
148 File file = new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
149 FileWriter writer;
150 try {
151 writer = new FileWriter(file ,true);
152 writer.write(sourceName+"\n");
153 writer.flush();
154 writer.close();
155 } catch (IOException e1) {
156 // TODO Auto-generated catch block
157 e1.printStackTrace();
158 }
159 String multiple = askMultiple(children.item(i));
160 if (multiple.equalsIgnoreCase("synonyms")) {
161 extractSynonyms(children.item(i),nametosave, acceptedTaxon,refMods);
162 }
163 else
164 if(multiple.equalsIgnoreCase("material examined")){
165 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
166 }
167 else
168 if (multiple.equalsIgnoreCase("distribution")){
169 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
170 }
171 else {
172 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,multiple);
173 }
174 }
175 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
176 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
177 extractFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, Feature.BIOLOGY_ECOLOGY());
178 }
179 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
180 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
181 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
182 }
183 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
184 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
185 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.DIAGNOSIS());
186 }
187 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
188 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
189 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DISCUSSION());
190 }
191
192 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
193 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
194 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
195 }
196 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
197 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
198 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave,refMods,Feature.ETYMOLOGY());
199 }
200
201 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
202 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
203 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
204 }
205
206 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
207 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
208 //TODO IGNORE keys for the moment
209 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
210 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,"Keys - unparsed");
211 }
212 else{
213 logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
214 if (children.item(i).getAttributes() !=null) {
215 logger.info(children.item(i).getAttributes().item(0));
216 }
217 }
218 }
219 // logger.info("saveUpdateNames");
220 if (maxRankRespected){
221 importer.getNameService().saveOrUpdate(nametosave);
222 importer.getClassificationService().saveOrUpdate(classification);
223 logger.info("saveUpdateNames-ok");
224 }
225 }
226
227
228 /**
229 * @param keys
230 * @param acceptedTaxon: the current acceptedTaxon
231 * @param nametosave: the list of objects to save into the CDM
232 * @param refMods: the current reference extracted from the MODS
233 */
234 @SuppressWarnings("rawtypes")
235 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
236 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
237
238 NodeList children = keys.getChildNodes();
239 String key="";
240 PolytomousKey poly = PolytomousKey.NewInstance();
241 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
242 poly.addTaxonomicScope(acceptedTaxon);
243 poly.setTitleCache("bloup");
244 // poly.addCoveredTaxon(acceptedTaxon);
245 PolytomousKeyNode root = poly.getRoot();
246 PolytomousKeyNode previous = null,tmpKey=null;
247 Taxon taxonKey=null;
248 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
249
250 // String fullContent = keys.getTextContent();
251 for (int i=0;i<children.getLength();i++){
252 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
253 NodeList paragraph = children.item(i).getChildNodes();
254 key="";
255 taxonKey=null;
256 for (int j=0;j<paragraph.getLength();j++){
257 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
258 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
259 key+=paragraph.item(j).getTextContent().trim();
260 // logger.info("KEY: "+j+"--"+key);
261 }
262 }
263 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
264 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
265 }
266 }
267 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
268 if (keypattern.matcher(key).matches()){
269 tmpKey = PolytomousKeyNode.NewInstance(key);
270 if (taxonKey!=null) {
271 tmpKey.setTaxon(taxonKey);
272 }
273 polyNodes.add(tmpKey);
274 if (previous == null) {
275 root.addChild(tmpKey);
276 } else {
277 previous.addChild(tmpKey);
278 }
279 }else{
280 if (!key.isEmpty()){
281 tmpKey=PolytomousKeyNode.NewInstance(key);
282 if (taxonKey!=null) {
283 tmpKey.setTaxon(taxonKey);
284 }
285 polyNodes.add(tmpKey);
286 if (keypatternend.matcher(key).matches()) {
287 root.addChild(tmpKey);
288 previous=tmpKey;
289 } else{
290 previous.addChild(tmpKey);
291 }
292
293 }
294 }
295 }
296 }
297 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
298 importer.getPolytomousKeyService().saveOrUpdate(poly);
299 }
300
301 /**
302 * @param taxons: the XML Nodegroup
303 * @param nametosave: the list of objects to save into the CDM
304 * @param acceptedTaxon: the current accepted Taxon
305 * @param refMods: the current reference extracted from the MODS
306 *
307 * @return Taxon object built
308 */
309 @SuppressWarnings({ "rawtypes", "unchecked" })
310 private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
311 // logger.info("getTaxonFromXML");
312 // logger.info("acceptedTaxon: "+acceptedTaxon);
313
314 TaxonNameBase nameToBeFilled = null;
315 String name="";
316
317 String[] enames = null;
318 Rank rank = Rank.UNKNOWN_RANK();
319 String original="";
320 String identifier="";
321
322 try {
323 enames = extractScientificName(taxons);
324 if (enames[1].isEmpty()) {
325 name=enames[0];
326 } else {
327 name=enames[1];
328 }
329 original=enames[0];
330 rank = Rank.getRankByName(enames[2]);
331 identifier = enames[3];
332 } catch (TransformerFactoryConfigurationError e1) {
333 logger.warn(e1);
334 } catch (TransformerException e1) {
335 logger.warn(e1);
336 } catch (UnknownCdmTypeException e) {
337 logger.warn("Rank problem!"+enames[2]);
338 rank=Rank.UNKNOWN_RANK();
339 }
340 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
341
342 nameToBeFilled = parser.parseFullName(name, nomenclaturalCode, rank);
343 if (nameToBeFilled.hasProblem() &&
344 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
345 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
346 nameToBeFilled=solveNameProblem(original, name,parser);
347 }
348
349 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
350
351 // importer.getNameService().saveOrUpdate(nametosave);
352 Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
353 if (t ==null){
354 // logger.info("BestTaxonService not the best or null");
355 t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
356 if (t.getSec() == null) {
357 t.setSec(refMods);
358 }
359 if(!configState.getConfig().doKeepOriginalSecundum()) {
360 t.setSec(configState.getConfig().getSecundum());
361 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
362 }
363 t.addSource(OriginalSourceType.Import,null,null,refMods,null);
364
365 if (!identifier.isEmpty() && (identifier.length()>2)){
366 setLSID(identifier, t);
367 }
368
369 Taxon parentTaxon = askParent(t, classification);
370 if (parentTaxon ==null){
371 while (parentTaxon == null) {
372 parentTaxon = createParent(t, refMods);
373 classification.addParentChild(parentTaxon, t, refMods, null);
374 }
375 }else{
376 classification.addParentChild(parentTaxon, t, refMods, null);
377 }
378 }
379 else{
380 t = CdmBase.deproxy(t, Taxon.class);
381 }
382 if (!configState.getConfig().doKeepOriginalSecundum()) {
383 t.setSec(configState.getConfig().getSecundum());
384 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
385 }
386 return t;
387 }
388
389
390 /**
391 * @param taxons: the XML Nodegroup
392 * @param nametosave: the list of objects to save into the CDM
393 * @param acceptedTaxon: the current accepted Taxon
394 * @param refMods: the current reference extracted from the MODS
395 *
396 * @return Taxon object built
397 */
398 @SuppressWarnings({ "rawtypes", "unchecked" })
399 private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
400 // logger.info("getTaxonFromXML");
401 // logger.info("acceptedTaxon: "+acceptedTaxon);
402
403 TaxonNameBase nameToBeFilled = null;
404 String name="";
405
406 String[] enames = null;
407 Rank rank = Rank.UNKNOWN_RANK();
408 String original="";
409 String identifier="";
410
411 try {
412 enames = extractScientificName(taxons);
413 if (enames[1].isEmpty()) {
414 name=enames[0];
415 } else {
416 name=enames[1];
417 }
418 original=enames[0];
419 rank = Rank.getRankByName(enames[2]);
420 identifier = enames[3];
421 } catch (TransformerFactoryConfigurationError e1) {
422 logger.warn(e1);
423 } catch (TransformerException e1) {
424 logger.warn(e1);
425 } catch (UnknownCdmTypeException e) {
426 logger.warn("Rank problem!"+enames[2]);
427 rank=Rank.UNKNOWN_RANK();
428 }
429 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
430
431 nameToBeFilled = parser.parseFullName(name, nomenclaturalCode, rank);
432 if (nameToBeFilled.hasProblem() &&
433 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
434 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
435 nameToBeFilled=solveNameProblem(original, name,parser);
436 }
437
438 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
439 return nameToBeFilled;
440
441 }
442
443
444 @SuppressWarnings("rawtypes")
445 private TaxonNameBase getTaxonNameBase (TaxonNameBase name, List<TaxonNameBase> nametosave){
446 List<TaxonNameBase> names = importer.getNameService().list(TaxonNameBase.class, null, null, null, null);
447 for (TaxonNameBase tb : names){
448 if (tb.getTitleCache().equalsIgnoreCase(name.getTitleCache())) {
449 logger.info("TaxonNameBase FOUND"+name.getTitleCache());
450 return tb;
451 }
452 }
453 logger.info("TaxonNameBase NOT FOUND "+name.getTitleCache());
454 nametosave.add(name);
455 return name;
456
457 }
458
459
460
461 /**
462 *
463 */
464 private void reloadClassification() {
465 Classification cl = importer.getClassificationService().find(classification.getUuid());
466 if (cl != null){
467 classification=cl;
468 }else{
469 importer.getClassificationService().saveOrUpdate(classification);
470 classification = importer.getClassificationService().find(classification.getUuid());
471 }
472
473 }
474
475 /**
476 * Create a Taxon for the current NameBase, based on the current reference
477 * @param taxonNameBase
478 * @param refMods: the current reference extracted from the MODS
479 * @return Taxon
480 */
481 @SuppressWarnings({ "unused", "rawtypes" })
482 private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
483 Taxon t = new Taxon(taxonNameBase,null );
484 if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
485 t.setSec(configState.getConfig().getSecundum());
486 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
487 }
488 t.addSource(OriginalSourceType.Import,null,null,refMods,null);
489 return t;
490 }
491
492 /**
493 * @param nametosave
494 * @param distribution: the XML node group
495 * @param acceptedTaxon: the current accepted Taxon
496 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
497 * @param refMods: the current reference extracted from the MODS
498 */
499 @SuppressWarnings("rawtypes")
500 private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods) {
501 // logger.info("DISTRIBUTION");
502 // logger.info("acceptedTaxon: "+acceptedTaxon);
503 NodeList children = distribution.getChildNodes();
504 Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
505 Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
506
507 for (int i=0;i<children.getLength();i++){
508 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
509 NodeList paragraph = children.item(i).getChildNodes();
510 for (int j=0;j<paragraph.getLength();j++){
511 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
512 if(!paragraph.item(j).getTextContent().trim().isEmpty()) {
513 String s =paragraph.item(j).getTextContent().trim();
514 if (descriptionsFulltext.get(i) !=null){
515 s = descriptionsFulltext.get(i)+" "+s;
516 }
517 descriptionsFulltext.put(i, s);
518 }
519 }
520 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
521 String s =getTaxonNameBaseFromXML(paragraph.item(j),nametosave,refMods).toString().split("sec.")[0];
522 if (descriptionsFulltext.get(i) !=null){
523 s = descriptionsFulltext.get(i)+" "+s;
524 }
525 descriptionsFulltext.put(i, s);
526 }
527 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
528 MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
529 DerivedUnit derivedUnitBase = null;
530 specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
531 List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
532 if (speObsList == null) {
533 speObsList=new ArrayList<MySpecimenOrObservation>();
534 }
535 speObsList.add(specimenOrObservation);
536 specimenOrObservations.put(i,speObsList);
537
538 String s = specimenOrObservation.getDerivedUnitBase().toString();
539 if (descriptionsFulltext.get(i) !=null){
540 s = descriptionsFulltext.get(i)+" "+s;
541 }
542 descriptionsFulltext.put(i, s);
543 }
544
545 }
546 }
547 }
548
549 int m=0;
550 for (int k:descriptionsFulltext.keySet()) {
551 if (k>m) {
552 m=k;
553 }
554 }
555 for (int k:specimenOrObservations.keySet()) {
556 if (k>m) {
557 m=k;
558 }
559 }
560
561
562 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
563 Feature currentFeature = Feature.DISTRIBUTION();
564 DerivedUnit derivedUnitBase=null;
565 String descr="";
566 for (int k=0;k<=m;k++){
567 if(specimenOrObservations.keySet().contains(k)){
568 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
569 derivedUnitBase = soo.getDerivedUnitBase();
570 descr=soo.getDescr();
571
572 derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
573
574 importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
575
576 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
577 acceptedTaxon.addDescription(taxonDescription);
578
579
580 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
581
582 Feature feature=null;
583 feature = makeFeature(derivedUnitBase);
584 if(!StringUtils.isEmpty(descr)) {
585 derivedUnitBase.setTitleCache(descr, true);
586 }
587 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
588 indAssociation.setFeature(feature);
589 indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
590
591 taxonDescription.addElement(indAssociation);
592 taxonDescription.setTaxon(acceptedTaxon);
593 taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
594
595 importer.getDescriptionService().saveOrUpdate(taxonDescription);
596 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
597 td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
598 }
599 }
600
601 if (descriptionsFulltext.keySet().contains(k)){
602 if (!descriptionsFulltext.get(k).isEmpty() && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
603 setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
604 break;
605 }
606 else{
607 TextData textData = TextData.NewInstance();
608
609 textData.setFeature(currentFeature);
610 textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
611 textData.addSource(OriginalSourceType.Import, null, null, refMods, null);
612
613 td.addElement(textData);
614 }
615 }
616
617
618 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
619 td.addSource(OriginalSourceType.Import, null,null,refMods,null);
620 acceptedTaxon.addDescription(td);
621 importer.getDescriptionService().saveOrUpdate(td);
622 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
623 }
624 }
625 }
626
627
628 /**
629 * @param materials: the XML node group
630 * @param acceptedTaxon: the current accepted Taxon
631 * @param refMods: the current reference extracted from the MODS
632 */
633 @SuppressWarnings("rawtypes")
634 private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference<?> refMods,List<TaxonNameBase> nametosave) {
635 // logger.info("EXTRACTMATERIALS");
636 // logger.info("acceptedTaxon: "+acceptedTaxon);
637 NodeList children = materials.getChildNodes();
638 NodeList events = null;
639 String descr="";
640
641 DerivedUnit derivedUnitBase=null;
642 MySpecimenOrObservation myspecimenOrObservation = null;
643
644 for (int i=0;i<children.getLength();i++){
645 String rawAssociation="";
646 boolean added=false;
647 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
648 events = children.item(i).getChildNodes();
649 for(int k=0;k<events.getLength();k++){
650 if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
651 String linkedTaxon = getTaxonNameBaseFromXML(events.item(k), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
652 rawAssociation+=linkedTaxon.split("sec")[0];
653 }
654 if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
655 && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
656 rawAssociation+= events.item(k).getTextContent().trim();
657 }
658 if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
659 if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
660 rawAssociation="no description text";
661 }
662 added=true;
663 DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.FieldUnit);
664 derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
665 derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
666 importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
667
668 myspecimenOrObservation = extractSpecimenOrObservation(events.item(k),derivedUnitBase,SpecimenOrObservationType.FieldUnit);
669 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
670 descr=myspecimenOrObservation.getDescr();
671
672 derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
673
674 importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
675
676 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
677 acceptedTaxon.addDescription(taxonDescription);
678
679
680 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
681
682 Feature feature = makeFeature(derivedUnitBase);
683 if(!StringUtils.isEmpty(descr)) {
684 derivedUnitBase.setTitleCache(descr, true);
685 }
686 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
687 indAssociation.setFeature(feature);
688 indAssociation.addSource(OriginalSourceType.Import,null, null, refMods, null);
689
690 taxonDescription.addElement(indAssociation);
691 taxonDescription.setTaxon(acceptedTaxon);
692 taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
693
694 importer.getDescriptionService().saveOrUpdate(taxonDescription);
695 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
696 }
697 if (!rawAssociation.isEmpty() && !added){
698 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
699 acceptedTaxon.addDescription(taxonDescription);
700
701 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
702
703 Feature feature = Feature.MATERIALS_EXAMINED();
704 if(!StringUtils.isEmpty(rawAssociation)) {
705 derivedUnitBase.setTitleCache(rawAssociation, true);
706 }
707 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
708 indAssociation.setFeature(feature);
709 indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
710
711 taxonDescription.addElement(indAssociation);
712 taxonDescription.setTaxon(acceptedTaxon);
713 taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
714
715 importer.getDescriptionService().saveOrUpdate(taxonDescription);
716 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
717
718 rawAssociation="";
719 }
720 }
721 }
722 }
723 }
724
725 /**
726 * @param materials: the XML node group
727 * @param acceptedTaxon: the current accepted Taxon
728 * @param refMods: the current reference extracted from the MODS
729 */
730 @SuppressWarnings("rawtypes")
731 private void extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference<?> refMods, String event) {
732 // logger.info("EXTRACTMATERIALS");
733 // logger.info("acceptedTaxon: "+acceptedTaxon);
734 String descr="";
735
736 DerivedUnit derivedUnitBase=null;
737 MySpecimenOrObservation myspecimenOrObservation = null;
738
739
740 myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.FieldUnit);
741 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
742 descr=myspecimenOrObservation.getDescr();
743
744 derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
745
746 importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
747
748 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
749 acceptedTaxon.addDescription(taxonDescription);
750
751
752 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
753
754 Feature feature=null;
755 if (event.equalsIgnoreCase("collection")){
756 feature = makeFeature(derivedUnitBase);
757 }
758 else{
759 feature = Feature.MATERIALS_EXAMINED();
760 }
761 if(!StringUtils.isEmpty(descr)) {
762 derivedUnitBase.setTitleCache(descr, true);
763 }
764 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
765 indAssociation.setFeature(feature);
766 indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
767
768 taxonDescription.addElement(indAssociation);
769 taxonDescription.setTaxon(acceptedTaxon);
770 taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
771
772 importer.getDescriptionService().saveOrUpdate(taxonDescription);
773 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
774
775
776 }
777
778
779 /**
780 * @param description: the XML node group
781 * @param acceptedTaxon: the current acceptedTaxon
782 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
783 * @param nametosave: the list of objects to save into the CDM
784 * @param refMods: the current reference extracted from the MODS
785 * @param featureName: the feature name
786 */
787 private void extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
788 List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
789 NodeList children = description.getChildNodes();
790 NodeList insideNodes ;
791 String descr ="";
792 String localdescr="";
793
794 // String fullContent = description.getTextContent();
795 for (int i=0;i<children.getLength();i++){
796 localdescr="";
797 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
798 descr += children.item(i).getTextContent().trim();
799 }
800 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
801 insideNodes=children.item(i).getChildNodes();
802 List<String> blabla= new ArrayList<String>();
803 for (int j=0;j<insideNodes.getLength();j++){
804 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
805 String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
806 blabla.add(linkedTaxon.split("sec")[0]);
807 }
808 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
809 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
810 blabla.add(insideNodes.item(j).getTextContent().trim());
811 localdescr += insideNodes.item(j).getTextContent().trim();
812 }
813 }
814 }
815 if (!blabla.isEmpty()) {
816 List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
817 Feature currentFeature=null;
818 for (DefinedTermBase feature: features){
819 String tmpF = ((Feature)feature).getTitleCache();
820 if (tmpF.equalsIgnoreCase(featureName)) {
821 currentFeature=(Feature)feature;
822 }
823 }
824 if (currentFeature == null) {
825 currentFeature=Feature.NewInstance(featureName, featureName, featureName);
826 importer.getTermService().saveOrUpdate(currentFeature);
827 }
828 setParticularDescription(StringUtils.join(blabla," "),acceptedTaxon,defaultTaxon, refMods,currentFeature);
829 }
830 }
831
832 }
833
834 }
835
836
837
838
839 /**
840 * @param children: the XML node group
841 * @param nametosave: the list of objects to save into the CDM
842 * @param acceptedTaxon: the current acceptedTaxon
843 * @param refMods: the current reference extracted from the MODS
844 * @param fullContent :the parsed XML content
845 * @return a list of description (text)
846 */
847 @SuppressWarnings("unused")
848 private List<String> parseParagraph(List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods, Node paragraph, Feature feature){
849 List<String> fullDescription= new ArrayList<String>();
850 // String localdescr;
851 String descr="";
852 NodeList insideNodes ;
853 boolean collectionEvent = false;
854 List<Node>collectionEvents = new ArrayList<Node>();
855
856 NodeList children = paragraph.getChildNodes();
857
858 for (int i=0;i<children.getLength();i++){
859 // localdescr="";
860 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
861 descr += children.item(i).getTextContent().trim();
862 }
863 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
864 insideNodes=children.item(i).getChildNodes();
865 List<String> blabla= new ArrayList<String>();
866 for (int j=0;j<insideNodes.getLength();j++){
867 boolean nodeKnown = false;
868 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
869 String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
870 blabla.add(linkedTaxon.split("sec")[0]);
871 nodeKnown=true;
872 }
873 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
874 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
875 blabla.add(insideNodes.item(j).getTextContent().trim());
876 // localdescr += insideNodes.item(j).getTextContent().trim();
877 }
878 nodeKnown=true;
879 }
880 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
881 String ref = insideNodes.item(j).getTextContent().trim();
882 if (ref.endsWith(";") && ((ref.length())>1)) {
883 ref=ref.substring(0, ref.length()-1)+".";
884 }
885 Reference<?> reference = ReferenceFactory.newGeneric();
886 reference.setTitleCache(ref, true);
887 blabla.add(reference.getTitleCache());
888 nodeKnown=true;
889 }
890 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
891 collectionEvent=true;
892 collectionEvents.add(insideNodes.item(j));
893 nodeKnown=true;
894 }
895 if (!nodeKnown) {
896 logger.info("Node not handled yet : "+insideNodes.item(j).getNodeName());
897 logger.warn("Node not handled yet : "+insideNodes.item(j).getNodeName());
898 }
899
900 }
901 if (!blabla.isEmpty()) {
902 fullDescription.add(StringUtils.join(blabla," "));
903 }
904 }
905 }
906 if (collectionEvent) {
907 logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
908 for (Node coll:collectionEvents){
909 extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
910 }
911 }
912 return fullDescription;
913 }
914
915
916 /**
917 * @param description: the XML node group
918 * @param acceptedTaxon: the current acceptedTaxon
919 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
920 * @param nametosave: the list of objects to save into the CDM
921 * @param refMods: the current reference extracted from the MODS
922 * @param feature: the feature to link the data with
923 */
924 private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods, Feature feature){
925 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
926 List<String> fullDescription= parseParagraph( nametosave, acceptedTaxon, refMods, description,feature);
927
928 if (!fullDescription.isEmpty()) {
929 setParticularDescription(StringUtils.join(fullDescription,"<br/>"),acceptedTaxon,defaultTaxon, refMods,feature);
930 }
931
932 }
933
934
935 /**
936 * @param descr: the XML Nodegroup to parse
937 * @param acceptedTaxon: the current acceptedTaxon
938 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
939 * @param refMods: the current reference extracted from the MODS
940 * @param currentFeature: the feature name
941 * @return
942 */
943 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods, Feature currentFeature) {
944 // logger.info("setParticularDescription "+currentFeature);
945 // logger.info("acceptedTaxon: "+acceptedTaxon);
946 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
947
948 TextData textData = TextData.NewInstance();
949 textData.setFeature(currentFeature);
950 textData.addSource(OriginalSourceType.Import, null,null,refMods,null);
951
952 textData.putText(Language.UNKNOWN_LANGUAGE(), descr+"<br/>");
953
954 if(! descr.isEmpty() && (acceptedTaxon!=null)){
955 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
956 td.addElement(textData);
957 td.addSource(OriginalSourceType.Import,null,null,refMods,null);
958 acceptedTaxon.addDescription(td);
959 importer.getDescriptionService().saveOrUpdate(td);
960 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
961 }
962
963 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
964 try{
965 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
966 if (tmp!=null) {
967 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
968 }else{
969 importer.getTaxonService().saveOrUpdate(defaultTaxon);
970 }
971 }catch(Exception e){
972 logger.debug("TAXON EXISTS"+defaultTaxon);
973 }
974
975 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
976 defaultTaxon.addDescription(td);
977 td.addElement(textData);
978 td.addSource(OriginalSourceType.Import,null,null,refMods,null);
979 importer.getDescriptionService().saveOrUpdate(td);
980 importer.getTaxonService().saveOrUpdate(defaultTaxon);
981 }
982 }
983
984
985
986 /**
987 * @param synonyms: the XML Nodegroup to parse
988 * @param nametosave: the list of objects to save into the CDM
989 * @param acceptedTaxon: the current acceptedTaxon
990 * @param refMods: the current reference extracted from the MODS
991 */
992 @SuppressWarnings({ "rawtypes", "unchecked" })
993 private void extractSynonyms(Node synonyms, List<TaxonNameBase> nametosave,Taxon acceptedTaxon, Reference<?> refMods) {
994 // logger.info("extractSynonyms: "+acceptedTaxon);
995 Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
996 if (ttmp != null) {
997 acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
998 }
999 else{
1000 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1001 }
1002 NodeList children = synonyms.getChildNodes();
1003 TaxonNameBase nameToBeFilled = null;
1004 List<String> names = new ArrayList<String>();
1005
1006 String identifier="";
1007
1008 for (int i=0;i<children.getLength();i++){
1009 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1010 NodeList tmp = children.item(i).getChildNodes();
1011 // String fullContent = children.item(i).getTextContent();
1012 for (int j=0; j< tmp.getLength();j++){
1013 if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1014 String[] enames;
1015 try {
1016 enames = extractScientificName(tmp.item(j));
1017 if (enames[1].isEmpty()) {
1018 names.add(enames[0]+"---"+enames[2]+"---"+enames[3]);
1019 } else {
1020 names.add(enames[1]+"---"+enames[2]+"---"+enames[3]);
1021 }
1022 } catch (TransformerFactoryConfigurationError e) {
1023 logger.warn(e);
1024 } catch (TransformerException e) {
1025 logger.warn(e);
1026 }
1027
1028 }
1029 }
1030 }
1031 }
1032 for(String name:names){
1033 System.out.println("HANDLE NAME "+name);
1034 Rank rank;
1035 try {
1036 rank = Rank.getRankByName(name.split("---")[1]);
1037 } catch (UnknownCdmTypeException e) {
1038 logger.warn("Rank problem!");
1039 rank=null;
1040 }
1041 try{
1042 identifier = name.split("---")[2];
1043 }catch(Exception e){logger.warn("identifier empty"); identifier="";}
1044 name = name.split("---")[0];
1045
1046 String original = name;
1047
1048 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1049 nameToBeFilled = parser.parseFullName(name, nomenclaturalCode, rank);
1050 if (nameToBeFilled.hasProblem() &&
1051 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1052 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1053 nameToBeFilled = solveNameProblem(original, name, parser);
1054 }
1055 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
1056 Synonym synonym = Synonym.NewInstance(nameToBeFilled, refMods);
1057
1058
1059 if (!identifier.isEmpty() && (identifier.length()>2)){
1060 setLSID(identifier, synonym);
1061 }
1062
1063 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
1064 System.out.println("SYNONYM");
1065
1066 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1067 }
1068
1069 }
1070
1071
1072
1073
1074
1075 /**
1076 * @param refgroup: the XML nodes
1077 * @param nametosave: the list of objects to save into the CDM
1078 * @param acceptedTaxon: the current acceptedTaxon
1079 * @param nametosave: the list of objects to save into the CDM
1080 * @param refMods: the current reference extracted from the MODS
1081 * @return the acceptedTaxon (why?)
1082 */
1083 @SuppressWarnings({ "null", "unused" ,"rawtypes" })
1084 private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods) {
1085 // logger.info("extractReferences");
1086 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1087
1088 NodeList children = refgroup.getChildNodes();
1089 NonViralName<?> nameToBeFilled = null;
1090 boolean accepted=true;
1091 for (int i=0;i<children.getLength();i++){
1092 if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1093 NodeList references = children.item(i).getChildNodes();
1094 int nbRef=0;
1095 boolean foundBibref=false;
1096 for (int j=0;j<references.getLength();j++){
1097 if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1098 foundBibref=true;
1099 String ref = references.item(j).getTextContent().trim();
1100 if (ref.endsWith(";") && ((ref.length())>1)) {
1101 ref=ref.substring(0, ref.length()-1)+".";
1102 }
1103 if (ref.startsWith(treatmentMainName) && !ref.endsWith(treatmentMainName)) {
1104 ref=ref.replace(treatmentMainName, "");
1105 ref=ref.trim();
1106 while (ref.startsWith(".") || ref.startsWith(",")) {
1107 ref=ref.replace(".","").replace(",","").trim();
1108 }
1109 }
1110
1111 // logger.info("Current reference :"+nbRef+", "+ref+", "+treatmentMainName+"--"+ref.indexOf(treatmentMainName));
1112 Reference<?> reference = ReferenceFactory.newGeneric();
1113 reference.setTitleCache(ref, true);
1114
1115 boolean makeEmpty = false;
1116 // Rank rank = null;
1117 // logger.info("TREATMENTMAINNAME: "+treatmentMainName);
1118 // logger.info("ref: "+ref);
1119 if (nbRef==0) {
1120 accepted=true;
1121 } else {
1122 accepted=false;
1123 }
1124
1125 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1126 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1127 nameToBeFilled = BotanicalName.NewInstance(null);
1128 }
1129 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1130 nameToBeFilled = ZoologicalName.NewInstance(null);
1131 }
1132 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1133 nameToBeFilled = BacterialName.NewInstance(null);
1134 }
1135 if (accepted){
1136 acceptedTaxon.getName().setNomenclaturalReference(reference);
1137 nameToBeFilled.setNomenclaturalReference(reference);
1138 acceptedTaxon.addSource(OriginalSourceType.Import,null,null,refMods,null);
1139 }else{
1140 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1141 acceptedTaxon.addDescription(td);
1142 acceptedTaxon.addSource(OriginalSourceType.Import,null,null,refMods,null);
1143
1144 TextData textData = TextData.NewInstance(Feature.CITATION());
1145
1146 textData.addSource(OriginalSourceType.Import, null, null, reference, null, acceptedTaxon.getName(), ref);
1147 td.addElement(textData);
1148 td.addSource(OriginalSourceType.Import, null,null,refMods,null);
1149
1150 importer.getDescriptionService().saveOrUpdate(td);
1151 }
1152 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1153 // logger.warn("BWAAHHHH: "+nameToBeFilled.getParsingProblems()+", "+ref);
1154 nbRef++;
1155 }
1156 }
1157 if (!foundBibref){
1158 String refString="";
1159 String name="";
1160 String identifier="";
1161 for (int j=0;j<references.getLength();j++){
1162 //no bibref tag inside
1163 logger.info("references.item(j).getNodeName()"+references.item(j).getNodeName());
1164 if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1165 String[] enames;
1166 try {
1167 enames = extractScientificName(references.item(j));
1168 if (enames[1].isEmpty()) {
1169 name=enames[0]+"---"+enames[2]+"---"+enames[3];
1170 } else {
1171 name=enames[1]+"---"+enames[2]+"---"+enames[3];
1172 }
1173 } catch (TransformerFactoryConfigurationError e) {
1174 logger.warn(e);
1175 } catch (TransformerException e) {
1176 logger.warn(e);
1177 }
1178
1179 name=name.trim();
1180 }
1181 if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1182 refString = references.item(j).getTextContent().trim();
1183 }
1184 if(references.item(j).getNodeName().equalsIgnoreCase("#text") && name.isEmpty() && !references.item(j).getTextContent().trim().isEmpty()){
1185 try{
1186 identifier = name.split("---")[3];
1187 }catch(Exception e ){logger.warn("no identifier");identifier="";}
1188 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1189 String fullLineRefName = references.item(j).getTextContent().trim();
1190 TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1191 if (nameTBF.hasProblem() &&
1192 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1193 nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser);
1194 }
1195 nameTBF = getTaxonNameBase(nameTBF,nametosave);
1196 Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1197
1198
1199 if (!identifier.isEmpty() && (identifier.length()>2)){
1200 setLSID(identifier, acceptedTaxon);
1201 }
1202
1203 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
1204 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1205 }
1206 }
1207
1208 if(!name.isEmpty()){
1209 logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+name+"*");
1210 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(name.split("---")[0].trim())){
1211 identifier = name.split("---")[3];
1212 Reference<?> refS = ReferenceFactory.newGeneric();
1213 refS.setTitleCache(refString, true);
1214 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1215 // acceptedTaxon.addDescription(td);
1216 // acceptedTaxon.addSource(refSource);
1217 //
1218 // TextData textData = TextData.NewInstance(Feature.CITATION());
1219 //
1220 // textData.addSource(null, null, refS, null);
1221 // td.addElement(textData);
1222 // td.addSource(refSource);
1223 // importer.getDescriptionService().saveOrUpdate(td);
1224
1225
1226 if (!identifier.isEmpty() && (identifier.length()>2)){
1227 setLSID(identifier, acceptedTaxon);
1228
1229 }
1230
1231 acceptedTaxon.getName().setNomenclaturalReference(refS);
1232 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1233 }
1234 else{
1235 Rank rank;
1236 try {
1237 rank = Rank.getRankByName(name.split("---")[1]);
1238 } catch (Exception e) {
1239 logger.warn("Rank or name problem!");
1240 rank=null;
1241 }
1242 name = name.split("---")[0].trim() + refString;
1243 String original = name;
1244 try{
1245 identifier = name.split("---")[3];
1246 }
1247 catch(Exception e){
1248 logger.warn("no identifier");
1249 identifier="";
1250 }
1251 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1252 TaxonNameBase nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1253 if (nameTBF.hasProblem() &&
1254 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1255 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1256 nameTBF=solveNameProblem(original, name,parser);
1257 }
1258 nameTBF = getTaxonNameBase(nameTBF,nametosave);
1259 Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1260
1261
1262 if (!identifier.isEmpty() && (identifier.length()>2)){
1263 String id = identifier.split("__")[0];
1264 String source = identifier.split("__")[1];
1265 if (id.indexOf("lsid")>-1){
1266 try {
1267 LSID lsid = new LSID(id);
1268 synonym.setLsid(lsid);
1269 } catch (MalformedLSIDException e) {
1270 // TODO Auto-generated catch block
1271 e.printStackTrace();
1272 }
1273
1274 }
1275 else{
1276 //TODO ADD ORIGINAL SOURCE ID
1277 IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
1278 os.setIdInSource(id);
1279 Reference<?> re = ReferenceFactory.newGeneric();
1280 re.setTitle(source);
1281 os.setCitation(re);
1282 synonym.addSource(os);
1283 }
1284 }
1285
1286 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
1287 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1288 }
1289 }
1290 }
1291 }
1292 }
1293 // importer.getClassificationService().saveOrUpdate(classification);
1294 return acceptedTaxon;
1295
1296 }
1297
1298 /**
1299 * @param identifier
1300 * @param acceptedTaxon
1301 */
1302 private void setLSID(String identifier, TaxonBase<?> taxon) {
1303 boolean lsidok=false;
1304 String id = identifier.split("__")[0];
1305 String source = identifier.split("__")[1];
1306 if (id.indexOf("lsid")>-1){
1307 try {
1308 LSID lsid = new LSID(id);
1309 taxon.setLsid(lsid);
1310 lsidok=true;
1311 } catch (MalformedLSIDException e) {
1312 logger.warn("Malformed LSID");
1313 }
1314
1315 }
1316 if ((id.indexOf("lsid")<0) || !lsidok){
1317 //ADD ORIGINAL SOURCE ID
1318 IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
1319 os.setIdInSource(id);
1320 Reference<?> re = ReferenceFactory.newGeneric();
1321 re.setTitle(source);
1322 os.setCitation(re);
1323 taxon.addSource(os);
1324 }
1325
1326 }
1327
1328 /**
1329 * try to solve a parsing problem for a scientific name
1330 * @param original : the name from the OCR document
1331 * @param name : the tagged version
1332 * @param parser
1333 * @return the corrected TaxonNameBase
1334 */
1335 @SuppressWarnings({ "unchecked", "rawtypes" })
1336 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser) {
1337 Map<String,String> ato = namesMap.get(original);
1338 Rank rank=Rank.UNKNOWN_RANK();
1339
1340 if (ato == null){
1341 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1342 }else{
1343 rank = getRank(ato);
1344 }
1345 TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1346 // logger.info("RANK: "+rank);
1347 int retry=0;
1348 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1349 String fullname = getFullReference(name,nameTBF.getParsingProblems());
1350 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1351 nameTBF = BotanicalName.NewInstance(null);
1352 }
1353 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1354 nameTBF = ZoologicalName.NewInstance(null);
1355 }
1356 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1357 nameTBF= BacterialName.NewInstance(null);
1358 }
1359 parser.parseReferencedName(nameTBF, fullname, rank, false);
1360 retry++;
1361 }
1362 if (retry == 1){
1363 nameTBF.setFullTitleCache(name, true);
1364 // logger.info("FULL TITLE CACHE "+name);
1365 }
1366 return nameTBF;
1367 }
1368
1369 /**
1370 * @param nomenclatureNode: the XML nodes
1371 * @param nametosave: the list of objects to save into the CDM
1372 * @param refMods: the current reference extracted from the MODS
1373 * @return
1374 */
1375 @SuppressWarnings({ "rawtypes", "unused" })
1376 private Taxon extractNomenclature(Node nomenclatureNode, List<TaxonNameBase> nametosave, Reference<?> refMods) {
1377 // logger.info("extractNomenclature");
1378 NodeList children = nomenclatureNode.getChildNodes();
1379 String freetext;
1380 TaxonNameBase nameToBeFilled = null;
1381 Taxon acceptedTaxon = null;
1382 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1383 String identifier="";
1384
1385 Rank rank = Rank.UNKNOWN_RANK();
1386 // String fullContent = nomenclatureNode.getTextContent();
1387 for (int i=0;i<children.getLength();i++){
1388 if (children.item(i).getNodeName().equalsIgnoreCase("#text")) {
1389 freetext=children.item(i).getTextContent();
1390 }
1391 if (children.item(i).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1392 System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
1393 extractMaterialsDirect(children.item(i), acceptedTaxon, refMods, "collection");
1394 }
1395 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1396 String[] names;
1397 try {
1398 names = extractScientificName(children.item(i));
1399 treatmentMainName = names[1];
1400 originalTreatmentName = names[0];
1401 rank = Rank.getRankByName(names[2]);
1402 identifier=names[3];
1403
1404 } catch (TransformerFactoryConfigurationError e1) {
1405 logger.warn(e1);
1406 } catch (TransformerException e1) {
1407 logger.warn(e1);
1408 } catch (UnknownCdmTypeException e) {
1409 logger.warn(e);
1410 }
1411
1412 if (rank.equals(Rank.UNKNOWN_RANK()) || rank.isLower(configState.getConfig().getMaxRank())){
1413 maxRankRespected=true;
1414 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1415 nameToBeFilled = BotanicalName.NewInstance(null);
1416 }
1417 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1418 nameToBeFilled = ZoologicalName.NewInstance(null);
1419 }
1420 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1421 nameToBeFilled = BacterialName.NewInstance(null);
1422 }
1423 acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
1424 if (acceptedTaxon ==null ){
1425 nameToBeFilled = parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
1426 if (nameToBeFilled.hasProblem() &&
1427 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1428 nameToBeFilled = solveNameProblem(originalTreatmentName,treatmentMainName,parser);
1429 }
1430 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
1431 if (!originalTreatmentName.isEmpty()) {
1432 TaxonNameDescription td = TaxonNameDescription.NewInstance();
1433 td.setTitleCache(originalTreatmentName);
1434 nameToBeFilled.addDescription(td);
1435 }
1436 nameToBeFilled.addSource(OriginalSourceType.Import,null,null,refMods,null);
1437 acceptedTaxon= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
1438 if(!configState.getConfig().doKeepOriginalSecundum()) {
1439 acceptedTaxon.setSec(configState.getConfig().getSecundum());
1440 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
1441 }
1442
1443
1444 if (!identifier.isEmpty() && (identifier.length()>2)){
1445 boolean lsidok=false;
1446 String id = identifier.split("__")[0];
1447 String source = identifier.split("__")[1];
1448 if (id.indexOf("lsid")>-1){
1449 try {
1450 LSID lsid = new LSID(id);
1451 acceptedTaxon.setLsid(lsid);
1452 lsidok=true;
1453 } catch (MalformedLSIDException e) {
1454 logger.warn("Malformed LSID");
1455 }
1456
1457 }
1458 if ((id.indexOf("lsid")<0) || !lsidok){
1459 //TODO ADD ORIGINAL SOURCE ID
1460 IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
1461 os.setIdInSource(id);
1462 Reference<?> re = ReferenceFactory.newGeneric();
1463 re.setTitle(source);
1464 os.setCitation(re);
1465 acceptedTaxon.addSource(os);
1466 }
1467 }
1468
1469 acceptedTaxon.addSource(OriginalSourceType.Import, null,null,refMods,null);
1470 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1471
1472 Taxon parentTaxon = askParent(acceptedTaxon, classification);
1473 if (parentTaxon ==null){
1474 while (parentTaxon == null) {
1475 parentTaxon = createParent(acceptedTaxon, refMods);
1476 classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
1477 }
1478 }else{
1479 classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
1480 }
1481 }else{
1482 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1483 Set<IdentifiableSource> sources = acceptedTaxon.getSources();
1484 boolean sourcelinked=false;
1485 for (IdentifiableSource source:sources){
1486 if (source.getCitation().getTitle().equalsIgnoreCase(refMods.getTitleCache())) {
1487 sourcelinked=true;
1488 }
1489 }
1490 if (!configState.getConfig().doKeepOriginalSecundum()) {
1491 acceptedTaxon.setSec(configState.getConfig().getSecundum());
1492 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
1493 }
1494 if (!sourcelinked){
1495 acceptedTaxon.addSource(OriginalSourceType.Import, null, null, refMods, null);
1496 }
1497 if (!sourcelinked || !configState.getConfig().doKeepOriginalSecundum()){
1498
1499 if (!identifier.isEmpty() && (identifier.length()>2)){
1500 setLSID(identifier, acceptedTaxon);
1501 }
1502 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1503 }
1504 }
1505 }else{
1506 maxRankRespected=false;
1507 }
1508 }
1509 }
1510 // importer.getClassificationService().saveOrUpdate(classification);
1511 return acceptedTaxon;
1512 }
1513
1514 /**
1515 * @param acceptedTaxon: the current acceptedTaxon
1516 * @param ref: the current reference extracted from the MODS
1517 * @return the parent for the current accepted taxon
1518 */
1519 private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
1520 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1521
1522 List<Rank> rankList = new ArrayList<Rank>();
1523 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
1524
1525 List<String> rankListStr = new ArrayList<String>();
1526 for (Rank r:rankList) {
1527 rankListStr.add(r.toString());
1528 }
1529 String r="";
1530 String s = acceptedTaxon.getTitleCache();
1531 Taxon tax = null;
1532
1533 int addTaxon = askAddParent(s);
1534 logger.info("ADD TAXON: "+addTaxon);
1535 if (addTaxon == 0){
1536 Taxon tmp = askParent(acceptedTaxon, classification);
1537 if (tmp == null){
1538 s = askSetParent(s);
1539 r = askRank(s,rankListStr);
1540
1541 NonViralName<?> nameToBeFilled = null;
1542 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1543 nameToBeFilled = BotanicalName.NewInstance(null);
1544 }
1545 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1546 nameToBeFilled = ZoologicalName.NewInstance(null);
1547 }
1548 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1549 nameToBeFilled = BacterialName.NewInstance(null);
1550 }
1551 nameToBeFilled.setTitleCache(s);
1552 nameToBeFilled.setRank(getRank(r));
1553
1554 tax = Taxon.NewInstance(nameToBeFilled, ref);
1555 }
1556 else{
1557 tax=tmp;
1558 }
1559
1560 createParent(tax, ref);
1561 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
1562 classification.addParentChild(tax, acceptedTaxon, ref, null);
1563 }
1564 else{
1565 classification.addChildTaxon(acceptedTaxon, ref, null);
1566 tax=acceptedTaxon;
1567 }
1568 // logger.info("RETURN: "+tax );
1569 return tax;
1570
1571 }
1572
1573
1574
1575 /**
1576 * @param name
1577 * @throws TransformerFactoryConfigurationError
1578 * @throws TransformerException
1579 * @return a list of possible names
1580 */
1581 private String[] extractScientificName(Node name) throws TransformerFactoryConfigurationError, TransformerException {
1582 // System.out.println("extractScientificName");
1583 Rank rank = Rank.UNKNOWN_RANK();
1584 NodeList children = name.getChildNodes();
1585 String fullName = "";
1586 String newName="";
1587 String identifier="";
1588 HashMap<String, String> atomisedMap = new HashMap<String, String>();
1589 List<String> atomisedName= new ArrayList<String>();
1590
1591 String rankStr = "";
1592 Rank tmpRank ;
1593 for (int i=0;i<children.getLength();i++){
1594 if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
1595 NodeList atom = children.item(i).getChildNodes();
1596 for (int k=0;k<atom.getLength();k++){
1597 if (atom.item(k).getNodeName().equalsIgnoreCase("tax:xid")){
1598 try{
1599 identifier = atom.item(k).getAttributes().getNamedItem("identifier").getNodeValue();
1600 }catch(Exception e){
1601 System.out.println("pb with identifier, maybe empty");
1602 }
1603 try{
1604 identifier+="__"+atom.item(k).getAttributes().getNamedItem("source").getNodeValue();
1605 }catch(Exception e){
1606 System.out.println("pb with identifier, maybe empty");
1607 }
1608 }
1609 tmpRank = null;
1610 rankStr = atom.item(k).getNodeName().toLowerCase();
1611 // logger.info("RANKSTR:*"+rankStr+"*");
1612 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
1613 rankStr=atom.item(k).getTextContent().trim();
1614 tmpRank = getRank(rankStr);
1615 }
1616 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
1617 if (tmpRank != null){
1618 rank=tmpRank;
1619 }
1620
1621 atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
1622 atomisedName.add(atom.item(k).getTextContent().trim());
1623 }
1624 }
1625 if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
1626 // logger.info("name non atomised: "+children.item(i).getTextContent());
1627 fullName = children.item(i).getTextContent().trim();
1628 // logger.info("fullname: "+fullName);
1629 }
1630 }
1631 if (fullName != null){
1632 fullName = fullName.replace("( ", "(");
1633 fullName = fullName.replace(" )",")");
1634
1635 }
1636 if (fullName.trim().isEmpty()){
1637 fullName=StringUtils.join(atomisedName," ");
1638 }
1639
1640 while(fullName.contains(" ")) {
1641 fullName=fullName.replace(" ", " ");
1642 // logger.info("while");
1643 }
1644
1645 namesMap.put(fullName,atomisedMap);
1646 String atomisedNameStr = StringUtils.join(atomisedName," ");
1647 while(atomisedNameStr.contains(" ")) {
1648 atomisedNameStr=atomisedNameStr.replace(" ", " ");
1649 // logger.info("atomisedNameStr: "+atomisedNameStr);
1650 }
1651 atomisedNameStr=atomisedNameStr.trim();
1652
1653 if (fullName != null){
1654 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
1655 newName=getScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
1656 } else {
1657 newName=fullName;
1658 }
1659 }
1660 rank = askForRank(newName, rank, nomenclaturalCode);
1661 String[] names = new String[4];
1662 names[0]=fullName;
1663 names[1]=newName;
1664 names[2]=rank.toString();
1665 names[3]=identifier;
1666 return names;
1667
1668 }
1669
1670 /**
1671 * @param classification2
1672 */
1673 public void updateClassification(Classification classification2) {
1674 classification = classification2;
1675 }
1676
1677
1678 }