ref #6368 remove TaxonNameBase subclasses
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / taxonx2013 / TaxonXTreatmentExtractor.java
1 /**
2 * Copyright (C) 2013 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.taxonx2013;
10
11 import java.io.BufferedWriter;
12 import java.io.File;
13 import java.io.FileWriter;
14 import java.io.IOException;
15 import java.net.URI;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.HashMap;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Set;
22 import java.util.UUID;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25
26 import javax.xml.transform.TransformerException;
27 import javax.xml.transform.TransformerFactoryConfigurationError;
28
29 import org.apache.commons.lang.StringUtils;
30 import org.apache.log4j.Logger;
31 import org.w3c.dom.Node;
32 import org.w3c.dom.NodeList;
33
34 import com.ibm.lsid.MalformedLSIDException;
35
36 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
37 import eu.etaxonomy.cdm.api.service.pager.Pager;
38 import eu.etaxonomy.cdm.model.agent.AgentBase;
39 import eu.etaxonomy.cdm.model.agent.Person;
40 import eu.etaxonomy.cdm.model.common.CdmBase;
41 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
42 import eu.etaxonomy.cdm.model.common.LSID;
43 import eu.etaxonomy.cdm.model.common.Language;
44 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
45 import eu.etaxonomy.cdm.model.description.Feature;
46 import eu.etaxonomy.cdm.model.description.FeatureNode;
47 import eu.etaxonomy.cdm.model.description.FeatureTree;
48 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
49 import eu.etaxonomy.cdm.model.description.TaxonDescription;
50 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
51 import eu.etaxonomy.cdm.model.description.TextData;
52 import eu.etaxonomy.cdm.model.name.INonViralName;
53 import eu.etaxonomy.cdm.model.name.ITaxonNameBase;
54 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
55 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
56 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
57 import eu.etaxonomy.cdm.model.name.Rank;
58 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
59 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
60 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
61 import eu.etaxonomy.cdm.model.reference.Reference;
62 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
63 import eu.etaxonomy.cdm.model.taxon.Classification;
64 import eu.etaxonomy.cdm.model.taxon.Synonym;
65 import eu.etaxonomy.cdm.model.taxon.SynonymType;
66 import eu.etaxonomy.cdm.model.taxon.Taxon;
67 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
68 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
69 import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
70 import eu.etaxonomy.cdm.persistence.query.MatchMode;
71 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
72 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
73 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
74 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
75
76 /**
77 * @author pkelbert
78 * @date 2 avr. 2013
79 *
80 */
81 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
82
83 private static final String PUBLICATION_YEAR = "publicationYear";
84
85 private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
86
87 private static final String notMarkedUp = "Not marked-up";
88 private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
89 private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
90 private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
91 private static final boolean skippQuestion = true;
92
93 private final NomenclaturalCode nomenclaturalCode;
94 private Classification classification;
95
96 private String treatmentMainName,originalTreatmentName;
97
98 private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
99
100
101 private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
102 private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
103
104 private boolean maxRankRespected =false;
105 private Map<String, Feature> featuresMap;
106
107 private MyName currentMyName;
108
109 private Reference sourceUrlRef;
110
111 private String followingText; //text element immediately following a tax:name in tax:nomenclature TODO move do state
112 private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
113
114 private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
115
116 /**
117 * @param nomenclaturalCode
118 * @param classification
119 * @param importer
120 * @param configState
121 */
122 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
123 TaxonXImportState configState,Map<String, Feature> featuresMap, Reference urlSource) {
124 this.nomenclaturalCode=nomenclaturalCode;
125 this.classification = classification;
126 this.importer=importer;
127 this.state2=configState;
128 this.featuresMap=featuresMap;
129 this.sourceUrlRef =urlSource;
130 prepareCollectors(configState, importer.getAgentService());
131 this.sourceHandler.setSourceUrlRef(sourceUrlRef);
132 this.sourceHandler.setImporter(importer);
133 this.sourceHandler.setConfigState(configState);
134 }
135
136 /**
137 * extracts all the treament information and save them
138 * @param treatmentnode: the XML Node
139 * @param tosave: the list of object to save into the CDM
140 * @param refMods: the reference extracted from the MODS
141 * @param sourceName: the URI of the document
142 */
143 @SuppressWarnings({ "rawtypes", "unused" })
144
145 protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) { logger.info("extractTreatment");
146 List<TaxonNameBase> namesToSave = new ArrayList<TaxonNameBase>();
147 NodeList children = treatmentnode.getChildNodes();
148 Taxon acceptedTaxon =null;
149 boolean hasRefgroup=false;
150
151 //needed?
152 for (int i=0;i<children.getLength();i++){
153 if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
154 hasRefgroup=true;
155 }
156 }
157
158 for (int i=0;i<children.getLength();i++){
159 Node child = children.item(i);
160 acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
161 }
162 // logger.info("saveUpdateNames");
163 if (maxRankRespected){
164 importer.getNameService().saveOrUpdate(namesToSave);
165 importer.getClassificationService().saveOrUpdate(classification);
166 //logger.info("saveUpdateNames-ok");
167 }
168
169 buildFeatureTree();
170 }
171
172 private Taxon handleSingleNode(Reference refMods, URI sourceName,
173 List<TaxonNameBase> namesToSave, Node child, Taxon acceptedTaxon) {
174 Taxon defaultTaxon =null;
175
176 String nodeName = child.getNodeName();
177 if (nodeName.equalsIgnoreCase("tax:nomenclature")){
178 NodeList nomenclatureChildren = child.getChildNodes();
179 boolean containsName = false;
180 for(int k=0; k<nomenclatureChildren.getLength(); k++){
181 if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
182 containsName=true;
183 break;
184 }
185 }
186 if (containsName){
187 reloadClassification();
188 //extract "main" the scientific name
189 try{
190 acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
191 }catch(ClassCastException e){
192 //FIXME exception handling
193 e.printStackTrace();
194 }
195 // System.out.println("acceptedTaxon : "+acceptedTaxon);
196 }
197 }else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
198 reloadClassification();
199 //extract the References within the document
200 extractReferences(child, namesToSave ,acceptedTaxon,refMods);
201 }else if (nodeName.equalsIgnoreCase("tax:div") &&
202 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
203 File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
204 FileWriter writer;
205 try {
206 writer = new FileWriter(file ,true);
207 writer.write(sourceName+"\n");
208 writer.flush();
209 writer.close();
210 } catch (IOException e1) {
211 // TODO Auto-generated catch block
212 logger.error(e1.getMessage());
213 }
214 // String multiple = askMultiple(children.item(i));
215 String multiple = "Other";
216 if (multiple.equalsIgnoreCase("other")) {
217 extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
218 }else if (multiple.equalsIgnoreCase("synonyms")) {
219 try{
220 extractSynonyms(child,acceptedTaxon, refMods, null);
221 }catch(NullPointerException e){
222 logger.warn("the accepted taxon is maybe null");
223 }
224 }else if(multiple.equalsIgnoreCase("material examined")){
225 extractMaterials(child, acceptedTaxon, refMods, namesToSave);
226 }else if (multiple.equalsIgnoreCase("distribution")){
227 extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
228 }else if (multiple.equalsIgnoreCase("type status")){
229 extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
230 }else if (multiple.equalsIgnoreCase("vernacular name")){
231 extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
232 }else{
233 extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
234 }
235 }
236 else if(nodeName.equalsIgnoreCase("tax:div") &&
237 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
238 extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
239 }
240 else if(nodeName.equalsIgnoreCase("tax:div") &&
241 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
242 extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
243 }
244 else if(nodeName.equalsIgnoreCase("tax:div") &&
245 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
246 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
247 }
248 else if(nodeName.equalsIgnoreCase("tax:div") &&
249 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
250 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
251 }
252 else if(nodeName.equalsIgnoreCase("tax:div") &&
253 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
254 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
255 }
256 else if(nodeName.equalsIgnoreCase("tax:div") &&
257 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
258 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
259 }
260 else if(nodeName.equalsIgnoreCase("tax:div") &&
261 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
262 extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
263 }
264 else if(nodeName.equalsIgnoreCase("tax:div") &&
265 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
266 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
267 }
268 else if(nodeName.equalsIgnoreCase("tax:div") &&
269 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
270 extractMaterials(child,acceptedTaxon, refMods, namesToSave);
271 }
272 else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
273 extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
274 }
275 else if(nodeName.equalsIgnoreCase("tax:div") &&
276 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
277 extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
278 }else if(nodeName.equalsIgnoreCase("tax:div") &&
279 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
280 //TODO IGNORE keys for the moment
281 //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
282 extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
283 }
284 else{
285 if (! nodeName.equalsIgnoreCase("tax:pb")){
286 //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
287 if (child.getAttributes() !=null) {
288 logger.info("First Attribute: " + child.getAttributes().item(0));
289 }
290 extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
291 }else{
292 //FIXME
293 logger.warn("Unhandled");
294 }
295 }
296 return acceptedTaxon;
297 }
298
299
300 protected Map<String,Feature> getFeaturesUsed(){
301 return featuresMap;
302 }
303 /**
304 *
305 */
306 private void buildFeatureTree() {
307 logger.info("buildFeatureTree");
308 FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
309 if (proibiospheretree == null){
310 List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
311 if (trees.size()==1) {
312 FeatureTree ft = trees.get(0);
313 if (featuresMap==null) {
314 featuresMap=new HashMap<String, Feature>();
315 }
316 for (Feature feature: ft.getDistinctFeatures()){
317 if(feature!=null) {
318 featuresMap.put(feature.getTitleCache(), feature);
319 }
320 }
321 }
322 proibiospheretree = FeatureTree.NewInstance();
323 proibiospheretree.setUuid(proIbioTreeUUID);
324 }
325 // FeatureNode root = proibiospheretree.getRoot();
326 FeatureNode root2 = proibiospheretree.getRoot();
327 if (root2 != null){
328 int nbChildren = root2.getChildCount()-1;
329 while (nbChildren>-1){
330 try{
331 root2.removeChild(nbChildren);
332 }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
333 nbChildren --;
334 }
335
336 }
337
338 for (Feature feature:featuresMap.values()) {
339 root2.addChild(FeatureNode.NewInstance(feature));
340 }
341 importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
342
343 }
344
345
346 /**
347 * @param keys
348 * @param acceptedTaxon: the current acceptedTaxon
349 * @param nametosave: the list of objects to save into the CDM
350 * @param refMods: the current reference extracted from the MODS
351 */
352 /* @SuppressWarnings("rawtypes")
353 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference refMods) {
354 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
355
356 NodeList children = keys.getChildNodes();
357 String key="";
358 PolytomousKey poly = PolytomousKey.NewInstance();
359 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
360 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
361 poly.addTaxonomicScope(acceptedTaxon);
362 poly.setTitleCache("bloup", true);
363 // poly.addCoveredTaxon(acceptedTaxon);
364 PolytomousKeyNode root = poly.getRoot();
365 PolytomousKeyNode previous = null,tmpKey=null;
366 Taxon taxonKey=null;
367 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
368
369 // String fullContent = keys.getTextContent();
370 for (int i=0;i<children.getLength();i++){
371 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
372 NodeList paragraph = children.item(i).getChildNodes();
373 key="";
374 taxonKey=null;
375 for (int j=0;j<paragraph.getLength();j++){
376 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
377 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
378 key+=paragraph.item(j).getTextContent().trim();
379 // logger.info("KEY: "+j+"--"+key);
380 }
381 }
382 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
383 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
384 }
385 }
386 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
387 if (keypattern.matcher(key).matches()){
388 tmpKey = PolytomousKeyNode.NewInstance(key);
389 if (taxonKey!=null) {
390 tmpKey.setTaxon(taxonKey);
391 }
392 polyNodes.add(tmpKey);
393 if (previous == null) {
394 root.addChild(tmpKey);
395 } else {
396 previous.addChild(tmpKey);
397 }
398 }else{
399 if (!key.isEmpty()){
400 tmpKey=PolytomousKeyNode.NewInstance(key);
401 if (taxonKey!=null) {
402 tmpKey.setTaxon(taxonKey);
403 }
404 polyNodes.add(tmpKey);
405 if (keypatternend.matcher(key).matches()) {
406 root.addChild(tmpKey);
407 previous=tmpKey;
408 } else{
409 previous.addChild(tmpKey);
410 }
411
412 }
413 }
414 }
415 }
416 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
417 importer.getPolytomousKeyService().saveOrUpdate(poly);
418 }
419 */
420
421
422 /**
423 * @param taxons: the XML Nodegroup
424 * @param nametosave: the list of objects to save into the CDM
425 * @param acceptedTaxon: the current accepted Taxon
426 * @param refMods: the current reference extracted from the MODS
427 *
428 * @return Taxon object built
429 */
430 @SuppressWarnings({ "rawtypes", "unused" })
431 private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference refMods, boolean isSynonym) {
432 // logger.info("getTaxonFromXML");
433 // logger.info("acceptedTaxon: "+acceptedTaxon);
434 logger.info("getTaxonNameBaseFromXML");
435 TaxonNameBase nameToBeFilled = null;
436
437 currentMyName=new MyName(isSynonym);
438
439 NomenclaturalStatusType statusType = null;
440 try {
441 String followingText = null; //needs to be checked if following text is possible
442 currentMyName = extractScientificName(taxons,refMods, null);
443 } catch (TransformerFactoryConfigurationError e1) {
444 logger.warn(e1);
445 } catch (TransformerException e1) {
446 logger.warn(e1);
447 }
448 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
449
450 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
451 if (nameToBeFilled.hasProblem() &&
452 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
453 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
454 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
455 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
456 }
457
458 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
459 */
460 nameToBeFilled = currentMyName.getTaxonNameBase();
461 return nameToBeFilled;
462
463 }
464
465
466 /**
467 *
468 */
469 private void reloadClassification() {
470 logger.info("reloadClassification");
471 Classification cl = importer.getClassificationService().find(classification.getUuid());
472 if (cl != null){
473 classification = cl;
474 }else{
475 importer.getClassificationService().saveOrUpdate(classification);
476 classification = importer.getClassificationService().find(classification.getUuid());
477 }
478 }
479
480 // /**
481 // * Create a Taxon for the current NameBase, based on the current reference
482 // * @param taxonNameBase
483 // * @param refMods: the current reference extracted from the MODS
484 // * @return Taxon
485 // */
486 // @SuppressWarnings({ "unused", "rawtypes" })
487 // private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference refMods) {
488 // Taxon t = new Taxon(taxonNameBase,null );
489 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
490 // t.setSec(configState.getConfig().getSecundum());
491 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
492 // }
493 // /*<<<<<<< .courant
494 // boolean sourceExists=false;
495 // Set<IdentifiableSource> sources = t.getSources();
496 // for (IdentifiableSource src : sources){
497 // String micro = src.getCitationMicroReference();
498 // Reference r = src.getCitation();
499 // if (r.equals(refMods) && micro == null) {
500 // sourceExists=true;
501 // }
502 // }
503 // if(!sourceExists) {
504 // t.addSource(null,null,refMods,null);
505 // }
506 //=======*/
507 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
508 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
509 // return t;
510 // }
511
512 private void extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
513 String featureName) {
514 // System.out.println("extractDescriptionWithReference !");
515 logger.info("extractDescriptionWithReference");
516 NodeList children = typestatus.getChildNodes();
517
518 Feature currentFeature=getFeatureObjectFromString(featureName);
519
520 String r="";String s="";
521 for (int i=0;i<children.getLength();i++){
522 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
523 s+=children.item(i).getTextContent().trim();
524 }
525 if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
526 r+= children.item(i).getTextContent().trim();
527 }
528 if (s.indexOf(r)>-1) {
529 s=s.split(r)[0];
530 }
531 }
532
533 Reference currentref = ReferenceFactory.newGeneric();
534 if(!r.isEmpty()) {
535 currentref.setTitleCache(r, true);
536 } else {
537 currentref=refMods;
538 }
539 setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
540 }
541
542 /**
543 * @param nametosave
544 * @param distribution: the XML node group
545 * @param acceptedTaxon: the current accepted Taxon
546 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
547 * @param refMods: the current reference extracted from the MODS
548 */
549 @SuppressWarnings("rawtypes")
550 private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference refMods) {
551 logger.info("extractDistribution");
552 // logger.info("acceptedTaxon: "+acceptedTaxon);
553 NodeList children = distribution.getChildNodes();
554 Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
555 Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
556
557 for (int i=0;i<children.getLength();i++){
558 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
559 NodeList paragraph = children.item(i).getChildNodes();
560 for (int j=0;j<paragraph.getLength();j++){
561 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
562 extractText(descriptionsFulltext, i, paragraph.item(j));
563 }
564 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
565 extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
566 }
567 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
568 MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
569 DerivedUnit derivedUnitBase = null;
570 specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
571 extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
572 }
573 }
574 }
575 }
576
577 int m=0;
578 for (int k:descriptionsFulltext.keySet()) {
579 if (k>m) {
580 m=k;
581 }
582 }
583 for (int k:specimenOrObservations.keySet()) {
584 if (k>m) {
585 m=k;
586 }
587 }
588
589
590 if(acceptedTaxon!=null){
591 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
592 Feature currentFeature = Feature.DISTRIBUTION();
593 // DerivedUnit derivedUnitBase=null;
594 // String descr="";
595 for (int k=0;k<=m;k++){
596 if(specimenOrObservations.keySet().contains(k)){
597 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
598 handleAssociation(acceptedTaxon, refMods, td, soo);
599 }
600 }
601
602 if (descriptionsFulltext.keySet().contains(k)){
603 if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
604 setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
605 break;
606 }
607 else{
608 handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
609 }
610 }
611
612 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
613 acceptedTaxon.addDescription(td);
614 sourceHandler.addAndSaveSource(refMods, td, null);
615 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
616 }
617 }
618 }
619 }
620
621 /**
622 * @param refMods
623 * @param descriptionsFulltext
624 * @param td
625 * @param currentFeature
626 * @param k
627 */
628 private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
629 Feature currentFeature, int k) {
630 //logger.info("handleTextData");
631 TextData textData = TextData.NewInstance();
632 textData.setFeature(currentFeature);
633 textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
634 sourceHandler.addSource(refMods, textData);
635 td.addElement(textData);
636 }
637
638 /**
639 * @param acceptedTaxon
640 * @param refMods
641 * @param td
642 * @param soo
643 */
644 private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
645 logger.info("handleAssociation");
646 String descr=soo.getDescr();
647 DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
648
649 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
650
651 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
652
653 Feature feature=null;
654 feature = makeFeature(derivedUnitBase);
655 if(!StringUtils.isEmpty(descr)) {
656 derivedUnitBase.setTitleCache(descr, true);
657 }
658
659 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
660
661 taxonDescription.addElement(indAssociation);
662 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
663 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
664 td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
665 }
666
667 /**
668 * create an individualAssociation
669 * @param refMods
670 * @param derivedUnitBase
671 * @param feature
672 * @return
673 */
674 private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
675 Feature feature) {
676 logger.info("createIndividualAssociation");
677 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
678 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
679 indAssociation.setFeature(feature);
680 indAssociation = sourceHandler.addSource(refMods, indAssociation);
681 return indAssociation;
682 }
683
684 /**
685 * @param specimenOrObservations
686 * @param descriptionsFulltext
687 * @param i
688 * @param specimenOrObservation
689 */
690 private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
691 Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
692 logger.info("extractTextFromSpecimenOrObservation");
693 List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
694 if (speObsList == null) {
695 speObsList=new ArrayList<MySpecimenOrObservation>();
696 }
697 speObsList.add(specimenOrObservation);
698 specimenOrObservations.put(i,speObsList);
699
700 String s = specimenOrObservation.getDerivedUnitBase().toString();
701 if (descriptionsFulltext.get(i) !=null){
702 s = descriptionsFulltext.get(i)+" "+s;
703 }
704 descriptionsFulltext.put(i, s);
705 }
706
707 /**
708 * Extract the text with the inline link to a taxon
709 * @param nametosave
710 * @param refMods
711 * @param descriptionsFulltext
712 * @param i
713 * @param paragraph
714 */
715 @SuppressWarnings("rawtypes")
716 private void extractInLine(List<TaxonNameBase> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
717 int i, Node paragraph) {
718 //logger.info("extractInLine");
719 String inLine=getInlineTextForName(nametosave, refMods, paragraph);
720 if (descriptionsFulltext.get(i) !=null){
721 inLine = descriptionsFulltext.get(i)+inLine;
722 }
723 descriptionsFulltext.put(i, inLine);
724 }
725
726 /**
727 * Extract the raw text from a Node
728 * @param descriptionsFulltext
729 * @param node
730 * @param j
731 */
732 private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
733 //logger.info("extractText");
734 if(!node.getTextContent().trim().isEmpty()) {
735 String s =node.getTextContent().trim();
736 if (descriptionsFulltext.get(i) !=null){
737 s = descriptionsFulltext.get(i)+" "+s;
738 }
739 descriptionsFulltext.put(i, s);
740 }
741 }
742
743
744 /**
745 * @param materials: the XML node group
746 * @param acceptedTaxon: the current accepted Taxon
747 * @param refMods: the current reference extracted from the MODS
748 */
749 @SuppressWarnings("rawtypes")
750 private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonNameBase> nametosave) {
751 logger.info("EXTRACTMATERIALS");
752 // logger.info("acceptedTaxon: "+acceptedTaxon);
753 NodeList children = materials.getChildNodes();
754 NodeList events = null;
755 // String descr="";
756
757
758 for (int i=0;i<children.getLength();i++){
759 String rawAssociation="";
760 boolean added=false;
761 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
762 events = children.item(i).getChildNodes();
763 for(int k=0;k<events.getLength();k++){
764 if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
765 String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
766 if(!inLine.isEmpty()) {
767 rawAssociation+=inLine;
768 }
769 }
770 if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
771 && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
772 rawAssociation+= events.item(k).getTextContent().trim();
773 }
774 if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
775 if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
776 rawAssociation="no description text";
777 }
778 added=true;
779 handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
780 }
781 if (!rawAssociation.isEmpty() && !added){
782
783 Feature feature = Feature.MATERIALS_EXAMINED();
784 featuresMap.put(feature.getTitleCache(),feature);
785
786 TextData textData = createTextData(rawAssociation, refMods, feature);
787
788 if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
789 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
790 td.addElement(textData);
791 acceptedTaxon.addDescription(td);
792 sourceHandler.addAndSaveSource(refMods, td, null);
793 }
794 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
795 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
796 //
797 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
798 // acceptedTaxon.addDescription(taxonDescription);
799 //
800 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
801 //
802 // Feature feature = Feature.MATERIALS_EXAMINED();
803 // featuresMap.put(feature.getTitleCache(),feature);
804 // if(!StringUtils.isEmpty(rawAssociation)) {
805 // derivedUnitBase.setTitleCache(rawAssociation, true);
806 // }
807 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
808 // indAssociation.setFeature(feature);
809 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
810 //
811 // /*boolean sourceExists=false;
812 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
813 // for (DescriptionElementSource src : dsources){
814 // String micro = src.getCitationMicroReference();
815 // Reference r = src.getCitation();
816 // if (r.equals(refMods) && micro == null) {
817 // sourceExists=true;
818 // }
819 // }
820 // if(!sourceExists) {
821 // indAssociation.addSource(null, null, refMods, null);
822 // }*/
823 // taxonDescription.addElement(indAssociation);
824 // taxonDescription.setTaxon(acceptedTaxon);
825 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
826 //
827 // /*sourceExists=false;
828 // Set<IdentifiableSource> sources = taxonDescription.getSources();
829 // for (IdentifiableSource src : sources){
830 // String micro = src.getCitationMicroReference();
831 // Reference r = src.getCitation();
832 // if (r.equals(refMods) && micro == null) {
833 // sourceExists=true;
834 // }
835 // }
836 // if(!sourceExists) {
837 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
838 // }*/
839 //
840 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
841 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
842
843 rawAssociation="";
844 }
845 }
846 }
847 }
848 }
849
850 /**
851 * @param acceptedTaxon
852 * @param refMods
853 * @param events
854 * @param rawAssociation
855 * @param k
856 */
857 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
858 String rawAssociation) {
859 logger.info("handleDerivedUnitFacadeAndBase");
860 String descr;
861 DerivedUnit derivedUnitBase;
862 MySpecimenOrObservation myspecimenOrObservation;
863 DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
864 derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
865
866 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
867
868 //TODO this may not always be correct, ask user
869 TaxonNameBase<?,?> typifiableName = acceptedTaxon != null ? acceptedTaxon.getName() : null;
870 myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
871 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
872 descr=myspecimenOrObservation.getDescr();
873
874 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
875
876 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
877
878 Feature feature = makeFeature(derivedUnitBase);
879 featuresMap.put(feature.getTitleCache(),feature);
880 if(!StringUtils.isEmpty(descr)) {
881 derivedUnitBase.setTitleCache(descr, true);
882 }
883
884 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
885
886 taxonDescription.addElement(indAssociation);
887 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
888 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
889 }
890
891
892
893 /**
894 * @param currentName
895 * @param materials: the XML node group
896 * @param acceptedTaxon: the current accepted Taxon
897 * @param refMods: the current reference extracted from the MODS
898 */
899 private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, TaxonNameBase<?,?> currentName) {
900 logger.info("extractMaterialsDirect");
901 // logger.info("acceptedTaxon: "+acceptedTaxon);
902 String descr="";
903
904 DerivedUnit derivedUnitBase=null;
905 MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
906 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
907
908 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
909
910 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
911
912 Feature feature=null;
913 if (event.equalsIgnoreCase("collection")){
914 feature = makeFeature(derivedUnitBase);
915 }
916 else{
917 feature = Feature.MATERIALS_EXAMINED();
918 }
919 featuresMap.put(feature.getTitleCache(), feature);
920
921 descr=myspecimenOrObservation.getDescr();
922 if(!StringUtils.isEmpty(descr)) {
923 derivedUnitBase.setTitleCache(descr, true);
924 }
925
926 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
927
928 taxonDescription.addElement(indAssociation);
929 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
930 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
931
932 return derivedUnitBase.getTitleCache();
933
934 }
935
936
937 /**
938 * @param description: the XML node group
939 * @param acceptedTaxon: the current acceptedTaxon
940 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
941 * @param nametosave: the list of objects to save into the CDM
942 * @param refMods: the current reference extracted from the MODS
943 * @param featureName: the feature name
944 */
945 @SuppressWarnings({ "rawtypes"})
946 private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
947 List<TaxonNameBase> nametosave, Reference refMods, String featureName ) {
948 logger.info("extractSpecificFeature "+featureName);
949 // System.out.println("GRUUUUuu");
950 NodeList children = description.getChildNodes();
951 NodeList insideNodes ;
952 NodeList trNodes;
953 // String descr ="";
954 String localdescr="";
955 List<String> blabla=null;
956 List<String> text = new ArrayList<String>();
957
958 String table="<table>";
959 String head="";
960 String line="";
961
962 Feature currentFeature=getFeatureObjectFromString(featureName);
963
964 // String fullContent = description.getTextContent();
965 for (int i=0;i<children.getLength();i++){
966 // localdescr="";
967 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
968 text.add(children.item(i).getTextContent().trim());
969 }
970 if (featureName.equalsIgnoreCase("table")){
971 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
972 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
973 head = extractTableHead(children.item(i));
974 table+=head;
975 line = extractTableLine(children.item(i));
976 if (!line.equalsIgnoreCase("<tr></tr>")) {
977 table+=line;
978 }
979 }
980 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
981 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
982 line = extractTableLineWithColumn(children.item(i).getChildNodes());
983 if(!line.equalsIgnoreCase("<tr></tr>")) {
984 table+=line;
985 }
986 }
987 }
988 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
989 insideNodes=children.item(i).getChildNodes();
990 blabla= new ArrayList<String>();
991 for (int j=0;j<insideNodes.getLength();j++){
992 Node insideNode = insideNodes.item(j);
993 if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
994 String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
995 if (!inlinetext.isEmpty()) {
996 blabla.add(inlinetext);
997 }
998 }
999 else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
1000 if(!insideNode.getTextContent().trim().isEmpty()){
1001 blabla.add(insideNode.getTextContent().trim());
1002 // localdescr += insideNodes.item(j).getTextContent().trim();
1003 }
1004 }
1005 }
1006 if (!blabla.isEmpty()) {
1007 String blaStr = StringUtils.join(blabla," ").trim();
1008 if(!stringIsEmpty(blaStr)) {
1009 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1010 text.add(blaStr);
1011 }
1012 }
1013
1014 }
1015 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1016 if(!children.item(i).getTextContent().trim().isEmpty()){
1017 localdescr = children.item(i).getTextContent().trim();
1018 if(!stringIsEmpty(localdescr)) {
1019 setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1020 }
1021 }
1022 }
1023 }
1024
1025 table+="</table>";
1026 if (!table.equalsIgnoreCase("<table></table>")){
1027 // System.out.println("TABLE : "+table);
1028 text.add(table);
1029 }
1030
1031 if (text !=null && !text.isEmpty()) {
1032 return StringUtils.join(text," ");
1033 } else {
1034 return "";
1035 }
1036
1037 }
1038
1039 /**
1040 * @param children
1041 * @param i
1042 * @return
1043 */
1044 private String extractTableLine(Node child) {
1045 //logger.info("extractTableLine");
1046 String line;
1047 line="<tr>";
1048 if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1049 line = extractTableLineWithColumn(child.getChildNodes());
1050 }
1051 line+="</tr>";
1052 return line;
1053 }
1054
1055 /**
1056 * @param children
1057 * @param i
1058 * @return
1059 */
1060 private String extractTableHead(Node child) {
1061 //logger.info("extractTableHead");
1062 String head;
1063 String line;
1064 head="<th>";
1065 NodeList trNodes = child.getChildNodes();
1066 for (int k=0;k<trNodes.getLength();k++){
1067 if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1068 && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1069 line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1070 head+=line;
1071 }
1072 }
1073 head+="</th>";
1074 return head;
1075 }
1076
1077 /**
1078 * build a html table line, with td columns
1079 * @param tdNodes
1080 * @return an html coded line
1081 */
1082 private String extractTableLineWithColumn(NodeList tdNodes) {
1083 //logger.info("extractTableLineWithColumn");
1084 String line;
1085 line="<tr>";
1086 for (int l=0;l<tdNodes.getLength();l++){
1087 if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1088 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1089 }
1090 }
1091 line+="</tr>";
1092 return line;
1093 }
1094
1095 /**
1096 * @param description: the XML node group
1097 * @param acceptedTaxon: the current acceptedTaxon
1098 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1099 * @param nametosave: the list of objects to save into the CDM
1100 * @param refMods: the current reference extracted from the MODS
1101 * @param featureName: the feature name
1102 */
1103 @SuppressWarnings({ "unused", "rawtypes" })
1104 private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1105 List<TaxonNameBase> nameToSave, Reference refMods, String featureName ) {
1106 logger.info("extractSpecificFeatureNotStructured " + featureName);
1107 NodeList children = description.getChildNodes();
1108 NodeList insideNodes ;
1109 List<String> blabla= new ArrayList<String>();
1110
1111
1112 Feature currentFeature = getFeatureObjectFromString(featureName);
1113
1114 String fullContent = description.getTextContent();
1115 for (int i=0;i<children.getLength();i++){
1116 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1117 insideNodes=children.item(i).getChildNodes();
1118 for (int j=0;j<insideNodes.getLength();j++){
1119 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1120 String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1121 if(!inlineText.isEmpty()) {
1122 blabla.add(inlineText);
1123 }
1124 }
1125 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1126 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1127 blabla.add(insideNodes.item(j).getTextContent().trim());
1128 }
1129 }
1130 }
1131 }
1132 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1133 if(!children.item(i).getTextContent().trim().isEmpty()){
1134 String localdescr = children.item(i).getTextContent().trim();
1135 if(!localdescr.isEmpty())
1136 {
1137 blabla.add(localdescr);
1138 }
1139 }
1140 }
1141 }
1142
1143 if (blabla !=null && !blabla.isEmpty()) {
1144 String blaStr = StringUtils.join(blabla," ").trim();
1145 if (! stringIsEmpty(blaStr)) {
1146 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1147 return blaStr;
1148 } else {
1149 return "";
1150 }
1151 } else {
1152 return "";
1153 }
1154
1155 }
1156
1157 /**
1158 * @param blaStr
1159 * @return
1160 */
1161 private boolean stringIsEmpty(String blaStr) {
1162 if (blaStr.matches("(\\.|,|;|\\.-)?")){
1163 return true;
1164 }else{
1165 return false;
1166 }
1167 }
1168
1169 /**
1170 * @param nametosave
1171 * @param refMods
1172 * @param insideNodes
1173 * @param blabla
1174 * @param j
1175 */
1176 @SuppressWarnings({ "rawtypes" })
1177 private String getInlineTextForName(List<TaxonNameBase> nametosave, Reference refMods, Node insideNode) {
1178 if (true){
1179 NodeList children = insideNode.getChildNodes();
1180 String result = "";
1181 for (int i=0;i<children.getLength();i++){
1182 Node nameChild = children.item(i);
1183 if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1184 result += nameChild.getTextContent();
1185 }else{
1186 //do nothing
1187 }
1188 }
1189 return result.replace("\n", "").trim();
1190 }else{
1191 TaxonNameBase tnb = getTaxonNameBaseFromXML(insideNode, nametosave,refMods,false);
1192 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1193 Taxon tax = currentMyName.getTaxon();
1194 if(tnb !=null && tax != null){
1195 String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1196 return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1197 }else if (tnb != null && tax == null){
1198 //TODO
1199 return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0] +"</cdm:taxonName>";
1200 }else{
1201 logger.warn("Inline text has no content yet");
1202 }
1203 return "";
1204 }
1205 }
1206
1207 /**
1208 * @param featureName
1209 * @return
1210 */
1211 @SuppressWarnings("rawtypes")
1212 private Feature getFeatureObjectFromString(String featureName) {
1213 logger.info("getFeatureObjectFromString");
1214 List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1215 Feature currentFeature=null;
1216 for (Feature feature: features){
1217 String tmpF = feature.getTitleCache();
1218 if (tmpF.equalsIgnoreCase(featureName)) {
1219 currentFeature=feature;
1220 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1221 }
1222 }
1223 if (currentFeature == null) {
1224 currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1225 if(featureName.equalsIgnoreCase("Other")){
1226 currentFeature.setUuid(OtherUUID);
1227 }
1228 if(featureName.equalsIgnoreCase(notMarkedUp)){
1229 currentFeature.setUuid(NotMarkedUpUUID);
1230 }
1231 importer.getTermService().saveOrUpdate(currentFeature);
1232 }
1233 return currentFeature;
1234 }
1235
1236
1237
1238
1239 /**
1240 * @param children: the XML node group
1241 * @param nametosave: the list of objects to save into the CDM
1242 * @param acceptedTaxon: the current acceptedTaxon
1243 * @param refMods: the current reference extracted from the MODS
1244 * @param fullContent :the parsed XML content
1245 * @return a list of description (text)
1246 */
1247 @SuppressWarnings({ "unused", "rawtypes" })
1248 private List<String> parseParagraph(List<TaxonNameBase> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1249 logger.info("parseParagraph "+feature.toString());
1250 List<String> fullDescription= new ArrayList<String>();
1251 // String localdescr;
1252 String descr="";
1253 NodeList insideNodes ;
1254 boolean collectionEvent = false;
1255 List<Node>collectionEvents = new ArrayList<Node>();
1256
1257 NodeList children = paragraph.getChildNodes();
1258
1259 for (int i=0;i<children.getLength();i++){
1260 // localdescr="";
1261 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1262 descr += children.item(i).getTextContent().trim();
1263 }
1264 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1265 insideNodes=children.item(i).getChildNodes();
1266 List<String> blabla= new ArrayList<String>();
1267 for (int j=0;j<insideNodes.getLength();j++){
1268 boolean nodeKnown = false;
1269 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1270 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1271 String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1272 if (!inlineText.isEmpty()) {
1273 blabla.add(inlineText);
1274 }
1275 nodeKnown=true;
1276 }
1277 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1278 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1279 blabla.add(insideNodes.item(j).getTextContent().trim());
1280 // localdescr += insideNodes.item(j).getTextContent().trim();
1281 }
1282 nodeKnown=true;
1283 }
1284 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1285 String ref = insideNodes.item(j).getTextContent().trim();
1286 if (ref.endsWith(";") && ((ref.length())>1)) {
1287 ref=ref.substring(0, ref.length()-1)+".";
1288 }
1289 Reference reference = ReferenceFactory.newGeneric();
1290 reference.setTitleCache(ref, true);
1291 blabla.add(reference.getTitleCache());
1292 nodeKnown=true;
1293 }
1294 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1295 String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1296 blabla.add(figure);
1297 }
1298 else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1299 insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1300 insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1301 String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1302 blabla.add(table);
1303 }
1304 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1305 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1306 String titlecache = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1307 blabla.add(titlecache);
1308 collectionEvent=true;
1309 collectionEvents.add(insideNodes.item(j));
1310 nodeKnown=true;
1311 }else{
1312 logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1313 }
1314
1315 }
1316 if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1317 fullDescription.add(StringUtils.join(blabla," "));
1318 }
1319 }
1320 if (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1321 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1322 fullDescription.add(figure);
1323 }
1324 if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1325 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1326 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1327 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1328 fullDescription.add(table);
1329 }
1330 }
1331
1332 if( !stringIsEmpty(descr.trim())){
1333 Feature currentFeature= getNotMarkedUpFeatureObject();
1334 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1335 }
1336 // if (collectionEvent) {
1337 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1338 // for (Node coll:collectionEvents){
1339 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1340 // }
1341 // }
1342 return fullDescription;
1343 }
1344
1345
1346 /**
1347 * @param description: the XML node group
1348 * @param acceptedTaxon: the current acceptedTaxon
1349 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1350 * @param nametosave: the list of objects to save into the CDM
1351 * @param refMods: the current reference extracted from the MODS
1352 * @param feature: the feature to link the data with
1353 */
1354 @SuppressWarnings("rawtypes")
1355 private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> namesToSave, Reference refMods, Feature feature){
1356 logger.info("EXTRACT FEATURE "+feature.toString());
1357 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1358 List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1359
1360 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1361 if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1362 setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1363 }
1364
1365 }
1366
1367
1368 /**
1369 * @param descr: the XML Nodegroup to parse
1370 * @param acceptedTaxon: the current acceptedTaxon
1371 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1372 * @param refMods: the current reference extracted from the MODS
1373 * @param currentFeature: the feature name
1374 * @return
1375 */
1376 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1377 logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1378
1379 //remove redundant feature title
1380 String featureStr = currentFeature.getTitleCache();
1381 if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1382 descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1383 }
1384
1385
1386 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1387 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1388
1389 TextData textData = createTextData(descr, refMods, currentFeature);
1390
1391 if(acceptedTaxon!=null){
1392 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1393 td.addElement(textData);
1394 acceptedTaxon.addDescription(td);
1395
1396 sourceHandler.addAndSaveSource(refMods, td, null);
1397 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1398 }
1399
1400 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1401 try{
1402 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1403 if (tmp!=null) {
1404 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1405 }else{
1406 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1407 }
1408 }catch(Exception e){
1409 logger.debug("TAXON EXISTS"+defaultTaxon);
1410 }
1411
1412 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1413 defaultTaxon.addDescription(td);
1414 td.addElement(textData);
1415 sourceHandler.addAndSaveSource(refMods, td, null);
1416 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1417 }
1418 }
1419
1420 /**
1421 * @param descr
1422 * @param refMods
1423 * @param currentFeature
1424 * @return
1425 */
1426 private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1427 //logger.info("createTextData");
1428 TextData textData = TextData.NewInstance();
1429 textData.setFeature(currentFeature);
1430 sourceHandler.addSource(refMods, textData);
1431
1432 textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1433 return textData;
1434 }
1435
1436
1437
1438 /**
1439 * @param descr: the XML Nodegroup to parse
1440 * @param acceptedTaxon: the current acceptedTaxon
1441 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1442 * @param refMods: the current reference extracted from the MODS
1443 * @param currentFeature: the feature name
1444 * @return
1445 */
1446 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1447 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1448 // logger.info("acceptedTaxon: "+acceptedTaxon);
1449 logger.info("setParticularDescription");
1450 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1451
1452 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1453 TextData textData = createTextData(descr, refMods, currentFeature);
1454
1455 if(! descr.isEmpty() && (acceptedTaxon!=null)){
1456 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1457 td.addElement(textData);
1458 acceptedTaxon.addDescription(td);
1459
1460 sourceHandler.addAndSaveSource(refMods, td, currentRef);
1461 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1462 }
1463
1464 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1465 try{
1466 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1467 if (tmp!=null) {
1468 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1469 }else{
1470 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1471 }
1472 }catch(Exception e){
1473 logger.debug("TAXON EXISTS"+defaultTaxon);
1474 }
1475
1476 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1477 defaultTaxon.addDescription(td);
1478 td.addElement(textData);
1479 sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1480 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1481 }
1482 }
1483
1484
1485
1486 /**
1487 * @param synonyms: the XML Nodegroup to parse
1488 * @param nametosave: the list of objects to save into the CDM
1489 * @param acceptedTaxon: the current acceptedTaxon
1490 * @param refMods: the current reference extracted from the MODS
1491 */
1492 @SuppressWarnings({ "rawtypes" })
1493 private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1494 logger.info("extractSynonyms");
1495 //System.out.println("extractSynonyms for: "+acceptedTaxon);
1496 Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1497 if (ttmp != null) {
1498 acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1499 }
1500 else{
1501 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1502 }
1503 NodeList children = synonymsNode.getChildNodes();
1504 List<MyName> names = new ArrayList<MyName>();
1505
1506 if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1507 try {
1508 MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1509 names.add(myName);
1510 } catch (TransformerFactoryConfigurationError e) {
1511 logger.warn(e);
1512 } catch (TransformerException e) {
1513 logger.warn(e);
1514 }
1515 }
1516
1517
1518 for (int i=0;i<children.getLength();i++){
1519 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1520 NodeList tmp = children.item(i).getChildNodes();
1521 // String fullContent = children.item(i).getTextContent();
1522 for (int j=0; j< tmp.getLength();j++){
1523 if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1524 try {
1525 MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1526 names.add(myName);
1527 } catch (TransformerFactoryConfigurationError e) {
1528 logger.warn(e);
1529 } catch (TransformerException e) {
1530 logger.warn(e);
1531 }
1532 }
1533 }
1534 }
1535 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1536 try {
1537 MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1538 names.add(myName);
1539 } catch (TransformerFactoryConfigurationError e) {
1540 logger.warn(e);
1541 } catch (TransformerException e) {
1542 logger.warn(e);
1543 }
1544
1545 }
1546 }
1547
1548 for(MyName name:names){
1549 TaxonNameBase nameToBeFilled = name.getTaxonNameBase();
1550 Synonym synonym = name.getSyno();
1551 addFollowingTextToName(nameToBeFilled, followingText);
1552
1553 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1554 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1555 if (nameToBeFilled.hasProblem() &&
1556 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1557 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1558 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1559 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1560 }
1561 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1562 */
1563 if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1564 setLSID(name.getIdentifier(), synonym);
1565 }
1566
1567 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1568 boolean synoExist = false;
1569 for (Synonym syn: synonymsSet){
1570
1571 boolean a =syn.getName().equals(synonym.getName());
1572 boolean b = syn.getSec().equals(synonym.getSec());
1573 if (a && b) {
1574 synoExist=true;
1575 }
1576 }
1577 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1578 sourceHandler.addSource(refMods, synonym);
1579 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1580 }
1581 }
1582 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1583 }
1584
1585
1586 private boolean addFollowingTextToName(ITaxonNameBase nameToBeFilled, String followingText) {
1587 if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1588 if (! followingText.matches("\\d\\.?")){
1589
1590 if (followingText.startsWith(",")){
1591 followingText = followingText.substring(1).trim();
1592 }
1593 nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1594 }
1595 return true;
1596 }
1597 return false;
1598
1599 }
1600
1601 /**
1602 * @param refgroup: the XML nodes
1603 * @param nametosave: the list of objects to save into the CDM
1604 * @param acceptedTaxon: the current acceptedTaxon
1605 * @param nametosave: the list of objects to save into the CDM
1606 * @param refMods: the current reference extracted from the MODS
1607 * @return the acceptedTaxon (why?)
1608 * handle cases where the bibref are inside <p> and outside
1609 */
1610 @SuppressWarnings({ "rawtypes" })
1611 private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference refMods) {
1612 logger.info("extractReferences");
1613 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1614
1615 NodeList children = refgroup.getChildNodes();
1616 INonViralName nameToBeFilled = getNonViralNameAccNomenclature();
1617
1618 ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1619 for (int i=0;i<children.getLength();i++){
1620 if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1621 String ref = children.item(i).getTextContent().trim();
1622 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1623 if (!refBuild.isFoundBibref()){
1624 extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1625 }
1626 }
1627
1628 if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1629 NodeList references = children.item(i).getChildNodes();
1630 String descr="";
1631 for (int j=0;j<references.getLength();j++){
1632 if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1633 String ref = references.item(j).getTextContent().trim();
1634 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1635 }
1636 else
1637 if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1638 && !references.item(j).getTextContent().trim().isEmpty()){
1639 descr += references.item(j).getTextContent().trim();
1640 }
1641
1642 }
1643 if (!refBuild.isFoundBibref()){
1644 //if it's not tagged, put it as row information.
1645 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1646 //then put it as a not markup feature if not empty
1647 if (!stringIsEmpty(descr.trim())){
1648 Feature currentFeature= getNotMarkedUpFeatureObject();
1649 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1650 }
1651 }
1652 }
1653 }
1654 // importer.getClassificationService().saveOrUpdate(classification);
1655 return acceptedTaxon;
1656
1657 }
1658
1659 /**
1660 * get the non viral name according to the current nomenclature
1661 * @return
1662 */
1663
1664 private INonViralName getNonViralNameAccNomenclature() {
1665 return nomenclaturalCode.getNewTaxonNameInstance(null);
1666 }
1667
1668 /**
1669 * @return the feature object for the category "not marked up"
1670 */
1671 private Feature getNotMarkedUpFeatureObject() {
1672 // FIXME use getFeature(uuid ....)
1673 logger.info("getNotMarkedUpFeatureObject");
1674 Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1675 if (currentFeature == null) {
1676 currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1677 currentFeature.setUuid(NotMarkedUpUUID);
1678 //TODO use userDefined Feature Vocabulary
1679 Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1680 // importer.getTermService().saveOrUpdate(currentFeature);
1681 importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1682 }
1683 return currentFeature;
1684 }
1685
1686 /**
1687 * @param references
1688 * handle cases where the bibref are inside <p> and outside
1689 */
1690 @SuppressWarnings("rawtypes")
1691 private void extractReferenceRawText(NodeList references, INonViralName nameToBeFilled, Reference refMods,
1692 Taxon acceptedTaxon) {
1693 logger.info("extractReferenceRawText");
1694 String refString="";
1695 currentMyName= new MyName(true);
1696 for (int j=0;j<references.getLength();j++){
1697 acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1698 //no bibref tag inside
1699 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1700 if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1701
1702 try {
1703 String followingText = null; //needs to be checked if follText is possible
1704 //TODO create or not create?
1705 currentMyName = extractScientificName(references.item(j), refMods, followingText);
1706 } catch (TransformerFactoryConfigurationError e) {
1707 logger.warn(e);
1708 } catch (TransformerException e) {
1709 logger.warn(e);
1710 }
1711
1712 // name=name.trim();
1713 }
1714 if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1715 refString = references.item(j).getTextContent().trim();
1716 }
1717 if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1718 //
1719 if (!currentMyName.getStatus().isEmpty()){
1720 String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1721 if (nomNovStatus != null){
1722 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1723 }else{
1724 try {
1725 NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1726 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1727 } catch (UnknownCdmTypeException e) {
1728 addProblematicStatusToFile(currentMyName.getStatus());
1729 logger.warn("Problem with status");
1730 }
1731 }
1732 }
1733
1734 String fullLineRefName = references.item(j).getTextContent().trim();
1735 int nameOrRefOrOther=2;
1736 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1737 if (nameOrRefOrOther==0){
1738 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1739 Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1740
1741 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1742 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1743 boolean synoExist = false;
1744 for (Synonym syn: synonymsSet){
1745 // System.out.println(syn.getName()+" -- "+syn.getSec());
1746 boolean a =syn.getName().equals(synonym.getName());
1747 boolean b = syn.getSec().equals(synonym.getSec());
1748 if (a && b) {
1749 synoExist=true;
1750 }
1751 }
1752 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1753 sourceHandler.addSource(refMods, synonym);
1754
1755 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1756 }
1757 }
1758
1759 if (nameOrRefOrOther==1){
1760 Reference re = ReferenceFactory.newGeneric();
1761 re.setTitleCache(fullLineRefName, true);
1762
1763 /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1764 if (nameTBF.hasProblem() &&
1765 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1766 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1767 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1768 }
1769 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1770 */
1771 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1772 Synonym synonym = Synonym.NewInstance(nameTBF, re);
1773
1774 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1775 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1776 boolean synoExist = false;
1777 for (Synonym syn: synonymsSet){
1778 // System.out.println(syn.getName()+" -- "+syn.getSec());
1779 boolean a =syn.getName().equals(synonym.getName());
1780 boolean b = syn.getSec().equals(synonym.getSec());
1781 if (a && b) {
1782 synoExist=true;
1783 }
1784 }
1785 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1786 sourceHandler.addSource(refMods, synonym);
1787
1788 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1789 }
1790
1791 }
1792
1793
1794 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1795 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1796 }
1797 }
1798
1799 if(!currentMyName.getName().isEmpty()){
1800 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1801 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1802 Reference refS = ReferenceFactory.newGeneric();
1803 refS.setTitleCache(refString, true);
1804 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1805 // acceptedTaxon.addDescription(td);
1806 // acceptedTaxon.addSource(refSource);
1807 //
1808 // TextData textData = TextData.NewInstance(Feature.CITATION());
1809 //
1810 // textData.addSource(null, null, refS, null);
1811 // td.addElement(textData);
1812 // td.addSource(refSource);
1813 // importer.getDescriptionService().saveOrUpdate(td);
1814
1815
1816 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1817 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1818
1819 }
1820
1821 acceptedTaxon.getName().setNomenclaturalReference(refS);
1822 }else{
1823 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1824 Synonym synonym = null;
1825 if (! currentMyName.getStatus().isEmpty()){
1826 String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1827 if (nomNovStatus != null){
1828 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1829 }else{
1830 try {
1831 NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1832 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1833 synonym = Synonym.NewInstance(nameTBF, refMods);
1834 } catch (UnknownCdmTypeException e) {
1835 addProblematicStatusToFile(currentMyName.getStatus());
1836 logger.warn("Problem with status");
1837 synonym = Synonym.NewInstance(nameTBF, refMods);
1838 synonym.setAppendedPhrase(currentMyName.getStatus());
1839 }
1840 }
1841 }else{
1842 synonym = Synonym.NewInstance(nameTBF, refMods);
1843 }
1844
1845
1846 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1847 setLSID(currentMyName.getIdentifier(), synonym);
1848 }
1849
1850 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1851 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1852 boolean synoExist = false;
1853 for (Synonym syn: synonymsSet){
1854 // System.out.println(syn.getName()+" -- "+syn.getSec());
1855 boolean a =syn.getName().equals(synonym.getName());
1856 boolean b = syn.getSec().equals(synonym.getSec());
1857 if (a && b) {
1858 synoExist=true;
1859 }
1860 }
1861 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1862 sourceHandler.addSource(refMods, synonym);
1863
1864 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1865 }
1866 }
1867 }
1868 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1869 }
1870 }
1871
1872
1873
1874 /**
1875 * @param identifier
1876 * @param acceptedTaxon
1877 */
1878 @SuppressWarnings("rawtypes")
1879 private void setLSID(String identifier, TaxonBase<?> taxon) {
1880 //logger.info("setLSID");
1881 // boolean lsidok=false;
1882 String id = identifier.split("__")[0];
1883 String source = identifier.split("__")[1];
1884 if (id.indexOf("lsid")>-1){
1885 try {
1886 LSID lsid = new LSID(id);
1887 taxon.setLsid(lsid);
1888 // lsidok=true;
1889 } catch (MalformedLSIDException e) {
1890 logger.warn("Malformed LSID");
1891 }
1892
1893 }
1894
1895 //logger.info("search reference for LSID");
1896 // if ((id.indexOf("lsid")<0) || !lsidok){
1897 //ADD ORIGINAL SOURCE ID EVEN IF LSID
1898 Reference re = null;
1899 Pager<Reference> references = importer.getReferenceService().findByTitle(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1900 if( references !=null && references.getCount()>0){
1901 re=references.getRecords().get(0);
1902 }
1903 //logger.info("search reference for LSID-end");
1904 if(re == null){
1905 re = ReferenceFactory.newGeneric();
1906 re.setTitleCache(source, true);
1907 importer.getReferenceService().saveOrUpdate(re);
1908 }
1909 re=CdmBase.deproxy(re, Reference.class);
1910
1911 //logger.info("search source for LSID");
1912 Set<IdentifiableSource> sources = taxon.getSources();
1913 boolean lsidinsource=false;
1914 boolean urlinsource=false;
1915 for (IdentifiableSource src:sources){
1916 if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1917 lsidinsource=true;
1918 }
1919 if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1920 urlinsource=true;
1921 }
1922 }
1923 if(!lsidinsource) {
1924 taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1925 }
1926 if(!urlinsource)
1927 {
1928 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1929 taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1930 // }
1931 }
1932
1933 }
1934
1935 /**
1936 * try to solve a parsing problem for a scientific name
1937 * @param original : the name from the OCR document
1938 * @param name : the tagged version
1939 * @param parser
1940 * @return the corrected TaxonNameBase
1941 */
1942 /* @SuppressWarnings({ "unchecked", "rawtypes" })
1943 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1944 Map<String,String> ato = namesMap.get(original);
1945 if (ato == null) {
1946 ato = namesMap.get(original+" "+author);
1947 }
1948
1949
1950 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1951 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1952 }
1953 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1954 rank = getRank(ato);
1955 }
1956 // TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1957 TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1958 // logger.info("RANK: "+rank);
1959 int retry=0;
1960 List<ParserProblem> problems = nameTBF.getParsingProblems();
1961 for (ParserProblem pb:problems) {
1962 System.out.println(pb.toString());
1963 }
1964 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1965 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1966 String fullname=name;
1967 if(! skippQuestion) {
1968 fullname = getFullReference(name,nameTBF.getParsingProblems());
1969 }
1970 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1971 nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1972 }
1973 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1974 nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1975 }
1976 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1977 nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1978 }
1979 parser.parseReferencedName(nameTBF, fullname, rank, false);
1980 retry++;
1981 }
1982 if (retry == 1){
1983 if(author != null){
1984 if (name.indexOf(author)>-1) {
1985 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1986 } else {
1987 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1988 }
1989 if (nameTBF.hasProblem()){
1990 if (name.indexOf(author)>-1) {
1991 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1992 } else {
1993 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1994 }
1995 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
1996 problems = nameTBF.getParsingProblems();
1997 for (ParserProblem pb:problems) {
1998 System.out.println(pb.toString());
1999 }
2000 nameTBF.setFullTitleCache(name, true);
2001 }else{
2002 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2003 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2004 }
2005 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2006 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2007 }
2008 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2009 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2010 }
2011 }
2012 // logger.info("FULL TITLE CACHE "+name);
2013 }else{
2014 nameTBF.setFullTitleCache(name, true);
2015 }
2016 }
2017 return nameTBF;
2018 }
2019
2020 */
2021
2022 /**
2023 * @param nomenclatureNode: the XML nodes
2024 * @param nametosave: the list of objects to save into the CDM
2025 * @param refMods: the current reference extracted from the MODS
2026 * @return
2027 */
2028 @SuppressWarnings({ "rawtypes" })
2029 private Taxon extractNomenclature(Node nomenclatureNode, List<TaxonNameBase> nametosave, Reference refMods) throws ClassCastException{
2030 refMods=CdmBase.deproxy(refMods, Reference.class);
2031
2032 logger.info("extractNomenclature");
2033 NodeList children = nomenclatureNode.getChildNodes();
2034 String freetext="";
2035 Taxon acceptedTaxon = null;
2036 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2037
2038 // String fullContent = nomenclatureNode.getTextContent();
2039
2040 NomenclaturalStatusType statusType = null;
2041 String newNameStatus = null;
2042 //TODO
2043 for (int i=0;i<children.getLength();i++){
2044 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2045 String status = children.item(i).getTextContent().trim();
2046
2047 if (!status.isEmpty()){
2048 if (newNameStatus(status) != null){
2049 newNameStatus = newNameStatus(status);
2050 }else{
2051 try {
2052 statusType = nomStatusString2NomStatus(status);
2053 } catch (UnknownCdmTypeException e) {
2054 // nomNovStatus;
2055 addProblematicStatusToFile(status);
2056 logger.warn("Problem with status: " + status);
2057 }
2058 }
2059 }
2060 }
2061 }
2062
2063 boolean containsSynonyms=false;
2064 boolean wasSynonym = false;
2065 usedFollowingTextPrefix = null; //reset
2066
2067 for (int i=0; i<children.getLength(); i++){
2068 Node childNode = children.item(i);
2069 String childName = childNode.getNodeName();
2070
2071
2072 //following text
2073 followingText = null;
2074 if ( i + 1 < children.getLength()){
2075 Node followingTextNode = children.item(i +1);
2076 if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2077 followingText = followingTextNode.getTextContent();
2078 }
2079 }
2080
2081 //traverse nodes
2082 if (childName.equalsIgnoreCase("#text")) {
2083 freetext = childNode.getTextContent().trim();
2084 if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2085 freetext = freetext.substring(usedFollowingTextPrefix.length());
2086 }
2087 usedFollowingTextPrefix = null; //reset
2088 }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2089 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2090 extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonNameBase());
2091 }else if(childName.equalsIgnoreCase("tax:name")){
2092 INonViralName nameToBeFilled;
2093 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2094 if(!containsSynonyms){
2095 wasSynonym = false;
2096
2097 //System.out.println("I : "+i);
2098 currentMyName = new MyName(false);
2099 try {
2100 currentMyName = extractScientificName(childNode, refMods, followingText);
2101 treatmentMainName = currentMyName.getNewName();
2102 originalTreatmentName = currentMyName.getOriginalName();
2103
2104 } catch (TransformerFactoryConfigurationError e1) {
2105 throw new RuntimeException(e1);
2106 } catch (TransformerException e1) {
2107 throw new RuntimeException(e1);
2108 }
2109
2110 if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2111 maxRankRespected=true;
2112
2113 nameToBeFilled=currentMyName.getTaxonNameBase();
2114
2115 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2116 acceptedTaxon=currentMyName.getTaxon();
2117 //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2118
2119
2120 boolean statusMatch=false;
2121 if(acceptedTaxon !=null ){
2122 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2123 statusMatch=compareStatus(acceptedTaxon, statusType);
2124 //System.out.println("statusMatch: "+statusMatch);
2125 }
2126 if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2127
2128 nameToBeFilled=currentMyName.getTaxonNameBase();
2129 if (nameToBeFilled != null){
2130 if (!originalTreatmentName.isEmpty()) {
2131 TaxonNameDescription td = TaxonNameDescription.NewInstance();
2132 td.setTitleCache(originalTreatmentName, true);
2133 nameToBeFilled.addDescription(td);
2134 }
2135
2136 if(statusType != null) {
2137 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2138 }
2139 if(newNameStatus != null){
2140 nameToBeFilled.setAppendedPhrase(newNameStatus);
2141 }
2142 sourceHandler.addSource(refMods, nameToBeFilled);
2143
2144 if (nameToBeFilled.getNomenclaturalReference() == null) {
2145 acceptedTaxon= Taxon.NewInstance(nameToBeFilled,refMods);
2146 //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2147 }
2148 else {
2149 acceptedTaxon= Taxon.NewInstance(nameToBeFilled,(Reference) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2150 //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2151 }
2152
2153 sourceHandler.addSource(refMods, acceptedTaxon);
2154
2155 if(!state2.getConfig().doKeepOriginalSecundum()) {
2156 acceptedTaxon.setSec(state2.getConfig().getSecundum());
2157 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2158 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2159 }
2160
2161 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2162 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2163 }
2164
2165
2166 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2167 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2168 }
2169
2170 }else{
2171 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2172 Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2173 boolean sourcelinked=false;
2174 for (IdentifiableSource source:sources){
2175 if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2176 sourcelinked=true;
2177 }
2178 }
2179 if (!state2.getConfig().doKeepOriginalSecundum()) {
2180 acceptedTaxon.setSec(state2.getConfig().getSecundum());
2181 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2182 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2183 }
2184 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2185
2186 if (!sourcelinked){
2187 sourceHandler.addSource(refMods, acceptedTaxon);
2188 }
2189 if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2190
2191 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2192 //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2193 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2194 }
2195 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2196 }
2197 }
2198 }else{
2199 maxRankRespected=false;
2200 }
2201 containsSynonyms=true; //all folowing names are handled as synonyms
2202 }else{
2203 try{
2204 extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2205 wasSynonym = true;
2206
2207 }catch(NullPointerException e){
2208 logger.warn("null pointer exception, the accepted taxon might be null");
2209 }
2210 }
2211 containsSynonyms=true;
2212 }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2213 reloadClassification();
2214 //extract the References within the document
2215 extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2216 }else if (childName.equalsIgnoreCase("tax:bibref")){
2217 logger.warn(childName + " still preliminary");
2218
2219 INonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2220 boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2221 if (! handled){
2222 setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2223 }
2224 }else{
2225 logger.warn(childName + " not yet handled");
2226 }
2227 if(!stringIsEmpty(freetext.trim())) {;
2228 if (! freetext.matches("\\d\\.?")){
2229 INonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2230 boolean handled = false;
2231 if (currentName != null && !wasSynonym){
2232 handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2233 }
2234 if (! handled){
2235 setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2236 }
2237 }
2238
2239 freetext = "";
2240 }
2241
2242 }
2243 //importer.getClassificationService().saveOrUpdate(classification);
2244 return acceptedTaxon;
2245 }
2246
2247
2248
2249
2250 /**
2251 * @return
2252 */
2253
2254 private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2255 //logger.info("compareStatus");
2256 boolean statusMatch=false;
2257 //found one taxon
2258 Set<NomenclaturalStatus> status = t.getName().getStatus();
2259 if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2260 for (NomenclaturalStatus st:status){
2261 NomenclaturalStatusType stype = st.getType();
2262 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2263 statusMatch=true;
2264 }
2265 }
2266 }
2267 else{
2268 if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2269 statusMatch=true;
2270 }
2271 }
2272 return statusMatch;
2273 }
2274
2275 /**
2276 * @param acceptedTaxon: the current acceptedTaxon
2277 * @param ref: the current reference extracted from the MODS
2278 * @return the parent for the current accepted taxon
2279 */
2280 /* private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2281 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2282
2283 List<Rank> rankList = new ArrayList<Rank>();
2284 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2285
2286 List<String> rankListStr = new ArrayList<String>();
2287 for (Rank r:rankList) {
2288 rankListStr.add(r.toString());
2289 }
2290 String r="";
2291 String s = acceptedTaxon.getTitleCache();
2292 Taxon tax = null;
2293 if(!skippQuestion){
2294 int addTaxon = askAddParent(s);
2295 logger.info("ADD TAXON: "+addTaxon);
2296 if (addTaxon == 0 ){
2297 Taxon tmp = askParent(acceptedTaxon, classification);
2298 if (tmp == null){
2299 s = askSetParent(s);
2300 r = askRank(s,rankListStr);
2301
2302 TaxonNameBase<?,?> nameToBeFilled = null;
2303 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2304 nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2305 }
2306 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2307 nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2308 }
2309 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2310 nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2311 }
2312 nameToBeFilled.setTitleCache(s, true);
2313 nameToBeFilled.setRank(getRank(r), true);
2314
2315 tax = Taxon.NewInstance(nameToBeFilled, ref);
2316 }
2317 else{
2318 tax=tmp;
2319 }
2320
2321 createParent(tax, ref);
2322 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2323 classification.addParentChild(tax, acceptedTaxon, ref, null);
2324 }
2325 else{
2326 classification.addChildTaxon(acceptedTaxon, ref, null);
2327 tax=acceptedTaxon;
2328 }
2329 } else{
2330 classification.addChildTaxon(acceptedTaxon, ref, null);
2331 tax=acceptedTaxon;
2332 }
2333 // logger.info("RETURN: "+tax );
2334 return tax;
2335
2336 }
2337
2338 */
2339
2340
2341 private MyName extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2342 //System.out.println("extractScientificNameSynonym");
2343 logger.info("extractScientificNameSynonym");
2344 String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2345 List<String> rankListToPrint = new ArrayList<String>();
2346 for (String r : rankListToPrint_tmp) {
2347 rankListToPrint.add(r.toLowerCase());
2348 }
2349
2350 Rank rank = Rank.UNKNOWN_RANK();
2351 NodeList children = name.getChildNodes();
2352 String originalName="";
2353 String fullName = "";
2354 String newName="";
2355 String identifier="";
2356 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2357 List<String> atomisedName= new ArrayList<String>();
2358
2359 String rankStr = "";
2360 Rank tmpRank ;
2361
2362 String status= extractStatus(children);
2363
2364 for (int i=0;i<children.getLength();i++){
2365 if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2366 NodeList atom = children.item(i).getChildNodes();
2367 for (int k=0;k<atom.getLength();k++){
2368 identifier = extractIdentifier(identifier, atom.item(k));
2369 tmpRank = null;
2370 rankStr = atom.item(k).getNodeName().toLowerCase();
2371 // logger.info("RANKSTR:*"+rankStr+"*");
2372 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2373 rankStr=atom.item(k).getTextContent().trim();
2374 tmpRank = getRank(rankStr);
2375 }
2376 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2377 if (tmpRank != null){
2378 rank=tmpRank;
2379 }
2380 atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2381 }
2382 addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2383 }
2384 if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2385 // logger.info("name non atomised: "+children.item(i).getTextContent());
2386 fullName = children.item(i).getTextContent().trim();
2387 // logger.info("fullname: "+fullName);
2388 }
2389 }
2390 originalName=fullName;
2391 fullName = cleanName(fullName, atomisedName);
2392 namesMap.put(fullName,atomisedMap);
2393
2394 String atomisedNameStr = getAtomisedNameStr(atomisedName);
2395
2396 if (fullName != null){
2397 // System.out.println("fullname: "+fullName);
2398 // System.out.println("atomised: "+atomisedNameStr);
2399 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2400 if (skippQuestion){
2401 // String defaultN = "";
2402 if (atomisedNameStr.length()>fullName.length()) {
2403 newName=atomisedNameStr;
2404 } else {
2405 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2406 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2407 } else {
2408 newName=fullName;
2409 }
2410 }
2411 } else {
2412 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2413 }
2414 } else {
2415 newName=fullName;
2416 }
2417 }
2418 //not really needed
2419 // rank = askForRank(newName, rank, nomenclaturalCode);
2420 // System.out.println("atomised: "+atomisedMap.toString());
2421
2422 // String[] names = new String[5];
2423 MyName myname = new MyName(true);
2424
2425 //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2426 // System.out.println(atomisedMap.keySet());
2427 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2428 myname.setOriginalName(fullName);
2429 myname.setNewName(newName);
2430 myname.setRank(rank);
2431 myname.setIdentifier(identifier);
2432 myname.setStatus(status);
2433 myname.setSource(refMods);
2434
2435 // boolean higherAdded=false;
2436
2437
2438 boolean parseNameManually=false;
2439 INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2440 ITaxonNameBase nameToBeFilledTest ;
2441
2442 //if selected the atomised version
2443 if(newName==atomisedNameStr){
2444 nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2445 if (nameToBeFilledTest.hasProblem()){
2446 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2447 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode, rank);
2448 if (nameToBeFilledTest.hasProblem()){
2449 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2450 parseNameManually=true;
2451 }
2452 }
2453 }else{
2454 nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2455 if (nameToBeFilledTest.hasProblem()){
2456 addProblemNameToFile("fullversion",fullName, nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2457 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2458 parseNameManually=true;
2459 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2460 addNameDifferenceToFile(originalName,atomisedNameStr);
2461 }
2462 }
2463 }
2464
2465 if(parseNameManually){
2466 //System.out.println("DO IT MANUALLY");
2467 if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2468 createUnparsedSynonym(rank, newName, atomisedMap, myname);
2469 }else{
2470 createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2471 }
2472 } else{
2473 //System.out.println("AUTOMATIC!");
2474 // createAtomisedTaxonString(newName, atomisedMap, myname);
2475 myname.setParsedName(nameToBeFilledTest);
2476 myname.buildTaxon();
2477 }
2478 //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2479 return myname;
2480 }
2481
2482
2483 /**
2484 * @param name
2485 * @throws TransformerFactoryConfigurationError
2486 * @throws TransformerException
2487 * @return a list of possible names
2488 */
2489 @SuppressWarnings({"rawtypes" })
2490 private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2491 logger.info("extractScientificName");
2492
2493 String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2494 List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2495
2496 Rank rank = Rank.UNKNOWN_RANK();
2497 NodeList children = name.getChildNodes();
2498 String originalName = "";
2499 String fullName = "";
2500 String newName = "";
2501 String identifier = "";
2502 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2503 List<String> atomisedNameList= new ArrayList<String>();
2504
2505 String status= extractStatus(children);
2506
2507 for (int i=0;i<children.getLength();i++){
2508 Node nameChild = children.item(i);
2509 if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2510 NodeList xmlDataChildren = nameChild.getChildNodes();
2511 for (int k=0;k<xmlDataChildren.getLength();k++){
2512 Node xmlDataChild = xmlDataChildren.item(k);
2513 identifier = extractIdentifier(identifier, xmlDataChild);
2514 String rankStr = xmlDataChild.getNodeName().toLowerCase();
2515 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2516 rankStr=xmlDataChild.getTextContent().trim();
2517 Rank tmpRank = getRank(rankStr);
2518 if (tmpRank != null){
2519 rank=tmpRank;
2520 }
2521 }
2522 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2523
2524 atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2525 }
2526 addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2527 }
2528 else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2529 // logger.info("name non atomised: "+children.item(i).getTextContent());
2530 fullName = nameChild.getTextContent().trim();
2531 // logger.info("fullname: "+fullName);
2532 }
2533 }
2534 originalName=fullName;
2535 fullName = cleanName(fullName, atomisedNameList);
2536 namesMap.put(fullName,atomisedMap);
2537
2538 String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2539
2540 if (fullName != null){
2541 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2542 if (skippQuestion){
2543 if (atomisedNameStr.length()>fullName.length()) {
2544 newName = atomisedNameStr;
2545 } else {
2546 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2547 newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2548 } else {
2549 newName = fullName;
2550 }
2551 }
2552 } else {
2553 newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2554 }
2555 } else {
2556 newName=fullName;
2557 }
2558 }
2559 //not really needed
2560 // rank = askForRank(newName, rank, nomenclaturalCode);
2561 // System.out.println("atomised: "+atomisedMap.toString());
2562
2563 // String[] names = new String[5];
2564 MyName myname = new MyName(false);
2565
2566 //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2567 // System.out.println(atomisedMap.keySet());
2568 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2569 myname.setOriginalName(fullName);
2570 myname.setNewName(newName);
2571
2572 myname.setRank(rank);
2573 myname.setIdentifier(identifier);
2574 myname.setStatus(status);
2575 myname.setSource(refMods);
2576
2577 // boolean higherAdded=false;
2578
2579
2580 boolean parseNameManually=false;
2581 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2582 ITaxonNameBase nameToBeFilledTest = null;
2583
2584 //if selected the atomised version
2585 if(newName==atomisedNameStr){
2586 nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2587 if (nameToBeFilledTest.hasProblem()){
2588 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2589 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2590 if (nameToBeFilledTest.hasProblem()){
2591 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2592 parseNameManually=true;
2593 }
2594 }
2595 }else{
2596 nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2597 if (nameToBeFilledTest.hasProblem()){
2598 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2599 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2600 parseNameManually=true;
2601 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2602 addNameDifferenceToFile(originalName,atomisedNameStr);
2603 }
2604 }
2605 }
2606
2607 //System.out.println("parseNameManually: "+parseNameManually);
2608 if(parseNameManually){
2609 createAtomisedTaxon(rank, newName, atomisedMap, myname);
2610 }
2611 else{
2612 createAtomisedTaxonString(newName, atomisedMap, myname);
2613 myname.setParsedName(nameToBeFilledTest);
2614 //TODO correct handling of createIfNotExists
2615 myname.buildTaxon();
2616 }
2617 return myname;
2618
2619 }
2620
2621 private ITaxonNameBase parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2622 Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2623
2624 TaxonNameBase name = (TaxonNameBase)parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2625 if (nameExtensionResult != null && nameExtensionResult[0] != null){
2626 String ext = (String)nameExtensionResult[0];
2627 TaxonNameBase extName = (TaxonNameBase)parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2628 if (! extName.hasProblem()){
2629 name = extName;
2630 this.usedFollowingTextPrefix = ext;
2631 //TODO do we need to fill the atomisedMap at all?
2632 if ((Boolean)(nameExtensionResult[1])){
2633 //TODO
2634 }
2635 if ((Boolean)(nameExtensionResult[2])){
2636 //TODO BasionymYear etc.
2637 Integer origYear = name.getPublicationYear();
2638 if (origYear != null){
2639 atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2640 }
2641 }
2642 }
2643 }
2644 return name;
2645 }
2646
2647 private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2648 if (StringUtils.isBlank(followingText)){
2649 return null;
2650 }
2651
2652 boolean includeAuthor = true;
2653 boolean includeYear = false;
2654 if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2655 includeAuthor = false;
2656 }
2657 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2658 includeYear = true;
2659 }
2660 String patternStr = "";
2661 if (includeAuthor){
2662 patternStr += NonViralNameParserImplRegExBase.capitalWord;
2663 }
2664 if (includeYear){
2665 patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2666 }
2667 String match = null;
2668 if (! patternStr.isEmpty()){
2669 Pattern pattern = Pattern.compile("^" + patternStr);
2670 Matcher matcher = pattern.matcher(followingText.trim());
2671 if (matcher.find()){
2672 match = matcher.group();
2673 }
2674 }
2675
2676 return new Object[]{match, includeAuthor, includeYear};
2677 }
2678
2679 /**
2680 * @param atomisedName
2681 * @return
2682 */
2683 private String getAtomisedNameStr(List<String> atomisedName) {
2684 //logger.info("getAtomisedNameStr");
2685 String atomisedNameStr = StringUtils.join(atomisedName," ");
2686 while(atomisedNameStr.contains(" ")) {
2687 atomisedNameStr=atomisedNameStr.replace(" ", " ");
2688 }
2689 atomisedNameStr=atomisedNameStr.trim();
2690 return atomisedNameStr;
2691 }
2692
2693 /**
2694 * @param children
2695 * @param status
2696 * @return
2697 */
2698 private String extractStatus(NodeList children) {
2699 logger.info("extractStatus");
2700 String status="";
2701 for (int i=0;i<children.getLength();i++){
2702 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2703 (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2704 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2705 status = children.item(i).getTextContent().trim();
2706 }
2707 }
2708 return status;
2709 }
2710
2711 /**
2712 * @param identifier
2713 * @param atom
2714 * @param k
2715 * @return
2716 */
2717 private String extractIdentifier(String identifier, Node atom) {
2718 //logger.info("extractIdentifier");
2719 if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2720 try{
2721 identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2722 }catch(Exception e){
2723 System.out.println("pb with identifier, maybe empty");
2724 }
2725 try{
2726 identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2727 }catch(Exception e){
2728 System.out.println("pb with identifier, maybe empty");
2729 }
2730 }
2731 return identifier;
2732 }
2733
2734 /**
2735 * @param rankListToPrint
2736 * @param rank
2737 * @param atomisedName
2738 * @param atom
2739 */
2740 private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2741 logger.info("addAtomisedNamesToMap");
2742 for (int k=0;k<atom.getLength();k++){
2743 Node node = atom.item(k);
2744 String nodeName = node.getNodeName();
2745 if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) { //rank has been handled in higher method
2746 if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2747 atomisedName.add("("+ node.getTextContent().trim()+")");
2748 } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2749 if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2750 atomisedName.add("var. "+node.getTextContent().trim());
2751 }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2752 atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2753 }
2754 } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2755 atomisedName.add(node.getTextContent().trim());
2756 } else{
2757 if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2758 atomisedName.add(node.getTextContent().trim());
2759 }else if (nodeName.equals("#text")){
2760 String text = node.getTextContent();
2761 if (StringUtils.isNotBlank(text)){
2762 //TODO handle text
2763 logger.warn("name xmldata contains text. This is unhandled");
2764 }
2765 }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2766 //we currently do not use higher ranks information
2767 }else{
2768 //TODO handle unhandled node
2769 logger.warn("Unhandled node: " + nodeName);
2770 }
2771 }
2772 }
2773 }
2774 }
2775
2776 /**
2777 * @param fullName
2778 * @param atomisedName
2779 * @return
2780 */
2781 private String cleanName(String name, List<String> atomisedName) {
2782 //logger.info("cleanName");
2783 String fullName =name;
2784 if (fullName != null){
2785 fullName = fullName.replace("( ", "(");
2786 fullName = fullName.replace(" )",")");
2787
2788 if (fullName.trim().isEmpty()){
2789 fullName=StringUtils.join(atomisedName," ");
2790 }
2791
2792 while(fullName.contains(" ")) {
2793 fullName=fullName.replace(" ", " ");
2794 // logger.info("while");
2795 }
2796 fullName=fullName.trim();
2797 }
2798 return fullName;
2799 }
2800
2801 /**
2802 * @param rank
2803 * @param fullName
2804 * @param atomisedMap
2805 * @param myname
2806 * @return
2807 */
2808 private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2809 logger.info("extractAuthorFromNames");
2810 String fullName=name;
2811 if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2812 // System.out.println("rank : "+rank.toString());
2813 if(rank.isHigher(Rank.SPECIES())){
2814 try{
2815 String author=null;
2816 if(atomisedMap.get("dwcranks:subgenus") != null) {
2817 author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2818 }
2819 if(atomisedMap.get("dwc:subgenus") != null) {
2820 author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2821 }
2822 if(author == null) {
2823 if(atomisedMap.get("dwc:genus") != null) {
2824 author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2825 }
2826 }
2827 if(author != null){
2828 fullName = fullName.substring(0, fullName.indexOf(author));
2829 author=author.replaceAll(",","").trim();
2830 myname.setAuthor(author);
2831 }
2832 }catch(Exception e){
2833 //could not extract the author
2834 }
2835 }
2836 if(rank.equals(Rank.SPECIES())){
2837 try{
2838 String author=null;
2839 if(author == null) {
2840 if(atomisedMap.get("dwc:species") != null) {
2841 String[] t = fullName.split(atomisedMap.get("dwc:species"));
2842 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2843 author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2844 // System.out.println("AUTEUR "+author);
2845 }
2846 }
2847 if(author != null){
2848 fullName = fullName.substring(0, fullName.indexOf(author));
2849 author=author.replaceAll(",","").trim();
2850 myname.setAuthor(author);
2851 }
2852 }catch(Exception e){
2853 //could not extract the author
2854 }
2855 }
2856 }else{
2857 myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2858 }
2859 return fullName;
2860 }
2861
2862 /**
2863 * @param newName
2864 * @param atomisedMap
2865 * @param myname
2866 */
2867 private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2868 logger.info("createAtomisedTaxonString "+atomisedMap);
2869 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2870 myname.setFamilyStr(atomisedMap.get("dwc:family"));
2871 }
2872 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
2873 myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2874 }
2875 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2876 myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2877 }
2878 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2879 myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2880 }
2881 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2882 myname.setGenusStr(atomisedMap.get("dwc:genus"));
2883 }
2884 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2885 myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2886 }
2887 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2888 myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2889 }
2890 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2891 String n=newName;
2892 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2893 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2894 n=n.replace("subsp.","");
2895 }
2896 if(atomisedMap.get("dwc:subspecies") != null) {
2897 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2898 n=n.replace("subsp.","");
2899 }
2900 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2901 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2902 n=n.replace("var.","");
2903 n=n.replace("v.","");
2904 }
2905 if(atomisedMap.get("dwcranks:formepithet") != null) {
2906 //TODO
2907 System.out.println("TODO FORMA");
2908 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2909 n=n.replace("forma","");
2910 }
2911 n=n.trim();
2912 String author = myname.getAuthor();
2913 if(n.split(" ").length>2){
2914
2915 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2916 String a= "";
2917 try{
2918 a=n.split(n2)[1].trim();
2919 }catch(Exception e){
2920 logger.info("no author in "+n+"?");}
2921
2922 myname.setAuthor(a);
2923 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2924 n=n2;
2925
2926 }
2927
2928 myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2929 myname.setAuthor(author);
2930 }
2931 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2932 myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2933 }
2934 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2935 myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2936 }
2937 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2938 myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2939 }
2940 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2941 myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2942 }
2943 if (atomisedMap.get(PUBLICATION_YEAR) != null){
2944 myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2945 }
2946 }
2947
2948 /**
2949 * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2950 * @param rank
2951 * @param newName
2952 * @param atomisedMap
2953 * @param myname
2954 */
2955 private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2956 logger.info("createSynonym");
2957 //System.out.println("createsynonym");
2958 if(rank.equals(Rank.UNKNOWN_RANK())){
2959 myname.setNotParsableTaxon(newName);
2960 }else{
2961 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2962 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2963 }
2964 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2965 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2966 }
2967 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2968 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2969 }
2970 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2971 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2972 }
2973 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2974 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2975 }
2976 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2977 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2978 }
2979 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2980 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2981 }
2982 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2983 String n=newName;
2984 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2985 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2986 n=n.replace("subsp.","");
2987 }
2988 if(atomisedMap.get("dwc:subspecies") != null) {
2989 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2990 n=n.replace("subsp.","");
2991 }
2992 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2993 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2994 n=n.replace("var.","");
2995 n=n.replace("v.","");
2996 }
2997 if(atomisedMap.get("dwcranks:formepithet") != null) {
2998 //TODO
2999 //System.out.println("TODO FORMA");
3000 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3001 n=n.replace("forma","");
3002 }
3003 n=n.trim();
3004 String author = myname.getAuthor();
3005 if(n.split(" ").length>2){
3006
3007 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3008 String a="";
3009 try{
3010 a= n.split(n2)[1].trim();
3011 }catch(Exception e){logger.info("no author in "+n);}
3012 myname.setAuthor(a);
3013 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3014 n=n2;
3015
3016 }
3017 Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3018 myname.setSpecies(species);
3019 myname.setAuthor(author);
3020 }
3021 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3022 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3023 }
3024 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3025 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3026 }
3027 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3028 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3029 }
3030 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3031 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3032 }
3033 }
3034
3035 }
3036
3037
3038 /**
3039 * @param refMods
3040 * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3041 * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3042 * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3043 * I created this switch for old
3044 * for Spiders the new version is preferred
3045 */
3046 private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3047 logger.info("createSynonym");
3048
3049 INonViralName nameToBeFilled = this.getNonViralNameAccNomenclature();
3050 //System.out.println("createsynonym");
3051 if(rank.equals(Rank.UNKNOWN_RANK())){
3052 //TODO
3053 myname.setNotParsableTaxon(newName);
3054
3055 nameToBeFilled.setTitleCache(newName, true);
3056 }else{
3057 if(atomisedMap.get("dwc:genus") != null ){
3058 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3059 }
3060 if (rank.isSupraGeneric()){
3061 if (atomisedMap.get("dwcranks:subtribe") != null ){
3062 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3063 }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3064 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3065 }else if (atomisedMap.get("dwcranks:tribe") != null ){
3066 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3067 }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3068 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3069 }else if (atomisedMap.get("dwc:family") != null ){
3070 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3071 }else{
3072 logger.warn("Supra generic rank not yet handled or atomisation not available");
3073 }
3074 }
3075 if (atomisedMap.get("dwcranks:subgenus") != null){
3076 nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3077 }
3078 if (atomisedMap.get("dwc:subgenus") != null){
3079 nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3080 }
3081 if (atomisedMap.get("dwc:species") != null){
3082 nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3083 }
3084 if (atomisedMap.get("dwcranks:formepithet") != null){
3085 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3086 }else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3087 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3088 }else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3089 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3090 }else if (atomisedMap.get("dwc:subspecies") != null){
3091 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3092 }
3093 Reference sec = sourceUrlRef;
3094 if(!state2.getConfig().doKeepOriginalSecundum()){
3095 sec = state2.getConfig().getSecundum();
3096 }
3097 Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3098 // sourceHandler.addSource(refMods, syn);
3099 myname.setSyno(syn);
3100 myname.setSynonym(true);
3101 }
3102 }
3103
3104 /**
3105 * @param rank
3106 * @param newName
3107 * @param atomisedMap
3108 * @param myname
3109 */
3110 private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3111 logger.info("createAtomisedTaxon "+atomisedMap);
3112 if(rank.equals(Rank.UNKNOWN_RANK())){
3113 myname.setNotParsableTaxon(newName);
3114 }
3115 else{
3116 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3117 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3118 }
3119 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
3120 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3121 }
3122 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3123 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3124 }
3125 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3126 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3127 }
3128 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3129 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3130 }
3131 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3132 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3133 }
3134 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3135 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3136 }
3137 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3138 String n=newName;
3139 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3140 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3141 n=n.replace("subsp.","");
3142 }
3143 if(atomisedMap.get("dwc:subspecies") != null) {
3144 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3145 n=n.replace("subsp.","");
3146 }
3147 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3148 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3149 n=n.replace("var.","");
3150 n=n.replace("v.","");
3151 }
3152 if(atomisedMap.get("dwcranks:formepithet") != null) {
3153 //TODO
3154 //System.out.println("TODO FORMA");
3155 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3156 n=n.replace("forma","");
3157 }
3158 n=n.trim();
3159 String author = myname.getAuthor();
3160 if(n.split(" ").length>2){
3161 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3162 String a="";
3163 try{
3164 a= n.split(n2)[1].trim();
3165 }catch(Exception e){logger.info("no author in "+n);}
3166 myname.setAuthor(a);
3167 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3168 n=n2;
3169
3170 }
3171
3172 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3173 myname.setAuthor(author);
3174 }
3175 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3176 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3177 }
3178 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3179 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3180 }
3181 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3182 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3183 }
3184 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3185 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3186 }
3187 }
3188 }
3189
3190 /**
3191 * @return
3192 */
3193 private boolean checkRankValidForImport(Rank currentRank) {
3194 //logger.info("checkRankValidForImport");
3195 return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3196 }
3197
3198
3199
3200 /**
3201 * @param classification2
3202 */
3203 public void updateClassification(Classification classification2) {
3204 //logger.info("updateClassification");
3205 classification = classification2;
3206 }
3207
3208
3209
3210 public class MyName {
3211 /**
3212 * @param isSynonym
3213 */
3214 public MyName(boolean isSynonym) {
3215 super();
3216 this.isSynonym = isSynonym;
3217 }
3218
3219 String originalName="";
3220 String newName="";
3221 Rank rank=Rank.UNKNOWN_RANK();
3222 String identifier="";
3223 String status="";
3224 String author=null;
3225
3226 TaxonNameBase<?,?> taxonNameBase;
3227
3228 Reference refMods ;
3229
3230 Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3231 INonViralName familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3232 String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3233 Integer publicationYear;
3234
3235
3236 Taxon higherTaxa;
3237 Rank higherRank;
3238 private Taxon taxon;
3239 private Synonym syno;
3240
3241 /**
3242 * @return the syno
3243 */
3244 public Synonym getSyno() {
3245 return syno;
3246 }
3247
3248 @Override
3249 public String toString(){
3250 List<String> tot=new ArrayList<String>();
3251 String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3252 for (String elt:n){
3253 if (!StringUtils.isEmpty(elt)) {
3254 tot.add(elt);
3255 } else {
3256 tot.add("*");
3257 }
3258 }
3259 return StringUtils.join(tot," ");
3260 }
3261 /**
3262 * @param syno the syno to set
3263 */
3264 public void setSyno(Synonym syno) {
3265 this.syno = syno;
3266 }
3267
3268 boolean isSynonym=false;
3269
3270 /**
3271 * @return the isSynonym
3272 */
3273 public boolean isSynonym() {
3274 return isSynonym;
3275 }
3276
3277 /**
3278 * @param isSynonym the isSynonym to set
3279 */
3280 public void setSynonym(boolean isSynonym) {
3281 this.isSynonym = isSynonym;
3282 }
3283
3284 public void setSource(Reference re){
3285 refMods=re;
3286 }
3287
3288 /**
3289 * @param string
3290 */
3291 public void setFormStr(String string) {
3292 this.formStr=string;
3293
3294 }
3295 /**
3296 * @param string
3297 */
3298 public void setVarietyStr(String string) {
3299 this.varietyStr=string;
3300
3301 }
3302 /**
3303 * @param string
3304 */
3305 public void setSubspeciesStr(String string) {
3306 this.subspeciesStr=string;
3307
3308 }
3309 /**
3310 * @param string
3311 */
3312 public void setSpeciesStr(String string) {
3313 this.speciesStr=string;
3314
3315 }
3316 /**
3317 * @param string
3318 */
3319 public void setSubgenusStr(String string) {
3320 this.subgenusStr=string;
3321
3322 }
3323 /**
3324 * @param string
3325 */
3326 public void setGenusStr(String string) {
3327 this.genusStr=string;
3328
3329 }
3330 /**
3331 * @param string
3332 */
3333 public void setSubtribeStr(String string) {
3334 this.subtribeStr=string;
3335
3336 }
3337 /**
3338 * @param string
3339 */
3340 public void setTribeStr(String string) {
3341 this.tribeStr=string;
3342
3343 }
3344 /**
3345 * @param string
3346 */
3347 public void setSubfamilyStr(String string) {
3348 this.subfamilyStr=string;
3349
3350 }
3351 /**
3352 * @param string
3353 */
3354 public void setFamilyStr(String string) {
3355 this.familyStr=string;
3356
3357 }
3358 /**
3359 * @return the familyStr
3360 */
3361 public String getFamilyStr() {
3362 return familyStr;
3363 }
3364 /**
3365 * @return the subfamilyStr
3366 */
3367 public String getSubfamilyStr() {
3368 return subfamilyStr;
3369 }
3370 /**
3371 * @return the tribeStr
3372 */
3373 public String getTribeStr() {
3374 return tribeStr;
3375 }
3376 /**
3377 * @return the subtribeStr
3378 */
3379 public String getSubtribeStr() {
3380 return subtribeStr;
3381 }
3382 /**
3383 * @return the genusStr
3384 */
3385 public String getGenusStr() {
3386 return genusStr;
3387 }
3388 /**
3389 * @return the subgenusStr
3390 */
3391 public String getSubgenusStr() {
3392 return subgenusStr;
3393 }
3394 /**
3395 * @return the speciesStr
3396 */
3397 public String getSpeciesStr() {
3398 return speciesStr;
3399 }
3400 /**
3401 * @return the subspeciesStr
3402 */
3403 public String getSubspeciesStr() {
3404 return subspeciesStr;
3405 }
3406 /**
3407 * @return the formStr
3408 */
3409 public String getFormStr() {
3410 return formStr;
3411 }
3412 /**
3413 * @return the varietyStr
3414 */
3415 public String getVarietyStr() {
3416 return varietyStr;
3417 }
3418
3419 public Integer getPublicationYear() {
3420 return publicationYear;
3421 }
3422
3423 public void setPublicationYear(Integer publicationYear) {
3424 this.publicationYear = publicationYear;
3425 }
3426
3427 /**
3428 * @param newName2
3429 */
3430 public void setNotParsableTaxon(String newName2) {
3431 //takes too much time
3432 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3433
3434 NomenclaturalStatusType statusType = null;
3435 if (!getStatus().isEmpty()){
3436 try {
3437 statusType = nomStatusString2NomStatus(getStatus());
3438 } catch (UnknownCdmTypeException e) {
3439 addProblematicStatusToFile(getStatus());
3440 logger.warn("Problem with status");
3441 }
3442 }
3443 List<TaxonBase> tmpList = new ArrayList<>();
3444
3445 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3446 tmpList.addAll(taxontest.getRecords());
3447
3448 //logger.info("tmpList returned: "+tmpList.size());
3449
3450
3451 INonViralName identicName = null;
3452 boolean foundIdentic=false;
3453 TaxonBase<?> tmpTaxonBase=null;
3454 // Taxon tmpPartial=null;
3455 for (TaxonBase<?> tmpb:tmpList){
3456 if(tmpb !=null){
3457 TaxonNameBase<?,?> tnb = tmpb.getName();
3458 Rank crank=null;
3459 if (tnb != null){
3460 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3461 crank =tnb.getRank();
3462 if (crank !=null && rank !=null){
3463 if (crank.equals(rank)){
3464 identicName = tnb;
3465 if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3466 foundIdentic=true;
3467 tmpTaxonBase=tmpb;
3468 break;
3469 }
3470 }
3471 }
3472 }
3473 }
3474 }
3475 }
3476 boolean statusMatch=false;
3477 boolean appendedMatch=false;
3478 if(tmpTaxonBase !=null && foundIdentic){
3479 statusMatch=compareStatus(tmpTaxonBase, statusType);
3480 if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3481 appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3482 }
3483 if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3484 appendedMatch=true;
3485 }
3486
3487 }
3488 if ((tmpTaxonBase == null || !foundIdentic) || (tmpTaxonBase != null && !statusMatch) || (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3489
3490 INonViralName tnb;
3491 if (identicName == null){
3492 tnb = getNonViralNameAccNomenclature();
3493 tnb.setRank(rank);
3494
3495 if(statusType != null) {
3496 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3497 }
3498 if(StringUtils.isNotBlank(getStatus())) {
3499 tnb.setAppendedPhrase(getStatus());
3500 }
3501 tnb.setTitleCache(newName2,true);
3502 tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3503 }else{
3504 tnb = identicName;
3505 }
3506
3507 if(tmpTaxonBase==null){
3508 tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3509 if(!state2.getConfig().doKeepOriginalSecundum()) {
3510 tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3511 }
3512 //tmptaxonbase.setSec(refMods);
3513 if(!isSynonym) {
3514 classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3515 sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3516 }
3517 }
3518 }
3519
3520 tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3521 if (author != null) {
3522 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3523 setLSID(getIdentifier(), tmpTaxonBase);
3524 importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3525 tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3526 }
3527 }
3528 TaxonNameBase<?,?> tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonNameBase.class);
3529
3530 if(!isSynonym) {
3531 this.taxon=(Taxon)tmpTaxonBase;
3532 } else {
3533 if (tmpTaxonBase instanceof Taxon){
3534 logger.warn("Incorrect status");
3535 }
3536 this.syno=(Synonym)tmpTaxonBase;
3537 }
3538
3539 taxonNameBase = tnb;
3540
3541 }
3542
3543 /**
3544 *
3545 */
3546 public void buildTaxon() {
3547 //System.out.println("BUILD TAXON");
3548 logger.info("buildTaxon");
3549 NomenclaturalStatusType statusType = null;
3550 if (!getStatus().isEmpty()){
3551 status = getStatus();
3552 String newNameStatus = newNameStatus(status);
3553 if (newNameStatus != null){
3554 taxonNameBase.setAppendedPhrase(newNameStatus);
3555 }else{
3556 try {
3557 statusType = nomStatusString2NomStatus(getStatus());
3558 taxonNameBase.addStatus(NomenclaturalStatus.NewInstance(statusType));
3559 } catch (UnknownCdmTypeException e) {
3560 addProblematicStatusToFile(getStatus());
3561 logger.warn("Problem with status");
3562 }
3563 }
3564 }
3565 importer.getNameService().save(taxonNameBase);
3566
3567 TaxonBase<?> tmpTaxonBase;
3568 if (!isSynonym) {
3569 tmpTaxonBase =Taxon.NewInstance(taxonNameBase, refMods); //sec set null
3570 }
3571 else {
3572 tmpTaxonBase =Synonym.NewInstance(taxonNameBase, refMods); //sec set null
3573 }
3574 boolean exist = false;
3575 if (!isSynonym){
3576 for (TaxonNode node : classification.getAllNodes()){
3577 try{
3578 Taxon nodeTaxon = node.getTaxon();
3579 boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3580 boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3581 boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3582 if(titleMatches && nomStatusMatches) {
3583 if (!isSynonym) {
3584 tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3585 exist =true;
3586 } else {
3587 logger.info("Found the same name but from another type (taxon/synonym)");
3588 TaxonNameBase<?,?> existingTnb = getTaxon().getName();
3589 tmpTaxonBase = Synonym.NewInstance(existingTnb, refMods);
3590 importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3591 exist =true;
3592 }
3593 }else if (nodeNameReplaceable){
3594 nodeTaxon.setName(tmpTaxonBase.getName());
3595 tmpTaxonBase = nodeTaxon;
3596 exist = true;
3597 }
3598 }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3599 }
3600 }
3601 if (!exist){
3602
3603 boolean insertAsExisting =false;
3604 List<Taxon> existingTaxons=new ArrayList<Taxon>();
3605 try {
3606 existingTaxons = getMatchingTaxa(taxonNameBase);
3607 } catch (Exception e1) {
3608 e1.printStackTrace();
3609 }
3610 double similarityScore=0.0;
3611 double similarityAuthor=-1;
3612 String author1="";
3613 String author2="";
3614 String t1="";
3615 String t2="";
3616 for (Taxon bestMatchingTaxon : existingTaxons){
3617 //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3618 //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3619 if(taxonNameBase.getAuthorshipCache()!=null) {
3620 author1=taxonNameBase.getAuthorshipCache();
3621 }
3622 try {
3623 if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
3624 author2=bestMatchingTaxon.getName().getAuthorshipCache();
3625 }
3626 } catch (Exception e) {
3627 // TODO Auto-generated catch block
3628 e.printStackTrace();
3629 }
3630 try {
3631 t1=taxonNameBase.getTitleCache();
3632 if (author1!=null && !StringUtils.isEmpty(author1)) {
3633 t1=t1.split(Pattern.quote(author1))[0];
3634 }
3635 } catch (Exception e) {
3636 // TODO Auto-generated catch block
3637 e.printStackTrace();
3638 }
3639 try {
3640 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3641 if (author2!=null && !StringUtils.isEmpty(author2)) {
3642 t2=t2.split(Pattern.quote(author2))[0];
3643 }
3644 } catch (Exception e) {
3645 // TODO Auto-generated catch block
3646 e.printStackTrace();
3647 }
3648
3649 similarityScore=similarity(t1.trim(), t2.trim());
3650 //System.out.println("taxonscore "+similarityScore);
3651 similarityAuthor=similarity(author1.trim(), author2.trim());
3652 //System.out.println("authorscore "+similarityAuthor);
3653 insertAsExisting = compareAndCheckTaxon(taxonNameBase, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3654 if(insertAsExisting) {
3655 tmpTaxonBase=bestMatchingTaxon;
3656 break;
3657 }
3658 }
3659 if ( !insertAsExisting ){
3660 if(!state2.getConfig().doKeepOriginalSecundum()) {
3661 tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3662 }
3663
3664 // tmptaxonbase.setSec(refMods);
3665 if (taxonNameBase.getRank().equals(state2.getConfig().getMaxRank())) {
3666 //System.out.println("****************************"+tmptaxonbase);
3667 if (!isSynonym) {
3668 classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3669 }
3670 } else{
3671 hierarchy = new HashMap<Rank, Taxon>();
3672 //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3673 if (!isSynonym){
3674 lookForParentNode(taxonNameBase,(Taxon)tmpTaxonBase, refMods,this);
3675 //System.out.println("HIERARCHY "+hierarchy);
3676 Taxon parent = buildHierarchy();
3677 if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3678 if(parent !=null) {
3679 classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3680 } else {
3681 classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3682 }
3683 importer.getClassificationService().saveOrUpdate(classification);
3684 }
3685 }
3686 // Set<TaxonNode> nodeList = classification.getAllNodes();
3687 // for(TaxonNode tn:nodeList) {
3688 // System.out.println(tn.getTaxon());
3689 // }
3690 }
3691 }
3692 importer.getClassificationService().saveOrUpdate(classification);
3693 if(isSynonym) {
3694 try{
3695 Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3696 }catch(Exception e){
3697 TaxonNameBase<?,?> existingTnb = tmpTaxonBase.getName();
3698 Synonym castTest = Synonym.NewInstance(existingTnb, refMods);
3699 importer.getTaxonService().saveOrUpdate(castTest);
3700 tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3701 }
3702 }
3703 }
3704 if(!isSynonym) {
3705 taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3706 } else {
3707 syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3708 }
3709
3710 }
3711
3712 private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3713 //TODO preliminary check
3714 if (newTaxon.isInstanceOf(Synonym.class)){
3715 return false;
3716 }
3717 INonViralName nodeName = nodeTaxon.getName();
3718 INonViralName newName = newTaxon.getName();
3719 if (nodeTaxon.getName() == null || newName == null){
3720 return false;
3721 }
3722 if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3723 return false;
3724 }
3725 boolean compare = true;
3726 for (NomenclaturalStatus status : newName.getStatus() ){
3727 compare &= compareStatus(nodeTaxon, status.getType());
3728 }
3729 if (! compare){
3730 return false;
3731 }
3732
3733 if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3734 if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3735 if (newName.getNameCache().length() < newName.getTitleCache().length()){
3736 logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3737 return true;
3738 }
3739 }
3740 }
3741
3742 return false;
3743 }
3744
3745 /**
3746 *
3747 */
3748 private Taxon buildHierarchy() {
3749 logger.info("buildHierarchy");
3750 Taxon higherTaxon = null;
3751 //add the maxRank as a root
3752 if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3753 Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3754 if(!taxonExistsInClassification(higherTaxon, ct)) {
3755 classification.addChildTaxon(ct, refMods, null);
3756 }
3757 higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3758 // return higherTaxon;
3759 }
3760 //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3761
3762 //TODO higher Ranks
3763
3764 if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3765 higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3766 }
3767 if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3768 higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3769 }
3770 if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3771 higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3772 }
3773 if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3774 higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3775 }
3776 if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3777 higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3778 }
3779 if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3780 higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3781 }
3782 importer.getClassificationService().saveOrUpdate(classification);
3783 return higherTaxon;
3784 }
3785
3786 private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3787 Taxon ct=hierarchy.get(r);
3788 if(!taxonExistsInClassification(higherTaxon,ct )) {
3789 if(higherTaxon != null && ct!=null) {
3790 classification.addParentChild(higherTaxon, ct, refMods, null);
3791 } else
3792 if(higherTaxon == null && ct !=null) {
3793 classification.addChildTaxon(ct, refMods, null);
3794 }
3795 }
3796 return ct;
3797 }
3798
3799 private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3800 logger.info("taxonExistsInClassification");
3801 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3802 boolean found=false;
3803 if(parent !=null){
3804 for (TaxonNode p : classification.getAllNodes()){
3805 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3806 for (TaxonNode c : p.getChildNodes()) {
3807 if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3808 found=true;
3809 break;
3810 }
3811 }
3812 }
3813 }
3814 }
3815 else{
3816 for (TaxonNode p : classification.getAllNodes()){
3817 if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3818 found=true;
3819 break;
3820 }
3821 }
3822 }
3823 // System.out.println("LOOK IF TAXA EXIST? "+found);
3824 return found;
3825 }
3826 /**
3827 * @param nameToBeFilledTest
3828 */
3829 public void setParsedName(ITaxonNameBase nameToBeFilledTest) {
3830 this.taxonNameBase = TaxonNameBase.castAndDeproxy(nameToBeFilledTest);
3831
3832 }
3833 //variety dwcranks:varietyEpithet
3834 /**
3835 * @return the author
3836 */
3837 public String getAuthor() {
3838 return author;
3839 }
3840 /**
3841 * @return
3842 */
3843 public Taxon getTaxon() {
3844 return taxon;
3845 }
3846 /**
3847 * @return
3848 */
3849 public TaxonNameBase<?,?> getTaxonNameBase() {
3850 return taxonNameBase;
3851 }
3852
3853 /**
3854 * @param findOrCreateTaxon
3855 */
3856 public void setForm(Taxon form) {
3857 this.form=form;
3858
3859 }
3860 /**
3861 * @param findOrCreateTaxon
3862 */
3863 public void setVariety(Taxon variety) {
3864 this.variety=variety;
3865
3866 }
3867 /**
3868 * @param string
3869 * @return
3870 */
3871 @SuppressWarnings("rawtypes")
3872 public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3873 logger.info("findOrCreateTaxon");
3874 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3875 //takes too much time
3876 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3877 // logger.info("tmpList returned: "+tmpList.size());
3878
3879 NomenclaturalStatusType statusType = null;
3880 if (!getStatus().isEmpty()){
3881 try {
3882 statusType = nomStatusString2NomStatus(getStatus());
3883 } catch (UnknownCdmTypeException e) {
3884 addProblematicStatusToFile(getStatus());
3885 logger.warn("Problem with status");
3886 }
3887 }
3888
3889 List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3890
3891 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3892
3893 tmpListFiltered.addAll(taxontest.getRecords());
3894 taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3895 tmpListFiltered.addAll(taxontest.getRecords());
3896
3897 //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3898
3899 boolean nameCorrected=false;
3900 if (fullname.indexOf(partialname)<0) {
3901 nameCorrected=true;
3902 }
3903
3904 boolean foundIdentic=false;
3905 Taxon tmp=null;
3906 for (TaxonBase tmpb:tmpListFiltered){
3907 if(tmpb !=null){
3908 TaxonNameBase tnb = tmpb.getName();
3909 Rank crank=null;
3910 if (tnb != null){
3911 if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3912 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3913 crank =tnb.getRank();
3914 if (crank !=null && rank !=null){
3915 if (crank.equals(rank)){
3916 foundIdentic=true;
3917 try{
3918 tmp=(Taxon)tmpb;
3919 break;
3920 }catch(Exception e){
3921 e.printStackTrace();
3922 }
3923 }
3924 }
3925 }
3926 if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3927 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3928 crank =tnb.getRank();
3929 if (crank !=null && rank !=null){
3930 if (crank.equals(rank)){
3931 foundIdentic=true;
3932 try{
3933 tmp=(Taxon)tmpb;
3934 break;
3935 }catch(Exception e){
3936 e.printStackTrace();
3937 }
3938 }
3939 }
3940 }
3941 }
3942 }
3943 else{
3944 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3945 crank =tnb.getRank();
3946 if (crank !=null && rank !=null){
3947 if (crank.equals(rank)){
3948 foundIdentic=true;
3949 try{
3950 tmp=(Taxon)tmpb;
3951 break;
3952 }catch(Exception e){
3953 e.printStackTrace();
3954 }
3955 }
3956 }
3957 }
3958 }
3959 }
3960 }
3961 }
3962 boolean statusMatch=false;
3963 boolean appendedMatch=false;
3964 if(tmp !=null && foundIdentic){
3965 statusMatch=compareStatus(tmp, statusType);
3966 if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3967 appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3968 }
3969 if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3970 appendedMatch=true;
3971 }
3972
3973 }
3974 if ((tmp == null || !foundIdentic) || (tmp != null && !statusMatch) || (tmp != null && !appendedMatch && !statusMatch)){
3975
3976 INonViralName tnb = getNonViralNameAccNomenclature();
3977 tnb.setRank(rank);
3978
3979 if(statusType != null) {
3980 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3981 }
3982 if(StringUtils.isNotBlank(getStatus())) {
3983 tnb.setAppendedPhrase(getStatus());
3984 }
3985
3986 if(rank.equals(Rank.UNKNOWN_RANK())){
3987 tnb.setTitleCache(fullname, true);
3988 // tnb.setGenusOrUninomial(fullname);
3989 }
3990 if(rank.isHigher(Rank.GENUS())) {
3991 tnb.setGenusOrUninomial(partialname);
3992 }
3993
3994 if(rank.isHigher(Rank.SPECIES())) {
3995 tnb.setTitleCache(partialname, true);
3996 }
3997
3998 if (rank.equals(globalrank) && author != null) {
3999
4000 tnb.setCombinationAuthorship(findOrCreateAuthor(author));
4001 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4002 Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4003 if (taxonLSID !=null) {
4004 tmp=taxonLSID;
4005 }
4006 }
4007 }
4008
4009 if(tmp == null){
4010 if (rank.equals(Rank.FAMILY())) {
4011 tmp = buildFamily(tnb);
4012 }
4013 if (rank.equals(Rank.SUBFAMILY())) {
4014 tmp = buildSubfamily(tnb);
4015 }
4016 if (rank.equals(Rank.TRIBE())) {
4017 tmp = buildTribe(tnb);
4018 }
4019 if (rank.equals(Rank.SUBTRIBE())) {
4020 tmp = buildSubtribe(tnb);
4021 }
4022 if (rank.equals(Rank.GENUS())) {
4023 tmp = buildGenus(partialname, tnb);
4024 }
4025
4026 if (rank.equals(Rank.SUBGENUS())) {
4027 tmp = buildSubgenus(partialname, tnb);
4028 }
4029 if (rank.equals(Rank.SPECIES())) {
4030 tmp = buildSpecies(partialname, tnb);
4031 }
4032
4033 if (rank.equals(Rank.SUBSPECIES())) {
4034 tmp = buildSubspecies(partialname, tnb);
4035 }
4036
4037 if (rank.equals(Rank.VARIETY())) {
4038 tmp = buildVariety(fullname, partialname, tnb);
4039 }
4040
4041 if (rank.equals(Rank.FORM())) {
4042 tmp = buildForm(fullname, partialname, tnb);
4043 }
4044 if (tmp != null){
4045 TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4046 }
4047
4048 importer.getClassificationService().saveOrUpdate(classification);
4049 }
4050
4051 }
4052
4053 tmp = CdmBase.deproxy(tmp, Taxon.class);
4054 if (rank.equals(globalrank) && author != null) {
4055 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4056 setLSID(getIdentifier(), tmp);
4057 importer.getTaxonService().saveOrUpdate(tmp);
4058 tmp = CdmBase.deproxy(tmp, Taxon.class);
4059 }
4060 }
4061
4062 this.taxon=tmp;
4063
4064 return tmp;
4065 }
4066
4067 /**
4068 * @param tnb
4069 * @return
4070 */
4071 private Taxon buildSubfamily(INonViralName tnb) {
4072 Taxon tmp;
4073 // tnb.generateTitle();
4074 tmp = findMatchingTaxon(tnb,refMods);
4075 if(tmp ==null){
4076 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4077 if(!state2.getConfig().doKeepOriginalSecundum()) {
4078 tmp.setSec(state2.getConfig().getSecundum());
4079 }
4080 // tmp.setSec(refMods);
4081 // sourceHandler.addSource(refMods, tmp);
4082 if(family != null) {
4083 classification.addParentChild(family, tmp, null, null);
4084 higherRank=Rank.FAMILY();
4085 higherTaxa=family;
4086 } else {
4087 //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4088 classification.addChildTaxon(tmp, null, null);
4089 }
4090 }
4091 return tmp;
4092 }
4093 /**
4094 * @param tnb
4095 * @return
4096 */
4097 private Taxon buildFamily(INonViralName tnb) {
4098 Taxon tmp;
4099 // tnb.generateTitle();
4100 tmp = findMatchingTaxon(tnb,refMods);
4101 if(tmp ==null){
4102 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4103 if(!state2.getConfig().doKeepOriginalSecundum()) {
4104 tmp.setSec(state2.getConfig().getSecundum());
4105 }
4106 // tmp.setSec(refMods);
4107 //sourceHandler.addSource(refMods, tmp);
4108 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4109 classification.addChildTaxon(tmp, null, null);
4110 }
4111 return tmp;
4112 }
4113 /**
4114 * @param fullname
4115 * @param tnb
4116 * @return
4117 */
4118 private Taxon buildForm(String fullname, String partialname, INonViralName tnb) {
4119 if (genusName !=null) {
4120 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4121 }
4122 if (subgenusName !=null) {
4123 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4124 }
4125 if(speciesName !=null) {
4126 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4127 }
4128 if(subspeciesName != null) {
4129 tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4130 }
4131 if(partialname!= null) {
4132 tnb.setInfraSpecificEpithet(partialname);
4133 }
4134 //TODO how to save form??
4135 tnb.setTitleCache(fullname, true);
4136 Taxon tmp = findMatchingTaxon(tnb,refMods);
4137 if(tmp ==null){
4138 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4139 if(!state2.getConfig().doKeepOriginalSecundum()) {
4140 tmp.setSec(state2.getConfig().getSecundum());
4141 }
4142 // tmp.setSec(refMods);
4143 //sourceHandler.addSource(refMods, tmp);
4144 if (subspecies !=null) {
4145 classification.addParentChild(subspecies, tmp, null, null);
4146 higherRank=Rank.SUBSPECIES();
4147 higherTaxa=subspecies;
4148 } else {
4149 if (species !=null) {
4150 classification.addParentChild(species, tmp, null, null);
4151 higherRank=Rank.SPECIES();
4152 higherTaxa=species;
4153 }
4154 else{
4155 // System.out.println("ADDCHILDTAXON FORM "+tmp);
4156 classification.addChildTaxon(tmp, null, null);
4157 }
4158 }
4159 }
4160 return tmp;
4161 }
4162 /**
4163 * @param fullname
4164 * @param tnb
4165 * @return
4166 */
4167 private Taxon buildVariety(String fullname, String partialname, INonViralName tnb) {
4168 Taxon tmp;
4169 if (genusName !=null) {
4170 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4171 }
4172 if (subgenusName !=null) {
4173 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4174 }
4175 if(speciesName !=null) {
4176 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4177 }
4178 if(subspeciesName != null) {
4179 tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4180 }
4181 if(partialname != null) {
4182 tnb.setInfraSpecificEpithet(partialname);
4183 }
4184 //TODO how to save variety?
4185 tnb.setTitleCache(fullname, true);
4186 tmp = findMatchingTaxon(tnb,refMods);
4187 if(tmp ==null){
4188 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4189 if(!state2.getConfig().doKeepOriginalSecundum()) {
4190 tmp.setSec(state2.getConfig().getSecundum());
4191 }
4192 // tmp.setSec(refMods);
4193 //sourceHandler.addSource(refMods, tmp);
4194 if (subspecies !=null) {
4195 classification.addParentChild(subspecies, tmp, null, null);
4196 higherRank=Rank.SUBSPECIES();
4197 higherTaxa=subspecies;
4198 } else {
4199 if(species !=null) {
4200 classification.addParentChild(species, tmp, null, null);
4201 higherRank=Rank.SPECIES();
4202 higherTaxa=species;
4203 }
4204 else{
4205 //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4206 classification.addChildTaxon(tmp, null, null);
4207 }
4208 }
4209 }
4210 return tmp;
4211 }
4212 /**
4213 * @param partialname
4214 * @param tnb
4215 * @return
4216 */
4217 private Taxon buildSubspecies(String partialname, INonViralName tnb) {
4218 if (genusName !=null) {
4219 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4220 }
4221 if (subgenusName !=null) {
4222 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4223 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4224 }
4225 if(speciesName !=null) {
4226 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
4227 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4228 }
4229 tnb.setInfraSpecificEpithet(partialname);
4230 Taxon tmp = findMatchingTaxon(tnb,refMods);
4231 if(tmp ==null){
4232 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4233 if(!state2.getConfig().doKeepOriginalSecundum())
4234 {
4235 tmp.setSec(state2.getConfig().getSecundum());
4236 // tmp.setSec(refMods);
4237 //sourceHandler.addSource(refMods, tmp);
4238 }
4239
4240 if(species != null) {
4241 classification.addParentChild(species, tmp, null, null);
4242 higherRank=Rank.SPECIES();
4243 higherTaxa=species;
4244 }
4245 else{
4246 //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4247 classification.addChildTaxon(tmp, null, null);
4248 }
4249 }
4250 return tmp;
4251 }
4252 /**
4253 * @param partialname
4254 * @param tnb
4255 * @return
4256 */
4257 private Taxon buildSpecies(String partialname, INonViralName tnb) {
4258 if (genusName !=null) {
4259 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4260 }
4261 if (subgenusName !=null) {
4262 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4263 }
4264 tnb.setSpecificEpithet(partialname.toLowerCase());
4265 Taxon tmp = findMatchingTaxon(tnb,refMods);
4266 if(tmp ==null){
4267 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4268 if(!state2.getConfig().doKeepOriginalSecundum()) {
4269 tmp.setSec(state2.getConfig().getSecundum());
4270 }
4271 // tmp.setSec(refMods);
4272 //sourceHandler.addSource(refMods, tmp);
4273 if (subgenus !=null) {
4274 classification.addParentChild(subgenus, tmp, null, null);
4275 higherRank=Rank.SUBGENUS();
4276 higherTaxa=subgenus;
4277 } else {
4278 if (genus !=null) {
4279 classification.addParentChild(genus, tmp, null, null);
4280 higherRank=Rank.GENUS();
4281 higherTaxa=genus;
4282 }
4283 else{
4284 //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4285 classification.addChildTaxon(tmp, null, null);
4286 }
4287 }
4288 }
4289 return tmp;
4290 }
4291 /**
4292 * @param partialname
4293 * @param tnb
4294 * @return
4295 */
4296 private Taxon buildSubgenus(String partialname, INonViralName tnb) {
4297 tnb.setInfraGenericEpithet(partialname);
4298 if (genusName !=null) {
4299 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4300 }
4301 Taxon tmp = findMatchingTaxon(tnb,refMods);
4302 if(tmp ==null){
4303 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4304 if(!state2.getConfig().doKeepOriginalSecundum()) {
4305 tmp.setSec(state2.getConfig().getSecundum());
4306 }
4307 // tmp.setSec(refMods);
4308 //sourceHandler.addSource(refMods, tmp);
4309 if(genus != null) {
4310 classification.addParentChild(genus, tmp, null, null);
4311 higherRank=Rank.GENUS();
4312 higherTaxa=genus;
4313 } else{
4314 //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4315 classification.addChildTaxon(tmp, null, null);
4316 }
4317 }
4318 return tmp;
4319 }
4320 /**
4321 * @param partialname
4322 * @param tnb
4323 * @return
4324 */
4325 private Taxon buildGenus(String partialname, INonViralName tnb) {
4326 Taxon tmp;
4327 tnb.setGenusOrUninomial(partialname);
4328
4329
4330 tmp = findMatchingTaxon(tnb,refMods);
4331 if(tmp ==null){
4332 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4333 if(!state2.getConfig().doKeepOriginalSecundum())
4334 {
4335 tmp.setSec(state2.getConfig().getSecundum());
4336 // tmp.setSec(refMods);
4337 //sourceHandler.addSource(refMods, tmp);
4338 }
4339
4340 if(subtribe != null) {
4341 classification.addParentChild(subtribe, tmp, null, null);
4342 higherRank=Rank.SUBTRIBE();
4343 higherTaxa=subtribe;
4344 } else{
4345 if(tribe !=null) {
4346 classification.addParentChild(tribe, tmp, null, null);
4347 higherRank=Rank.TRIBE();
4348 higherTaxa=tribe;
4349 } else{
4350 if(subfamily !=null) {
4351 classification.addParentChild(subfamily, tmp, null, null);
4352 higherRank=Rank.SUBFAMILY();
4353 higherTaxa=subfamily;
4354 } else
4355 if(family !=null) {
4356 classification.addParentChild(family, tmp, null, null);
4357 higherRank=Rank.FAMILY();
4358 higherTaxa=family;
4359 }
4360 else{
4361 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4362 classification.addChildTaxon(tmp, null, null);
4363 }
4364 }
4365 }
4366 }
4367 return tmp;
4368 }
4369
4370 /**
4371 * @param tnb
4372 * @return
4373 */
4374 private Taxon buildSubtribe(INonViralName tnb) {
4375 Taxon tmp = findMatchingTaxon(tnb,refMods);
4376 if(tmp==null){
4377 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4378 if(!state2.getConfig().doKeepOriginalSecundum()) {
4379 tmp.setSec(state2.getConfig().getSecundum());
4380 }
4381 // tmp.setSec(refMods);
4382 //sourceHandler.addSource(refMods, tmp);
4383 if(tribe != null) {
4384 classification.addParentChild(tribe, tmp, null, null);
4385 higherRank=Rank.TRIBE();
4386 higherTaxa=tribe;
4387 } else{
4388 //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4389 classification.addChildTaxon(tmp, null, null);
4390 }
4391 }
4392 return tmp;
4393 }
4394 /**
4395 * @param tnb
4396 * @return
4397 */
4398 private Taxon buildTribe(INonViralName tnb) {
4399 Taxon tmp = findMatchingTaxon(tnb,refMods);
4400 if(tmp==null){
4401 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4402 if(!state2.getConfig().doKeepOriginalSecundum()) {
4403 tmp.setSec(state2.getConfig().getSecundum());
4404 }
4405 // tmp.setSec(refMods);
4406 //sourceHandler.addSource(refMods, tmp);
4407 if (subfamily !=null) {
4408 classification.addParentChild(subfamily, tmp, null, null);
4409 higherRank=Rank.SUBFAMILY();
4410 higherTaxa=subfamily;
4411 } else {
4412 if(family != null) {
4413 classification.addParentChild(family, tmp, null, null);
4414 higherRank=Rank.FAMILY();
4415 higherTaxa=family;
4416 }
4417 else{
4418 //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4419 classification.addChildTaxon(tmp, null, null);
4420 }
4421 }
4422 }
4423 return tmp;
4424 }
4425
4426 /**
4427 * @param identifier2
4428 * @return
4429 */
4430 @SuppressWarnings("rawtypes")
4431 private Taxon getTaxonByLSID(String identifier) {
4432 //logger.info("getTaxonByLSID");
4433 // boolean lsidok=false;
4434 String id = identifier.split("__")[0];
4435 // String source = identifier.split("__")[1];
4436 LSID lsid = null;
4437 if (id.indexOf("lsid")>-1){
4438 try {
4439 lsid = new LSID(id);
4440 // lsidok=true;
4441 } catch (MalformedLSIDException e) {
4442 logger.warn("Malformed LSID");
4443 }
4444 }
4445 if (lsid !=null){
4446 List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4447 LSID currentlsid=null;
4448 for (Taxon t:taxa){
4449 currentlsid = t.getLsid();
4450 if (currentlsid !=null){
4451 if (currentlsid.getLsid().equals(lsid.getLsid())){
4452 try{
4453 return t;
4454 }
4455 catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4456 }
4457 }
4458 }
4459 }
4460 return null;
4461 }
4462 /**
4463 * @param author2
4464 * @return
4465 */
4466 @SuppressWarnings("rawtypes")
4467 private Person findOrCreateAuthor(String author2) {
4468 //logger.info("findOrCreateAuthor");
4469 List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4470 for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4471 if(hibernateP.getTitleCache().equals(author2)) {
4472 AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4473 return CdmBase.deproxy(existing, Person.class);
4474 }
4475 }
4476 Person p = Person.NewInstance();
4477 p.setTitleCache(author2,true);
4478 importer.getAgentService().saveOrUpdate(p);
4479 return CdmBase.deproxy(p, Person.class);
4480 }
4481 /**
4482 * @param author the author to set
4483 */
4484 public void setAuthor(String author) {
4485 this.author = author;
4486 }
4487
4488 /**
4489 * @return the higherTaxa
4490 */
4491 public Taxon getHigherTaxa() {
4492 return higherTaxa;
4493 }
4494 /**
4495 * @param higherTaxa the higherTaxa to set
4496 */
4497 public void setHigherTaxa(Taxon higherTaxa) {
4498 this.higherTaxa = higherTaxa;
4499 }
4500 /**
4501 * @return the higherRank
4502 */
4503 public Rank getHigherRank() {
4504 return higherRank;
4505 }
4506 /**
4507 * @param higherRank the higherRank to set
4508 */
4509 public void setHigherRank(Rank higherRank) {
4510 this.higherRank = higherRank;
4511 }
4512 public String getName(){
4513 if (newName.isEmpty()) {
4514 return originalName;
4515 } else {
4516 return newName;
4517 }
4518
4519 }
4520 /**
4521 * @return the fullName
4522 */
4523 public String getOriginalName() {
4524 return originalName;
4525 }
4526 /**
4527 * @param fullName the fullName to set
4528 */
4529 public void setOriginalName(String fullName) {
4530 this.originalName = fullName;
4531 }
4532 /**
4533 * @return the newName
4534 */
4535 public String getNewName() {
4536 return newName;
4537 }
4538 /**
4539 * @param newName the newName to set
4540 */
4541 public void setNewName(String newName) {
4542 this.newName = newName;
4543 }
4544 /**
4545 * @return the rank
4546 */
4547 public Rank getRank() {
4548 return rank;
4549 }
4550 /**
4551 * @param rank the rank to set
4552 */
4553 public void setRank(Rank rank) {
4554 this.rank = rank;
4555 }
4556 /**
4557 * @return the idenfitiger
4558 */
4559 public String getIdentifier() {
4560 return identifier;
4561 }
4562 /**
4563 * @param idenfitiger the idenfitiger to set
4564 */
4565 public void setIdentifier(String identifier) {
4566 this.identifier = identifier;
4567 }
4568 /**
4569 * @return the status
4570 */
4571 public String getStatus() {
4572 if (status == null) {
4573 return "";
4574 }
4575 return status;
4576 }
4577 /**
4578 * @param status the status to set
4579 */
4580 public void setStatus(String status) {
4581 this.status = status;
4582 }
4583 /**
4584 * @return the family
4585 */
4586 public Taxon getFamily() {
4587 return family;
4588 }
4589 /**
4590 * @param family the family to set
4591 */
4592 @SuppressWarnings("rawtypes")
4593 public void setFamily(Taxon family) {
4594 this.family = family;
4595 familyName = CdmBase.deproxy(family.getName());
4596 }
4597 /**
4598 * @return the subfamily
4599 */
4600 public Taxon getSubfamily() {
4601 return subfamily;
4602 }
4603 /**
4604 * @param subfamily the subfamily to set
4605 */
4606 @SuppressWarnings("rawtypes")
4607 public void setSubfamily(Taxon subfamily) {
4608 this.subfamily = subfamily;
4609 subfamilyName = CdmBase.deproxy(subfamily.getName());
4610 }
4611 /**
4612 * @return the tribe
4613 */
4614 public Taxon getTribe() {
4615 return tribe;
4616 }
4617 /**
4618 * @param tribe the tribe to set
4619 */
4620 @SuppressWarnings("rawtypes")
4621 public void setTribe(Taxon tribe) {
4622 this.tribe = tribe;
4623 tribeName = CdmBase.deproxy(tribe.getName());
4624 }
4625 /**
4626 * @return the subtribe
4627 */
4628 public Taxon getSubtribe() {
4629 return subtribe;
4630 }
4631 /**
4632 * @param subtribe the subtribe to set
4633 */
4634 @SuppressWarnings("rawtypes")
4635 public void setSubtribe(Taxon subtribe) {
4636 this.subtribe = subtribe;
4637 subtribeName =CdmBase.deproxy(subtribe.getName());
4638 }
4639 /**
4640 * @return the genus
4641 */
4642 public Taxon getGenus() {
4643 return genus;
4644 }
4645 /**
4646 * @param genus the genus to set
4647 */
4648 @SuppressWarnings("rawtypes")
4649 public void setGenus(Taxon genus) {
4650 if (genus != null){
4651 this.genus = genus;
4652 genusName = CdmBase.deproxy(genus.getName());
4653 }
4654 }
4655 /**
4656 * @return the subgenus
4657 */
4658 public Taxon getSubgenus() {
4659 return subgenus;
4660 }
4661 /**
4662 * @param subgenus the subgenus to set
4663 */
4664 @SuppressWarnings("rawtypes")
4665 public void setSubgenus(Taxon subgenus) {
4666 this.subgenus = subgenus;
4667 subgenusName = CdmBase.deproxy(subgenus.getName());
4668 }
4669 /**
4670 * @return the species
4671 */
4672 public Taxon getSpecies() {
4673 return species;
4674 }
4675 /**
4676 * @param species the species to set
4677 */
4678 public void setSpecies(Taxon species) {
4679 if (species != null){
4680 this.species = species;
4681 speciesName = CdmBase.deproxy(species.getName());
4682 }
4683 }
4684 /**
4685 * @return the subspecies
4686 */
4687 public Taxon getSubspecies() {
4688 return subspecies;
4689 }
4690 /**
4691 * @param subspecies the subspecies to set
4692 */
4693 @SuppressWarnings("rawtypes")
4694 public void setSubspecies(Taxon subspecies) {
4695 this.subspecies = subspecies;
4696 subspeciesName = CdmBase.deproxy(subspecies.getName());
4697
4698 }
4699
4700
4701
4702 }
4703
4704
4705 /**
4706 * @param status
4707 */
4708 private void addProblematicStatusToFile(String status) {
4709 try{
4710 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4711 BufferedWriter out = new BufferedWriter(fstream);
4712 out.write(status+"\n");
4713 //Close the output stream
4714 out.close();
4715 }catch (Exception e){//Catch exception if any
4716 System.err.println("Error: " + e.getMessage());
4717 }
4718
4719 }
4720
4721
4722
4723 /**
4724 * @param tnb
4725 * @return
4726 */
4727 private Taxon findMatchingTaxon(INonViralName tnb, Reference refMods) {
4728 logger.info("findMatchingTaxon");
4729 Taxon tmp=null;
4730
4731 refMods=CdmBase.deproxy(refMods, Reference.class);
4732 boolean insertAsExisting =false;
4733 List<Taxon> existingTaxa = new ArrayList<Taxon>();
4734 try {
4735 existingTaxa = getMatchingTaxa(tnb);
4736 } catch (Exception e1) {
4737 // TODO Auto-generated catch block
4738 e1.printStackTrace();
4739 }
4740 double similarityScore=0.0;
4741 double similarityAuthor=-1;
4742 String author1="";
4743 String author2="";
4744 String t1="";
4745 String t2="";
4746 for (Taxon bestMatchingTaxon : existingTaxa){
4747 if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4748 // System.out.println("tnb "+tnb.getTitleCache());
4749 // System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4750 try {
4751 if(tnb.getAuthorshipCache()!=null) {
4752 author1=tnb.getAuthorshipCache();
4753 }
4754 } catch (Exception e) {
4755 // TODO Auto-generated catch block
4756 e.printStackTrace();
4757 }
4758 try {
4759 if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
4760 author2=bestMatchingTaxon.getName().getAuthorshipCache();
4761 }
4762 } catch (Exception e) {
4763 // TODO Auto-generated catch block
4764 e.printStackTrace();
4765 }
4766 try {
4767 t1=tnb.getTitleCache().split("sec.")[0].trim();
4768 if (author1!=null && !StringUtils.isEmpty(author1)) {
4769 t1=t1.split(Pattern.quote(author1))[0];
4770 }
4771 } catch (Exception e) {
4772 // TODO Auto-generated catch block
4773 e.printStackTrace();
4774 }
4775 try {
4776 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4777 if (author2!=null && !StringUtils.isEmpty(author2)) {
4778 t2=t2.split(Pattern.quote(author2))[0];
4779 }
4780 } catch (Exception e) {
4781 // TODO Auto-generated catch block
4782 e.printStackTrace();
4783 }
4784 similarityScore=similarity(t1.trim(), t2.trim());
4785 // System.out.println("taxascore: "+similarityScore);
4786 similarityAuthor=similarity(author1.trim(), author2.trim());
4787 // System.out.println("authorscore: "+similarityAuthor);
4788 insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4789 }
4790 if(insertAsExisting) {
4791 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4792 tmp=bestMatchingTaxon;
4793 sourceHandler.addSource(refMods, tmp);
4794 return tmp;
4795 }
4796 }
4797 return tmp;
4798 }
4799
4800
4801 /**
4802 * @param tnb
4803 * @param refMods
4804 * @param similarityScore
4805 * @param bestMatchingTaxon
4806 * @param similarityAuthor
4807 * @return
4808 */
4809 private boolean compareAndCheckTaxon(INonViralName tnb, Reference refMods, double similarityScore,
4810 Taxon bestMatchingTaxon, double similarityAuthor) {
4811 //logger.info("compareAndCheckTaxon");
4812 boolean insertAsExisting;
4813 // if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4814 // insertAsExisting=false;
4815 // } else{
4816 //a small hack/automatisation for Chenopodium only
4817 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4818 bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4819 insertAsExisting=true;
4820 } else {
4821 insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4822 }
4823 // }
4824
4825 logDecision(tnb, bestMatchingTaxon, insertAsExisting, refMods);
4826 return insertAsExisting;
4827 }
4828
4829 /**
4830 * @return
4831 */
4832 @SuppressWarnings("rawtypes")
4833 private List<Taxon> getMatchingTaxa(ITaxonNameBase tnb) {
4834 //logger.info("getMatchingTaxon");
4835 if (tnb.getTitleCache() == null){
4836 tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4837 }
4838
4839 Pager<TaxonBase> pager=importer.getTaxonService().findByTitle(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4840 List<TaxonBase>records = pager.getRecords();
4841
4842 List<Taxon> existingTaxons = new ArrayList<Taxon>();
4843 for (TaxonBase r:records){
4844 try{
4845 Taxon bestMatchingTaxon = (Taxon)r;
4846 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4847 if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4848 existingTaxons.add(bestMatchingTaxon);
4849 }
4850 }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4851 }
4852 Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4853 if (!existingTaxons.contains(bmt) && bmt!=null) {
4854 if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4855 existingTaxons.add(bmt);
4856 }
4857 }
4858 return existingTaxons;
4859 }
4860
4861 /**
4862 * Check if the found Taxon can reasonnably be the same
4863 * example: with and without author should match, but the subspecies should not be suggested for a genus
4864 * */
4865 private boolean compareTaxonNameLength(String f, String o){
4866 //logger.info("compareTaxonNameLength");
4867 boolean lengthOk=false;
4868 int sizeF = f.length();
4869 int sizeO = o.length();
4870 if (sizeO>=sizeF) {
4871 lengthOk=true;
4872 }
4873 if(sizeF>sizeO) {
4874 if (sizeF-sizeO>10) {
4875 lengthOk=false;
4876 } else {
4877 lengthOk=true;
4878 }
4879 }
4880
4881 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4882 return lengthOk;
4883 }
4884
4885 private double similarity(String s1, String s2) {
4886 //logger.info("similarity");
4887 //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4888 if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4889 String l1=s1.toLowerCase().trim();
4890 String l2=s2.toLowerCase().trim();
4891 if (l1.length() < l2.length()) { // s1 should always be bigger
4892 String swap = l1; l1 = l2; l2 = swap;
4893 }
4894 int bigLen = l1.length();
4895 if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4896 return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4897 }
4898 else{
4899 if(s1!=null && s2!=null){
4900 if (s1.equalsIgnoreCase(s2)) {
4901 return 1;
4902 }
4903 }
4904 return -1;
4905 }
4906 }
4907
4908 private int computeEditDistance(String s1, String s2) {
4909 //logger.info("computeEditDistance");
4910 int[] costs = new int[s2.length() + 1];
4911 for (int i = 0; i <= s1.length(); i++) {
4912 int lastValue = i;
4913 for (int j = 0; j <= s2.length(); j++) {
4914 if (i == 0) {
4915 costs[j] = j;
4916 } else {
4917 if (j > 0) {
4918 int newValue = costs[j - 1];
4919 if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4920 newValue = Math.min(Math.min(newValue, lastValue),
4921 costs[j]) + 1;
4922 }
4923 costs[j - 1] = lastValue;
4924 lastValue = newValue;
4925 }
4926 }
4927 }
4928 if (i > 0) {
4929 costs[s2.length()] = lastValue;
4930 }
4931 }
4932 return costs[s2.length()];
4933 }
4934
4935 Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
4936 /**
4937 * @param taxonNameBase
4938 */
4939 @SuppressWarnings("rawtypes")
4940 public void lookForParentNode(INonViralName taxonNameBase, Taxon tax, Reference ref, MyName myName) {
4941 logger.info("lookForParentNode "+taxonNameBase.getTitleCache()+" for "+myName.toString());
4942 //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
4943 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4944 if (taxonNameBase.getRank().equals(Rank.FORM())){
4945 handleFormHierarchy(ref, myName, parser);
4946 }
4947 else if (taxonNameBase.getRank().equals(Rank.VARIETY())){
4948 handleVarietyHierarchy(ref, myName, parser);
4949 }
4950 else if (taxonNameBase.getRank().equals(Rank.SUBSPECIES())){
4951 handleSubSpeciesHierarchy(ref, myName, parser);
4952 }
4953 else if (taxonNameBase.getRank().equals(Rank.SPECIES())){
4954 handleSpeciesHierarchy(ref, myName, parser);
4955 }
4956 else if (taxonNameBase.getRank().equals(Rank.SUBGENUS())){
4957 handleSubgenusHierarchy(ref, myName, parser);
4958 }
4959
4960 if (taxonNameBase.getRank().equals(Rank.GENUS())){
4961 handleGenusHierarchy(ref, myName, parser);
4962 }
4963 if (taxonNameBase.getRank().equals(Rank.SUBTRIBE())){
4964 handleSubtribeHierarchy(ref, myName, parser);
4965 }
4966 if (taxonNameBase.getRank().equals(Rank.TRIBE())){
4967 handleTribeHierarchy(ref, myName, parser);
4968 }
4969
4970 if (taxonNameBase.getRank().equals(Rank.SUBFAMILY())){
4971 handleSubfamilyHierarchy(ref, myName, parser);
4972 }
4973 }
4974
4975 /**
4976 * @param ref
4977 * @param myName
4978 * @param parser
4979 */
4980 private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
4981 System.out.println("handleSubfamilyHierarchy");
4982 String parentStr = myName.getFamilyStr();
4983 Rank r = Rank.FAMILY();
4984 if(parentStr!=null){
4985
4986 Taxon parent = null;
4987 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
4988 for(TaxonBase tb:taxontest.getRecords()){
4989 try {
4990 if (tb.getName().getRank().equals(r)) {
4991 parent=CdmBase.deproxy(tb, Taxon.class);
4992 }
4993 break;
4994 } catch (Exception e) {
4995 // TODO Auto-generated catch block
4996 e.printStackTrace();
4997 }
4998 }
4999 if(parent == null) {
5000 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5001 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5002 if(tmp ==null)
5003 {
5004 parent=Taxon.NewInstance(parentNameName, ref);
5005 importer.getTaxonService().save(parent);
5006 parent = CdmBase.deproxy(parent, Taxon.class);
5007 } else {
5008 parent=tmp;
5009 }
5010 lookForParentNode(parentNameName, parent, ref,myName);
5011
5012 }
5013 hierarchy.put(r,parent);
5014 }
5015 }
5016
5017 /**
5018 * @param ref
5019 * @param myName
5020 * @param parser
5021 */
5022 private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5023 String parentStr = myName.getSubfamilyStr();
5024 Rank r = Rank.SUBFAMILY();
5025 if (parentStr == null){
5026 parentStr = myName.getFamilyStr();
5027 r = Rank.FAMILY();
5028 }
5029 if(parentStr!=null){
5030 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5031 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5032 // importer.getTaxonService().save(parent);
5033 // parent = CdmBase.deproxy(parent, Taxon.class);
5034
5035 boolean parentDoesNotExists = true;
5036 for (TaxonNode p : classification.getAllNodes()){
5037 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5038 parentDoesNotExists = false;
5039 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5040 break;
5041 }
5042 }
5043 // if(parentDoesNotExists) {
5044 // importer.getTaxonService().save(parent);
5045 // parent = CdmBase.deproxy(parent, Taxon.class);
5046 // lookForParentNode(parentNameName, parent, ref,myName);
5047 // }
5048 if(parentDoesNotExists) {
5049 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5050 if(tmp ==null)
5051 {
5052 parent=Taxon.NewInstance(parentNameName, ref);
5053 importer.getTaxonService().save(parent);
5054 parent = CdmBase.deproxy(parent, Taxon.class);
5055 } else {
5056 parent=tmp;
5057 }
5058 lookForParentNode(parentNameName, parent, ref,myName);
5059
5060 }
5061 hierarchy.put(r,parent);
5062 }
5063 }
5064
5065 /**
5066 * @param ref
5067 * @param myName
5068 * @param parser
5069 */
5070 private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5071 String parentStr = myName.getTribeStr();
5072 Rank r = Rank.TRIBE();
5073 if (parentStr == null){
5074 parentStr = myName.getSubfamilyStr();
5075 r = Rank.SUBFAMILY();
5076 }
5077 if (parentStr == null){
5078 parentStr = myName.getFamilyStr();
5079 r = Rank.FAMILY();
5080 }
5081 if(parentStr!=null){
5082 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5083 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5084 // importer.getTaxonService().save(parent);
5085 // parent = CdmBase.deproxy(parent, Taxon.class);
5086
5087 boolean parentDoesNotExists = true;
5088 for (TaxonNode p : classification.getAllNodes()){
5089 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5090 parentDoesNotExists = false;
5091 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5092
5093 break;
5094 }
5095 }
5096 // if(parentDoesNotExists) {
5097 // importer.getTaxonService().save(parent);
5098 // parent = CdmBase.deproxy(parent, Taxon.class);
5099 // lookForParentNode(parentNameName, parent, ref,myName);
5100 // }
5101 if(parentDoesNotExists) {
5102 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5103 if(tmp ==null)
5104 {
5105 parent=Taxon.NewInstance(parentNameName, ref);
5106 importer.getTaxonService().save(parent);
5107 parent = CdmBase.deproxy(parent, Taxon.class);
5108 } else {
5109 parent=tmp;
5110 }
5111 lookForParentNode(parentNameName, parent, ref,myName);
5112
5113 }
5114 hierarchy.put(r,parent);
5115 }
5116 }
5117
5118 /**
5119 * @param ref
5120 * @param myName
5121 * @param parser
5122 */
5123 private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5124 String parentStr = myName.getSubtribeStr();
5125 Rank r = Rank.SUBTRIBE();
5126 if (parentStr == null){
5127 parentStr = myName.getTribeStr();
5128 r = Rank.TRIBE();
5129 }
5130 if (parentStr == null){
5131 parentStr = myName.getSubfamilyStr();
5132 r = Rank.SUBFAMILY();
5133 }
5134 if (parentStr == null){
5135 parentStr = myName.getFamilyStr();
5136 r = Rank.FAMILY();
5137 }
5138 if(parentStr!=null){
5139 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5140 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5141 // importer.getTaxonService().save(parent);
5142 // parent = CdmBase.deproxy(parent, Taxon.class);
5143
5144 boolean parentDoesNotExist = true;
5145 for (TaxonNode p : classification.getAllNodes()){
5146 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5147 // System.out.println(p.getTaxon().getUuid());
5148 // System.out.println(parent.getUuid());
5149 parentDoesNotExist = false;
5150 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5151 break;
5152 }
5153 }
5154 // if(parentDoesNotExists) {
5155 // importer.getTaxonService().save(parent);
5156 // parent = CdmBase.deproxy(parent, Taxon.class);
5157 // lookForParentNode(parentNameName, parent, ref,myName);
5158 // }
5159 if(parentDoesNotExist) {
5160 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5161 if(tmp ==null){
5162
5163 parent=Taxon.NewInstance(parentNameName, ref);
5164 importer.getTaxonService().save(parent);
5165 parent = CdmBase.deproxy(parent, Taxon.class);
5166 } else {
5167 parent=tmp;
5168 }
5169 lookForParentNode(parentNameName, parent, ref,myName);
5170
5171 }
5172 hierarchy.put(r,parent);
5173 }
5174 }
5175
5176 /**
5177 * @param ref
5178 * @param myName
5179 * @param parser
5180 */
5181 private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5182 String parentStr = myName.getGenusStr();
5183 Rank r = Rank.GENUS();
5184
5185 if(parentStr==null){
5186 parentStr = myName.getSubtribeStr();
5187 r = Rank.SUBTRIBE();
5188 }
5189 if (parentStr == null){
5190 parentStr = myName.getTribeStr();
5191 r = Rank.TRIBE();
5192 }
5193 if (parentStr == null){
5194 parentStr = myName.getSubfamilyStr();
5195 r = Rank.SUBFAMILY();
5196 }
5197 if (parentStr == null){
5198 parentStr = myName.getFamilyStr();
5199 r = Rank.FAMILY();
5200 }
5201 if(parentStr!=null){
5202 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5203 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5204 // importer.getTaxonService().save(parent);
5205 // parent = CdmBase.deproxy(parent, Taxon.class);
5206
5207 boolean parentDoesNotExists = true;
5208 for (TaxonNode p : classification.getAllNodes()){
5209 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5210 // System.out.println(p.getTaxon().getUuid());
5211 // System.out.println(parent.getUuid());
5212 parentDoesNotExists = false;
5213 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5214 break;
5215 }
5216 }
5217 // if(parentDoesNotExists) {
5218 // importer.getTaxonService().save(parent);
5219 // parent = CdmBase.deproxy(parent, Taxon.class);
5220 // lookForParentNode(parentNameName, parent, ref,myName);
5221 // }
5222 if(parentDoesNotExists) {
5223 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5224 if(tmp ==null)
5225 {
5226 parent=Taxon.NewInstance(parentNameName, ref);
5227 importer.getTaxonService().save(parent);
5228 parent = CdmBase.deproxy(parent, Taxon.class);
5229 } else {
5230 parent=tmp;
5231 }
5232 lookForParentNode(parentNameName, parent, ref,myName);
5233
5234 }
5235 hierarchy.put(r,parent);
5236 }
5237 }
5238
5239 /**
5240 * @param ref
5241 * @param myName
5242 * @param parser
5243 */
5244 private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5245 String parentStr = myName.getSubgenusStr();
5246 Rank r = Rank.SUBGENUS();
5247
5248 if(parentStr==null){
5249 parentStr = myName.getGenusStr();
5250 r = Rank.GENUS();
5251 }
5252
5253 if(parentStr==null){
5254 parentStr = myName.getSubtribeStr();
5255 r = Rank.SUBTRIBE();
5256 }
5257 if (parentStr == null){
5258 parentStr = myName.getTribeStr();
5259 r = Rank.TRIBE();
5260 }
5261 if (parentStr == null){
5262 parentStr = myName.getSubfamilyStr();
5263 r = Rank.SUBFAMILY();
5264 }
5265 if (parentStr == null){
5266 parentStr = myName.getFamilyStr();
5267 r = Rank.FAMILY();
5268 }
5269 if(parentStr!=null){
5270 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5271 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5272 hierarchy.put(r,parent);
5273 }
5274 }
5275
5276 /**
5277 * @param ref
5278 * @param myName
5279 * @param parser
5280 */
5281 private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5282 String parentStr = myName.getSpeciesStr();
5283 Rank r = Rank.SPECIES();
5284
5285
5286 if(parentStr==null){
5287 parentStr = myName.getSubgenusStr();
5288 r = Rank.SUBGENUS();
5289 }
5290
5291 if(parentStr==null){
5292 parentStr = myName.getGenusStr();
5293 r = Rank.GENUS();
5294 }
5295
5296 if(parentStr==null){
5297 parentStr = myName.getSubtribeStr();
5298 r = Rank.SUBTRIBE();
5299 }
5300 if (parentStr == null){
5301 parentStr = myName.getTribeStr();
5302 r = Rank.TRIBE();
5303 }
5304 if (parentStr == null){
5305 parentStr = myName.getSubfamilyStr();
5306 r = Rank.SUBFAMILY();
5307 }
5308 if (parentStr == null){
5309 parentStr = myName.getFamilyStr();
5310 r = Rank.FAMILY();
5311 }
5312 if(parentStr!=null){
5313 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5314 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5315 hierarchy.put(r,parent);
5316 }
5317 }
5318
5319
5320 /**
5321 * @param ref
5322 * @param myName
5323 * @param parser
5324 */
5325 private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5326 String parentStr = myName.getSubspeciesStr();
5327 Rank r = Rank.SUBSPECIES();
5328
5329
5330 if(parentStr==null){
5331 parentStr = myName.getSpeciesStr();
5332 r = Rank.SPECIES();
5333 }
5334
5335 if(parentStr==null){
5336 parentStr = myName.getSubgenusStr();
5337 r = Rank.SUBGENUS();
5338 }
5339
5340 if(parentStr==null){
5341 parentStr = myName.getGenusStr();
5342 r = Rank.GENUS();
5343 }
5344
5345 if(parentStr==null){
5346 parentStr = myName.getSubtribeStr();
5347 r = Rank.SUBTRIBE();
5348 }
5349 if (parentStr == null){
5350 parentStr = myName.getTribeStr();
5351 r = Rank.TRIBE();
5352 }
5353 if (parentStr == null){
5354 parentStr = myName.getSubfamilyStr();
5355 r = Rank.SUBFAMILY();
5356 }
5357 if (parentStr == null){
5358 parentStr = myName.getFamilyStr();
5359 r = Rank.FAMILY();
5360 }
5361 if(parentStr!=null){
5362 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5363 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5364 hierarchy.put(r,parent);
5365 }
5366 }
5367
5368 /**
5369 * @param ref
5370 * @param myName
5371 * @param parser
5372 */
5373 private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5374 String parentStr = myName.getSubspeciesStr();
5375 Rank r = Rank.SUBSPECIES();
5376
5377 if(parentStr==null){
5378 parentStr = myName.getSpeciesStr();
5379 r = Rank.SPECIES();
5380 }
5381
5382 if(parentStr==null){
5383 parentStr = myName.getSubgenusStr();
5384 r = Rank.SUBGENUS();
5385 }
5386
5387 if(parentStr==null){
5388 parentStr = myName.getGenusStr();
5389 r = Rank.GENUS();
5390 }
5391
5392 if(parentStr==null){
5393 parentStr = myName.getSubtribeStr();
5394 r = Rank.SUBTRIBE();
5395 }
5396 if (parentStr == null){
5397 parentStr = myName.getTribeStr();
5398 r = Rank.TRIBE();
5399 }
5400 if (parentStr == null){
5401 parentStr = myName.getSubfamilyStr();
5402 r = Rank.SUBFAMILY();
5403 }
5404 if (parentStr == null){
5405 parentStr = myName.getFamilyStr();
5406 r = Rank.FAMILY();
5407 }
5408 if(parentStr!=null){
5409 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5410 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5411 hierarchy.put(r,parent);
5412 }
5413 }
5414
5415 /**
5416 * @param ref
5417 * @param myName
5418 * @param parser
5419 * @param parentStr
5420 * @param r
5421 * @return
5422 */
5423 private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5424 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5425 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5426 // importer.getTaxonService().save(parent);
5427 // parent = CdmBase.deproxy(parent, Taxon.class);
5428
5429 boolean parentDoesNotExists = true;
5430 for (TaxonNode p : classification.getAllNodes()){
5431 if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5432 // System.out.println(p.getTaxon().getUuid());
5433 // System.out.println(parent.getUuid());
5434 parentDoesNotExists = false;
5435 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5436 break;
5437 }
5438 }
5439 if(parentDoesNotExists) {
5440 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5441 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5442 if(tmp ==null){
5443
5444 parent=Taxon.NewInstance(parentNameName, ref);
5445 importer.getTaxonService().save(parent);
5446
5447 } else {
5448 parent=tmp;
5449 }
5450 lookForParentNode(parentNameName, parent, ref,myName);
5451
5452 }
5453 return parent;
5454 }
5455
5456 private void addNameDifferenceToFile(String originalname, String atomisedname){
5457 try{
5458 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5459 BufferedWriter out = new BufferedWriter(fstream);
5460 out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5461 //Close the output stream
5462 out.close();
5463 }catch (Exception e){//Catch exception if any
5464 System.err.println("Error: " + e.getMessage());
5465 }
5466 }
5467 /**
5468 * @param name
5469 * @param author
5470 * @param nomenclaturalCode2
5471 * @param rank
5472 */
5473 private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5474 try{
5475 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5476 BufferedWriter out = new BufferedWriter(fstream);
5477 out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5478 //Close the output stream
5479 out.close();
5480 }catch (Exception e){//Catch exception if any
5481 System.err.println("Error: " + e.getMessage());
5482 }
5483 }
5484
5485
5486 /**
5487 * @param tnb
5488 * @param bestMatchingTaxon
5489 * @param insertAsExisting
5490 * @param refMods
5491 */
5492 private void logDecision(INonViralName tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5493 try{
5494 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt", true);
5495 BufferedWriter out = new BufferedWriter(fstream);
5496 out.write(tnb.getTitleCache() + " sec. " + refMods + "\t" + bestMatchingTaxon.getTitleCache() + "\t" + insertAsExisting + "\n");
5497 //Close the output stream
5498 out.close();
5499 }catch (Exception e){//Catch exception if any
5500 System.err.println("Error: " + e.getMessage());
5501 }
5502 }
5503
5504
5505 @SuppressWarnings("unused")
5506 private String replaceNull(Object in){
5507 if (in == null) {
5508 return "";
5509 }
5510 if (in.getClass().equals(NomenclaturalCode.class)) {
5511 return ((NomenclaturalCode)in).getTitleCache();
5512 }
5513 return in.toString();
5514 }
5515
5516 /**
5517 * @param fullName
5518 * @param nomenclaturalCode2
5519 * @param rank
5520 */
5521 private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5522 try{
5523 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5524 BufferedWriter out = new BufferedWriter(fstream);
5525 out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5526 //Close the output stream
5527 out.close();
5528 }catch (Exception e){//Catch exception if any
5529 System.err.println("Error: " + e.getMessage());
5530 }
5531
5532 }
5533
5534 }
5535
5536
5537