Remove generics from Reference in cdmlib (except for cdmlib-model) #5830
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / taxonx2013 / TaxonXTreatmentExtractor.java
1 // $Id$
2 /**
3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.taxonx2013;
11
12 import java.io.BufferedWriter;
13 import java.io.File;
14 import java.io.FileWriter;
15 import java.io.IOException;
16 import java.net.URI;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.HashMap;
20 import java.util.List;
21 import java.util.Map;
22 import java.util.Set;
23 import java.util.UUID;
24 import java.util.regex.Matcher;
25 import java.util.regex.Pattern;
26
27 import javax.xml.transform.TransformerException;
28 import javax.xml.transform.TransformerFactoryConfigurationError;
29
30 import org.apache.commons.lang.StringUtils;
31 import org.apache.log4j.Logger;
32 import org.w3c.dom.Node;
33 import org.w3c.dom.NodeList;
34
35 import com.ibm.lsid.MalformedLSIDException;
36
37 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
38 import eu.etaxonomy.cdm.api.service.pager.Pager;
39 import eu.etaxonomy.cdm.model.agent.AgentBase;
40 import eu.etaxonomy.cdm.model.agent.Person;
41 import eu.etaxonomy.cdm.model.common.CdmBase;
42 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
43 import eu.etaxonomy.cdm.model.common.LSID;
44 import eu.etaxonomy.cdm.model.common.Language;
45 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
46 import eu.etaxonomy.cdm.model.description.Feature;
47 import eu.etaxonomy.cdm.model.description.FeatureNode;
48 import eu.etaxonomy.cdm.model.description.FeatureTree;
49 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
50 import eu.etaxonomy.cdm.model.description.TaxonDescription;
51 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
52 import eu.etaxonomy.cdm.model.description.TextData;
53 import eu.etaxonomy.cdm.model.name.BacterialName;
54 import eu.etaxonomy.cdm.model.name.BotanicalName;
55 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
56 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
57 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
58 import eu.etaxonomy.cdm.model.name.NonViralName;
59 import eu.etaxonomy.cdm.model.name.Rank;
60 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
61 import eu.etaxonomy.cdm.model.name.ZoologicalName;
62 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
63 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
64 import eu.etaxonomy.cdm.model.reference.Reference;
65 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
66 import eu.etaxonomy.cdm.model.taxon.Classification;
67 import eu.etaxonomy.cdm.model.taxon.Synonym;
68 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
69 import eu.etaxonomy.cdm.model.taxon.Taxon;
70 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
71 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
72 import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
73 import eu.etaxonomy.cdm.persistence.query.MatchMode;
74 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
75 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
76 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
77 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
78
79 /**
80 * @author pkelbert
81 * @date 2 avr. 2013
82 *
83 */
84 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
85
86 private static final String PUBLICATION_YEAR = "publicationYear";
87
88 private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
89
90 private static final String notMarkedUp = "Not marked-up";
91 private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
92 private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
93 private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
94 private static final boolean skippQuestion = true;
95
96 private final NomenclaturalCode nomenclaturalCode;
97 private Classification classification;
98
99 private String treatmentMainName,originalTreatmentName;
100
101 private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
102
103
104 private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
105 private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
106
107 private boolean maxRankRespected =false;
108 private Map<String, Feature> featuresMap;
109
110 private MyName currentMyName;
111
112 private Reference sourceUrlRef;
113
114 private String followingText; //text element immediately following a tax:name in tax:nomenclature TODO move do state
115 private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
116
117 private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
118
119 /**
120 * @param nomenclaturalCode
121 * @param classification
122 * @param importer
123 * @param configState
124 */
125 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
126 TaxonXImportState configState,Map<String, Feature> featuresMap, Reference urlSource) {
127 this.nomenclaturalCode=nomenclaturalCode;
128 this.classification = classification;
129 this.importer=importer;
130 this.state2=configState;
131 this.featuresMap=featuresMap;
132 this.sourceUrlRef =urlSource;
133 prepareCollectors(configState, importer.getAgentService());
134 this.sourceHandler.setSourceUrlRef(sourceUrlRef);
135 this.sourceHandler.setImporter(importer);
136 this.sourceHandler.setConfigState(configState);
137 }
138
139 /**
140 * extracts all the treament information and save them
141 * @param treatmentnode: the XML Node
142 * @param tosave: the list of object to save into the CDM
143 * @param refMods: the reference extracted from the MODS
144 * @param sourceName: the URI of the document
145 */
146 @SuppressWarnings({ "rawtypes", "unused" })
147
148 protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) { logger.info("extractTreatment");
149 List<TaxonNameBase> namesToSave = new ArrayList<TaxonNameBase>();
150 NodeList children = treatmentnode.getChildNodes();
151 Taxon acceptedTaxon =null;
152 boolean hasRefgroup=false;
153
154 //needed?
155 for (int i=0;i<children.getLength();i++){
156 if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
157 hasRefgroup=true;
158 }
159 }
160
161 for (int i=0;i<children.getLength();i++){
162 Node child = children.item(i);
163 acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
164 }
165 // logger.info("saveUpdateNames");
166 if (maxRankRespected){
167 importer.getNameService().saveOrUpdate(namesToSave);
168 importer.getClassificationService().saveOrUpdate(classification);
169 //logger.info("saveUpdateNames-ok");
170 }
171
172 buildFeatureTree();
173 }
174
175 private Taxon handleSingleNode(Reference refMods, URI sourceName,
176 List<TaxonNameBase> namesToSave, Node child, Taxon acceptedTaxon) {
177 Taxon defaultTaxon =null;
178
179 String nodeName = child.getNodeName();
180 if (nodeName.equalsIgnoreCase("tax:nomenclature")){
181 NodeList nomenclatureChildren = child.getChildNodes();
182 boolean containsName = false;
183 for(int k=0; k<nomenclatureChildren.getLength(); k++){
184 if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
185 containsName=true;
186 break;
187 }
188 }
189 if (containsName){
190 reloadClassification();
191 //extract "main" the scientific name
192 try{
193 acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
194 }catch(ClassCastException e){
195 //FIXME exception handling
196 e.printStackTrace();
197 }
198 // System.out.println("acceptedTaxon : "+acceptedTaxon);
199 }
200 }else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
201 reloadClassification();
202 //extract the References within the document
203 extractReferences(child, namesToSave ,acceptedTaxon,refMods);
204 }else if (nodeName.equalsIgnoreCase("tax:div") &&
205 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
206 File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
207 FileWriter writer;
208 try {
209 writer = new FileWriter(file ,true);
210 writer.write(sourceName+"\n");
211 writer.flush();
212 writer.close();
213 } catch (IOException e1) {
214 // TODO Auto-generated catch block
215 logger.error(e1.getMessage());
216 }
217 // String multiple = askMultiple(children.item(i));
218 String multiple = "Other";
219 if (multiple.equalsIgnoreCase("other")) {
220 extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
221 }else if (multiple.equalsIgnoreCase("synonyms")) {
222 try{
223 extractSynonyms(child,acceptedTaxon, refMods, null);
224 }catch(NullPointerException e){
225 logger.warn("the accepted taxon is maybe null");
226 }
227 }else if(multiple.equalsIgnoreCase("material examined")){
228 extractMaterials(child, acceptedTaxon, refMods, namesToSave);
229 }else if (multiple.equalsIgnoreCase("distribution")){
230 extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
231 }else if (multiple.equalsIgnoreCase("type status")){
232 extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
233 }else if (multiple.equalsIgnoreCase("vernacular name")){
234 extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
235 }else{
236 extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
237 }
238 }
239 else if(nodeName.equalsIgnoreCase("tax:div") &&
240 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
241 extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
242 }
243 else if(nodeName.equalsIgnoreCase("tax:div") &&
244 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
245 extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
246 }
247 else if(nodeName.equalsIgnoreCase("tax:div") &&
248 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
249 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
250 }
251 else if(nodeName.equalsIgnoreCase("tax:div") &&
252 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
253 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
254 }
255 else if(nodeName.equalsIgnoreCase("tax:div") &&
256 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
257 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
258 }
259 else if(nodeName.equalsIgnoreCase("tax:div") &&
260 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
261 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
262 }
263 else if(nodeName.equalsIgnoreCase("tax:div") &&
264 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
265 extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
266 }
267 else if(nodeName.equalsIgnoreCase("tax:div") &&
268 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
269 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
270 }
271 else if(nodeName.equalsIgnoreCase("tax:div") &&
272 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
273 extractMaterials(child,acceptedTaxon, refMods, namesToSave);
274 }
275 else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
276 extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
277 }
278 else if(nodeName.equalsIgnoreCase("tax:div") &&
279 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
280 extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
281 }else if(nodeName.equalsIgnoreCase("tax:div") &&
282 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
283 //TODO IGNORE keys for the moment
284 //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
285 extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
286 }
287 else{
288 if (! nodeName.equalsIgnoreCase("tax:pb")){
289 //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
290 if (child.getAttributes() !=null) {
291 logger.info("First Attribute: " + child.getAttributes().item(0));
292 }
293 extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
294 }else{
295 //FIXME
296 logger.warn("Unhandled");
297 }
298 }
299 return acceptedTaxon;
300 }
301
302
303 protected Map<String,Feature> getFeaturesUsed(){
304 return featuresMap;
305 }
306 /**
307 *
308 */
309 private void buildFeatureTree() {
310 logger.info("buildFeatureTree");
311 FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
312 if (proibiospheretree == null){
313 List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
314 if (trees.size()==1) {
315 FeatureTree ft = trees.get(0);
316 if (featuresMap==null) {
317 featuresMap=new HashMap<String, Feature>();
318 }
319 for (Feature feature: ft.getDistinctFeatures()){
320 if(feature!=null) {
321 featuresMap.put(feature.getTitleCache(), feature);
322 }
323 }
324 }
325 proibiospheretree = FeatureTree.NewInstance();
326 proibiospheretree.setUuid(proIbioTreeUUID);
327 }
328 // FeatureNode root = proibiospheretree.getRoot();
329 FeatureNode root2 = proibiospheretree.getRoot();
330 if (root2 != null){
331 int nbChildren = root2.getChildCount()-1;
332 while (nbChildren>-1){
333 try{
334 root2.removeChild(nbChildren);
335 }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
336 nbChildren --;
337 }
338
339 }
340
341 for (Feature feature:featuresMap.values()) {
342 root2.addChild(FeatureNode.NewInstance(feature));
343 }
344 importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
345
346 }
347
348
349 /**
350 * @param keys
351 * @param acceptedTaxon: the current acceptedTaxon
352 * @param nametosave: the list of objects to save into the CDM
353 * @param refMods: the current reference extracted from the MODS
354 */
355 /* @SuppressWarnings("rawtypes")
356 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference refMods) {
357 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
358
359 NodeList children = keys.getChildNodes();
360 String key="";
361 PolytomousKey poly = PolytomousKey.NewInstance();
362 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
363 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
364 poly.addTaxonomicScope(acceptedTaxon);
365 poly.setTitleCache("bloup", true);
366 // poly.addCoveredTaxon(acceptedTaxon);
367 PolytomousKeyNode root = poly.getRoot();
368 PolytomousKeyNode previous = null,tmpKey=null;
369 Taxon taxonKey=null;
370 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
371
372 // String fullContent = keys.getTextContent();
373 for (int i=0;i<children.getLength();i++){
374 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
375 NodeList paragraph = children.item(i).getChildNodes();
376 key="";
377 taxonKey=null;
378 for (int j=0;j<paragraph.getLength();j++){
379 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
380 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
381 key+=paragraph.item(j).getTextContent().trim();
382 // logger.info("KEY: "+j+"--"+key);
383 }
384 }
385 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
386 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
387 }
388 }
389 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
390 if (keypattern.matcher(key).matches()){
391 tmpKey = PolytomousKeyNode.NewInstance(key);
392 if (taxonKey!=null) {
393 tmpKey.setTaxon(taxonKey);
394 }
395 polyNodes.add(tmpKey);
396 if (previous == null) {
397 root.addChild(tmpKey);
398 } else {
399 previous.addChild(tmpKey);
400 }
401 }else{
402 if (!key.isEmpty()){
403 tmpKey=PolytomousKeyNode.NewInstance(key);
404 if (taxonKey!=null) {
405 tmpKey.setTaxon(taxonKey);
406 }
407 polyNodes.add(tmpKey);
408 if (keypatternend.matcher(key).matches()) {
409 root.addChild(tmpKey);
410 previous=tmpKey;
411 } else{
412 previous.addChild(tmpKey);
413 }
414
415 }
416 }
417 }
418 }
419 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
420 importer.getPolytomousKeyService().saveOrUpdate(poly);
421 }
422 */
423
424
425 /**
426 * @param taxons: the XML Nodegroup
427 * @param nametosave: the list of objects to save into the CDM
428 * @param acceptedTaxon: the current accepted Taxon
429 * @param refMods: the current reference extracted from the MODS
430 *
431 * @return Taxon object built
432 */
433 @SuppressWarnings({ "rawtypes", "unused" })
434 private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference refMods, boolean isSynonym) {
435 // logger.info("getTaxonFromXML");
436 // logger.info("acceptedTaxon: "+acceptedTaxon);
437 logger.info("getTaxonNameBaseFromXML");
438 TaxonNameBase nameToBeFilled = null;
439
440 currentMyName=new MyName(isSynonym);
441
442 NomenclaturalStatusType statusType = null;
443 try {
444 String followingText = null; //needs to be checked if following text is possible
445 currentMyName = extractScientificName(taxons,refMods, null);
446 } catch (TransformerFactoryConfigurationError e1) {
447 logger.warn(e1);
448 } catch (TransformerException e1) {
449 logger.warn(e1);
450 }
451 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
452
453 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
454 if (nameToBeFilled.hasProblem() &&
455 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
456 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
457 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
458 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
459 }
460
461 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
462 */
463 nameToBeFilled = currentMyName.getTaxonNameBase();
464 return nameToBeFilled;
465
466 }
467
468
469 /**
470 *
471 */
472 private void reloadClassification() {
473 logger.info("reloadClassification");
474 Classification cl = importer.getClassificationService().find(classification.getUuid());
475 if (cl != null){
476 classification = cl;
477 }else{
478 importer.getClassificationService().saveOrUpdate(classification);
479 classification = importer.getClassificationService().find(classification.getUuid());
480 }
481 }
482
483 // /**
484 // * Create a Taxon for the current NameBase, based on the current reference
485 // * @param taxonNameBase
486 // * @param refMods: the current reference extracted from the MODS
487 // * @return Taxon
488 // */
489 // @SuppressWarnings({ "unused", "rawtypes" })
490 // private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference refMods) {
491 // Taxon t = new Taxon(taxonNameBase,null );
492 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
493 // t.setSec(configState.getConfig().getSecundum());
494 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
495 // }
496 // /*<<<<<<< .courant
497 // boolean sourceExists=false;
498 // Set<IdentifiableSource> sources = t.getSources();
499 // for (IdentifiableSource src : sources){
500 // String micro = src.getCitationMicroReference();
501 // Reference r = src.getCitation();
502 // if (r.equals(refMods) && micro == null) {
503 // sourceExists=true;
504 // }
505 // }
506 // if(!sourceExists) {
507 // t.addSource(null,null,refMods,null);
508 // }
509 //=======*/
510 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
511 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
512 // return t;
513 // }
514
515 private void extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
516 String featureName) {
517 // System.out.println("extractDescriptionWithReference !");
518 logger.info("extractDescriptionWithReference");
519 NodeList children = typestatus.getChildNodes();
520
521 Feature currentFeature=getFeatureObjectFromString(featureName);
522
523 String r="";String s="";
524 for (int i=0;i<children.getLength();i++){
525 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
526 s+=children.item(i).getTextContent().trim();
527 }
528 if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
529 r+= children.item(i).getTextContent().trim();
530 }
531 if (s.indexOf(r)>-1) {
532 s=s.split(r)[0];
533 }
534 }
535
536 Reference currentref = ReferenceFactory.newGeneric();
537 if(!r.isEmpty()) {
538 currentref.setTitleCache(r, true);
539 } else {
540 currentref=refMods;
541 }
542 setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
543 }
544
545 /**
546 * @param nametosave
547 * @param distribution: the XML node group
548 * @param acceptedTaxon: the current accepted Taxon
549 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
550 * @param refMods: the current reference extracted from the MODS
551 */
552 @SuppressWarnings("rawtypes")
553 private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference refMods) {
554 logger.info("extractDistribution");
555 // logger.info("acceptedTaxon: "+acceptedTaxon);
556 NodeList children = distribution.getChildNodes();
557 Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
558 Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
559
560 for (int i=0;i<children.getLength();i++){
561 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
562 NodeList paragraph = children.item(i).getChildNodes();
563 for (int j=0;j<paragraph.getLength();j++){
564 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
565 extractText(descriptionsFulltext, i, paragraph.item(j));
566 }
567 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
568 extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
569 }
570 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
571 MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
572 DerivedUnit derivedUnitBase = null;
573 specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
574 extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
575 }
576 }
577 }
578 }
579
580 int m=0;
581 for (int k:descriptionsFulltext.keySet()) {
582 if (k>m) {
583 m=k;
584 }
585 }
586 for (int k:specimenOrObservations.keySet()) {
587 if (k>m) {
588 m=k;
589 }
590 }
591
592
593 if(acceptedTaxon!=null){
594 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
595 Feature currentFeature = Feature.DISTRIBUTION();
596 // DerivedUnit derivedUnitBase=null;
597 // String descr="";
598 for (int k=0;k<=m;k++){
599 if(specimenOrObservations.keySet().contains(k)){
600 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
601 handleAssociation(acceptedTaxon, refMods, td, soo);
602 }
603 }
604
605 if (descriptionsFulltext.keySet().contains(k)){
606 if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
607 setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
608 break;
609 }
610 else{
611 handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
612 }
613 }
614
615 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
616 acceptedTaxon.addDescription(td);
617 sourceHandler.addAndSaveSource(refMods, td, null);
618 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
619 }
620 }
621 }
622 }
623
624 /**
625 * @param refMods
626 * @param descriptionsFulltext
627 * @param td
628 * @param currentFeature
629 * @param k
630 */
631 private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
632 Feature currentFeature, int k) {
633 //logger.info("handleTextData");
634 TextData textData = TextData.NewInstance();
635 textData.setFeature(currentFeature);
636 textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
637 sourceHandler.addSource(refMods, textData);
638 td.addElement(textData);
639 }
640
641 /**
642 * @param acceptedTaxon
643 * @param refMods
644 * @param td
645 * @param soo
646 */
647 private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
648 logger.info("handleAssociation");
649 String descr=soo.getDescr();
650 DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
651
652 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
653
654 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
655
656 Feature feature=null;
657 feature = makeFeature(derivedUnitBase);
658 if(!StringUtils.isEmpty(descr)) {
659 derivedUnitBase.setTitleCache(descr, true);
660 }
661
662 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
663
664 taxonDescription.addElement(indAssociation);
665 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
666 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
667 td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
668 }
669
670 /**
671 * create an individualAssociation
672 * @param refMods
673 * @param derivedUnitBase
674 * @param feature
675 * @return
676 */
677 private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
678 Feature feature) {
679 logger.info("createIndividualAssociation");
680 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
681 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
682 indAssociation.setFeature(feature);
683 indAssociation = sourceHandler.addSource(refMods, indAssociation);
684 return indAssociation;
685 }
686
687 /**
688 * @param specimenOrObservations
689 * @param descriptionsFulltext
690 * @param i
691 * @param specimenOrObservation
692 */
693 private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
694 Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
695 logger.info("extractTextFromSpecimenOrObservation");
696 List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
697 if (speObsList == null) {
698 speObsList=new ArrayList<MySpecimenOrObservation>();
699 }
700 speObsList.add(specimenOrObservation);
701 specimenOrObservations.put(i,speObsList);
702
703 String s = specimenOrObservation.getDerivedUnitBase().toString();
704 if (descriptionsFulltext.get(i) !=null){
705 s = descriptionsFulltext.get(i)+" "+s;
706 }
707 descriptionsFulltext.put(i, s);
708 }
709
710 /**
711 * Extract the text with the inline link to a taxon
712 * @param nametosave
713 * @param refMods
714 * @param descriptionsFulltext
715 * @param i
716 * @param paragraph
717 */
718 @SuppressWarnings("rawtypes")
719 private void extractInLine(List<TaxonNameBase> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
720 int i, Node paragraph) {
721 //logger.info("extractInLine");
722 String inLine=getInlineTextForName(nametosave, refMods, paragraph);
723 if (descriptionsFulltext.get(i) !=null){
724 inLine = descriptionsFulltext.get(i)+inLine;
725 }
726 descriptionsFulltext.put(i, inLine);
727 }
728
729 /**
730 * Extract the raw text from a Node
731 * @param descriptionsFulltext
732 * @param node
733 * @param j
734 */
735 private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
736 //logger.info("extractText");
737 if(!node.getTextContent().trim().isEmpty()) {
738 String s =node.getTextContent().trim();
739 if (descriptionsFulltext.get(i) !=null){
740 s = descriptionsFulltext.get(i)+" "+s;
741 }
742 descriptionsFulltext.put(i, s);
743 }
744 }
745
746
747 /**
748 * @param materials: the XML node group
749 * @param acceptedTaxon: the current accepted Taxon
750 * @param refMods: the current reference extracted from the MODS
751 */
752 @SuppressWarnings("rawtypes")
753 private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonNameBase> nametosave) {
754 logger.info("EXTRACTMATERIALS");
755 // logger.info("acceptedTaxon: "+acceptedTaxon);
756 NodeList children = materials.getChildNodes();
757 NodeList events = null;
758 // String descr="";
759
760
761 for (int i=0;i<children.getLength();i++){
762 String rawAssociation="";
763 boolean added=false;
764 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
765 events = children.item(i).getChildNodes();
766 for(int k=0;k<events.getLength();k++){
767 if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
768 String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
769 if(!inLine.isEmpty()) {
770 rawAssociation+=inLine;
771 }
772 }
773 if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
774 && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
775 rawAssociation+= events.item(k).getTextContent().trim();
776 }
777 if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
778 if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
779 rawAssociation="no description text";
780 }
781 added=true;
782 handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
783 }
784 if (!rawAssociation.isEmpty() && !added){
785
786 Feature feature = Feature.MATERIALS_EXAMINED();
787 featuresMap.put(feature.getTitleCache(),feature);
788
789 TextData textData = createTextData(rawAssociation, refMods, feature);
790
791 if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
792 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
793 td.addElement(textData);
794 acceptedTaxon.addDescription(td);
795 sourceHandler.addAndSaveSource(refMods, td, null);
796 }
797 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
798 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
799 //
800 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
801 // acceptedTaxon.addDescription(taxonDescription);
802 //
803 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
804 //
805 // Feature feature = Feature.MATERIALS_EXAMINED();
806 // featuresMap.put(feature.getTitleCache(),feature);
807 // if(!StringUtils.isEmpty(rawAssociation)) {
808 // derivedUnitBase.setTitleCache(rawAssociation, true);
809 // }
810 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
811 // indAssociation.setFeature(feature);
812 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
813 //
814 // /*boolean sourceExists=false;
815 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
816 // for (DescriptionElementSource src : dsources){
817 // String micro = src.getCitationMicroReference();
818 // Reference r = src.getCitation();
819 // if (r.equals(refMods) && micro == null) {
820 // sourceExists=true;
821 // }
822 // }
823 // if(!sourceExists) {
824 // indAssociation.addSource(null, null, refMods, null);
825 // }*/
826 // taxonDescription.addElement(indAssociation);
827 // taxonDescription.setTaxon(acceptedTaxon);
828 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
829 //
830 // /*sourceExists=false;
831 // Set<IdentifiableSource> sources = taxonDescription.getSources();
832 // for (IdentifiableSource src : sources){
833 // String micro = src.getCitationMicroReference();
834 // Reference r = src.getCitation();
835 // if (r.equals(refMods) && micro == null) {
836 // sourceExists=true;
837 // }
838 // }
839 // if(!sourceExists) {
840 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
841 // }*/
842 //
843 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
844 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
845
846 rawAssociation="";
847 }
848 }
849 }
850 }
851 }
852
853 /**
854 * @param acceptedTaxon
855 * @param refMods
856 * @param events
857 * @param rawAssociation
858 * @param k
859 */
860 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
861 String rawAssociation) {
862 logger.info("handleDerivedUnitFacadeAndBase");
863 String descr;
864 DerivedUnit derivedUnitBase;
865 MySpecimenOrObservation myspecimenOrObservation;
866 DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
867 derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
868
869 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
870
871 //TODO this may not always be correct, ask user
872 TaxonNameBase<?,?> typifiableName = acceptedTaxon != null ? acceptedTaxon.getName() : null;
873 myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
874 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
875 descr=myspecimenOrObservation.getDescr();
876
877 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
878
879 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
880
881 Feature feature = makeFeature(derivedUnitBase);
882 featuresMap.put(feature.getTitleCache(),feature);
883 if(!StringUtils.isEmpty(descr)) {
884 derivedUnitBase.setTitleCache(descr, true);
885 }
886
887 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
888
889 taxonDescription.addElement(indAssociation);
890 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
891 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
892 }
893
894
895
896 /**
897 * @param currentName
898 * @param materials: the XML node group
899 * @param acceptedTaxon: the current accepted Taxon
900 * @param refMods: the current reference extracted from the MODS
901 */
902 private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, NonViralName<?> currentName) {
903 logger.info("extractMaterialsDirect");
904 // logger.info("acceptedTaxon: "+acceptedTaxon);
905 String descr="";
906
907 DerivedUnit derivedUnitBase=null;
908 MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
909 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
910
911 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
912
913 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
914
915 Feature feature=null;
916 if (event.equalsIgnoreCase("collection")){
917 feature = makeFeature(derivedUnitBase);
918 }
919 else{
920 feature = Feature.MATERIALS_EXAMINED();
921 }
922 featuresMap.put(feature.getTitleCache(), feature);
923
924 descr=myspecimenOrObservation.getDescr();
925 if(!StringUtils.isEmpty(descr)) {
926 derivedUnitBase.setTitleCache(descr, true);
927 }
928
929 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
930
931 taxonDescription.addElement(indAssociation);
932 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
933 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
934
935 return derivedUnitBase.getTitleCache();
936
937 }
938
939
940 /**
941 * @param description: the XML node group
942 * @param acceptedTaxon: the current acceptedTaxon
943 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
944 * @param nametosave: the list of objects to save into the CDM
945 * @param refMods: the current reference extracted from the MODS
946 * @param featureName: the feature name
947 */
948 @SuppressWarnings({ "rawtypes"})
949 private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
950 List<TaxonNameBase> nametosave, Reference refMods, String featureName ) {
951 logger.info("extractSpecificFeature "+featureName);
952 // System.out.println("GRUUUUuu");
953 NodeList children = description.getChildNodes();
954 NodeList insideNodes ;
955 NodeList trNodes;
956 // String descr ="";
957 String localdescr="";
958 List<String> blabla=null;
959 List<String> text = new ArrayList<String>();
960
961 String table="<table>";
962 String head="";
963 String line="";
964
965 Feature currentFeature=getFeatureObjectFromString(featureName);
966
967 // String fullContent = description.getTextContent();
968 for (int i=0;i<children.getLength();i++){
969 // localdescr="";
970 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
971 text.add(children.item(i).getTextContent().trim());
972 }
973 if (featureName.equalsIgnoreCase("table")){
974 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
975 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
976 head = extractTableHead(children.item(i));
977 table+=head;
978 line = extractTableLine(children.item(i));
979 if (!line.equalsIgnoreCase("<tr></tr>")) {
980 table+=line;
981 }
982 }
983 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
984 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
985 line = extractTableLineWithColumn(children.item(i).getChildNodes());
986 if(!line.equalsIgnoreCase("<tr></tr>")) {
987 table+=line;
988 }
989 }
990 }
991 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
992 insideNodes=children.item(i).getChildNodes();
993 blabla= new ArrayList<String>();
994 for (int j=0;j<insideNodes.getLength();j++){
995 Node insideNode = insideNodes.item(j);
996 if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
997 String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
998 if (!inlinetext.isEmpty()) {
999 blabla.add(inlinetext);
1000 }
1001 }
1002 else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
1003 if(!insideNode.getTextContent().trim().isEmpty()){
1004 blabla.add(insideNode.getTextContent().trim());
1005 // localdescr += insideNodes.item(j).getTextContent().trim();
1006 }
1007 }
1008 }
1009 if (!blabla.isEmpty()) {
1010 String blaStr = StringUtils.join(blabla," ").trim();
1011 if(!stringIsEmpty(blaStr)) {
1012 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1013 text.add(blaStr);
1014 }
1015 }
1016
1017 }
1018 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1019 if(!children.item(i).getTextContent().trim().isEmpty()){
1020 localdescr = children.item(i).getTextContent().trim();
1021 if(!stringIsEmpty(localdescr)) {
1022 setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1023 }
1024 }
1025 }
1026 }
1027
1028 table+="</table>";
1029 if (!table.equalsIgnoreCase("<table></table>")){
1030 // System.out.println("TABLE : "+table);
1031 text.add(table);
1032 }
1033
1034 if (text !=null && !text.isEmpty()) {
1035 return StringUtils.join(text," ");
1036 } else {
1037 return "";
1038 }
1039
1040 }
1041
1042 /**
1043 * @param children
1044 * @param i
1045 * @return
1046 */
1047 private String extractTableLine(Node child) {
1048 //logger.info("extractTableLine");
1049 String line;
1050 line="<tr>";
1051 if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1052 line = extractTableLineWithColumn(child.getChildNodes());
1053 }
1054 line+="</tr>";
1055 return line;
1056 }
1057
1058 /**
1059 * @param children
1060 * @param i
1061 * @return
1062 */
1063 private String extractTableHead(Node child) {
1064 //logger.info("extractTableHead");
1065 String head;
1066 String line;
1067 head="<th>";
1068 NodeList trNodes = child.getChildNodes();
1069 for (int k=0;k<trNodes.getLength();k++){
1070 if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1071 && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1072 line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1073 head+=line;
1074 }
1075 }
1076 head+="</th>";
1077 return head;
1078 }
1079
1080 /**
1081 * build a html table line, with td columns
1082 * @param tdNodes
1083 * @return an html coded line
1084 */
1085 private String extractTableLineWithColumn(NodeList tdNodes) {
1086 //logger.info("extractTableLineWithColumn");
1087 String line;
1088 line="<tr>";
1089 for (int l=0;l<tdNodes.getLength();l++){
1090 if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1091 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1092 }
1093 }
1094 line+="</tr>";
1095 return line;
1096 }
1097
1098 /**
1099 * @param description: the XML node group
1100 * @param acceptedTaxon: the current acceptedTaxon
1101 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1102 * @param nametosave: the list of objects to save into the CDM
1103 * @param refMods: the current reference extracted from the MODS
1104 * @param featureName: the feature name
1105 */
1106 @SuppressWarnings({ "unused", "rawtypes" })
1107 private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1108 List<TaxonNameBase> nameToSave, Reference refMods, String featureName ) {
1109 logger.info("extractSpecificFeatureNotStructured " + featureName);
1110 NodeList children = description.getChildNodes();
1111 NodeList insideNodes ;
1112 List<String> blabla= new ArrayList<String>();
1113
1114
1115 Feature currentFeature = getFeatureObjectFromString(featureName);
1116
1117 String fullContent = description.getTextContent();
1118 for (int i=0;i<children.getLength();i++){
1119 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1120 insideNodes=children.item(i).getChildNodes();
1121 for (int j=0;j<insideNodes.getLength();j++){
1122 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1123 String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1124 if(!inlineText.isEmpty()) {
1125 blabla.add(inlineText);
1126 }
1127 }
1128 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1129 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1130 blabla.add(insideNodes.item(j).getTextContent().trim());
1131 }
1132 }
1133 }
1134 }
1135 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1136 if(!children.item(i).getTextContent().trim().isEmpty()){
1137 String localdescr = children.item(i).getTextContent().trim();
1138 if(!localdescr.isEmpty())
1139 {
1140 blabla.add(localdescr);
1141 }
1142 }
1143 }
1144 }
1145
1146 if (blabla !=null && !blabla.isEmpty()) {
1147 String blaStr = StringUtils.join(blabla," ").trim();
1148 if (! stringIsEmpty(blaStr)) {
1149 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1150 return blaStr;
1151 } else {
1152 return "";
1153 }
1154 } else {
1155 return "";
1156 }
1157
1158 }
1159
1160 /**
1161 * @param blaStr
1162 * @return
1163 */
1164 private boolean stringIsEmpty(String blaStr) {
1165 if (blaStr.matches("(\\.|,|;|\\.-)?")){
1166 return true;
1167 }else{
1168 return false;
1169 }
1170 }
1171
1172 /**
1173 * @param nametosave
1174 * @param refMods
1175 * @param insideNodes
1176 * @param blabla
1177 * @param j
1178 */
1179 @SuppressWarnings({ "rawtypes" })
1180 private String getInlineTextForName(List<TaxonNameBase> nametosave, Reference refMods, Node insideNode) {
1181 if (true){
1182 NodeList children = insideNode.getChildNodes();
1183 String result = "";
1184 for (int i=0;i<children.getLength();i++){
1185 Node nameChild = children.item(i);
1186 if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1187 result += nameChild.getTextContent();
1188 }else{
1189 //do nothing
1190 }
1191 }
1192 return result.replace("\n", "").trim();
1193 }else{
1194 TaxonNameBase tnb = getTaxonNameBaseFromXML(insideNode, nametosave,refMods,false);
1195 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1196 Taxon tax = currentMyName.getTaxon();
1197 if(tnb !=null && tax != null){
1198 String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1199 return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1200 }else if (tnb != null && tax == null){
1201 //TODO
1202 return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0] +"</cdm:taxonName>";
1203 }else{
1204 logger.warn("Inline text has no content yet");
1205 }
1206 return "";
1207 }
1208 }
1209
1210 /**
1211 * @param featureName
1212 * @return
1213 */
1214 @SuppressWarnings("rawtypes")
1215 private Feature getFeatureObjectFromString(String featureName) {
1216 logger.info("getFeatureObjectFromString");
1217 List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1218 Feature currentFeature=null;
1219 for (Feature feature: features){
1220 String tmpF = feature.getTitleCache();
1221 if (tmpF.equalsIgnoreCase(featureName)) {
1222 currentFeature=feature;
1223 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1224 }
1225 }
1226 if (currentFeature == null) {
1227 currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1228 if(featureName.equalsIgnoreCase("Other")){
1229 currentFeature.setUuid(OtherUUID);
1230 }
1231 if(featureName.equalsIgnoreCase(notMarkedUp)){
1232 currentFeature.setUuid(NotMarkedUpUUID);
1233 }
1234 importer.getTermService().saveOrUpdate(currentFeature);
1235 }
1236 return currentFeature;
1237 }
1238
1239
1240
1241
1242 /**
1243 * @param children: the XML node group
1244 * @param nametosave: the list of objects to save into the CDM
1245 * @param acceptedTaxon: the current acceptedTaxon
1246 * @param refMods: the current reference extracted from the MODS
1247 * @param fullContent :the parsed XML content
1248 * @return a list of description (text)
1249 */
1250 @SuppressWarnings({ "unused", "rawtypes" })
1251 private List<String> parseParagraph(List<TaxonNameBase> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1252 logger.info("parseParagraph "+feature.toString());
1253 List<String> fullDescription= new ArrayList<String>();
1254 // String localdescr;
1255 String descr="";
1256 NodeList insideNodes ;
1257 boolean collectionEvent = false;
1258 List<Node>collectionEvents = new ArrayList<Node>();
1259
1260 NodeList children = paragraph.getChildNodes();
1261
1262 for (int i=0;i<children.getLength();i++){
1263 // localdescr="";
1264 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1265 descr += children.item(i).getTextContent().trim();
1266 }
1267 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1268 insideNodes=children.item(i).getChildNodes();
1269 List<String> blabla= new ArrayList<String>();
1270 for (int j=0;j<insideNodes.getLength();j++){
1271 boolean nodeKnown = false;
1272 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1273 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1274 String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1275 if (!inlineText.isEmpty()) {
1276 blabla.add(inlineText);
1277 }
1278 nodeKnown=true;
1279 }
1280 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1281 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1282 blabla.add(insideNodes.item(j).getTextContent().trim());
1283 // localdescr += insideNodes.item(j).getTextContent().trim();
1284 }
1285 nodeKnown=true;
1286 }
1287 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1288 String ref = insideNodes.item(j).getTextContent().trim();
1289 if (ref.endsWith(";") && ((ref.length())>1)) {
1290 ref=ref.substring(0, ref.length()-1)+".";
1291 }
1292 Reference reference = ReferenceFactory.newGeneric();
1293 reference.setTitleCache(ref, true);
1294 blabla.add(reference.getTitleCache());
1295 nodeKnown=true;
1296 }
1297 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1298 String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1299 blabla.add(figure);
1300 }
1301 else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1302 insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1303 insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1304 String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1305 blabla.add(table);
1306 }
1307 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1308 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1309 String titlecache = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1310 blabla.add(titlecache);
1311 collectionEvent=true;
1312 collectionEvents.add(insideNodes.item(j));
1313 nodeKnown=true;
1314 }else{
1315 logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1316 }
1317
1318 }
1319 if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1320 fullDescription.add(StringUtils.join(blabla," "));
1321 }
1322 }
1323 if (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1324 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1325 fullDescription.add(figure);
1326 }
1327 if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1328 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1329 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1330 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1331 fullDescription.add(table);
1332 }
1333 }
1334
1335 if( !stringIsEmpty(descr.trim())){
1336 Feature currentFeature= getNotMarkedUpFeatureObject();
1337 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1338 }
1339 // if (collectionEvent) {
1340 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1341 // for (Node coll:collectionEvents){
1342 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1343 // }
1344 // }
1345 return fullDescription;
1346 }
1347
1348
1349 /**
1350 * @param description: the XML node group
1351 * @param acceptedTaxon: the current acceptedTaxon
1352 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1353 * @param nametosave: the list of objects to save into the CDM
1354 * @param refMods: the current reference extracted from the MODS
1355 * @param feature: the feature to link the data with
1356 */
1357 @SuppressWarnings("rawtypes")
1358 private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> namesToSave, Reference refMods, Feature feature){
1359 logger.info("EXTRACT FEATURE "+feature.toString());
1360 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1361 List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1362
1363 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1364 if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1365 setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1366 }
1367
1368 }
1369
1370
1371 /**
1372 * @param descr: the XML Nodegroup to parse
1373 * @param acceptedTaxon: the current acceptedTaxon
1374 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1375 * @param refMods: the current reference extracted from the MODS
1376 * @param currentFeature: the feature name
1377 * @return
1378 */
1379 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1380 logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1381
1382 //remove redundant feature title
1383 String featureStr = currentFeature.getTitleCache();
1384 if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1385 descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1386 }
1387
1388
1389 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1390 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1391
1392 TextData textData = createTextData(descr, refMods, currentFeature);
1393
1394 if(acceptedTaxon!=null){
1395 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1396 td.addElement(textData);
1397 acceptedTaxon.addDescription(td);
1398
1399 sourceHandler.addAndSaveSource(refMods, td, null);
1400 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1401 }
1402
1403 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1404 try{
1405 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1406 if (tmp!=null) {
1407 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1408 }else{
1409 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1410 }
1411 }catch(Exception e){
1412 logger.debug("TAXON EXISTS"+defaultTaxon);
1413 }
1414
1415 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1416 defaultTaxon.addDescription(td);
1417 td.addElement(textData);
1418 sourceHandler.addAndSaveSource(refMods, td, null);
1419 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1420 }
1421 }
1422
1423 /**
1424 * @param descr
1425 * @param refMods
1426 * @param currentFeature
1427 * @return
1428 */
1429 private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1430 //logger.info("createTextData");
1431 TextData textData = TextData.NewInstance();
1432 textData.setFeature(currentFeature);
1433 sourceHandler.addSource(refMods, textData);
1434
1435 textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1436 return textData;
1437 }
1438
1439
1440
1441 /**
1442 * @param descr: the XML Nodegroup to parse
1443 * @param acceptedTaxon: the current acceptedTaxon
1444 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1445 * @param refMods: the current reference extracted from the MODS
1446 * @param currentFeature: the feature name
1447 * @return
1448 */
1449 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1450 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1451 // logger.info("acceptedTaxon: "+acceptedTaxon);
1452 logger.info("setParticularDescription");
1453 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1454
1455 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1456 TextData textData = createTextData(descr, refMods, currentFeature);
1457
1458 if(! descr.isEmpty() && (acceptedTaxon!=null)){
1459 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1460 td.addElement(textData);
1461 acceptedTaxon.addDescription(td);
1462
1463 sourceHandler.addAndSaveSource(refMods, td, currentRef);
1464 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1465 }
1466
1467 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1468 try{
1469 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1470 if (tmp!=null) {
1471 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1472 }else{
1473 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1474 }
1475 }catch(Exception e){
1476 logger.debug("TAXON EXISTS"+defaultTaxon);
1477 }
1478
1479 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1480 defaultTaxon.addDescription(td);
1481 td.addElement(textData);
1482 sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1483 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1484 }
1485 }
1486
1487
1488
1489 /**
1490 * @param synonyms: the XML Nodegroup to parse
1491 * @param nametosave: the list of objects to save into the CDM
1492 * @param acceptedTaxon: the current acceptedTaxon
1493 * @param refMods: the current reference extracted from the MODS
1494 */
1495 @SuppressWarnings({ "rawtypes" })
1496 private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1497 logger.info("extractSynonyms");
1498 //System.out.println("extractSynonyms for: "+acceptedTaxon);
1499 Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1500 if (ttmp != null) {
1501 acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1502 }
1503 else{
1504 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1505 }
1506 NodeList children = synonymsNode.getChildNodes();
1507 List<MyName> names = new ArrayList<MyName>();
1508
1509 if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1510 try {
1511 MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1512 names.add(myName);
1513 } catch (TransformerFactoryConfigurationError e) {
1514 logger.warn(e);
1515 } catch (TransformerException e) {
1516 logger.warn(e);
1517 }
1518 }
1519
1520
1521 for (int i=0;i<children.getLength();i++){
1522 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1523 NodeList tmp = children.item(i).getChildNodes();
1524 // String fullContent = children.item(i).getTextContent();
1525 for (int j=0; j< tmp.getLength();j++){
1526 if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1527 try {
1528 MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1529 names.add(myName);
1530 } catch (TransformerFactoryConfigurationError e) {
1531 logger.warn(e);
1532 } catch (TransformerException e) {
1533 logger.warn(e);
1534 }
1535 }
1536 }
1537 }
1538 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1539 try {
1540 MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1541 names.add(myName);
1542 } catch (TransformerFactoryConfigurationError e) {
1543 logger.warn(e);
1544 } catch (TransformerException e) {
1545 logger.warn(e);
1546 }
1547
1548 }
1549 }
1550
1551 for(MyName name:names){
1552 TaxonNameBase nameToBeFilled = name.getTaxonNameBase();
1553 Synonym synonym = name.getSyno();
1554 addFollowingTextToName(nameToBeFilled, followingText);
1555
1556 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1557 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1558 if (nameToBeFilled.hasProblem() &&
1559 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1560 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1561 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1562 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1563 }
1564 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1565 */
1566 if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1567 setLSID(name.getIdentifier(), synonym);
1568 }
1569
1570 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1571 boolean synoExist = false;
1572 for (Synonym syn: synonymsSet){
1573
1574 boolean a =syn.getName().equals(synonym.getName());
1575 boolean b = syn.getSec().equals(synonym.getSec());
1576 if (a && b) {
1577 synoExist=true;
1578 }
1579 }
1580 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1581 sourceHandler.addSource(refMods, synonym);
1582 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1583 }
1584 }
1585 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1586 }
1587
1588
1589 private boolean addFollowingTextToName(TaxonNameBase nameToBeFilled, String followingText) {
1590 if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1591 if (! followingText.matches("\\d\\.?")){
1592
1593 if (followingText.startsWith(",")){
1594 followingText = followingText.substring(1).trim();
1595 }
1596 nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1597 }
1598 return true;
1599 }
1600 return false;
1601
1602 }
1603
1604 /**
1605 * @param refgroup: the XML nodes
1606 * @param nametosave: the list of objects to save into the CDM
1607 * @param acceptedTaxon: the current acceptedTaxon
1608 * @param nametosave: the list of objects to save into the CDM
1609 * @param refMods: the current reference extracted from the MODS
1610 * @return the acceptedTaxon (why?)
1611 * handle cases where the bibref are inside <p> and outside
1612 */
1613 @SuppressWarnings({ "rawtypes" })
1614 private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference refMods) {
1615 logger.info("extractReferences");
1616 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1617
1618 NodeList children = refgroup.getChildNodes();
1619 NonViralName<?> nameToBeFilled = getNonViralNameAccNomenclature();
1620
1621 ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1622 for (int i=0;i<children.getLength();i++){
1623 if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1624 String ref = children.item(i).getTextContent().trim();
1625 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1626 if (!refBuild.isFoundBibref()){
1627 extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1628 }
1629 }
1630
1631 if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1632 NodeList references = children.item(i).getChildNodes();
1633 String descr="";
1634 for (int j=0;j<references.getLength();j++){
1635 if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1636 String ref = references.item(j).getTextContent().trim();
1637 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1638 }
1639 else
1640 if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1641 && !references.item(j).getTextContent().trim().isEmpty()){
1642 descr += references.item(j).getTextContent().trim();
1643 }
1644
1645 }
1646 if (!refBuild.isFoundBibref()){
1647 //if it's not tagged, put it as row information.
1648 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1649 //then put it as a not markup feature if not empty
1650 if (!stringIsEmpty(descr.trim())){
1651 Feature currentFeature= getNotMarkedUpFeatureObject();
1652 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1653 }
1654 }
1655 }
1656 }
1657 // importer.getClassificationService().saveOrUpdate(classification);
1658 return acceptedTaxon;
1659
1660 }
1661
1662 /**
1663 * get the non viral name according to the current nomenclature
1664 * @return
1665 */
1666
1667 private NonViralName<?> getNonViralNameAccNomenclature() {
1668 return (NonViralName<?>)nomenclaturalCode.getNewTaxonNameInstance(null);
1669 }
1670
1671 /**
1672 * @return the feature object for the category "not marked up"
1673 */
1674 private Feature getNotMarkedUpFeatureObject() {
1675 // FIXME use getFeature(uuid ....)
1676 logger.info("getNotMarkedUpFeatureObject");
1677 Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1678 if (currentFeature == null) {
1679 currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1680 currentFeature.setUuid(NotMarkedUpUUID);
1681 //TODO use userDefined Feature Vocabulary
1682 Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1683 // importer.getTermService().saveOrUpdate(currentFeature);
1684 importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1685 }
1686 return currentFeature;
1687 }
1688
1689 /**
1690 * @param references
1691 * handle cases where the bibref are inside <p> and outside
1692 */
1693 @SuppressWarnings("rawtypes")
1694 private void extractReferenceRawText(NodeList references, NonViralName<?> nameToBeFilled, Reference refMods,
1695 Taxon acceptedTaxon) {
1696 logger.info("extractReferenceRawText");
1697 String refString="";
1698 currentMyName= new MyName(true);
1699 for (int j=0;j<references.getLength();j++){
1700 acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1701 //no bibref tag inside
1702 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1703 if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1704
1705 try {
1706 String followingText = null; //needs to be checked if follText is possible
1707 //TODO create or not create?
1708 currentMyName = extractScientificName(references.item(j), refMods, followingText);
1709 } catch (TransformerFactoryConfigurationError e) {
1710 logger.warn(e);
1711 } catch (TransformerException e) {
1712 logger.warn(e);
1713 }
1714
1715 // name=name.trim();
1716 }
1717 if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1718 refString = references.item(j).getTextContent().trim();
1719 }
1720 if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1721 //
1722 if (!currentMyName.getStatus().isEmpty()){
1723 String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1724 if (nomNovStatus != null){
1725 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1726 }else{
1727 try {
1728 NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1729 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1730 } catch (UnknownCdmTypeException e) {
1731 addProblematicStatusToFile(currentMyName.getStatus());
1732 logger.warn("Problem with status");
1733 }
1734 }
1735 }
1736
1737 String fullLineRefName = references.item(j).getTextContent().trim();
1738 int nameOrRefOrOther=2;
1739 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1740 if (nameOrRefOrOther==0){
1741 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1742 Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1743
1744 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1745 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1746 boolean synoExist = false;
1747 for (Synonym syn: synonymsSet){
1748 // System.out.println(syn.getName()+" -- "+syn.getSec());
1749 boolean a =syn.getName().equals(synonym.getName());
1750 boolean b = syn.getSec().equals(synonym.getSec());
1751 if (a && b) {
1752 synoExist=true;
1753 }
1754 }
1755 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1756 sourceHandler.addSource(refMods, synonym);
1757
1758 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1759 }
1760 }
1761
1762 if (nameOrRefOrOther==1){
1763 Reference re = ReferenceFactory.newGeneric();
1764 re.setTitleCache(fullLineRefName, true);
1765
1766 /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1767 if (nameTBF.hasProblem() &&
1768 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1769 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1770 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1771 }
1772 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1773 */
1774 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1775 Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1776
1777 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1778 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1779 boolean synoExist = false;
1780 for (Synonym syn: synonymsSet){
1781 // System.out.println(syn.getName()+" -- "+syn.getSec());
1782 boolean a =syn.getName().equals(synonym.getName());
1783 boolean b = syn.getSec().equals(synonym.getSec());
1784 if (a && b) {
1785 synoExist=true;
1786 }
1787 }
1788 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1789 sourceHandler.addSource(refMods, synonym);
1790
1791 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),re, null);
1792 }
1793
1794 }
1795
1796
1797 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1798 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1799 }
1800 }
1801
1802 if(!currentMyName.getName().isEmpty()){
1803 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1804 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1805 Reference refS = ReferenceFactory.newGeneric();
1806 refS.setTitleCache(refString, true);
1807 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1808 // acceptedTaxon.addDescription(td);
1809 // acceptedTaxon.addSource(refSource);
1810 //
1811 // TextData textData = TextData.NewInstance(Feature.CITATION());
1812 //
1813 // textData.addSource(null, null, refS, null);
1814 // td.addElement(textData);
1815 // td.addSource(refSource);
1816 // importer.getDescriptionService().saveOrUpdate(td);
1817
1818
1819 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1820 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1821
1822 }
1823
1824 acceptedTaxon.getName().setNomenclaturalReference(refS);
1825 }else{
1826 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1827 Synonym synonym = null;
1828 if (! currentMyName.getStatus().isEmpty()){
1829 String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1830 if (nomNovStatus != null){
1831 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1832 }else{
1833 try {
1834 NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1835 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1836 synonym = Synonym.NewInstance(nameTBF, refMods);
1837 } catch (UnknownCdmTypeException e) {
1838 addProblematicStatusToFile(currentMyName.getStatus());
1839 logger.warn("Problem with status");
1840 synonym = Synonym.NewInstance(nameTBF, refMods);
1841 synonym.setAppendedPhrase(currentMyName.getStatus());
1842 }
1843 }
1844 }else{
1845 synonym = Synonym.NewInstance(nameTBF, refMods);
1846 }
1847
1848
1849 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1850 setLSID(currentMyName.getIdentifier(), synonym);
1851 }
1852
1853 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1854 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1855 boolean synoExist = false;
1856 for (Synonym syn: synonymsSet){
1857 // System.out.println(syn.getName()+" -- "+syn.getSec());
1858 boolean a =syn.getName().equals(synonym.getName());
1859 boolean b = syn.getSec().equals(synonym.getSec());
1860 if (a && b) {
1861 synoExist=true;
1862 }
1863 }
1864 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1865 sourceHandler.addSource(refMods, synonym);
1866
1867 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1868 }
1869 }
1870 }
1871 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1872 }
1873 }
1874
1875
1876
1877 /**
1878 * @param identifier
1879 * @param acceptedTaxon
1880 */
1881 @SuppressWarnings("rawtypes")
1882 private void setLSID(String identifier, TaxonBase<?> taxon) {
1883 //logger.info("setLSID");
1884 // boolean lsidok=false;
1885 String id = identifier.split("__")[0];
1886 String source = identifier.split("__")[1];
1887 if (id.indexOf("lsid")>-1){
1888 try {
1889 LSID lsid = new LSID(id);
1890 taxon.setLsid(lsid);
1891 // lsidok=true;
1892 } catch (MalformedLSIDException e) {
1893 logger.warn("Malformed LSID");
1894 }
1895
1896 }
1897
1898 //logger.info("search reference for LSID");
1899 // if ((id.indexOf("lsid")<0) || !lsidok){
1900 //ADD ORIGINAL SOURCE ID EVEN IF LSID
1901 Reference re = null;
1902 Pager<Reference> references = importer.getReferenceService().findByTitle(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1903 if( references !=null && references.getCount()>0){
1904 re=references.getRecords().get(0);
1905 }
1906 //logger.info("search reference for LSID-end");
1907 if(re == null){
1908 re = ReferenceFactory.newGeneric();
1909 re.setTitleCache(source, true);
1910 importer.getReferenceService().saveOrUpdate(re);
1911 }
1912 re=CdmBase.deproxy(re, Reference.class);
1913
1914 //logger.info("search source for LSID");
1915 Set<IdentifiableSource> sources = taxon.getSources();
1916 boolean lsidinsource=false;
1917 boolean urlinsource=false;
1918 for (IdentifiableSource src:sources){
1919 if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1920 lsidinsource=true;
1921 }
1922 if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1923 urlinsource=true;
1924 }
1925 }
1926 if(!lsidinsource) {
1927 taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1928 }
1929 if(!urlinsource)
1930 {
1931 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1932 taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1933 // }
1934 }
1935
1936 }
1937
1938 /**
1939 * try to solve a parsing problem for a scientific name
1940 * @param original : the name from the OCR document
1941 * @param name : the tagged version
1942 * @param parser
1943 * @return the corrected TaxonNameBase
1944 */
1945 /* @SuppressWarnings({ "unchecked", "rawtypes" })
1946 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1947 Map<String,String> ato = namesMap.get(original);
1948 if (ato == null) {
1949 ato = namesMap.get(original+" "+author);
1950 }
1951
1952
1953 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1954 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1955 }
1956 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1957 rank = getRank(ato);
1958 }
1959 // TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1960 TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1961 // logger.info("RANK: "+rank);
1962 int retry=0;
1963 List<ParserProblem> problems = nameTBF.getParsingProblems();
1964 for (ParserProblem pb:problems) {
1965 System.out.println(pb.toString());
1966 }
1967 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1968 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1969 String fullname=name;
1970 if(! skippQuestion) {
1971 fullname = getFullReference(name,nameTBF.getParsingProblems());
1972 }
1973 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1974 nameTBF = BotanicalName.NewInstance(null);
1975 }
1976 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1977 nameTBF = ZoologicalName.NewInstance(null);
1978 }
1979 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1980 nameTBF= BacterialName.NewInstance(null);
1981 }
1982 parser.parseReferencedName(nameTBF, fullname, rank, false);
1983 retry++;
1984 }
1985 if (retry == 1){
1986 if(author != null){
1987 if (name.indexOf(author)>-1) {
1988 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1989 } else {
1990 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1991 }
1992 if (nameTBF.hasProblem()){
1993 if (name.indexOf(author)>-1) {
1994 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1995 } else {
1996 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1997 }
1998 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
1999 problems = nameTBF.getParsingProblems();
2000 for (ParserProblem pb:problems) {
2001 System.out.println(pb.toString());
2002 }
2003 nameTBF.setFullTitleCache(name, true);
2004 }else{
2005 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2006 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2007 }
2008 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2009 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2010 }
2011 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2012 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2013 }
2014 }
2015 // logger.info("FULL TITLE CACHE "+name);
2016 }else{
2017 nameTBF.setFullTitleCache(name, true);
2018 }
2019 }
2020 return nameTBF;
2021 }
2022
2023 */
2024
2025 /**
2026 * @param nomenclatureNode: the XML nodes
2027 * @param nametosave: the list of objects to save into the CDM
2028 * @param refMods: the current reference extracted from the MODS
2029 * @return
2030 */
2031 @SuppressWarnings({ "rawtypes" })
2032 private Taxon extractNomenclature(Node nomenclatureNode, List<TaxonNameBase> nametosave, Reference refMods) throws ClassCastException{
2033 refMods=CdmBase.deproxy(refMods, Reference.class);
2034
2035 logger.info("extractNomenclature");
2036 NodeList children = nomenclatureNode.getChildNodes();
2037 String freetext="";
2038 Taxon acceptedTaxon = null;
2039 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2040
2041 // String fullContent = nomenclatureNode.getTextContent();
2042
2043 NomenclaturalStatusType statusType = null;
2044 String newNameStatus = null;
2045 //TODO
2046 for (int i=0;i<children.getLength();i++){
2047 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2048 String status = children.item(i).getTextContent().trim();
2049
2050 if (!status.isEmpty()){
2051 if (newNameStatus(status) != null){
2052 newNameStatus = newNameStatus(status);
2053 }else{
2054 try {
2055 statusType = nomStatusString2NomStatus(status);
2056 } catch (UnknownCdmTypeException e) {
2057 // nomNovStatus;
2058 addProblematicStatusToFile(status);
2059 logger.warn("Problem with status: " + status);
2060 }
2061 }
2062 }
2063 }
2064 }
2065
2066 boolean containsSynonyms=false;
2067 boolean wasSynonym = false;
2068 usedFollowingTextPrefix = null; //reset
2069
2070 for (int i=0; i<children.getLength(); i++){
2071 Node childNode = children.item(i);
2072 String childName = childNode.getNodeName();
2073
2074
2075 //following text
2076 followingText = null;
2077 if ( i + 1 < children.getLength()){
2078 Node followingTextNode = children.item(i +1);
2079 if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2080 followingText = followingTextNode.getTextContent();
2081 }
2082 }
2083
2084 //traverse nodes
2085 if (childName.equalsIgnoreCase("#text")) {
2086 freetext = childNode.getTextContent().trim();
2087 if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2088 freetext = freetext.substring(usedFollowingTextPrefix.length());
2089 }
2090 usedFollowingTextPrefix = null; //reset
2091 }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2092 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2093 extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonNameBase());
2094 }else if(childName.equalsIgnoreCase("tax:name")){
2095 NonViralName<?> nameToBeFilled;
2096 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2097 if(!containsSynonyms){
2098 wasSynonym = false;
2099
2100 //System.out.println("I : "+i);
2101 currentMyName = new MyName(false);
2102 try {
2103 currentMyName = extractScientificName(childNode, refMods, followingText);
2104 treatmentMainName = currentMyName.getNewName();
2105 originalTreatmentName = currentMyName.getOriginalName();
2106
2107 } catch (TransformerFactoryConfigurationError e1) {
2108 throw new RuntimeException(e1);
2109 } catch (TransformerException e1) {
2110 throw new RuntimeException(e1);
2111 }
2112
2113 if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2114 maxRankRespected=true;
2115
2116 nameToBeFilled=currentMyName.getTaxonNameBase();
2117
2118 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2119 acceptedTaxon=currentMyName.getTaxon();
2120 //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2121
2122
2123 boolean statusMatch=false;
2124 if(acceptedTaxon !=null ){
2125 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2126 statusMatch=compareStatus(acceptedTaxon, statusType);
2127 //System.out.println("statusMatch: "+statusMatch);
2128 }
2129 if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2130
2131 nameToBeFilled=currentMyName.getTaxonNameBase();
2132 if (nameToBeFilled != null){
2133 if (!originalTreatmentName.isEmpty()) {
2134 TaxonNameDescription td = TaxonNameDescription.NewInstance();
2135 td.setTitleCache(originalTreatmentName, true);
2136 nameToBeFilled.addDescription(td);
2137 }
2138
2139 if(statusType != null) {
2140 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2141 }
2142 if(newNameStatus != null){
2143 nameToBeFilled.setAppendedPhrase(newNameStatus);
2144 }
2145 sourceHandler.addSource(refMods, nameToBeFilled);
2146
2147 if (nameToBeFilled.getNomenclaturalReference() == null) {
2148 acceptedTaxon= new Taxon(nameToBeFilled,refMods);
2149 //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2150 }
2151 else {
2152 acceptedTaxon= new Taxon(nameToBeFilled,(Reference) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2153 //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2154 }
2155
2156 sourceHandler.addSource(refMods, acceptedTaxon);
2157
2158 if(!state2.getConfig().doKeepOriginalSecundum()) {
2159 acceptedTaxon.setSec(state2.getConfig().getSecundum());
2160 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2161 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2162 }
2163
2164 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2165 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2166 }
2167
2168
2169 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2170 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2171 }
2172
2173 }else{
2174 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2175 Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2176 boolean sourcelinked=false;
2177 for (IdentifiableSource source:sources){
2178 if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2179 sourcelinked=true;
2180 }
2181 }
2182 if (!state2.getConfig().doKeepOriginalSecundum()) {
2183 acceptedTaxon.setSec(state2.getConfig().getSecundum());
2184 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2185 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2186 }
2187 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2188
2189 if (!sourcelinked){
2190 sourceHandler.addSource(refMods, acceptedTaxon);
2191 }
2192 if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2193
2194 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2195 //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2196 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2197 }
2198 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2199 }
2200 }
2201 }else{
2202 maxRankRespected=false;
2203 }
2204 containsSynonyms=true; //all folowing names are handled as synonyms
2205 }else{
2206 try{
2207 extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2208 wasSynonym = true;
2209
2210 }catch(NullPointerException e){
2211 logger.warn("null pointer exception, the accepted taxon might be null");
2212 }
2213 }
2214 containsSynonyms=true;
2215 }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2216 reloadClassification();
2217 //extract the References within the document
2218 extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2219 }else if (childName.equalsIgnoreCase("tax:bibref")){
2220 logger.warn(childName + " still preliminary");
2221
2222 NonViralName<?> currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2223 boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2224 if (! handled){
2225 setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2226 }
2227 }else{
2228 logger.warn(childName + " not yet handled");
2229 }
2230 if(!stringIsEmpty(freetext.trim())) {;
2231 if (! freetext.matches("\\d\\.?")){
2232 NonViralName<?> currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2233 boolean handled = false;
2234 if (currentName != null && !wasSynonym){
2235 handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2236 }
2237 if (! handled){
2238 setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2239 }
2240 }
2241
2242 freetext = "";
2243 }
2244
2245 }
2246 //importer.getClassificationService().saveOrUpdate(classification);
2247 return acceptedTaxon;
2248 }
2249
2250
2251
2252
2253 /**
2254 * @return
2255 */
2256
2257 private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2258 //logger.info("compareStatus");
2259 boolean statusMatch=false;
2260 //found one taxon
2261 Set<NomenclaturalStatus> status = t.getName().getStatus();
2262 if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2263 for (NomenclaturalStatus st:status){
2264 NomenclaturalStatusType stype = st.getType();
2265 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2266 statusMatch=true;
2267 }
2268 }
2269 }
2270 else{
2271 if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2272 statusMatch=true;
2273 }
2274 }
2275 return statusMatch;
2276 }
2277
2278 /**
2279 * @param acceptedTaxon: the current acceptedTaxon
2280 * @param ref: the current reference extracted from the MODS
2281 * @return the parent for the current accepted taxon
2282 */
2283 /* private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2284 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2285
2286 List<Rank> rankList = new ArrayList<Rank>();
2287 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2288
2289 List<String> rankListStr = new ArrayList<String>();
2290 for (Rank r:rankList) {
2291 rankListStr.add(r.toString());
2292 }
2293 String r="";
2294 String s = acceptedTaxon.getTitleCache();
2295 Taxon tax = null;
2296 if(!skippQuestion){
2297 int addTaxon = askAddParent(s);
2298 logger.info("ADD TAXON: "+addTaxon);
2299 if (addTaxon == 0 ){
2300 Taxon tmp = askParent(acceptedTaxon, classification);
2301 if (tmp == null){
2302 s = askSetParent(s);
2303 r = askRank(s,rankListStr);
2304
2305 NonViralName<?> nameToBeFilled = null;
2306 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2307 nameToBeFilled = BotanicalName.NewInstance(null);
2308 }
2309 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2310 nameToBeFilled = ZoologicalName.NewInstance(null);
2311 }
2312 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2313 nameToBeFilled = BacterialName.NewInstance(null);
2314 }
2315 nameToBeFilled.setTitleCache(s, true);
2316 nameToBeFilled.setRank(getRank(r), true);
2317
2318 tax = Taxon.NewInstance(nameToBeFilled, ref);
2319 }
2320 else{
2321 tax=tmp;
2322 }
2323
2324 createParent(tax, ref);
2325 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2326 classification.addParentChild(tax, acceptedTaxon, ref, null);
2327 }
2328 else{
2329 classification.addChildTaxon(acceptedTaxon, ref, null);
2330 tax=acceptedTaxon;
2331 }
2332 } else{
2333 classification.addChildTaxon(acceptedTaxon, ref, null);
2334 tax=acceptedTaxon;
2335 }
2336 // logger.info("RETURN: "+tax );
2337 return tax;
2338
2339 }
2340
2341 */
2342
2343
2344 private MyName extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2345 //System.out.println("extractScientificNameSynonym");
2346 logger.info("extractScientificNameSynonym");
2347 String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2348 List<String> rankListToPrint = new ArrayList<String>();
2349 for (String r : rankListToPrint_tmp) {
2350 rankListToPrint.add(r.toLowerCase());
2351 }
2352
2353 Rank rank = Rank.UNKNOWN_RANK();
2354 NodeList children = name.getChildNodes();
2355 String originalName="";
2356 String fullName = "";
2357 String newName="";
2358 String identifier="";
2359 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2360 List<String> atomisedName= new ArrayList<String>();
2361
2362 String rankStr = "";
2363 Rank tmpRank ;
2364
2365 String status= extractStatus(children);
2366
2367 for (int i=0;i<children.getLength();i++){
2368 if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2369 NodeList atom = children.item(i).getChildNodes();
2370 for (int k=0;k<atom.getLength();k++){
2371 identifier = extractIdentifier(identifier, atom.item(k));
2372 tmpRank = null;
2373 rankStr = atom.item(k).getNodeName().toLowerCase();
2374 // logger.info("RANKSTR:*"+rankStr+"*");
2375 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2376 rankStr=atom.item(k).getTextContent().trim();
2377 tmpRank = getRank(rankStr);
2378 }
2379 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2380 if (tmpRank != null){
2381 rank=tmpRank;
2382 }
2383 atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2384 }
2385 addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2386 }
2387 if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2388 // logger.info("name non atomised: "+children.item(i).getTextContent());
2389 fullName = children.item(i).getTextContent().trim();
2390 // logger.info("fullname: "+fullName);
2391 }
2392 }
2393 originalName=fullName;
2394 fullName = cleanName(fullName, atomisedName);
2395 namesMap.put(fullName,atomisedMap);
2396
2397 String atomisedNameStr = getAtomisedNameStr(atomisedName);
2398
2399 if (fullName != null){
2400 // System.out.println("fullname: "+fullName);
2401 // System.out.println("atomised: "+atomisedNameStr);
2402 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2403 if (skippQuestion){
2404 // String defaultN = "";
2405 if (atomisedNameStr.length()>fullName.length()) {
2406 newName=atomisedNameStr;
2407 } else {
2408 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2409 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2410 } else {
2411 newName=fullName;
2412 }
2413 }
2414 } else {
2415 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2416 }
2417 } else {
2418 newName=fullName;
2419 }
2420 }
2421 //not really needed
2422 // rank = askForRank(newName, rank, nomenclaturalCode);
2423 // System.out.println("atomised: "+atomisedMap.toString());
2424
2425 // String[] names = new String[5];
2426 MyName myname = new MyName(true);
2427
2428 //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2429 // System.out.println(atomisedMap.keySet());
2430 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2431 myname.setOriginalName(fullName);
2432 myname.setNewName(newName);
2433 myname.setRank(rank);
2434 myname.setIdentifier(identifier);
2435 myname.setStatus(status);
2436 myname.setSource(refMods);
2437
2438 // boolean higherAdded=false;
2439
2440
2441 boolean parseNameManually=false;
2442 INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2443 TaxonNameBase<?,?> nameToBeFilledTest ;
2444
2445 //if selected the atomised version
2446 if(newName==atomisedNameStr){
2447 nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2448 if (nameToBeFilledTest.hasProblem()){
2449 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2450 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2451 if (nameToBeFilledTest.hasProblem()){
2452 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2453 parseNameManually=true;
2454 }
2455 }
2456 }else{
2457 nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2458 if (nameToBeFilledTest.hasProblem()){
2459 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2460 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2461 parseNameManually=true;
2462 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2463 addNameDifferenceToFile(originalName,atomisedNameStr);
2464 }
2465 }
2466 }
2467
2468 if(parseNameManually){
2469 //System.out.println("DO IT MANUALLY");
2470 if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2471 createUnparsedSynonym(rank, newName, atomisedMap, myname);
2472 }else{
2473 createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2474 }
2475 } else{
2476 //System.out.println("AUTOMATIC!");
2477 // createAtomisedTaxonString(newName, atomisedMap, myname);
2478 myname.setParsedName(nameToBeFilledTest);
2479 myname.buildTaxon();
2480 }
2481 //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2482 return myname;
2483 }
2484
2485
2486 /**
2487 * @param name
2488 * @throws TransformerFactoryConfigurationError
2489 * @throws TransformerException
2490 * @return a list of possible names
2491 */
2492 @SuppressWarnings({"rawtypes" })
2493 private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2494 logger.info("extractScientificName");
2495
2496 String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2497 List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2498
2499 Rank rank = Rank.UNKNOWN_RANK();
2500 NodeList children = name.getChildNodes();
2501 String originalName = "";
2502 String fullName = "";
2503 String newName = "";
2504 String identifier = "";
2505 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2506 List<String> atomisedNameList= new ArrayList<String>();
2507
2508 String status= extractStatus(children);
2509
2510 for (int i=0;i<children.getLength();i++){
2511 Node nameChild = children.item(i);
2512 if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2513 NodeList xmlDataChildren = nameChild.getChildNodes();
2514 for (int k=0;k<xmlDataChildren.getLength();k++){
2515 Node xmlDataChild = xmlDataChildren.item(k);
2516 identifier = extractIdentifier(identifier, xmlDataChild);
2517 String rankStr = xmlDataChild.getNodeName().toLowerCase();
2518 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2519 rankStr=xmlDataChild.getTextContent().trim();
2520 Rank tmpRank = getRank(rankStr);
2521 if (tmpRank != null){
2522 rank=tmpRank;
2523 }
2524 }
2525 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2526
2527 atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2528 }
2529 addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2530 }
2531 else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2532 // logger.info("name non atomised: "+children.item(i).getTextContent());
2533 fullName = nameChild.getTextContent().trim();
2534 // logger.info("fullname: "+fullName);
2535 }
2536 }
2537 originalName=fullName;
2538 fullName = cleanName(fullName, atomisedNameList);
2539 namesMap.put(fullName,atomisedMap);
2540
2541 String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2542
2543 if (fullName != null){
2544 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2545 if (skippQuestion){
2546 if (atomisedNameStr.length()>fullName.length()) {
2547 newName = atomisedNameStr;
2548 } else {
2549 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2550 newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2551 } else {
2552 newName = fullName;
2553 }
2554 }
2555 } else {
2556 newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2557 }
2558 } else {
2559 newName=fullName;
2560 }
2561 }
2562 //not really needed
2563 // rank = askForRank(newName, rank, nomenclaturalCode);
2564 // System.out.println("atomised: "+atomisedMap.toString());
2565
2566 // String[] names = new String[5];
2567 MyName myname = new MyName(false);
2568
2569 //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2570 // System.out.println(atomisedMap.keySet());
2571 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2572 myname.setOriginalName(fullName);
2573 myname.setNewName(newName);
2574
2575 myname.setRank(rank);
2576 myname.setIdentifier(identifier);
2577 myname.setStatus(status);
2578 myname.setSource(refMods);
2579
2580 // boolean higherAdded=false;
2581
2582
2583 boolean parseNameManually=false;
2584 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2585 TaxonNameBase nameToBeFilledTest = null;
2586
2587 //if selected the atomised version
2588 if(newName==atomisedNameStr){
2589 nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2590 if (nameToBeFilledTest.hasProblem()){
2591 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2592 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2593 if (nameToBeFilledTest.hasProblem()){
2594 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2595 parseNameManually=true;
2596 }
2597 }
2598 }else{
2599 nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2600 if (nameToBeFilledTest.hasProblem()){
2601 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2602 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2603 parseNameManually=true;
2604 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2605 addNameDifferenceToFile(originalName,atomisedNameStr);
2606 }
2607 }
2608 }
2609
2610 //System.out.println("parseNameManually: "+parseNameManually);
2611 if(parseNameManually){
2612 createAtomisedTaxon(rank, newName, atomisedMap, myname);
2613 }
2614 else{
2615 createAtomisedTaxonString(newName, atomisedMap, myname);
2616 myname.setParsedName(nameToBeFilledTest);
2617 //TODO correct handling of createIfNotExists
2618 myname.buildTaxon();
2619 }
2620 return myname;
2621
2622 }
2623
2624 private TaxonNameBase<?,?> parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2625 Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2626
2627 TaxonNameBase<?,?> name = parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2628 if (nameExtensionResult != null && nameExtensionResult[0] != null){
2629 String ext = (String)nameExtensionResult[0];
2630 TaxonNameBase<?,?> extName =parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2631 if (! extName.hasProblem()){
2632 name = extName;
2633 this.usedFollowingTextPrefix = ext;
2634 //TODO do we need to fill the atomisedMap at all?
2635 if ((Boolean)(nameExtensionResult[1])){
2636 //TODO
2637 }
2638 if ((Boolean)(nameExtensionResult[2])){
2639 //TODO BasionymYear etc.
2640 Integer origYear = ((ZoologicalName)name).getPublicationYear();
2641 if (origYear != null){
2642 atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2643 }
2644 }
2645 }
2646 }
2647 return name;
2648 }
2649
2650 private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2651 if (StringUtils.isBlank(followingText)){
2652 return null;
2653 }
2654
2655 boolean includeAuthor = true;
2656 boolean includeYear = false;
2657 if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2658 includeAuthor = false;
2659 }
2660 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2661 includeYear = true;
2662 }
2663 String patternStr = "";
2664 if (includeAuthor){
2665 patternStr += NonViralNameParserImplRegExBase.capitalWord;
2666 }
2667 if (includeYear){
2668 patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2669 }
2670 String match = null;
2671 if (! patternStr.isEmpty()){
2672 Pattern pattern = Pattern.compile("^" + patternStr);
2673 Matcher matcher = pattern.matcher(followingText.trim());
2674 if (matcher.find()){
2675 match = matcher.group();
2676 }
2677 }
2678
2679 return new Object[]{match, includeAuthor, includeYear};
2680 }
2681
2682 /**
2683 * @param atomisedName
2684 * @return
2685 */
2686 private String getAtomisedNameStr(List<String> atomisedName) {
2687 //logger.info("getAtomisedNameStr");
2688 String atomisedNameStr = StringUtils.join(atomisedName," ");
2689 while(atomisedNameStr.contains(" ")) {
2690 atomisedNameStr=atomisedNameStr.replace(" ", " ");
2691 }
2692 atomisedNameStr=atomisedNameStr.trim();
2693 return atomisedNameStr;
2694 }
2695
2696 /**
2697 * @param children
2698 * @param status
2699 * @return
2700 */
2701 private String extractStatus(NodeList children) {
2702 logger.info("extractStatus");
2703 String status="";
2704 for (int i=0;i<children.getLength();i++){
2705 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2706 (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2707 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2708 status = children.item(i).getTextContent().trim();
2709 }
2710 }
2711 return status;
2712 }
2713
2714 /**
2715 * @param identifier
2716 * @param atom
2717 * @param k
2718 * @return
2719 */
2720 private String extractIdentifier(String identifier, Node atom) {
2721 //logger.info("extractIdentifier");
2722 if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2723 try{
2724 identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2725 }catch(Exception e){
2726 System.out.println("pb with identifier, maybe empty");
2727 }
2728 try{
2729 identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2730 }catch(Exception e){
2731 System.out.println("pb with identifier, maybe empty");
2732 }
2733 }
2734 return identifier;
2735 }
2736
2737 /**
2738 * @param rankListToPrint
2739 * @param rank
2740 * @param atomisedName
2741 * @param atom
2742 */
2743 private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2744 logger.info("addAtomisedNamesToMap");
2745 for (int k=0;k<atom.getLength();k++){
2746 Node node = atom.item(k);
2747 String nodeName = node.getNodeName();
2748 if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) { //rank has been handled in higher method
2749 if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2750 atomisedName.add("("+ node.getTextContent().trim()+")");
2751 } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2752 if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2753 atomisedName.add("var. "+node.getTextContent().trim());
2754 }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2755 atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2756 }
2757 } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2758 atomisedName.add(node.getTextContent().trim());
2759 } else{
2760 if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2761 atomisedName.add(node.getTextContent().trim());
2762 }else if (nodeName.equals("#text")){
2763 String text = node.getTextContent();
2764 if (StringUtils.isNotBlank(text)){
2765 //TODO handle text
2766 logger.warn("name xmldata contains text. This is unhandled");
2767 }
2768 }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2769 //we currently do not use higher ranks information
2770 }else{
2771 //TODO handle unhandled node
2772 logger.warn("Unhandled node: " + nodeName);
2773 }
2774 }
2775 }
2776 }
2777 }
2778
2779 /**
2780 * @param fullName
2781 * @param atomisedName
2782 * @return
2783 */
2784 private String cleanName(String name, List<String> atomisedName) {
2785 //logger.info("cleanName");
2786 String fullName =name;
2787 if (fullName != null){
2788 fullName = fullName.replace("( ", "(");
2789 fullName = fullName.replace(" )",")");
2790
2791 if (fullName.trim().isEmpty()){
2792 fullName=StringUtils.join(atomisedName," ");
2793 }
2794
2795 while(fullName.contains(" ")) {
2796 fullName=fullName.replace(" ", " ");
2797 // logger.info("while");
2798 }
2799 fullName=fullName.trim();
2800 }
2801 return fullName;
2802 }
2803
2804 /**
2805 * @param rank
2806 * @param fullName
2807 * @param atomisedMap
2808 * @param myname
2809 * @return
2810 */
2811 private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2812 logger.info("extractAuthorFromNames");
2813 String fullName=name;
2814 if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2815 // System.out.println("rank : "+rank.toString());
2816 if(rank.isHigher(Rank.SPECIES())){
2817 try{
2818 String author=null;
2819 if(atomisedMap.get("dwcranks:subgenus") != null) {
2820 author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2821 }
2822 if(atomisedMap.get("dwc:subgenus") != null) {
2823 author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2824 }
2825 if(author == null) {
2826 if(atomisedMap.get("dwc:genus") != null) {
2827 author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2828 }
2829 }
2830 if(author != null){
2831 fullName = fullName.substring(0, fullName.indexOf(author));
2832 author=author.replaceAll(",","").trim();
2833 myname.setAuthor(author);
2834 }
2835 }catch(Exception e){
2836 //could not extract the author
2837 }
2838 }
2839 if(rank.equals(Rank.SPECIES())){
2840 try{
2841 String author=null;
2842 if(author == null) {
2843 if(atomisedMap.get("dwc:species") != null) {
2844 String[] t = fullName.split(atomisedMap.get("dwc:species"));
2845 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2846 author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2847 // System.out.println("AUTEUR "+author);
2848 }
2849 }
2850 if(author != null){
2851 fullName = fullName.substring(0, fullName.indexOf(author));
2852 author=author.replaceAll(",","").trim();
2853 myname.setAuthor(author);
2854 }
2855 }catch(Exception e){
2856 //could not extract the author
2857 }
2858 }
2859 }else{
2860 myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2861 }
2862 return fullName;
2863 }
2864
2865 /**
2866 * @param newName
2867 * @param atomisedMap
2868 * @param myname
2869 */
2870 private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2871 logger.info("createAtomisedTaxonString "+atomisedMap);
2872 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2873 myname.setFamilyStr(atomisedMap.get("dwc:family"));
2874 }
2875 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
2876 myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2877 }
2878 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2879 myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2880 }
2881 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2882 myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2883 }
2884 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2885 myname.setGenusStr(atomisedMap.get("dwc:genus"));
2886 }
2887 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2888 myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2889 }
2890 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2891 myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2892 }
2893 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2894 String n=newName;
2895 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2896 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2897 n=n.replace("subsp.","");
2898 }
2899 if(atomisedMap.get("dwc:subspecies") != null) {
2900 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2901 n=n.replace("subsp.","");
2902 }
2903 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2904 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2905 n=n.replace("var.","");
2906 n=n.replace("v.","");
2907 }
2908 if(atomisedMap.get("dwcranks:formepithet") != null) {
2909 //TODO
2910 System.out.println("TODO FORMA");
2911 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2912 n=n.replace("forma","");
2913 }
2914 n=n.trim();
2915 String author = myname.getAuthor();
2916 if(n.split(" ").length>2){
2917
2918 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2919 String a= "";
2920 try{
2921 a=n.split(n2)[1].trim();
2922 }catch(Exception e){
2923 logger.info("no author in "+n+"?");}
2924
2925 myname.setAuthor(a);
2926 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2927 n=n2;
2928
2929 }
2930
2931 myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2932 myname.setAuthor(author);
2933 }
2934 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2935 myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2936 }
2937 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2938 myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2939 }
2940 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2941 myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2942 }
2943 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2944 myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2945 }
2946 if (atomisedMap.get(PUBLICATION_YEAR) != null){
2947 myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2948 }
2949 }
2950
2951 /**
2952 * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2953 * @param rank
2954 * @param newName
2955 * @param atomisedMap
2956 * @param myname
2957 */
2958 private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2959 logger.info("createSynonym");
2960 //System.out.println("createsynonym");
2961 if(rank.equals(Rank.UNKNOWN_RANK())){
2962 myname.setNotParsableTaxon(newName);
2963 }else{
2964 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2965 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2966 }
2967 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2968 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2969 }
2970 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2971 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2972 }
2973 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2974 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2975 }
2976 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2977 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2978 }
2979 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2980 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2981 }
2982 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2983 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2984 }
2985 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2986 String n=newName;
2987 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2988 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2989 n=n.replace("subsp.","");
2990 }
2991 if(atomisedMap.get("dwc:subspecies") != null) {
2992 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2993 n=n.replace("subsp.","");
2994 }
2995 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2996 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2997 n=n.replace("var.","");
2998 n=n.replace("v.","");
2999 }
3000 if(atomisedMap.get("dwcranks:formepithet") != null) {
3001 //TODO
3002 //System.out.println("TODO FORMA");
3003 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3004 n=n.replace("forma","");
3005 }
3006 n=n.trim();
3007 String author = myname.getAuthor();
3008 if(n.split(" ").length>2){
3009
3010 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3011 String a="";
3012 try{
3013 a= n.split(n2)[1].trim();
3014 }catch(Exception e){logger.info("no author in "+n);}
3015 myname.setAuthor(a);
3016 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3017 n=n2;
3018
3019 }
3020 Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3021 myname.setSpecies(species);
3022 myname.setAuthor(author);
3023 }
3024 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3025 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3026 }
3027 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3028 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3029 }
3030 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3031 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3032 }
3033 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3034 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3035 }
3036 }
3037
3038 }
3039
3040
3041 /**
3042 * @param refMods
3043 * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3044 * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3045 * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3046 * I created this switch for old
3047 * for Spiders the new version is preferred
3048 */
3049 private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3050 logger.info("createSynonym");
3051
3052 NonViralName<?> nameToBeFilled = this.getNonViralNameAccNomenclature();
3053 //System.out.println("createsynonym");
3054 if(rank.equals(Rank.UNKNOWN_RANK())){
3055 //TODO
3056 myname.setNotParsableTaxon(newName);
3057
3058 nameToBeFilled.setTitleCache(newName, true);
3059 }else{
3060 if(atomisedMap.get("dwc:genus") != null ){
3061 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3062 }
3063 if (rank.isSupraGeneric()){
3064 if (atomisedMap.get("dwcranks:subtribe") != null ){
3065 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3066 }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3067 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3068 }else if (atomisedMap.get("dwcranks:tribe") != null ){
3069 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3070 }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3071 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3072 }else if (atomisedMap.get("dwc:family") != null ){
3073 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3074 }else{
3075 logger.warn("Supra generic rank not yet handled or atomisation not available");
3076 }
3077 }
3078 if (atomisedMap.get("dwcranks:subgenus") != null){
3079 nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3080 }
3081 if (atomisedMap.get("dwc:subgenus") != null){
3082 nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3083 }
3084 if (atomisedMap.get("dwc:species") != null){
3085 nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3086 }
3087 if (atomisedMap.get("dwcranks:formepithet") != null){
3088 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3089 }else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3090 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3091 }else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3092 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3093 }else if (atomisedMap.get("dwc:subspecies") != null){
3094 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3095 }
3096 Reference sec = sourceUrlRef;
3097 if(!state2.getConfig().doKeepOriginalSecundum()){
3098 sec = state2.getConfig().getSecundum();
3099 }
3100 Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3101 // sourceHandler.addSource(refMods, syn);
3102 myname.setSyno(syn);
3103 myname.setSynonym(true);
3104 }
3105 }
3106
3107 /**
3108 * @param rank
3109 * @param newName
3110 * @param atomisedMap
3111 * @param myname
3112 */
3113 private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3114 logger.info("createAtomisedTaxon "+atomisedMap);
3115 if(rank.equals(Rank.UNKNOWN_RANK())){
3116 myname.setNotParsableTaxon(newName);
3117 }
3118 else{
3119 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3120 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3121 }
3122 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
3123 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3124 }
3125 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3126 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3127 }
3128 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3129 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3130 }
3131 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3132 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3133 }
3134 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3135 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3136 }
3137 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3138 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3139 }
3140 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3141 String n=newName;
3142 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3143 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3144 n=n.replace("subsp.","");
3145 }
3146 if(atomisedMap.get("dwc:subspecies") != null) {
3147 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3148 n=n.replace("subsp.","");
3149 }
3150 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3151 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3152 n=n.replace("var.","");
3153 n=n.replace("v.","");
3154 }
3155 if(atomisedMap.get("dwcranks:formepithet") != null) {
3156 //TODO
3157 //System.out.println("TODO FORMA");
3158 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3159 n=n.replace("forma","");
3160 }
3161 n=n.trim();
3162 String author = myname.getAuthor();
3163 if(n.split(" ").length>2){
3164 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3165 String a="";
3166 try{
3167 a= n.split(n2)[1].trim();
3168 }catch(Exception e){logger.info("no author in "+n);}
3169 myname.setAuthor(a);
3170 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3171 n=n2;
3172
3173 }
3174
3175 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3176 myname.setAuthor(author);
3177 }
3178 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3179 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3180 }
3181 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3182 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3183 }
3184 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3185 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3186 }
3187 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3188 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3189 }
3190 }
3191 }
3192
3193 /**
3194 * @return
3195 */
3196 private boolean checkRankValidForImport(Rank currentRank) {
3197 //logger.info("checkRankValidForImport");
3198 return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3199 }
3200
3201
3202
3203 /**
3204 * @param classification2
3205 */
3206 public void updateClassification(Classification classification2) {
3207 //logger.info("updateClassification");
3208 classification = classification2;
3209 }
3210
3211 /**
3212 * @param tnb
3213 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3214 * if errors, cast into a classis nonviralname
3215 * @param taxonnamebase2
3216 */
3217 @SuppressWarnings("rawtypes")
3218 public NonViralName<?> castTaxonNameBase(TaxonNameBase tnb, NonViralName<?> nvn) {
3219
3220 //logger.info("castTaxonNameBase");
3221 NonViralName<?> taxonnamebase2 = nvn;
3222 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
3223 try{
3224 taxonnamebase2=(BotanicalName) tnb;
3225 }catch(Exception e){
3226 taxonnamebase2= (NonViralName<?>) tnb;
3227 }
3228 }
3229 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
3230 try{
3231 taxonnamebase2=(ZoologicalName) tnb;
3232 }catch(Exception e){
3233 taxonnamebase2= (NonViralName<?>) tnb;
3234 }
3235 }
3236 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
3237 try{
3238 taxonnamebase2=(BacterialName) tnb;
3239 }catch(Exception e){
3240 taxonnamebase2= (NonViralName<?>) tnb;
3241 }
3242 }
3243 return taxonnamebase2;
3244 }
3245
3246 /**
3247 * @param tnb
3248 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3249 * if errors, cast into a classis nonviralname
3250 * @param taxonnamebase2
3251 */
3252 @SuppressWarnings("rawtypes")
3253 public NonViralName<?> castTaxonNameBase(TaxonNameBase tnb) {
3254 //logger.info("castTaxonNameBase2");
3255 NonViralName<?> taxonnamebase2 = null;
3256 tnb=CdmBase.deproxy(tnb, TaxonNameBase.class);
3257 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
3258 try{
3259 taxonnamebase2=(BotanicalName) tnb;
3260 }catch(Exception e){
3261 taxonnamebase2= (NonViralName<?>) tnb;
3262 }
3263 }
3264 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
3265 try{
3266 taxonnamebase2=(ZoologicalName) tnb;
3267 }catch(Exception e){
3268 taxonnamebase2= (NonViralName<?>) tnb;
3269 }
3270 }
3271 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
3272 try{
3273 taxonnamebase2=(BacterialName) tnb;
3274 }catch(Exception e){
3275 taxonnamebase2= (NonViralName<?>) tnb;
3276 }
3277 }
3278 return taxonnamebase2;
3279 }
3280
3281 public class MyName {
3282 /**
3283 * @param isSynonym
3284 */
3285 public MyName(boolean isSynonym) {
3286 super();
3287 this.isSynonym = isSynonym;
3288 }
3289
3290 String originalName="";
3291 String newName="";
3292 Rank rank=Rank.UNKNOWN_RANK();
3293 String identifier="";
3294 String status="";
3295 String author=null;
3296
3297 NonViralName<?> taxonNameBase;
3298
3299 Reference refMods ;
3300
3301 Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3302 NonViralName<?> familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3303 String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3304 Integer publicationYear;
3305
3306
3307 Taxon higherTaxa;
3308 Rank higherRank;
3309 private Taxon taxon;
3310 private Synonym syno;
3311
3312 /**
3313 * @return the syno
3314 */
3315 public Synonym getSyno() {
3316 return syno;
3317 }
3318
3319 @Override
3320 public String toString(){
3321 List<String> tot=new ArrayList<String>();
3322 String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3323 for (String elt:n){
3324 if (!StringUtils.isEmpty(elt)) {
3325 tot.add(elt);
3326 } else {
3327 tot.add("*");
3328 }
3329 }
3330 return StringUtils.join(tot," ");
3331 }
3332 /**
3333 * @param syno the syno to set
3334 */
3335 public void setSyno(Synonym syno) {
3336 this.syno = syno;
3337 }
3338
3339 boolean isSynonym=false;
3340
3341 /**
3342 * @return the isSynonym
3343 */
3344 public boolean isSynonym() {
3345 return isSynonym;
3346 }
3347
3348 /**
3349 * @param isSynonym the isSynonym to set
3350 */
3351 public void setSynonym(boolean isSynonym) {
3352 this.isSynonym = isSynonym;
3353 }
3354
3355 public void setSource(Reference re){
3356 refMods=re;
3357 }
3358
3359 /**
3360 * @param string
3361 */
3362 public void setFormStr(String string) {
3363 this.formStr=string;
3364
3365 }
3366 /**
3367 * @param string
3368 */
3369 public void setVarietyStr(String string) {
3370 this.varietyStr=string;
3371
3372 }
3373 /**
3374 * @param string
3375 */
3376 public void setSubspeciesStr(String string) {
3377 this.subspeciesStr=string;
3378
3379 }
3380 /**
3381 * @param string
3382 */
3383 public void setSpeciesStr(String string) {
3384 this.speciesStr=string;
3385
3386 }
3387 /**
3388 * @param string
3389 */
3390 public void setSubgenusStr(String string) {
3391 this.subgenusStr=string;
3392
3393 }
3394 /**
3395 * @param string
3396 */
3397 public void setGenusStr(String string) {
3398 this.genusStr=string;
3399
3400 }
3401 /**
3402 * @param string
3403 */
3404 public void setSubtribeStr(String string) {
3405 this.subtribeStr=string;
3406
3407 }
3408 /**
3409 * @param string
3410 */
3411 public void setTribeStr(String string) {
3412 this.tribeStr=string;
3413
3414 }
3415 /**
3416 * @param string
3417 */
3418 public void setSubfamilyStr(String string) {
3419 this.subfamilyStr=string;
3420
3421 }
3422 /**
3423 * @param string
3424 */
3425 public void setFamilyStr(String string) {
3426 this.familyStr=string;
3427
3428 }
3429 /**
3430 * @return the familyStr
3431 */
3432 public String getFamilyStr() {
3433 return familyStr;
3434 }
3435 /**
3436 * @return the subfamilyStr
3437 */
3438 public String getSubfamilyStr() {
3439 return subfamilyStr;
3440 }
3441 /**
3442 * @return the tribeStr
3443 */
3444 public String getTribeStr() {
3445 return tribeStr;
3446 }
3447 /**
3448 * @return the subtribeStr
3449 */
3450 public String getSubtribeStr() {
3451 return subtribeStr;
3452 }
3453 /**
3454 * @return the genusStr
3455 */
3456 public String getGenusStr() {
3457 return genusStr;
3458 }
3459 /**
3460 * @return the subgenusStr
3461 */
3462 public String getSubgenusStr() {
3463 return subgenusStr;
3464 }
3465 /**
3466 * @return the speciesStr
3467 */
3468 public String getSpeciesStr() {
3469 return speciesStr;
3470 }
3471 /**
3472 * @return the subspeciesStr
3473 */
3474 public String getSubspeciesStr() {
3475 return subspeciesStr;
3476 }
3477 /**
3478 * @return the formStr
3479 */
3480 public String getFormStr() {
3481 return formStr;
3482 }
3483 /**
3484 * @return the varietyStr
3485 */
3486 public String getVarietyStr() {
3487 return varietyStr;
3488 }
3489
3490 public Integer getPublicationYear() {
3491 return publicationYear;
3492 }
3493
3494 public void setPublicationYear(Integer publicationYear) {
3495 this.publicationYear = publicationYear;
3496 }
3497
3498 /**
3499 * @param newName2
3500 */
3501 public void setNotParsableTaxon(String newName2) {
3502 //takes too much time
3503 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3504
3505 NomenclaturalStatusType statusType = null;
3506 if (!getStatus().isEmpty()){
3507 try {
3508 statusType = nomStatusString2NomStatus(getStatus());
3509 } catch (UnknownCdmTypeException e) {
3510 addProblematicStatusToFile(getStatus());
3511 logger.warn("Problem with status");
3512 }
3513 }
3514 List<TaxonBase> tmpList = new ArrayList<TaxonBase>();
3515
3516 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3517 tmpList.addAll(taxontest.getRecords());
3518
3519 //logger.info("tmpList returned: "+tmpList.size());
3520
3521
3522 NonViralName<?> identicName = null;
3523 boolean foundIdentic=false;
3524 TaxonBase<?> tmpTaxonBase=null;
3525 // Taxon tmpPartial=null;
3526 for (TaxonBase<?> tmpb:tmpList){
3527 if(tmpb !=null){
3528 TaxonNameBase<?,?> tnb = tmpb.getName();
3529 Rank crank=null;
3530 if (tnb != null){
3531 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3532 crank =tnb.getRank();
3533 if (crank !=null && rank !=null){
3534 if (crank.equals(rank)){
3535 identicName = CdmBase.deproxy(tnb, NonViralName.class);
3536 if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3537 foundIdentic=true;
3538 tmpTaxonBase=tmpb;
3539 break;
3540 }
3541 }
3542 }
3543 }
3544 }
3545 }
3546 }
3547 boolean statusMatch=false;
3548 boolean appendedMatch=false;
3549 if(tmpTaxonBase !=null && foundIdentic){
3550 statusMatch=compareStatus(tmpTaxonBase, statusType);
3551 if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3552 appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3553 }
3554 if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3555 appendedMatch=true;
3556 }
3557
3558 }
3559 if ((tmpTaxonBase == null || !foundIdentic) || (tmpTaxonBase != null && !statusMatch) || (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3560
3561 NonViralName<?> tnb;
3562 if (identicName == null){
3563 tnb = getNonViralNameAccNomenclature();
3564 tnb.setRank(rank);
3565
3566 if(statusType != null) {
3567 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3568 }
3569 if(StringUtils.isNotBlank(getStatus())) {
3570 tnb.setAppendedPhrase(getStatus());
3571 }
3572 tnb.setTitleCache(newName2,true);
3573 tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3574 }else{
3575 tnb = identicName;
3576 }
3577
3578 if(tmpTaxonBase==null){
3579 tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3580 if(!state2.getConfig().doKeepOriginalSecundum()) {
3581 tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3582 }
3583 //tmptaxonbase.setSec(refMods);
3584 if(!isSynonym) {
3585 classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3586 sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3587 }
3588 }
3589 }
3590
3591 tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3592 if (author != null) {
3593 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3594 setLSID(getIdentifier(), tmpTaxonBase);
3595 importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3596 tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3597 }
3598 }
3599 TaxonNameBase<?,?> tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonNameBase.class);
3600
3601 if(!isSynonym) {
3602 this.taxon=(Taxon)tmpTaxonBase;
3603 } else {
3604 if (tmpTaxonBase instanceof Taxon){
3605 logger.warn("Incorrect status");
3606 }
3607 this.syno=(Synonym)tmpTaxonBase;
3608 }
3609
3610 taxonNameBase = castTaxonNameBase(tnb, taxonNameBase);
3611
3612 }
3613
3614 /**
3615 *
3616 */
3617 public void buildTaxon() {
3618 //System.out.println("BUILD TAXON");
3619 logger.info("buildTaxon");
3620 NomenclaturalStatusType statusType = null;
3621 if (!getStatus().isEmpty()){
3622 status = getStatus();
3623 String newNameStatus = newNameStatus(status);
3624 if (newNameStatus != null){
3625 taxonNameBase.setAppendedPhrase(newNameStatus);
3626 }else{
3627 try {
3628 statusType = nomStatusString2NomStatus(getStatus());
3629 taxonNameBase.addStatus(NomenclaturalStatus.NewInstance(statusType));
3630 } catch (UnknownCdmTypeException e) {
3631 addProblematicStatusToFile(getStatus());
3632 logger.warn("Problem with status");
3633 }
3634 }
3635 }
3636 importer.getNameService().save(taxonNameBase);
3637
3638 TaxonBase<?> tmpTaxonBase;
3639 if (!isSynonym) {
3640 tmpTaxonBase =Taxon.NewInstance(taxonNameBase, refMods); //sec set null
3641 }
3642 else {
3643 tmpTaxonBase =Synonym.NewInstance(taxonNameBase, refMods); //sec set null
3644 }
3645 boolean exist = false;
3646 if (!isSynonym){
3647 for (TaxonNode node : classification.getAllNodes()){
3648 try{
3649 Taxon nodeTaxon = node.getTaxon();
3650 boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3651 boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3652 boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3653 if(titleMatches && nomStatusMatches) {
3654 if (!isSynonym) {
3655 tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3656 exist =true;
3657 } else {
3658 logger.info("Found the same name but from another type (taxon/synonym)");
3659 TaxonNameBase<?,?> existingTnb = getTaxon().getName();
3660 tmpTaxonBase = new Synonym(existingTnb, refMods);
3661 importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3662 exist =true;
3663 }
3664 }else if (nodeNameReplaceable){
3665 nodeTaxon.setName(tmpTaxonBase.getName());
3666 tmpTaxonBase = nodeTaxon;
3667 exist = true;
3668 }
3669 }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3670 }
3671 }
3672 if (!exist){
3673
3674 boolean insertAsExisting =false;
3675 List<Taxon> existingTaxons=new ArrayList<Taxon>();
3676 try {
3677 existingTaxons = getMatchingTaxa(taxonNameBase);
3678 } catch (Exception e1) {
3679 e1.printStackTrace();
3680 }
3681 double similarityScore=0.0;
3682 double similarityAuthor=-1;
3683 String author1="";
3684 String author2="";
3685 String t1="";
3686 String t2="";
3687 for (Taxon bestMatchingTaxon : existingTaxons){
3688 //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3689 //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3690 if(taxonNameBase.getAuthorshipCache()!=null) {
3691 author1=taxonNameBase.getAuthorshipCache();
3692 }
3693 try {
3694 if(castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache()!=null) {
3695 author2=castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache();
3696 }
3697 } catch (Exception e) {
3698 // TODO Auto-generated catch block
3699 e.printStackTrace();
3700 }
3701 try {
3702 t1=taxonNameBase.getTitleCache();
3703 if (author1!=null && !StringUtils.isEmpty(author1)) {
3704 t1=t1.split(Pattern.quote(author1))[0];
3705 }
3706 } catch (Exception e) {
3707 // TODO Auto-generated catch block
3708 e.printStackTrace();
3709 }
3710 try {
3711 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3712 if (author2!=null && !StringUtils.isEmpty(author2)) {
3713 t2=t2.split(Pattern.quote(author2))[0];
3714 }
3715 } catch (Exception e) {
3716 // TODO Auto-generated catch block
3717 e.printStackTrace();
3718 }
3719
3720 similarityScore=similarity(t1.trim(), t2.trim());
3721 //System.out.println("taxonscore "+similarityScore);
3722 similarityAuthor=similarity(author1.trim(), author2.trim());
3723 //System.out.println("authorscore "+similarityAuthor);
3724 insertAsExisting = compareAndCheckTaxon(taxonNameBase, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3725 if(insertAsExisting) {
3726 tmpTaxonBase=bestMatchingTaxon;
3727 break;
3728 }
3729 }
3730 if ( !insertAsExisting ){
3731 if(!state2.getConfig().doKeepOriginalSecundum()) {
3732 tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3733 }
3734
3735 // tmptaxonbase.setSec(refMods);
3736 if (taxonNameBase.getRank().equals(state2.getConfig().getMaxRank())) {
3737 //System.out.println("****************************"+tmptaxonbase);
3738 if (!isSynonym) {
3739 classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3740 }
3741 } else{
3742 hierarchy = new HashMap<Rank, Taxon>();
3743 //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3744 if (!isSynonym){
3745 lookForParentNode(taxonNameBase,(Taxon)tmpTaxonBase, refMods,this);
3746 //System.out.println("HIERARCHY "+hierarchy);
3747 Taxon parent = buildHierarchy();
3748 if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3749 if(parent !=null) {
3750 classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3751 } else {
3752 classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3753 }
3754 importer.getClassificationService().saveOrUpdate(classification);
3755 }
3756 }
3757 // Set<TaxonNode> nodeList = classification.getAllNodes();
3758 // for(TaxonNode tn:nodeList) {
3759 // System.out.println(tn.getTaxon());
3760 // }
3761 }
3762 }
3763 importer.getClassificationService().saveOrUpdate(classification);
3764 if(isSynonym) {
3765 try{
3766 Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3767 }catch(Exception e){
3768 TaxonNameBase<?,?> existingTnb = tmpTaxonBase.getName();
3769 Synonym castTest = new Synonym(existingTnb, refMods);
3770 importer.getTaxonService().saveOrUpdate(castTest);
3771 tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3772 }
3773 }
3774 }
3775 if(!isSynonym) {
3776 taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3777 } else {
3778 syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3779 }
3780
3781 }
3782
3783 private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3784 //TODO preliminary check
3785 if (newTaxon.isInstanceOf(Synonym.class)){
3786 return false;
3787 }
3788 NonViralName<?> nodeName = CdmBase.deproxy(nodeTaxon.getName(), NonViralName.class);
3789 NonViralName<?> newName = CdmBase.deproxy(newTaxon.getName(), NonViralName.class);
3790 if (nodeTaxon.getName() == null || newName == null){
3791 return false;
3792 }
3793 if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3794 return false;
3795 }
3796 boolean compare = true;
3797 for (NomenclaturalStatus status : newName.getStatus() ){
3798 compare &= compareStatus(nodeTaxon, status.getType());
3799 }
3800 if (! compare){
3801 return false;
3802 }
3803
3804 if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3805 if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3806 if (newName.getNameCache().length() < newName.getTitleCache().length()){
3807 logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3808 return true;
3809 }
3810 }
3811 }
3812
3813 return false;
3814 }
3815
3816 /**
3817 *
3818 */
3819 private Taxon buildHierarchy() {
3820 logger.info("buildHierarchy");
3821 Taxon higherTaxon = null;
3822 //add the maxRank as a root
3823 if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3824 Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3825 if(!taxonExistsInClassification(higherTaxon, ct)) {
3826 classification.addChildTaxon(ct, refMods, null);
3827 }
3828 higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3829 // return higherTaxon;
3830 }
3831 //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3832
3833 //TODO higher Ranks
3834
3835 if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3836 higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3837 }
3838 if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3839 higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3840 }
3841 if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3842 higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3843 }
3844 if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3845 higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3846 }
3847 if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3848 higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3849 }
3850 if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3851 higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3852 }
3853 importer.getClassificationService().saveOrUpdate(classification);
3854 return higherTaxon;
3855 }
3856
3857 private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3858 Taxon ct=hierarchy.get(r);
3859 if(!taxonExistsInClassification(higherTaxon,ct )) {
3860 if(higherTaxon != null && ct!=null) {
3861 classification.addParentChild(higherTaxon, ct, refMods, null);
3862 } else
3863 if(higherTaxon == null && ct !=null) {
3864 classification.addChildTaxon(ct, refMods, null);
3865 }
3866 }
3867 return ct;
3868 }
3869
3870 private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3871 logger.info("taxonExistsInClassification");
3872 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3873 boolean found=false;
3874 if(parent !=null){
3875 for (TaxonNode p : classification.getAllNodes()){
3876 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3877 for (TaxonNode c : p.getChildNodes()) {
3878 if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3879 found=true;
3880 break;
3881 }
3882 }
3883 }
3884 }
3885 }
3886 else{
3887 for (TaxonNode p : classification.getAllNodes()){
3888 if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3889 found=true;
3890 break;
3891 }
3892 }
3893 }
3894 // System.out.println("LOOK IF TAXA EXIST? "+found);
3895 return found;
3896 }
3897 /**
3898 * @param nameToBeFilledTest
3899 */
3900 @SuppressWarnings("rawtypes")
3901 public void setParsedName(TaxonNameBase nameToBeFilledTest) {
3902 this.taxonNameBase = (NonViralName<?>) nameToBeFilledTest;
3903
3904 }
3905 //variety dwcranks:varietyEpithet
3906 /**
3907 * @return the author
3908 */
3909 public String getAuthor() {
3910 return author;
3911 }
3912 /**
3913 * @return
3914 */
3915 public Taxon getTaxon() {
3916 return taxon;
3917 }
3918 /**
3919 * @return
3920 */
3921 public NonViralName<?> getTaxonNameBase() {
3922 return taxonNameBase;
3923 }
3924
3925 /**
3926 * @param findOrCreateTaxon
3927 */
3928 public void setForm(Taxon form) {
3929 this.form=form;
3930
3931 }
3932 /**
3933 * @param findOrCreateTaxon
3934 */
3935 public void setVariety(Taxon variety) {
3936 this.variety=variety;
3937
3938 }
3939 /**
3940 * @param string
3941 * @return
3942 */
3943 @SuppressWarnings("rawtypes")
3944 public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3945 logger.info("findOrCreateTaxon");
3946 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3947 //takes too much time
3948 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3949 // logger.info("tmpList returned: "+tmpList.size());
3950
3951 NomenclaturalStatusType statusType = null;
3952 if (!getStatus().isEmpty()){
3953 try {
3954 statusType = nomStatusString2NomStatus(getStatus());
3955 } catch (UnknownCdmTypeException e) {
3956 addProblematicStatusToFile(getStatus());
3957 logger.warn("Problem with status");
3958 }
3959 }
3960
3961 List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3962
3963 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3964
3965 tmpListFiltered.addAll(taxontest.getRecords());
3966 taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3967 tmpListFiltered.addAll(taxontest.getRecords());
3968
3969 //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3970
3971 boolean nameCorrected=false;
3972 if (fullname.indexOf(partialname)<0) {
3973 nameCorrected=true;
3974 }
3975
3976 boolean foundIdentic=false;
3977 Taxon tmp=null;
3978 for (TaxonBase tmpb:tmpListFiltered){
3979 if(tmpb !=null){
3980 TaxonNameBase tnb = tmpb.getName();
3981 Rank crank=null;
3982 if (tnb != null){
3983 if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3984 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3985 crank =tnb.getRank();
3986 if (crank !=null && rank !=null){
3987 if (crank.equals(rank)){
3988 foundIdentic=true;
3989 try{
3990 tmp=(Taxon)tmpb;
3991 break;
3992 }catch(Exception e){
3993 e.printStackTrace();
3994 }
3995 }
3996 }
3997 }
3998 if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3999 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
4000 crank =tnb.getRank();
4001 if (crank !=null && rank !=null){
4002 if (crank.equals(rank)){
4003 foundIdentic=true;
4004 try{
4005 tmp=(Taxon)tmpb;
4006 break;
4007 }catch(Exception e){
4008 e.printStackTrace();
4009 }
4010 }
4011 }
4012 }
4013 }
4014 }
4015 else{
4016 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
4017 crank =tnb.getRank();
4018 if (crank !=null && rank !=null){
4019 if (crank.equals(rank)){
4020 foundIdentic=true;
4021 try{
4022 tmp=(Taxon)tmpb;
4023 break;
4024 }catch(Exception e){
4025 e.printStackTrace();
4026 }
4027 }
4028 }
4029 }
4030 }
4031 }
4032 }
4033 }
4034 boolean statusMatch=false;
4035 boolean appendedMatch=false;
4036 if(tmp !=null && foundIdentic){
4037 statusMatch=compareStatus(tmp, statusType);
4038 if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
4039 appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
4040 }
4041 if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
4042 appendedMatch=true;
4043 }
4044
4045 }
4046 if ((tmp == null || !foundIdentic) || (tmp != null && !statusMatch) || (tmp != null && !appendedMatch && !statusMatch)){
4047
4048 NonViralName<?> tnb = getNonViralNameAccNomenclature();
4049 tnb.setRank(rank);
4050
4051 if(statusType != null) {
4052 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
4053 }
4054 if(StringUtils.isNotBlank(getStatus())) {
4055 tnb.setAppendedPhrase(getStatus());
4056 }
4057
4058 if(rank.equals(Rank.UNKNOWN_RANK())){
4059 tnb.setTitleCache(fullname, true);
4060 // tnb.setGenusOrUninomial(fullname);
4061 }
4062 if(rank.isHigher(Rank.GENUS())) {
4063 tnb.setGenusOrUninomial(partialname);
4064 }
4065
4066 if(rank.isHigher(Rank.SPECIES())) {
4067 tnb.setTitleCache(partialname, true);
4068 }
4069
4070 if (rank.equals(globalrank) && author != null) {
4071
4072 tnb.setCombinationAuthorship(findOrCreateAuthor(author));
4073 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4074 Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4075 if (taxonLSID !=null) {
4076 tmp=taxonLSID;
4077 }
4078 }
4079 }
4080
4081 if(tmp == null){
4082 if (rank.equals(Rank.FAMILY())) {
4083 tmp = buildFamily(tnb);
4084 }
4085 if (rank.equals(Rank.SUBFAMILY())) {
4086 tmp = buildSubfamily(tnb);
4087 }
4088 if (rank.equals(Rank.TRIBE())) {
4089 tmp = buildTribe(tnb);
4090 }
4091 if (rank.equals(Rank.SUBTRIBE())) {
4092 tmp = buildSubtribe(tnb);
4093 }
4094 if (rank.equals(Rank.GENUS())) {
4095 tmp = buildGenus(partialname, tnb);
4096 }
4097
4098 if (rank.equals(Rank.SUBGENUS())) {
4099 tmp = buildSubgenus(partialname, tnb);
4100 }
4101 if (rank.equals(Rank.SPECIES())) {
4102 tmp = buildSpecies(partialname, tnb);
4103 }
4104
4105 if (rank.equals(Rank.SUBSPECIES())) {
4106 tmp = buildSubspecies(partialname, tnb);
4107 }
4108
4109 if (rank.equals(Rank.VARIETY())) {
4110 tmp = buildVariety(fullname, partialname, tnb);
4111 }
4112
4113 if (rank.equals(Rank.FORM())) {
4114 tmp = buildForm(fullname, partialname, tnb);
4115 }
4116 if (tmp != null){
4117 TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4118 }
4119
4120 importer.getClassificationService().saveOrUpdate(classification);
4121 }
4122
4123 }
4124
4125 tmp = CdmBase.deproxy(tmp, Taxon.class);
4126 if (rank.equals(globalrank) && author != null) {
4127 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4128 setLSID(getIdentifier(), tmp);
4129 importer.getTaxonService().saveOrUpdate(tmp);
4130 tmp = CdmBase.deproxy(tmp, Taxon.class);
4131 }
4132 }
4133
4134 this.taxon=tmp;
4135
4136 return tmp;
4137 }
4138
4139 /**
4140 * @param tnb
4141 * @return
4142 */
4143 private Taxon buildSubfamily(NonViralName<?> tnb) {
4144 Taxon tmp;
4145 // tnb.generateTitle();
4146 tmp = findMatchingTaxon(tnb,refMods);
4147 if(tmp ==null){
4148 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4149 if(!state2.getConfig().doKeepOriginalSecundum()) {
4150 tmp.setSec(state2.getConfig().getSecundum());
4151 }
4152 // tmp.setSec(refMods);
4153 // sourceHandler.addSource(refMods, tmp);
4154 if(family != null) {
4155 classification.addParentChild(family, tmp, null, null);
4156 higherRank=Rank.FAMILY();
4157 higherTaxa=family;
4158 } else {
4159 //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4160 classification.addChildTaxon(tmp, null, null);
4161 }
4162 }
4163 return tmp;
4164 }
4165 /**
4166 * @param tnb
4167 * @return
4168 */
4169 private Taxon buildFamily(NonViralName<?> tnb) {
4170 Taxon tmp;
4171 // tnb.generateTitle();
4172 tmp = findMatchingTaxon(tnb,refMods);
4173 if(tmp ==null){
4174 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4175 if(!state2.getConfig().doKeepOriginalSecundum()) {
4176 tmp.setSec(state2.getConfig().getSecundum());
4177 }
4178 // tmp.setSec(refMods);
4179 //sourceHandler.addSource(refMods, tmp);
4180 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4181 classification.addChildTaxon(tmp, null, null);
4182 }
4183 return tmp;
4184 }
4185 /**
4186 * @param fullname
4187 * @param tnb
4188 * @return
4189 */
4190 private Taxon buildForm(String fullname, String partialname, NonViralName<?> tnb) {
4191 if (genusName !=null) {
4192 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4193 }
4194 if (subgenusName !=null) {
4195 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4196 }
4197 if(speciesName !=null) {
4198 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4199 }
4200 if(subspeciesName != null) {
4201 tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4202 }
4203 if(partialname!= null) {
4204 tnb.setInfraSpecificEpithet(partialname);
4205 }
4206 //TODO how to save form??
4207 tnb.setTitleCache(fullname, true);
4208 Taxon tmp = findMatchingTaxon(tnb,refMods);
4209 if(tmp ==null){
4210 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4211 if(!state2.getConfig().doKeepOriginalSecundum()) {
4212 tmp.setSec(state2.getConfig().getSecundum());
4213 }
4214 // tmp.setSec(refMods);
4215 //sourceHandler.addSource(refMods, tmp);
4216 if (subspecies !=null) {
4217 classification.addParentChild(subspecies, tmp, null, null);
4218 higherRank=Rank.SUBSPECIES();
4219 higherTaxa=subspecies;
4220 } else {
4221 if (species !=null) {
4222 classification.addParentChild(species, tmp, null, null);
4223 higherRank=Rank.SPECIES();
4224 higherTaxa=species;
4225 }
4226 else{
4227 // System.out.println("ADDCHILDTAXON FORM "+tmp);
4228 classification.addChildTaxon(tmp, null, null);
4229 }
4230 }
4231 }
4232 return tmp;
4233 }
4234 /**
4235 * @param fullname
4236 * @param tnb
4237 * @return
4238 */
4239 private Taxon buildVariety(String fullname, String partialname, NonViralName<?> tnb) {
4240 Taxon tmp;
4241 if (genusName !=null) {
4242 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4243 }
4244 if (subgenusName !=null) {
4245 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4246 }
4247 if(speciesName !=null) {
4248 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4249 }
4250 if(subspeciesName != null) {
4251 tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4252 }
4253 if(partialname != null) {
4254 tnb.setInfraSpecificEpithet(partialname);
4255 }
4256 //TODO how to save variety?
4257 tnb.setTitleCache(fullname, true);
4258 tmp = findMatchingTaxon(tnb,refMods);
4259 if(tmp ==null){
4260 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4261 if(!state2.getConfig().doKeepOriginalSecundum()) {
4262 tmp.setSec(state2.getConfig().getSecundum());
4263 }
4264 // tmp.setSec(refMods);
4265 //sourceHandler.addSource(refMods, tmp);
4266 if (subspecies !=null) {
4267 classification.addParentChild(subspecies, tmp, null, null);
4268 higherRank=Rank.SUBSPECIES();
4269 higherTaxa=subspecies;
4270 } else {
4271 if(species !=null) {
4272 classification.addParentChild(species, tmp, null, null);
4273 higherRank=Rank.SPECIES();
4274 higherTaxa=species;
4275 }
4276 else{
4277 //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4278 classification.addChildTaxon(tmp, null, null);
4279 }
4280 }
4281 }
4282 return tmp;
4283 }
4284 /**
4285 * @param partialname
4286 * @param tnb
4287 * @return
4288 */
4289 private Taxon buildSubspecies(String partialname, NonViralName<?> tnb) {
4290 if (genusName !=null) {
4291 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4292 }
4293 if (subgenusName !=null) {
4294 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4295 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4296 }
4297 if(speciesName !=null) {
4298 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
4299 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4300 }
4301 tnb.setInfraSpecificEpithet(partialname);
4302 Taxon tmp = findMatchingTaxon(tnb,refMods);
4303 if(tmp ==null){
4304 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4305 if(!state2.getConfig().doKeepOriginalSecundum())
4306 {
4307 tmp.setSec(state2.getConfig().getSecundum());
4308 // tmp.setSec(refMods);
4309 //sourceHandler.addSource(refMods, tmp);
4310 }
4311
4312 if(species != null) {
4313 classification.addParentChild(species, tmp, null, null);
4314 higherRank=Rank.SPECIES();
4315 higherTaxa=species;
4316 }
4317 else{
4318 //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4319 classification.addChildTaxon(tmp, null, null);
4320 }
4321 }
4322 return tmp;
4323 }
4324 /**
4325 * @param partialname
4326 * @param tnb
4327 * @return
4328 */
4329 private Taxon buildSpecies(String partialname, NonViralName<?> tnb) {
4330 if (genusName !=null) {
4331 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4332 }
4333 if (subgenusName !=null) {
4334 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4335 }
4336 tnb.setSpecificEpithet(partialname.toLowerCase());
4337 Taxon tmp = findMatchingTaxon(tnb,refMods);
4338 if(tmp ==null){
4339 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4340 if(!state2.getConfig().doKeepOriginalSecundum()) {
4341 tmp.setSec(state2.getConfig().getSecundum());
4342 }
4343 // tmp.setSec(refMods);
4344 //sourceHandler.addSource(refMods, tmp);
4345 if (subgenus !=null) {
4346 classification.addParentChild(subgenus, tmp, null, null);
4347 higherRank=Rank.SUBGENUS();
4348 higherTaxa=subgenus;
4349 } else {
4350 if (genus !=null) {
4351 classification.addParentChild(genus, tmp, null, null);
4352 higherRank=Rank.GENUS();
4353 higherTaxa=genus;
4354 }
4355 else{
4356 //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4357 classification.addChildTaxon(tmp, null, null);
4358 }
4359 }
4360 }
4361 return tmp;
4362 }
4363 /**
4364 * @param partialname
4365 * @param tnb
4366 * @return
4367 */
4368 private Taxon buildSubgenus(String partialname, NonViralName<?> tnb) {
4369 tnb.setInfraGenericEpithet(partialname);
4370 if (genusName !=null) {
4371 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4372 }
4373 Taxon tmp = findMatchingTaxon(tnb,refMods);
4374 if(tmp ==null){
4375 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4376 if(!state2.getConfig().doKeepOriginalSecundum()) {
4377 tmp.setSec(state2.getConfig().getSecundum());
4378 }
4379 // tmp.setSec(refMods);
4380 //sourceHandler.addSource(refMods, tmp);
4381 if(genus != null) {
4382 classification.addParentChild(genus, tmp, null, null);
4383 higherRank=Rank.GENUS();
4384 higherTaxa=genus;
4385 } else{
4386 //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4387 classification.addChildTaxon(tmp, null, null);
4388 }
4389 }
4390 return tmp;
4391 }
4392 /**
4393 * @param partialname
4394 * @param tnb
4395 * @return
4396 */
4397 private Taxon buildGenus(String partialname, NonViralName<?> tnb) {
4398 Taxon tmp;
4399 tnb.setGenusOrUninomial(partialname);
4400
4401
4402 tmp = findMatchingTaxon(tnb,refMods);
4403 if(tmp ==null){
4404 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4405 if(!state2.getConfig().doKeepOriginalSecundum())
4406 {
4407 tmp.setSec(state2.getConfig().getSecundum());
4408 // tmp.setSec(refMods);
4409 //sourceHandler.addSource(refMods, tmp);
4410 }
4411
4412 if(subtribe != null) {
4413 classification.addParentChild(subtribe, tmp, null, null);
4414 higherRank=Rank.SUBTRIBE();
4415 higherTaxa=subtribe;
4416 } else{
4417 if(tribe !=null) {
4418 classification.addParentChild(tribe, tmp, null, null);
4419 higherRank=Rank.TRIBE();
4420 higherTaxa=tribe;
4421 } else{
4422 if(subfamily !=null) {
4423 classification.addParentChild(subfamily, tmp, null, null);
4424 higherRank=Rank.SUBFAMILY();
4425 higherTaxa=subfamily;
4426 } else
4427 if(family !=null) {
4428 classification.addParentChild(family, tmp, null, null);
4429 higherRank=Rank.FAMILY();
4430 higherTaxa=family;
4431 }
4432 else{
4433 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4434 classification.addChildTaxon(tmp, null, null);
4435 }
4436 }
4437 }
4438 }
4439 return tmp;
4440 }
4441
4442 /**
4443 * @param tnb
4444 * @return
4445 */
4446 private Taxon buildSubtribe(NonViralName<?> tnb) {
4447 Taxon tmp = findMatchingTaxon(tnb,refMods);
4448 if(tmp==null){
4449 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4450 if(!state2.getConfig().doKeepOriginalSecundum()) {
4451 tmp.setSec(state2.getConfig().getSecundum());
4452 }
4453 // tmp.setSec(refMods);
4454 //sourceHandler.addSource(refMods, tmp);
4455 if(tribe != null) {
4456 classification.addParentChild(tribe, tmp, null, null);
4457 higherRank=Rank.TRIBE();
4458 higherTaxa=tribe;
4459 } else{
4460 //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4461 classification.addChildTaxon(tmp, null, null);
4462 }
4463 }
4464 return tmp;
4465 }
4466 /**
4467 * @param tnb
4468 * @return
4469 */
4470 private Taxon buildTribe(NonViralName<?> tnb) {
4471 Taxon tmp = findMatchingTaxon(tnb,refMods);
4472 if(tmp==null){
4473 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4474 if(!state2.getConfig().doKeepOriginalSecundum()) {
4475 tmp.setSec(state2.getConfig().getSecundum());
4476 }
4477 // tmp.setSec(refMods);
4478 //sourceHandler.addSource(refMods, tmp);
4479 if (subfamily !=null) {
4480 classification.addParentChild(subfamily, tmp, null, null);
4481 higherRank=Rank.SUBFAMILY();
4482 higherTaxa=subfamily;
4483 } else {
4484 if(family != null) {
4485 classification.addParentChild(family, tmp, null, null);
4486 higherRank=Rank.FAMILY();
4487 higherTaxa=family;
4488 }
4489 else{
4490 //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4491 classification.addChildTaxon(tmp, null, null);
4492 }
4493 }
4494 }
4495 return tmp;
4496 }
4497
4498 /**
4499 * @param identifier2
4500 * @return
4501 */
4502 @SuppressWarnings("rawtypes")
4503 private Taxon getTaxonByLSID(String identifier) {
4504 //logger.info("getTaxonByLSID");
4505 // boolean lsidok=false;
4506 String id = identifier.split("__")[0];
4507 // String source = identifier.split("__")[1];
4508 LSID lsid = null;
4509 if (id.indexOf("lsid")>-1){
4510 try {
4511 lsid = new LSID(id);
4512 // lsidok=true;
4513 } catch (MalformedLSIDException e) {
4514 logger.warn("Malformed LSID");
4515 }
4516 }
4517 if (lsid !=null){
4518 List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4519 LSID currentlsid=null;
4520 for (Taxon t:taxa){
4521 currentlsid = t.getLsid();
4522 if (currentlsid !=null){
4523 if (currentlsid.getLsid().equals(lsid.getLsid())){
4524 try{
4525 return t;
4526 }
4527 catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4528 }
4529 }
4530 }
4531 }
4532 return null;
4533 }
4534 /**
4535 * @param author2
4536 * @return
4537 */
4538 @SuppressWarnings("rawtypes")
4539 private Person findOrCreateAuthor(String author2) {
4540 //logger.info("findOrCreateAuthor");
4541 List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4542 for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4543 if(hibernateP.getTitleCache().equals(author2)) {
4544 AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4545 return CdmBase.deproxy(existing, Person.class);
4546 }
4547 }
4548 Person p = Person.NewInstance();
4549 p.setTitleCache(author2,true);
4550 importer.getAgentService().saveOrUpdate(p);
4551 return CdmBase.deproxy(p, Person.class);
4552 }
4553 /**
4554 * @param author the author to set
4555 */
4556 public void setAuthor(String author) {
4557 this.author = author;
4558 }
4559
4560 /**
4561 * @return the higherTaxa
4562 */
4563 public Taxon getHigherTaxa() {
4564 return higherTaxa;
4565 }
4566 /**
4567 * @param higherTaxa the higherTaxa to set
4568 */
4569 public void setHigherTaxa(Taxon higherTaxa) {
4570 this.higherTaxa = higherTaxa;
4571 }
4572 /**
4573 * @return the higherRank
4574 */
4575 public Rank getHigherRank() {
4576 return higherRank;
4577 }
4578 /**
4579 * @param higherRank the higherRank to set
4580 */
4581 public void setHigherRank(Rank higherRank) {
4582 this.higherRank = higherRank;
4583 }
4584 public String getName(){
4585 if (newName.isEmpty()) {
4586 return originalName;
4587 } else {
4588 return newName;
4589 }
4590
4591 }
4592 /**
4593 * @return the fullName
4594 */
4595 public String getOriginalName() {
4596 return originalName;
4597 }
4598 /**
4599 * @param fullName the fullName to set
4600 */
4601 public void setOriginalName(String fullName) {
4602 this.originalName = fullName;
4603 }
4604 /**
4605 * @return the newName
4606 */
4607 public String getNewName() {
4608 return newName;
4609 }
4610 /**
4611 * @param newName the newName to set
4612 */
4613 public void setNewName(String newName) {
4614 this.newName = newName;
4615 }
4616 /**
4617 * @return the rank
4618 */
4619 public Rank getRank() {
4620 return rank;
4621 }
4622 /**
4623 * @param rank the rank to set
4624 */
4625 public void setRank(Rank rank) {
4626 this.rank = rank;
4627 }
4628 /**
4629 * @return the idenfitiger
4630 */
4631 public String getIdentifier() {
4632 return identifier;
4633 }
4634 /**
4635 * @param idenfitiger the idenfitiger to set
4636 */
4637 public void setIdentifier(String identifier) {
4638 this.identifier = identifier;
4639 }
4640 /**
4641 * @return the status
4642 */
4643 public String getStatus() {
4644 if (status == null) {
4645 return "";
4646 }
4647 return status;
4648 }
4649 /**
4650 * @param status the status to set
4651 */
4652 public void setStatus(String status) {
4653 this.status = status;
4654 }
4655 /**
4656 * @return the family
4657 */
4658 public Taxon getFamily() {
4659 return family;
4660 }
4661 /**
4662 * @param family the family to set
4663 */
4664 @SuppressWarnings("rawtypes")
4665 public void setFamily(Taxon family) {
4666 this.family = family;
4667 TaxonNameBase taxonNameBase = CdmBase.deproxy(family.getName(), TaxonNameBase.class);
4668 familyName = castTaxonNameBase(taxonNameBase,familyName);
4669 }
4670 /**
4671 * @return the subfamily
4672 */
4673 public Taxon getSubfamily() {
4674 return subfamily;
4675 }
4676 /**
4677 * @param subfamily the subfamily to set
4678 */
4679 @SuppressWarnings("rawtypes")
4680 public void setSubfamily(Taxon subfamily) {
4681 this.subfamily = subfamily;
4682 TaxonNameBase taxonNameBase = CdmBase.deproxy(subfamily.getName(), TaxonNameBase.class);
4683 subfamilyName = castTaxonNameBase(taxonNameBase,subfamilyName);
4684 }
4685 /**
4686 * @return the tribe
4687 */
4688 public Taxon getTribe() {
4689 return tribe;
4690 }
4691 /**
4692 * @param tribe the tribe to set
4693 */
4694 @SuppressWarnings("rawtypes")
4695 public void setTribe(Taxon tribe) {
4696 this.tribe = tribe;
4697 TaxonNameBase taxonNameBase = CdmBase.deproxy(tribe.getName(), TaxonNameBase.class);
4698 tribeName = castTaxonNameBase(taxonNameBase,tribeName);
4699 }
4700 /**
4701 * @return the subtribe
4702 */
4703 public Taxon getSubtribe() {
4704 return subtribe;
4705 }
4706 /**
4707 * @param subtribe the subtribe to set
4708 */
4709 @SuppressWarnings("rawtypes")
4710 public void setSubtribe(Taxon subtribe) {
4711 this.subtribe = subtribe;
4712 TaxonNameBase taxonNameBase = CdmBase.deproxy(subtribe.getName(), TaxonNameBase.class);
4713 subtribeName =castTaxonNameBase(taxonNameBase,subtribeName);
4714 }
4715 /**
4716 * @return the genus
4717 */
4718 public Taxon getGenus() {
4719 return genus;
4720 }
4721 /**
4722 * @param genus the genus to set
4723 */
4724 @SuppressWarnings("rawtypes")
4725 public void setGenus(Taxon genus) {
4726 if (genus != null){
4727 this.genus = genus;
4728 TaxonNameBase taxonNameBase = CdmBase.deproxy(genus.getName(), TaxonNameBase.class);
4729 genusName = castTaxonNameBase(taxonNameBase,genusName);
4730 }
4731 }
4732 /**
4733 * @return the subgenus
4734 */
4735 public Taxon getSubgenus() {
4736 return subgenus;
4737 }
4738 /**
4739 * @param subgenus the subgenus to set
4740 */
4741 @SuppressWarnings("rawtypes")
4742 public void setSubgenus(Taxon subgenus) {
4743 this.subgenus = subgenus;
4744 TaxonNameBase taxonNameBase = CdmBase.deproxy(subgenus.getName(), TaxonNameBase.class);
4745 subgenusName = castTaxonNameBase(taxonNameBase,subgenusName);
4746 }
4747 /**
4748 * @return the species
4749 */
4750 public Taxon getSpecies() {
4751 return species;
4752 }
4753 /**
4754 * @param species the species to set
4755 */
4756 public void setSpecies(Taxon species) {
4757 if (species != null){
4758 this.species = species;
4759 @SuppressWarnings("rawtypes")
4760 TaxonNameBase taxonNameBase = CdmBase.deproxy(species.getName(), TaxonNameBase.class);
4761 speciesName = castTaxonNameBase(taxonNameBase,speciesName);
4762 }
4763 }
4764 /**
4765 * @return the subspecies
4766 */
4767 public Taxon getSubspecies() {
4768 return subspecies;
4769 }
4770 /**
4771 * @param subspecies the subspecies to set
4772 */
4773 @SuppressWarnings("rawtypes")
4774 public void setSubspecies(Taxon subspecies) {
4775 this.subspecies = subspecies;
4776 TaxonNameBase taxonNameBase = CdmBase.deproxy(subspecies.getName(), TaxonNameBase.class);
4777 subspeciesName = castTaxonNameBase(taxonNameBase,subspeciesName);
4778
4779 }
4780
4781
4782
4783 }
4784
4785
4786 /**
4787 * @param status
4788 */
4789 private void addProblematicStatusToFile(String status) {
4790 try{
4791 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4792 BufferedWriter out = new BufferedWriter(fstream);
4793 out.write(status+"\n");
4794 //Close the output stream
4795 out.close();
4796 }catch (Exception e){//Catch exception if any
4797 System.err.println("Error: " + e.getMessage());
4798 }
4799
4800 }
4801
4802
4803
4804 /**
4805 * @param tnb
4806 * @return
4807 */
4808 private Taxon findMatchingTaxon(NonViralName<?> tnb, Reference refMods) {
4809 logger.info("findMatchingTaxon");
4810 Taxon tmp=null;
4811
4812 refMods=CdmBase.deproxy(refMods, Reference.class);
4813 boolean insertAsExisting =false;
4814 List<Taxon> existingTaxa = new ArrayList<Taxon>();
4815 try {
4816 existingTaxa = getMatchingTaxa(tnb);
4817 } catch (Exception e1) {
4818 // TODO Auto-generated catch block
4819 e1.printStackTrace();
4820 }
4821 double similarityScore=0.0;
4822 double similarityAuthor=-1;
4823 String author1="";
4824 String author2="";
4825 String t1="";
4826 String t2="";
4827 for (Taxon bestMatchingTaxon : existingTaxa){
4828 if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4829 // System.out.println("tnb "+tnb.getTitleCache());
4830 // System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4831 try {
4832 if(tnb.getAuthorshipCache()!=null) {
4833 author1=tnb.getAuthorshipCache();
4834 }
4835 } catch (Exception e) {
4836 // TODO Auto-generated catch block
4837 e.printStackTrace();
4838 }
4839 try {
4840 if(castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache()!=null) {
4841 author2=castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache();
4842 }
4843 } catch (Exception e) {
4844 // TODO Auto-generated catch block
4845 e.printStackTrace();
4846 }
4847 try {
4848 t1=tnb.getTitleCache().split("sec.")[0].trim();
4849 if (author1!=null && !StringUtils.isEmpty(author1)) {
4850 t1=t1.split(Pattern.quote(author1))[0];
4851 }
4852 } catch (Exception e) {
4853 // TODO Auto-generated catch block
4854 e.printStackTrace();
4855 }
4856 try {
4857 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4858 if (author2!=null && !StringUtils.isEmpty(author2)) {
4859 t2=t2.split(Pattern.quote(author2))[0];
4860 }
4861 } catch (Exception e) {
4862 // TODO Auto-generated catch block
4863 e.printStackTrace();
4864 }
4865 similarityScore=similarity(t1.trim(), t2.trim());
4866 // System.out.println("taxascore: "+similarityScore);
4867 similarityAuthor=similarity(author1.trim(), author2.trim());
4868 // System.out.println("authorscore: "+similarityAuthor);
4869 insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4870 }
4871 if(insertAsExisting) {
4872 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4873 tmp=bestMatchingTaxon;
4874 sourceHandler.addSource(refMods, tmp);
4875 return tmp;
4876 }
4877 }
4878 return tmp;
4879 }
4880
4881
4882 /**
4883 * @param tnb
4884 * @param refMods
4885 * @param similarityScore
4886 * @param bestMatchingTaxon
4887 * @param similarityAuthor
4888 * @return
4889 */
4890 private boolean compareAndCheckTaxon(NonViralName<?> tnb, Reference refMods, double similarityScore,
4891 Taxon bestMatchingTaxon, double similarityAuthor) {
4892 //logger.info("compareAndCheckTaxon");
4893 boolean insertAsExisting;
4894 // if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4895 // insertAsExisting=false;
4896 // } else{
4897 //a small hack/automatisation for Chenopodium only
4898 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4899 bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4900 insertAsExisting=true;
4901 } else {
4902 insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4903 }
4904 // }
4905
4906 logDecision(tnb,bestMatchingTaxon,insertAsExisting, refMods);
4907 return insertAsExisting;
4908 }
4909
4910 /**
4911 * @return
4912 */
4913 @SuppressWarnings("rawtypes")
4914 private List<Taxon> getMatchingTaxa(TaxonNameBase tnb) {
4915 //logger.info("getMatchingTaxon");
4916 if (tnb.getTitleCache() == null){
4917 tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4918 }
4919
4920 Pager<TaxonBase> pager=importer.getTaxonService().findByTitle(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4921 List<TaxonBase>records = pager.getRecords();
4922
4923 List<Taxon> existingTaxons = new ArrayList<Taxon>();
4924 for (TaxonBase r:records){
4925 try{
4926 Taxon bestMatchingTaxon = (Taxon)r;
4927 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4928 if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4929 existingTaxons.add(bestMatchingTaxon);
4930 }
4931 }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4932 }
4933 Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4934 if (!existingTaxons.contains(bmt) && bmt!=null) {
4935 if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4936 existingTaxons.add(bmt);
4937 }
4938 }
4939 return existingTaxons;
4940 }
4941
4942 /**
4943 * Check if the found Taxon can reasonnably be the same
4944 * example: with and without author should match, but the subspecies should not be suggested for a genus
4945 * */
4946 private boolean compareTaxonNameLength(String f, String o){
4947 //logger.info("compareTaxonNameLength");
4948 boolean lengthOk=false;
4949 int sizeF = f.length();
4950 int sizeO = o.length();
4951 if (sizeO>=sizeF) {
4952 lengthOk=true;
4953 }
4954 if(sizeF>sizeO) {
4955 if (sizeF-sizeO>10) {
4956 lengthOk=false;
4957 } else {
4958 lengthOk=true;
4959 }
4960 }
4961
4962 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4963 return lengthOk;
4964 }
4965
4966 private double similarity(String s1, String s2) {
4967 //logger.info("similarity");
4968 //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4969 if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4970 String l1=s1.toLowerCase().trim();
4971 String l2=s2.toLowerCase().trim();
4972 if (l1.length() < l2.length()) { // s1 should always be bigger
4973 String swap = l1; l1 = l2; l2 = swap;
4974 }
4975 int bigLen = l1.length();
4976 if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4977 return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4978 }
4979 else{
4980 if(s1!=null && s2!=null){
4981 if (s1.equalsIgnoreCase(s2)) {
4982 return 1;
4983 }
4984 }
4985 return -1;
4986 }
4987 }
4988
4989 private int computeEditDistance(String s1, String s2) {
4990 //logger.info("computeEditDistance");
4991 int[] costs = new int[s2.length() + 1];
4992 for (int i = 0; i <= s1.length(); i++) {
4993 int lastValue = i;
4994 for (int j = 0; j <= s2.length(); j++) {
4995 if (i == 0) {
4996 costs[j] = j;
4997 } else {
4998 if (j > 0) {
4999 int newValue = costs[j - 1];
5000 if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
5001 newValue = Math.min(Math.min(newValue, lastValue),
5002 costs[j]) + 1;
5003 }
5004 costs[j - 1] = lastValue;
5005 lastValue = newValue;
5006 }
5007 }
5008 }
5009 if (i > 0) {
5010 costs[s2.length()] = lastValue;
5011 }
5012 }
5013 return costs[s2.length()];
5014 }
5015
5016 Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
5017 /**
5018 * @param taxonNameBase
5019 */
5020 @SuppressWarnings("rawtypes")
5021 public void lookForParentNode(NonViralName<?> taxonNameBase, Taxon tax, Reference ref, MyName myName) {
5022 logger.info("lookForParentNode "+taxonNameBase.getTitleCache()+" for "+myName.toString());
5023 //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
5024 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
5025 if (taxonNameBase.getRank().equals(Rank.FORM())){
5026 handleFormHierarchy(ref, myName, parser);
5027 }
5028 else if (taxonNameBase.getRank().equals(Rank.VARIETY())){
5029 handleVarietyHierarchy(ref, myName, parser);
5030 }
5031 else if (taxonNameBase.getRank().equals(Rank.SUBSPECIES())){
5032 handleSubSpeciesHierarchy(ref, myName, parser);
5033 }
5034 else if (taxonNameBase.getRank().equals(Rank.SPECIES())){
5035 handleSpeciesHierarchy(ref, myName, parser);
5036 }
5037 else if (taxonNameBase.getRank().equals(Rank.SUBGENUS())){
5038 handleSubgenusHierarchy(ref, myName, parser);
5039 }
5040
5041 if (taxonNameBase.getRank().equals(Rank.GENUS())){
5042 handleGenusHierarchy(ref, myName, parser);
5043 }
5044 if (taxonNameBase.getRank().equals(Rank.SUBTRIBE())){
5045 handleSubtribeHierarchy(ref, myName, parser);
5046 }
5047 if (taxonNameBase.getRank().equals(Rank.TRIBE())){
5048 handleTribeHierarchy(ref, myName, parser);
5049 }
5050
5051 if (taxonNameBase.getRank().equals(Rank.SUBFAMILY())){
5052 handleSubfamilyHierarchy(ref, myName, parser);
5053 }
5054 }
5055
5056 /**
5057 * @param ref
5058 * @param myName
5059 * @param parser
5060 */
5061 private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5062 System.out.println("handleSubfamilyHierarchy");
5063 String parentStr = myName.getFamilyStr();
5064 Rank r = Rank.FAMILY();
5065 if(parentStr!=null){
5066
5067 Taxon parent = null;
5068 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
5069 for(TaxonBase tb:taxontest.getRecords()){
5070 try {
5071 if (tb.getName().getRank().equals(r)) {
5072 parent=CdmBase.deproxy(tb, Taxon.class);
5073 }
5074 break;
5075 } catch (Exception e) {
5076 // TODO Auto-generated catch block
5077 e.printStackTrace();
5078 }
5079 }
5080 if(parent == null) {
5081 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5082 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5083 if(tmp ==null)
5084 {
5085 parent=Taxon.NewInstance(parentNameName, ref);
5086 importer.getTaxonService().save(parent);
5087 parent = CdmBase.deproxy(parent, Taxon.class);
5088 } else {
5089 parent=tmp;
5090 }
5091 lookForParentNode(parentNameName, parent, ref,myName);
5092
5093 }
5094 hierarchy.put(r,parent);
5095 }
5096 }
5097
5098 /**
5099 * @param ref
5100 * @param myName
5101 * @param parser
5102 */
5103 private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5104 String parentStr = myName.getSubfamilyStr();
5105 Rank r = Rank.SUBFAMILY();
5106 if (parentStr == null){
5107 parentStr = myName.getFamilyStr();
5108 r = Rank.FAMILY();
5109 }
5110 if(parentStr!=null){
5111 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5112 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5113 // importer.getTaxonService().save(parent);
5114 // parent = CdmBase.deproxy(parent, Taxon.class);
5115
5116 boolean parentDoesNotExists = true;
5117 for (TaxonNode p : classification.getAllNodes()){
5118 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5119 parentDoesNotExists = false;
5120 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5121 break;
5122 }
5123 }
5124 // if(parentDoesNotExists) {
5125 // importer.getTaxonService().save(parent);
5126 // parent = CdmBase.deproxy(parent, Taxon.class);
5127 // lookForParentNode(parentNameName, parent, ref,myName);
5128 // }
5129 if(parentDoesNotExists) {
5130 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5131 if(tmp ==null)
5132 {
5133 parent=Taxon.NewInstance(parentNameName, ref);
5134 importer.getTaxonService().save(parent);
5135 parent = CdmBase.deproxy(parent, Taxon.class);
5136 } else {
5137 parent=tmp;
5138 }
5139 lookForParentNode(parentNameName, parent, ref,myName);
5140
5141 }
5142 hierarchy.put(r,parent);
5143 }
5144 }
5145
5146 /**
5147 * @param ref
5148 * @param myName
5149 * @param parser
5150 */
5151 private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5152 String parentStr = myName.getTribeStr();
5153 Rank r = Rank.TRIBE();
5154 if (parentStr == null){
5155 parentStr = myName.getSubfamilyStr();
5156 r = Rank.SUBFAMILY();
5157 }
5158 if (parentStr == null){
5159 parentStr = myName.getFamilyStr();
5160 r = Rank.FAMILY();
5161 }
5162 if(parentStr!=null){
5163 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5164 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5165 // importer.getTaxonService().save(parent);
5166 // parent = CdmBase.deproxy(parent, Taxon.class);
5167
5168 boolean parentDoesNotExists = true;
5169 for (TaxonNode p : classification.getAllNodes()){
5170 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5171 parentDoesNotExists = false;
5172 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5173
5174 break;
5175 }
5176 }
5177 // if(parentDoesNotExists) {
5178 // importer.getTaxonService().save(parent);
5179 // parent = CdmBase.deproxy(parent, Taxon.class);
5180 // lookForParentNode(parentNameName, parent, ref,myName);
5181 // }
5182 if(parentDoesNotExists) {
5183 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5184 if(tmp ==null)
5185 {
5186 parent=Taxon.NewInstance(parentNameName, ref);
5187 importer.getTaxonService().save(parent);
5188 parent = CdmBase.deproxy(parent, Taxon.class);
5189 } else {
5190 parent=tmp;
5191 }
5192 lookForParentNode(parentNameName, parent, ref,myName);
5193
5194 }
5195 hierarchy.put(r,parent);
5196 }
5197 }
5198
5199 /**
5200 * @param ref
5201 * @param myName
5202 * @param parser
5203 */
5204 private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5205 String parentStr = myName.getSubtribeStr();
5206 Rank r = Rank.SUBTRIBE();
5207 if (parentStr == null){
5208 parentStr = myName.getTribeStr();
5209 r = Rank.TRIBE();
5210 }
5211 if (parentStr == null){
5212 parentStr = myName.getSubfamilyStr();
5213 r = Rank.SUBFAMILY();
5214 }
5215 if (parentStr == null){
5216 parentStr = myName.getFamilyStr();
5217 r = Rank.FAMILY();
5218 }
5219 if(parentStr!=null){
5220 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5221 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5222 // importer.getTaxonService().save(parent);
5223 // parent = CdmBase.deproxy(parent, Taxon.class);
5224
5225 boolean parentDoesNotExist = true;
5226 for (TaxonNode p : classification.getAllNodes()){
5227 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5228 // System.out.println(p.getTaxon().getUuid());
5229 // System.out.println(parent.getUuid());
5230 parentDoesNotExist = false;
5231 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5232 break;
5233 }
5234 }
5235 // if(parentDoesNotExists) {
5236 // importer.getTaxonService().save(parent);
5237 // parent = CdmBase.deproxy(parent, Taxon.class);
5238 // lookForParentNode(parentNameName, parent, ref,myName);
5239 // }
5240 if(parentDoesNotExist) {
5241 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5242 if(tmp ==null){
5243
5244 parent=Taxon.NewInstance(parentNameName, ref);
5245 importer.getTaxonService().save(parent);
5246 parent = CdmBase.deproxy(parent, Taxon.class);
5247 } else {
5248 parent=tmp;
5249 }
5250 lookForParentNode(parentNameName, parent, ref,myName);
5251
5252 }
5253 hierarchy.put(r,parent);
5254 }
5255 }
5256
5257 /**
5258 * @param ref
5259 * @param myName
5260 * @param parser
5261 */
5262 private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5263 String parentStr = myName.getGenusStr();
5264 Rank r = Rank.GENUS();
5265
5266 if(parentStr==null){
5267 parentStr = myName.getSubtribeStr();
5268 r = Rank.SUBTRIBE();
5269 }
5270 if (parentStr == null){
5271 parentStr = myName.getTribeStr();
5272 r = Rank.TRIBE();
5273 }
5274 if (parentStr == null){
5275 parentStr = myName.getSubfamilyStr();
5276 r = Rank.SUBFAMILY();
5277 }
5278 if (parentStr == null){
5279 parentStr = myName.getFamilyStr();
5280 r = Rank.FAMILY();
5281 }
5282 if(parentStr!=null){
5283 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5284 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5285 // importer.getTaxonService().save(parent);
5286 // parent = CdmBase.deproxy(parent, Taxon.class);
5287
5288 boolean parentDoesNotExists = true;
5289 for (TaxonNode p : classification.getAllNodes()){
5290 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5291 // System.out.println(p.getTaxon().getUuid());
5292 // System.out.println(parent.getUuid());
5293 parentDoesNotExists = false;
5294 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5295 break;
5296 }
5297 }
5298 // if(parentDoesNotExists) {
5299 // importer.getTaxonService().save(parent);
5300 // parent = CdmBase.deproxy(parent, Taxon.class);
5301 // lookForParentNode(parentNameName, parent, ref,myName);
5302 // }
5303 if(parentDoesNotExists) {
5304 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5305 if(tmp ==null)
5306 {
5307 parent=Taxon.NewInstance(parentNameName, ref);
5308 importer.getTaxonService().save(parent);
5309 parent = CdmBase.deproxy(parent, Taxon.class);
5310 } else {
5311 parent=tmp;
5312 }
5313 lookForParentNode(parentNameName, parent, ref,myName);
5314
5315 }
5316 hierarchy.put(r,parent);
5317 }
5318 }
5319
5320 /**
5321 * @param ref
5322 * @param myName
5323 * @param parser
5324 */
5325 private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5326 String parentStr = myName.getSubgenusStr();
5327 Rank r = Rank.SUBGENUS();
5328
5329 if(parentStr==null){
5330 parentStr = myName.getGenusStr();
5331 r = Rank.GENUS();
5332 }
5333
5334 if(parentStr==null){
5335 parentStr = myName.getSubtribeStr();
5336 r = Rank.SUBTRIBE();
5337 }
5338 if (parentStr == null){
5339 parentStr = myName.getTribeStr();
5340 r = Rank.TRIBE();
5341 }
5342 if (parentStr == null){
5343 parentStr = myName.getSubfamilyStr();
5344 r = Rank.SUBFAMILY();
5345 }
5346 if (parentStr == null){
5347 parentStr = myName.getFamilyStr();
5348 r = Rank.FAMILY();
5349 }
5350 if(parentStr!=null){
5351 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5352 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5353 hierarchy.put(r,parent);
5354 }
5355 }
5356
5357 /**
5358 * @param ref
5359 * @param myName
5360 * @param parser
5361 */
5362 private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5363 String parentStr = myName.getSpeciesStr();
5364 Rank r = Rank.SPECIES();
5365
5366
5367 if(parentStr==null){
5368 parentStr = myName.getSubgenusStr();
5369 r = Rank.SUBGENUS();
5370 }
5371
5372 if(parentStr==null){
5373 parentStr = myName.getGenusStr();
5374 r = Rank.GENUS();
5375 }
5376
5377 if(parentStr==null){
5378 parentStr = myName.getSubtribeStr();
5379 r = Rank.SUBTRIBE();
5380 }
5381 if (parentStr == null){
5382 parentStr = myName.getTribeStr();
5383 r = Rank.TRIBE();
5384 }
5385 if (parentStr == null){
5386 parentStr = myName.getSubfamilyStr();
5387 r = Rank.SUBFAMILY();
5388 }
5389 if (parentStr == null){
5390 parentStr = myName.getFamilyStr();
5391 r = Rank.FAMILY();
5392 }
5393 if(parentStr!=null){
5394 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5395 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5396 hierarchy.put(r,parent);
5397 }
5398 }
5399
5400
5401 /**
5402 * @param ref
5403 * @param myName
5404 * @param parser
5405 */
5406 private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5407 String parentStr = myName.getSubspeciesStr();
5408 Rank r = Rank.SUBSPECIES();
5409
5410
5411 if(parentStr==null){
5412 parentStr = myName.getSpeciesStr();
5413 r = Rank.SPECIES();
5414 }
5415
5416 if(parentStr==null){
5417 parentStr = myName.getSubgenusStr();
5418 r = Rank.SUBGENUS();
5419 }
5420
5421 if(parentStr==null){
5422 parentStr = myName.getGenusStr();
5423 r = Rank.GENUS();
5424 }
5425
5426 if(parentStr==null){
5427 parentStr = myName.getSubtribeStr();
5428 r = Rank.SUBTRIBE();
5429 }
5430 if (parentStr == null){
5431 parentStr = myName.getTribeStr();
5432 r = Rank.TRIBE();
5433 }
5434 if (parentStr == null){
5435 parentStr = myName.getSubfamilyStr();
5436 r = Rank.SUBFAMILY();
5437 }
5438 if (parentStr == null){
5439 parentStr = myName.getFamilyStr();
5440 r = Rank.FAMILY();
5441 }
5442 if(parentStr!=null){
5443 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5444 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5445 hierarchy.put(r,parent);
5446 }
5447 }
5448
5449 /**
5450 * @param ref
5451 * @param myName
5452 * @param parser
5453 */
5454 private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5455 String parentStr = myName.getSubspeciesStr();
5456 Rank r = Rank.SUBSPECIES();
5457
5458 if(parentStr==null){
5459 parentStr = myName.getSpeciesStr();
5460 r = Rank.SPECIES();
5461 }
5462
5463 if(parentStr==null){
5464 parentStr = myName.getSubgenusStr();
5465 r = Rank.SUBGENUS();
5466 }
5467
5468 if(parentStr==null){
5469 parentStr = myName.getGenusStr();
5470 r = Rank.GENUS();
5471 }
5472
5473 if(parentStr==null){
5474 parentStr = myName.getSubtribeStr();
5475 r = Rank.SUBTRIBE();
5476 }
5477 if (parentStr == null){
5478 parentStr = myName.getTribeStr();
5479 r = Rank.TRIBE();
5480 }
5481 if (parentStr == null){
5482 parentStr = myName.getSubfamilyStr();
5483 r = Rank.SUBFAMILY();
5484 }
5485 if (parentStr == null){
5486 parentStr = myName.getFamilyStr();
5487 r = Rank.FAMILY();
5488 }
5489 if(parentStr!=null){
5490 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5491 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5492 hierarchy.put(r,parent);
5493 }
5494 }
5495
5496 /**
5497 * @param ref
5498 * @param myName
5499 * @param parser
5500 * @param parentStr
5501 * @param r
5502 * @return
5503 */
5504 private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5505 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5506 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5507 // importer.getTaxonService().save(parent);
5508 // parent = CdmBase.deproxy(parent, Taxon.class);
5509
5510 boolean parentDoesNotExists = true;
5511 for (TaxonNode p : classification.getAllNodes()){
5512 if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5513 // System.out.println(p.getTaxon().getUuid());
5514 // System.out.println(parent.getUuid());
5515 parentDoesNotExists = false;
5516 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5517 break;
5518 }
5519 }
5520 if(parentDoesNotExists) {
5521 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5522 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5523 if(tmp ==null){
5524
5525 parent=Taxon.NewInstance(parentNameName, ref);
5526 importer.getTaxonService().save(parent);
5527
5528 } else {
5529 parent=tmp;
5530 }
5531 lookForParentNode(parentNameName, parent, ref,myName);
5532
5533 }
5534 return parent;
5535 }
5536
5537 private void addNameDifferenceToFile(String originalname, String atomisedname){
5538 try{
5539 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5540 BufferedWriter out = new BufferedWriter(fstream);
5541 out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5542 //Close the output stream
5543 out.close();
5544 }catch (Exception e){//Catch exception if any
5545 System.err.println("Error: " + e.getMessage());
5546 }
5547 }
5548 /**
5549 * @param name
5550 * @param author
5551 * @param nomenclaturalCode2
5552 * @param rank
5553 */
5554 private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5555 try{
5556 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5557 BufferedWriter out = new BufferedWriter(fstream);
5558 out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5559 //Close the output stream
5560 out.close();
5561 }catch (Exception e){//Catch exception if any
5562 System.err.println("Error: " + e.getMessage());
5563 }
5564 }
5565
5566
5567 /**
5568 * @param tnb
5569 * @param bestMatchingTaxon
5570 * @param insertAsExisting
5571 * @param refMods
5572 */
5573 private void logDecision(NonViralName<?> tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5574 try{
5575 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt",true);
5576 BufferedWriter out = new BufferedWriter(fstream);
5577 out.write(tnb.getTitleCache()+" sec. "+refMods+"\t"+bestMatchingTaxon.getTitleCache()+"\t"+insertAsExisting+"\n");
5578 //Close the output stream
5579 out.close();
5580 }catch (Exception e){//Catch exception if any
5581 System.err.println("Error: " + e.getMessage());
5582 }
5583 }
5584
5585
5586 @SuppressWarnings("unused")
5587 private String replaceNull(Object in){
5588 if (in == null) {
5589 return "";
5590 }
5591 if (in.getClass().equals(NomenclaturalCode.class)) {
5592 return ((NomenclaturalCode)in).getTitleCache();
5593 }
5594 return in.toString();
5595 }
5596
5597 /**
5598 * @param fullName
5599 * @param nomenclaturalCode2
5600 * @param rank
5601 */
5602 private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5603 try{
5604 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5605 BufferedWriter out = new BufferedWriter(fstream);
5606 out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5607 //Close the output stream
5608 out.close();
5609 }catch (Exception e){//Catch exception if any
5610 System.err.println("Error: " + e.getMessage());
5611 }
5612
5613 }
5614
5615 }
5616
5617
5618