ref #8162 move FeatureTree and FeatureNode to term package
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / taxonx2013 / TaxonXTreatmentExtractor.java
1 /**
2 * Copyright (C) 2013 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.taxonx2013;
10
11 import java.io.BufferedWriter;
12 import java.io.File;
13 import java.io.FileWriter;
14 import java.io.IOException;
15 import java.net.URI;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.HashMap;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Set;
22 import java.util.UUID;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25
26 import javax.xml.transform.TransformerException;
27 import javax.xml.transform.TransformerFactoryConfigurationError;
28
29 import org.apache.commons.lang.StringUtils;
30 import org.apache.log4j.Logger;
31 import org.w3c.dom.Node;
32 import org.w3c.dom.NodeList;
33
34 import com.ibm.lsid.MalformedLSIDException;
35
36 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
37 import eu.etaxonomy.cdm.api.service.pager.Pager;
38 import eu.etaxonomy.cdm.model.agent.AgentBase;
39 import eu.etaxonomy.cdm.model.agent.Person;
40 import eu.etaxonomy.cdm.model.common.CdmBase;
41 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
42 import eu.etaxonomy.cdm.model.common.LSID;
43 import eu.etaxonomy.cdm.model.common.Language;
44 import eu.etaxonomy.cdm.model.description.Feature;
45 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
46 import eu.etaxonomy.cdm.model.description.TaxonDescription;
47 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
48 import eu.etaxonomy.cdm.model.description.TextData;
49 import eu.etaxonomy.cdm.model.name.INonViralName;
50 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
51 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
52 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
53 import eu.etaxonomy.cdm.model.name.Rank;
54 import eu.etaxonomy.cdm.model.name.TaxonName;
55 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
56 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
57 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
58 import eu.etaxonomy.cdm.model.reference.Reference;
59 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
60 import eu.etaxonomy.cdm.model.taxon.Classification;
61 import eu.etaxonomy.cdm.model.taxon.Synonym;
62 import eu.etaxonomy.cdm.model.taxon.SynonymType;
63 import eu.etaxonomy.cdm.model.taxon.Taxon;
64 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
65 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
66 import eu.etaxonomy.cdm.model.term.FeatureNode;
67 import eu.etaxonomy.cdm.model.term.FeatureTree;
68 import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
69 import eu.etaxonomy.cdm.persistence.query.MatchMode;
70 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
71 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
72 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
73 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
74
75 /**
76 * @author pkelbert
77 * @since 2 avr. 2013
78 *
79 */
80 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
81
82 private static final String PUBLICATION_YEAR = "publicationYear";
83
84 private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
85
86 private static final String notMarkedUp = "Not marked-up";
87 private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
88 private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
89 private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
90 private static final boolean skippQuestion = true;
91
92 private final NomenclaturalCode nomenclaturalCode;
93 private Classification classification;
94
95 private String treatmentMainName,originalTreatmentName;
96
97 private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
98
99
100 private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
101 private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
102
103 private boolean maxRankRespected =false;
104 private Map<String, Feature> featuresMap;
105
106 private MyName currentMyName;
107
108 private Reference sourceUrlRef;
109
110 private String followingText; //text element immediately following a tax:name in tax:nomenclature TODO move do state
111 private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
112
113 private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
114
115 /**
116 * @param nomenclaturalCode
117 * @param classification
118 * @param importer
119 * @param configState
120 */
121 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
122 TaxonXImportState configState,Map<String, Feature> featuresMap, Reference urlSource) {
123 this.nomenclaturalCode=nomenclaturalCode;
124 this.classification = classification;
125 this.importer=importer;
126 this.state2=configState;
127 this.featuresMap=featuresMap;
128 this.sourceUrlRef =urlSource;
129 prepareCollectors(configState, importer.getAgentService());
130 this.sourceHandler.setSourceUrlRef(sourceUrlRef);
131 this.sourceHandler.setImporter(importer);
132 this.sourceHandler.setConfigState(configState);
133 }
134
135 /**
136 * extracts all the treament information and save them
137 * @param treatmentnode: the XML Node
138 * @param tosave: the list of object to save into the CDM
139 * @param refMods: the reference extracted from the MODS
140 * @param sourceName: the URI of the document
141 */
142 @SuppressWarnings({ "rawtypes", "unused" })
143
144 protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) { logger.info("extractTreatment");
145 List<TaxonName> namesToSave = new ArrayList<TaxonName>();
146 NodeList children = treatmentnode.getChildNodes();
147 Taxon acceptedTaxon =null;
148 boolean hasRefgroup=false;
149
150 //needed?
151 for (int i=0;i<children.getLength();i++){
152 if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
153 hasRefgroup=true;
154 }
155 }
156
157 for (int i=0;i<children.getLength();i++){
158 Node child = children.item(i);
159 acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
160 }
161 // logger.info("saveUpdateNames");
162 if (maxRankRespected){
163 importer.getNameService().saveOrUpdate(namesToSave);
164 importer.getClassificationService().saveOrUpdate(classification);
165 //logger.info("saveUpdateNames-ok");
166 }
167
168 buildFeatureTree();
169 }
170
171 private Taxon handleSingleNode(Reference refMods, URI sourceName,
172 List<TaxonName> namesToSave, Node child, Taxon acceptedTaxon) {
173 Taxon defaultTaxon =null;
174
175 String nodeName = child.getNodeName();
176 if (nodeName.equalsIgnoreCase("tax:nomenclature")){
177 NodeList nomenclatureChildren = child.getChildNodes();
178 boolean containsName = false;
179 for(int k=0; k<nomenclatureChildren.getLength(); k++){
180 if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
181 containsName=true;
182 break;
183 }
184 }
185 if (containsName){
186 reloadClassification();
187 //extract "main" the scientific name
188 try{
189 acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
190 }catch(ClassCastException e){
191 //FIXME exception handling
192 e.printStackTrace();
193 }
194 // System.out.println("acceptedTaxon : "+acceptedTaxon);
195 }
196 }else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
197 reloadClassification();
198 //extract the References within the document
199 extractReferences(child, namesToSave ,acceptedTaxon,refMods);
200 }else if (nodeName.equalsIgnoreCase("tax:div") &&
201 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
202 File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
203 FileWriter writer;
204 try {
205 writer = new FileWriter(file ,true);
206 writer.write(sourceName+"\n");
207 writer.flush();
208 writer.close();
209 } catch (IOException e1) {
210 // TODO Auto-generated catch block
211 logger.error(e1.getMessage());
212 }
213 // String multiple = askMultiple(children.item(i));
214 String multiple = "Other";
215 if (multiple.equalsIgnoreCase("other")) {
216 extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
217 }else if (multiple.equalsIgnoreCase("synonyms")) {
218 try{
219 extractSynonyms(child,acceptedTaxon, refMods, null);
220 }catch(NullPointerException e){
221 logger.warn("the accepted taxon is maybe null");
222 }
223 }else if(multiple.equalsIgnoreCase("material examined")){
224 extractMaterials(child, acceptedTaxon, refMods, namesToSave);
225 }else if (multiple.equalsIgnoreCase("distribution")){
226 extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
227 }else if (multiple.equalsIgnoreCase("type status")){
228 extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
229 }else if (multiple.equalsIgnoreCase("vernacular name")){
230 extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
231 }else{
232 extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
233 }
234 }
235 else if(nodeName.equalsIgnoreCase("tax:div") &&
236 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
237 extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
238 }
239 else if(nodeName.equalsIgnoreCase("tax:div") &&
240 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
241 extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
242 }
243 else if(nodeName.equalsIgnoreCase("tax:div") &&
244 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
245 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
246 }
247 else if(nodeName.equalsIgnoreCase("tax:div") &&
248 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
249 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
250 }
251 else if(nodeName.equalsIgnoreCase("tax:div") &&
252 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
253 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
254 }
255 else if(nodeName.equalsIgnoreCase("tax:div") &&
256 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
257 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
258 }
259 else if(nodeName.equalsIgnoreCase("tax:div") &&
260 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
261 extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
262 }
263 else if(nodeName.equalsIgnoreCase("tax:div") &&
264 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
265 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
266 }
267 else if(nodeName.equalsIgnoreCase("tax:div") &&
268 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
269 extractMaterials(child,acceptedTaxon, refMods, namesToSave);
270 }
271 else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
272 extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
273 }
274 else if(nodeName.equalsIgnoreCase("tax:div") &&
275 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
276 extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
277 }else if(nodeName.equalsIgnoreCase("tax:div") &&
278 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
279 //TODO IGNORE keys for the moment
280 //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
281 extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
282 }
283 else{
284 if (! nodeName.equalsIgnoreCase("tax:pb")){
285 //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
286 if (child.getAttributes() !=null) {
287 logger.info("First Attribute: " + child.getAttributes().item(0));
288 }
289 extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
290 }else{
291 //FIXME
292 logger.warn("Unhandled");
293 }
294 }
295 return acceptedTaxon;
296 }
297
298
299 protected Map<String,Feature> getFeaturesUsed(){
300 return featuresMap;
301 }
302 /**
303 *
304 */
305 private void buildFeatureTree() {
306 logger.info("buildFeatureTree");
307 FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
308 if (proibiospheretree == null){
309 List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
310 if (trees.size()==1) {
311 FeatureTree<Feature> ft = trees.get(0);
312 if (featuresMap==null) {
313 featuresMap=new HashMap<String, Feature>();
314 }
315 for (Feature feature: ft.getDistinctFeatures()){
316 if(feature!=null) {
317 featuresMap.put(feature.getTitleCache(), feature);
318 }
319 }
320 }
321 proibiospheretree = FeatureTree.NewInstance();
322 proibiospheretree.setUuid(proIbioTreeUUID);
323 }
324 // FeatureNode root = proibiospheretree.getRoot();
325 FeatureNode root2 = proibiospheretree.getRoot();
326 if (root2 != null){
327 int nbChildren = root2.getChildCount()-1;
328 while (nbChildren>-1){
329 try{
330 root2.removeChild(nbChildren);
331 }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
332 nbChildren --;
333 }
334
335 }
336
337 for (Feature feature:featuresMap.values()) {
338 root2.addChild(FeatureNode.NewInstance(feature));
339 }
340 importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
341
342 }
343
344
345 /**
346 * @param keys
347 * @param acceptedTaxon: the current acceptedTaxon
348 * @param nametosave: the list of objects to save into the CDM
349 * @param refMods: the current reference extracted from the MODS
350 */
351 /* @SuppressWarnings("rawtypes")
352 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonName> nametosave, Reference refMods) {
353 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
354
355 NodeList children = keys.getChildNodes();
356 String key="";
357 PolytomousKey poly = PolytomousKey.NewInstance();
358 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
359 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
360 poly.addTaxonomicScope(acceptedTaxon);
361 poly.setTitleCache("bloup", true);
362 // poly.addCoveredTaxon(acceptedTaxon);
363 PolytomousKeyNode root = poly.getRoot();
364 PolytomousKeyNode previous = null,tmpKey=null;
365 Taxon taxonKey=null;
366 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
367
368 // String fullContent = keys.getTextContent();
369 for (int i=0;i<children.getLength();i++){
370 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
371 NodeList paragraph = children.item(i).getChildNodes();
372 key="";
373 taxonKey=null;
374 for (int j=0;j<paragraph.getLength();j++){
375 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
376 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
377 key+=paragraph.item(j).getTextContent().trim();
378 // logger.info("KEY: "+j+"--"+key);
379 }
380 }
381 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
382 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
383 }
384 }
385 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
386 if (keypattern.matcher(key).matches()){
387 tmpKey = PolytomousKeyNode.NewInstance(key);
388 if (taxonKey!=null) {
389 tmpKey.setTaxon(taxonKey);
390 }
391 polyNodes.add(tmpKey);
392 if (previous == null) {
393 root.addChild(tmpKey);
394 } else {
395 previous.addChild(tmpKey);
396 }
397 }else{
398 if (!key.isEmpty()){
399 tmpKey=PolytomousKeyNode.NewInstance(key);
400 if (taxonKey!=null) {
401 tmpKey.setTaxon(taxonKey);
402 }
403 polyNodes.add(tmpKey);
404 if (keypatternend.matcher(key).matches()) {
405 root.addChild(tmpKey);
406 previous=tmpKey;
407 } else{
408 previous.addChild(tmpKey);
409 }
410
411 }
412 }
413 }
414 }
415 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
416 importer.getPolytomousKeyService().saveOrUpdate(poly);
417 }
418 */
419
420
421 /**
422 * @param taxons: the XML Nodegroup
423 * @param nametosave: the list of objects to save into the CDM
424 * @param acceptedTaxon: the current accepted Taxon
425 * @param refMods: the current reference extracted from the MODS
426 *
427 * @return Taxon object built
428 */
429 @SuppressWarnings({ "rawtypes", "unused" })
430 private TaxonName getTaxonNameFromXML(Node taxons, List<TaxonName> nametosave, Reference refMods, boolean isSynonym) {
431 // logger.info("getTaxonFromXML");
432 // logger.info("acceptedTaxon: "+acceptedTaxon);
433 logger.info("getTaxonNameFromXML");
434 TaxonName nameToBeFilled = null;
435
436 currentMyName=new MyName(isSynonym);
437
438 NomenclaturalStatusType statusType = null;
439 try {
440 String followingText = null; //needs to be checked if following text is possible
441 currentMyName = extractScientificName(taxons,refMods, null);
442 } catch (TransformerFactoryConfigurationError e1) {
443 logger.warn(e1);
444 } catch (TransformerException e1) {
445 logger.warn(e1);
446 }
447 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
448
449 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
450 if (nameToBeFilled.hasProblem() &&
451 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
452 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
453 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
454 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
455 }
456
457 nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
458 */
459 nameToBeFilled = currentMyName.getTaxonName();
460 return nameToBeFilled;
461
462 }
463
464
465 /**
466 *
467 */
468 private void reloadClassification() {
469 logger.info("reloadClassification");
470 Classification cl = importer.getClassificationService().find(classification.getUuid());
471 if (cl != null){
472 classification = cl;
473 }else{
474 importer.getClassificationService().saveOrUpdate(classification);
475 classification = importer.getClassificationService().find(classification.getUuid());
476 }
477 }
478
479 // /**
480 // * Create a Taxon for the current NameBase, based on the current reference
481 // * @param taxonName
482 // * @param refMods: the current reference extracted from the MODS
483 // * @return Taxon
484 // */
485 // @SuppressWarnings({ "unused", "rawtypes" })
486 // private Taxon getTaxon(TaxonName taxonName, Reference refMods) {
487 // Taxon t = new Taxon(taxonName,null );
488 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
489 // t.setSec(configState.getConfig().getSecundum());
490 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
491 // }
492 // /*<<<<<<< .courant
493 // boolean sourceExists=false;
494 // Set<IdentifiableSource> sources = t.getSources();
495 // for (IdentifiableSource src : sources){
496 // String micro = src.getCitationMicroReference();
497 // Reference r = src.getCitation();
498 // if (r.equals(refMods) && micro == null) {
499 // sourceExists=true;
500 // }
501 // }
502 // if(!sourceExists) {
503 // t.addSource(null,null,refMods,null);
504 // }
505 //=======*/
506 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
507 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
508 // return t;
509 // }
510
511 private void extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
512 String featureName) {
513 // System.out.println("extractDescriptionWithReference !");
514 logger.info("extractDescriptionWithReference");
515 NodeList children = typestatus.getChildNodes();
516
517 Feature currentFeature=getFeatureObjectFromString(featureName);
518
519 String r="";String s="";
520 for (int i=0;i<children.getLength();i++){
521 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
522 s+=children.item(i).getTextContent().trim();
523 }
524 if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
525 r+= children.item(i).getTextContent().trim();
526 }
527 if (s.indexOf(r)>-1) {
528 s=s.split(r)[0];
529 }
530 }
531
532 Reference currentref = ReferenceFactory.newGeneric();
533 if(!r.isEmpty()) {
534 currentref.setTitleCache(r, true);
535 } else {
536 currentref=refMods;
537 }
538 setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
539 }
540
541 /**
542 * @param nametosave
543 * @param distribution: the XML node group
544 * @param acceptedTaxon: the current accepted Taxon
545 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
546 * @param refMods: the current reference extracted from the MODS
547 */
548 @SuppressWarnings("rawtypes")
549 private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonName> nametosave, Reference refMods) {
550 logger.info("extractDistribution");
551 // logger.info("acceptedTaxon: "+acceptedTaxon);
552 NodeList children = distribution.getChildNodes();
553 Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
554 Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
555
556 for (int i=0;i<children.getLength();i++){
557 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
558 NodeList paragraph = children.item(i).getChildNodes();
559 for (int j=0;j<paragraph.getLength();j++){
560 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
561 extractText(descriptionsFulltext, i, paragraph.item(j));
562 }
563 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
564 extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
565 }
566 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
567 MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
568 DerivedUnit derivedUnitBase = null;
569 specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
570 extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
571 }
572 }
573 }
574 }
575
576 int m=0;
577 for (int k:descriptionsFulltext.keySet()) {
578 if (k>m) {
579 m=k;
580 }
581 }
582 for (int k:specimenOrObservations.keySet()) {
583 if (k>m) {
584 m=k;
585 }
586 }
587
588
589 if(acceptedTaxon!=null){
590 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
591 Feature currentFeature = Feature.DISTRIBUTION();
592 // DerivedUnit derivedUnitBase=null;
593 // String descr="";
594 for (int k=0;k<=m;k++){
595 if(specimenOrObservations.keySet().contains(k)){
596 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
597 handleAssociation(acceptedTaxon, refMods, td, soo);
598 }
599 }
600
601 if (descriptionsFulltext.keySet().contains(k)){
602 if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
603 setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
604 break;
605 }
606 else{
607 handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
608 }
609 }
610
611 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
612 acceptedTaxon.addDescription(td);
613 sourceHandler.addAndSaveSource(refMods, td, null);
614 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
615 }
616 }
617 }
618 }
619
620 /**
621 * @param refMods
622 * @param descriptionsFulltext
623 * @param td
624 * @param currentFeature
625 * @param k
626 */
627 private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
628 Feature currentFeature, int k) {
629 //logger.info("handleTextData");
630 TextData textData = TextData.NewInstance();
631 textData.setFeature(currentFeature);
632 textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
633 sourceHandler.addSource(refMods, textData);
634 td.addElement(textData);
635 }
636
637 /**
638 * @param acceptedTaxon
639 * @param refMods
640 * @param td
641 * @param soo
642 */
643 private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
644 logger.info("handleAssociation");
645 String descr=soo.getDescr();
646 DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
647
648 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
649
650 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
651
652 Feature feature=null;
653 feature = makeFeature(derivedUnitBase);
654 if(!StringUtils.isEmpty(descr)) {
655 derivedUnitBase.setTitleCache(descr, true);
656 }
657
658 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
659
660 taxonDescription.addElement(indAssociation);
661 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
662 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
663 td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
664 }
665
666 /**
667 * create an individualAssociation
668 * @param refMods
669 * @param derivedUnitBase
670 * @param feature
671 * @return
672 */
673 private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
674 Feature feature) {
675 logger.info("createIndividualAssociation");
676 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
677 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
678 indAssociation.setFeature(feature);
679 indAssociation = sourceHandler.addSource(refMods, indAssociation);
680 return indAssociation;
681 }
682
683 /**
684 * @param specimenOrObservations
685 * @param descriptionsFulltext
686 * @param i
687 * @param specimenOrObservation
688 */
689 private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
690 Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
691 logger.info("extractTextFromSpecimenOrObservation");
692 List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
693 if (speObsList == null) {
694 speObsList=new ArrayList<MySpecimenOrObservation>();
695 }
696 speObsList.add(specimenOrObservation);
697 specimenOrObservations.put(i,speObsList);
698
699 String s = specimenOrObservation.getDerivedUnitBase().toString();
700 if (descriptionsFulltext.get(i) !=null){
701 s = descriptionsFulltext.get(i)+" "+s;
702 }
703 descriptionsFulltext.put(i, s);
704 }
705
706 /**
707 * Extract the text with the inline link to a taxon
708 * @param nametosave
709 * @param refMods
710 * @param descriptionsFulltext
711 * @param i
712 * @param paragraph
713 */
714 @SuppressWarnings("rawtypes")
715 private void extractInLine(List<TaxonName> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
716 int i, Node paragraph) {
717 //logger.info("extractInLine");
718 String inLine=getInlineTextForName(nametosave, refMods, paragraph);
719 if (descriptionsFulltext.get(i) !=null){
720 inLine = descriptionsFulltext.get(i)+inLine;
721 }
722 descriptionsFulltext.put(i, inLine);
723 }
724
725 /**
726 * Extract the raw text from a Node
727 * @param descriptionsFulltext
728 * @param node
729 * @param j
730 */
731 private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
732 //logger.info("extractText");
733 if(!node.getTextContent().trim().isEmpty()) {
734 String s =node.getTextContent().trim();
735 if (descriptionsFulltext.get(i) !=null){
736 s = descriptionsFulltext.get(i)+" "+s;
737 }
738 descriptionsFulltext.put(i, s);
739 }
740 }
741
742
743 /**
744 * @param materials: the XML node group
745 * @param acceptedTaxon: the current accepted Taxon
746 * @param refMods: the current reference extracted from the MODS
747 */
748 @SuppressWarnings("rawtypes")
749 private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonName> nametosave) {
750 logger.info("EXTRACTMATERIALS");
751 // logger.info("acceptedTaxon: "+acceptedTaxon);
752 NodeList children = materials.getChildNodes();
753 NodeList events = null;
754 // String descr="";
755
756
757 for (int i=0;i<children.getLength();i++){
758 String rawAssociation="";
759 boolean added=false;
760 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
761 events = children.item(i).getChildNodes();
762 for(int k=0;k<events.getLength();k++){
763 if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
764 String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
765 if(!inLine.isEmpty()) {
766 rawAssociation+=inLine;
767 }
768 }
769 if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
770 && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
771 rawAssociation+= events.item(k).getTextContent().trim();
772 }
773 if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
774 if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
775 rawAssociation="no description text";
776 }
777 added=true;
778 handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
779 }
780 if (!rawAssociation.isEmpty() && !added){
781
782 Feature feature = Feature.MATERIALS_EXAMINED();
783 featuresMap.put(feature.getTitleCache(),feature);
784
785 TextData textData = createTextData(rawAssociation, refMods, feature);
786
787 if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
788 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
789 td.addElement(textData);
790 acceptedTaxon.addDescription(td);
791 sourceHandler.addAndSaveSource(refMods, td, null);
792 }
793 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
794 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
795 //
796 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
797 // acceptedTaxon.addDescription(taxonDescription);
798 //
799 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
800 //
801 // Feature feature = Feature.MATERIALS_EXAMINED();
802 // featuresMap.put(feature.getTitleCache(),feature);
803 // if(!StringUtils.isEmpty(rawAssociation)) {
804 // derivedUnitBase.setTitleCache(rawAssociation, true);
805 // }
806 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
807 // indAssociation.setFeature(feature);
808 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
809 //
810 // /*boolean sourceExists=false;
811 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
812 // for (DescriptionElementSource src : dsources){
813 // String micro = src.getCitationMicroReference();
814 // Reference r = src.getCitation();
815 // if (r.equals(refMods) && micro == null) {
816 // sourceExists=true;
817 // }
818 // }
819 // if(!sourceExists) {
820 // indAssociation.addSource(null, null, refMods, null);
821 // }*/
822 // taxonDescription.addElement(indAssociation);
823 // taxonDescription.setTaxon(acceptedTaxon);
824 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
825 //
826 // /*sourceExists=false;
827 // Set<IdentifiableSource> sources = taxonDescription.getSources();
828 // for (IdentifiableSource src : sources){
829 // String micro = src.getCitationMicroReference();
830 // Reference r = src.getCitation();
831 // if (r.equals(refMods) && micro == null) {
832 // sourceExists=true;
833 // }
834 // }
835 // if(!sourceExists) {
836 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
837 // }*/
838 //
839 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
840 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
841
842 rawAssociation="";
843 }
844 }
845 }
846 }
847 }
848
849 /**
850 * @param acceptedTaxon
851 * @param refMods
852 * @param events
853 * @param rawAssociation
854 * @param k
855 */
856 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
857 String rawAssociation) {
858 logger.info("handleDerivedUnitFacadeAndBase");
859 String descr;
860 DerivedUnit derivedUnitBase;
861 MySpecimenOrObservation myspecimenOrObservation;
862 DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
863 derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
864
865 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
866
867 //TODO this may not always be correct, ask user
868 TaxonName typifiableName = acceptedTaxon != null ? acceptedTaxon.getName() : null;
869 myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
870 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
871 descr=myspecimenOrObservation.getDescr();
872
873 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
874
875 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
876
877 Feature feature = makeFeature(derivedUnitBase);
878 featuresMap.put(feature.getTitleCache(),feature);
879 if(!StringUtils.isEmpty(descr)) {
880 derivedUnitBase.setTitleCache(descr, true);
881 }
882
883 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
884
885 taxonDescription.addElement(indAssociation);
886 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
887 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
888 }
889
890
891
892 /**
893 * @param currentName
894 * @param materials: the XML node group
895 * @param acceptedTaxon: the current accepted Taxon
896 * @param refMods: the current reference extracted from the MODS
897 */
898 private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, TaxonName currentName) {
899 logger.info("extractMaterialsDirect");
900 // logger.info("acceptedTaxon: "+acceptedTaxon);
901 String descr="";
902
903 DerivedUnit derivedUnitBase=null;
904 MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
905 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
906
907 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
908
909 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
910
911 Feature feature=null;
912 if (event.equalsIgnoreCase("collection")){
913 feature = makeFeature(derivedUnitBase);
914 }
915 else{
916 feature = Feature.MATERIALS_EXAMINED();
917 }
918 featuresMap.put(feature.getTitleCache(), feature);
919
920 descr=myspecimenOrObservation.getDescr();
921 if(!StringUtils.isEmpty(descr)) {
922 derivedUnitBase.setTitleCache(descr, true);
923 }
924
925 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
926
927 taxonDescription.addElement(indAssociation);
928 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
929 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
930
931 return derivedUnitBase.getTitleCache();
932
933 }
934
935
936 /**
937 * @param description: the XML node group
938 * @param acceptedTaxon: the current acceptedTaxon
939 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
940 * @param nametosave: the list of objects to save into the CDM
941 * @param refMods: the current reference extracted from the MODS
942 * @param featureName: the feature name
943 */
944 @SuppressWarnings({ "rawtypes"})
945 private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
946 List<TaxonName> nametosave, Reference refMods, String featureName ) {
947 logger.info("extractSpecificFeature "+featureName);
948 // System.out.println("GRUUUUuu");
949 NodeList children = description.getChildNodes();
950 NodeList insideNodes ;
951 NodeList trNodes;
952 // String descr ="";
953 String localdescr="";
954 List<String> blabla=null;
955 List<String> text = new ArrayList<String>();
956
957 String table="<table>";
958 String head="";
959 String line="";
960
961 Feature currentFeature=getFeatureObjectFromString(featureName);
962
963 // String fullContent = description.getTextContent();
964 for (int i=0;i<children.getLength();i++){
965 // localdescr="";
966 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
967 text.add(children.item(i).getTextContent().trim());
968 }
969 if (featureName.equalsIgnoreCase("table")){
970 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
971 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
972 head = extractTableHead(children.item(i));
973 table+=head;
974 line = extractTableLine(children.item(i));
975 if (!line.equalsIgnoreCase("<tr></tr>")) {
976 table+=line;
977 }
978 }
979 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
980 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
981 line = extractTableLineWithColumn(children.item(i).getChildNodes());
982 if(!line.equalsIgnoreCase("<tr></tr>")) {
983 table+=line;
984 }
985 }
986 }
987 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
988 insideNodes=children.item(i).getChildNodes();
989 blabla= new ArrayList<String>();
990 for (int j=0;j<insideNodes.getLength();j++){
991 Node insideNode = insideNodes.item(j);
992 if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
993 String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
994 if (!inlinetext.isEmpty()) {
995 blabla.add(inlinetext);
996 }
997 }
998 else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
999 if(!insideNode.getTextContent().trim().isEmpty()){
1000 blabla.add(insideNode.getTextContent().trim());
1001 // localdescr += insideNodes.item(j).getTextContent().trim();
1002 }
1003 }
1004 }
1005 if (!blabla.isEmpty()) {
1006 String blaStr = StringUtils.join(blabla," ").trim();
1007 if(!stringIsEmpty(blaStr)) {
1008 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1009 text.add(blaStr);
1010 }
1011 }
1012
1013 }
1014 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1015 if(!children.item(i).getTextContent().trim().isEmpty()){
1016 localdescr = children.item(i).getTextContent().trim();
1017 if(!stringIsEmpty(localdescr)) {
1018 setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1019 }
1020 }
1021 }
1022 }
1023
1024 table+="</table>";
1025 if (!table.equalsIgnoreCase("<table></table>")){
1026 // System.out.println("TABLE : "+table);
1027 text.add(table);
1028 }
1029
1030 if (text !=null && !text.isEmpty()) {
1031 return StringUtils.join(text," ");
1032 } else {
1033 return "";
1034 }
1035
1036 }
1037
1038 /**
1039 * @param children
1040 * @param i
1041 * @return
1042 */
1043 private String extractTableLine(Node child) {
1044 //logger.info("extractTableLine");
1045 String line;
1046 line="<tr>";
1047 if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1048 line = extractTableLineWithColumn(child.getChildNodes());
1049 }
1050 line+="</tr>";
1051 return line;
1052 }
1053
1054 /**
1055 * @param children
1056 * @param i
1057 * @return
1058 */
1059 private String extractTableHead(Node child) {
1060 //logger.info("extractTableHead");
1061 String head;
1062 String line;
1063 head="<th>";
1064 NodeList trNodes = child.getChildNodes();
1065 for (int k=0;k<trNodes.getLength();k++){
1066 if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1067 && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1068 line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1069 head+=line;
1070 }
1071 }
1072 head+="</th>";
1073 return head;
1074 }
1075
1076 /**
1077 * build a html table line, with td columns
1078 * @param tdNodes
1079 * @return an html coded line
1080 */
1081 private String extractTableLineWithColumn(NodeList tdNodes) {
1082 //logger.info("extractTableLineWithColumn");
1083 String line;
1084 line="<tr>";
1085 for (int l=0;l<tdNodes.getLength();l++){
1086 if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1087 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1088 }
1089 }
1090 line+="</tr>";
1091 return line;
1092 }
1093
1094 /**
1095 * @param description: the XML node group
1096 * @param acceptedTaxon: the current acceptedTaxon
1097 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1098 * @param nametosave: the list of objects to save into the CDM
1099 * @param refMods: the current reference extracted from the MODS
1100 * @param featureName: the feature name
1101 */
1102 @SuppressWarnings({ "unused", "rawtypes" })
1103 private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1104 List<TaxonName> nameToSave, Reference refMods, String featureName ) {
1105 logger.info("extractSpecificFeatureNotStructured " + featureName);
1106 NodeList children = description.getChildNodes();
1107 NodeList insideNodes ;
1108 List<String> blabla= new ArrayList<String>();
1109
1110
1111 Feature currentFeature = getFeatureObjectFromString(featureName);
1112
1113 String fullContent = description.getTextContent();
1114 for (int i=0;i<children.getLength();i++){
1115 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1116 insideNodes=children.item(i).getChildNodes();
1117 for (int j=0;j<insideNodes.getLength();j++){
1118 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1119 String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1120 if(!inlineText.isEmpty()) {
1121 blabla.add(inlineText);
1122 }
1123 }
1124 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1125 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1126 blabla.add(insideNodes.item(j).getTextContent().trim());
1127 }
1128 }
1129 }
1130 }
1131 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1132 if(!children.item(i).getTextContent().trim().isEmpty()){
1133 String localdescr = children.item(i).getTextContent().trim();
1134 if(!localdescr.isEmpty())
1135 {
1136 blabla.add(localdescr);
1137 }
1138 }
1139 }
1140 }
1141
1142 if (blabla !=null && !blabla.isEmpty()) {
1143 String blaStr = StringUtils.join(blabla," ").trim();
1144 if (! stringIsEmpty(blaStr)) {
1145 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1146 return blaStr;
1147 } else {
1148 return "";
1149 }
1150 } else {
1151 return "";
1152 }
1153
1154 }
1155
1156 /**
1157 * @param blaStr
1158 * @return
1159 */
1160 private boolean stringIsEmpty(String blaStr) {
1161 if (blaStr.matches("(\\.|,|;|\\.-)?")){
1162 return true;
1163 }else{
1164 return false;
1165 }
1166 }
1167
1168 /**
1169 * @param nametosave
1170 * @param refMods
1171 * @param insideNodes
1172 * @param blabla
1173 * @param j
1174 */
1175 @SuppressWarnings({ "rawtypes" })
1176 private String getInlineTextForName(List<TaxonName> nametosave, Reference refMods, Node insideNode) {
1177 if (true){
1178 NodeList children = insideNode.getChildNodes();
1179 String result = "";
1180 for (int i=0;i<children.getLength();i++){
1181 Node nameChild = children.item(i);
1182 if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1183 result += nameChild.getTextContent();
1184 }else{
1185 //do nothing
1186 }
1187 }
1188 return result.replace("\n", "").trim();
1189 }else{
1190 TaxonName tnb = getTaxonNameFromXML(insideNode, nametosave,refMods,false);
1191 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1192 Taxon tax = currentMyName.getTaxon();
1193 if(tnb !=null && tax != null){
1194 String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1195 return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1196 }else if (tnb != null && tax == null){
1197 //TODO
1198 return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0] +"</cdm:taxonName>";
1199 }else{
1200 logger.warn("Inline text has no content yet");
1201 }
1202 return "";
1203 }
1204 }
1205
1206 /**
1207 * @param featureName
1208 * @return
1209 */
1210 @SuppressWarnings("rawtypes")
1211 private Feature getFeatureObjectFromString(String featureName) {
1212 logger.info("getFeatureObjectFromString");
1213 List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1214 Feature currentFeature=null;
1215 for (Feature feature: features){
1216 String tmpF = feature.getTitleCache();
1217 if (tmpF.equalsIgnoreCase(featureName)) {
1218 currentFeature=feature;
1219 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1220 }
1221 }
1222 if (currentFeature == null) {
1223 currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1224 if(featureName.equalsIgnoreCase("Other")){
1225 currentFeature.setUuid(OtherUUID);
1226 }
1227 if(featureName.equalsIgnoreCase(notMarkedUp)){
1228 currentFeature.setUuid(NotMarkedUpUUID);
1229 }
1230 importer.getTermService().saveOrUpdate(currentFeature);
1231 }
1232 return currentFeature;
1233 }
1234
1235
1236
1237
1238 /**
1239 * @param children: the XML node group
1240 * @param nametosave: the list of objects to save into the CDM
1241 * @param acceptedTaxon: the current acceptedTaxon
1242 * @param refMods: the current reference extracted from the MODS
1243 * @param fullContent :the parsed XML content
1244 * @return a list of description (text)
1245 */
1246 @SuppressWarnings({ "unused", "rawtypes" })
1247 private List<String> parseParagraph(List<TaxonName> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1248 logger.info("parseParagraph "+feature.toString());
1249 List<String> fullDescription= new ArrayList<String>();
1250 // String localdescr;
1251 String descr="";
1252 NodeList insideNodes ;
1253 boolean collectionEvent = false;
1254 List<Node>collectionEvents = new ArrayList<Node>();
1255
1256 NodeList children = paragraph.getChildNodes();
1257
1258 for (int i=0;i<children.getLength();i++){
1259 // localdescr="";
1260 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1261 descr += children.item(i).getTextContent().trim();
1262 }
1263 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1264 insideNodes=children.item(i).getChildNodes();
1265 List<String> blabla= new ArrayList<String>();
1266 for (int j=0;j<insideNodes.getLength();j++){
1267 boolean nodeKnown = false;
1268 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1269 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1270 String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1271 if (!inlineText.isEmpty()) {
1272 blabla.add(inlineText);
1273 }
1274 nodeKnown=true;
1275 }
1276 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1277 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1278 blabla.add(insideNodes.item(j).getTextContent().trim());
1279 // localdescr += insideNodes.item(j).getTextContent().trim();
1280 }
1281 nodeKnown=true;
1282 }
1283 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1284 String ref = insideNodes.item(j).getTextContent().trim();
1285 if (ref.endsWith(";") && ((ref.length())>1)) {
1286 ref=ref.substring(0, ref.length()-1)+".";
1287 }
1288 Reference reference = ReferenceFactory.newGeneric();
1289 reference.setTitleCache(ref, true);
1290 blabla.add(reference.getTitleCache());
1291 nodeKnown=true;
1292 }
1293 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1294 String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1295 blabla.add(figure);
1296 }
1297 else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1298 insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1299 insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1300 String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1301 blabla.add(table);
1302 }
1303 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1304 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1305 String titlecache = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1306 blabla.add(titlecache);
1307 collectionEvent=true;
1308 collectionEvents.add(insideNodes.item(j));
1309 nodeKnown=true;
1310 }else{
1311 logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1312 }
1313
1314 }
1315 if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1316 fullDescription.add(StringUtils.join(blabla," "));
1317 }
1318 }
1319 if (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1320 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1321 fullDescription.add(figure);
1322 }
1323 if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1324 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1325 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1326 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1327 fullDescription.add(table);
1328 }
1329 }
1330
1331 if( !stringIsEmpty(descr.trim())){
1332 Feature currentFeature= getNotMarkedUpFeatureObject();
1333 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1334 }
1335 // if (collectionEvent) {
1336 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1337 // for (Node coll:collectionEvents){
1338 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1339 // }
1340 // }
1341 return fullDescription;
1342 }
1343
1344
1345 /**
1346 * @param description: the XML node group
1347 * @param acceptedTaxon: the current acceptedTaxon
1348 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1349 * @param nametosave: the list of objects to save into the CDM
1350 * @param refMods: the current reference extracted from the MODS
1351 * @param feature: the feature to link the data with
1352 */
1353 @SuppressWarnings("rawtypes")
1354 private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonName> namesToSave, Reference refMods, Feature feature){
1355 logger.info("EXTRACT FEATURE "+feature.toString());
1356 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1357 List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1358
1359 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1360 if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1361 setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1362 }
1363
1364 }
1365
1366
1367 /**
1368 * @param descr: the XML Nodegroup to parse
1369 * @param acceptedTaxon: the current acceptedTaxon
1370 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1371 * @param refMods: the current reference extracted from the MODS
1372 * @param currentFeature: the feature name
1373 * @return
1374 */
1375 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1376 logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1377
1378 //remove redundant feature title
1379 String featureStr = currentFeature.getTitleCache();
1380 if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1381 descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1382 }
1383
1384
1385 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1386 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1387
1388 TextData textData = createTextData(descr, refMods, currentFeature);
1389
1390 if(acceptedTaxon!=null){
1391 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1392 td.addElement(textData);
1393 acceptedTaxon.addDescription(td);
1394
1395 sourceHandler.addAndSaveSource(refMods, td, null);
1396 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1397 }
1398
1399 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1400 try{
1401 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1402 if (tmp!=null) {
1403 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1404 }else{
1405 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1406 }
1407 }catch(Exception e){
1408 logger.debug("TAXON EXISTS"+defaultTaxon);
1409 }
1410
1411 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1412 defaultTaxon.addDescription(td);
1413 td.addElement(textData);
1414 sourceHandler.addAndSaveSource(refMods, td, null);
1415 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1416 }
1417 }
1418
1419 /**
1420 * @param descr
1421 * @param refMods
1422 * @param currentFeature
1423 * @return
1424 */
1425 private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1426 //logger.info("createTextData");
1427 TextData textData = TextData.NewInstance();
1428 textData.setFeature(currentFeature);
1429 sourceHandler.addSource(refMods, textData);
1430
1431 textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1432 return textData;
1433 }
1434
1435
1436
1437 /**
1438 * @param descr: the XML Nodegroup to parse
1439 * @param acceptedTaxon: the current acceptedTaxon
1440 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1441 * @param refMods: the current reference extracted from the MODS
1442 * @param currentFeature: the feature name
1443 * @return
1444 */
1445 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1446 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1447 // logger.info("acceptedTaxon: "+acceptedTaxon);
1448 logger.info("setParticularDescription");
1449 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1450
1451 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1452 TextData textData = createTextData(descr, refMods, currentFeature);
1453
1454 if(! descr.isEmpty() && (acceptedTaxon!=null)){
1455 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1456 td.addElement(textData);
1457 acceptedTaxon.addDescription(td);
1458
1459 sourceHandler.addAndSaveSource(refMods, td, currentRef);
1460 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1461 }
1462
1463 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1464 try{
1465 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1466 if (tmp!=null) {
1467 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1468 }else{
1469 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1470 }
1471 }catch(Exception e){
1472 logger.debug("TAXON EXISTS"+defaultTaxon);
1473 }
1474
1475 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1476 defaultTaxon.addDescription(td);
1477 td.addElement(textData);
1478 sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1479 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1480 }
1481 }
1482
1483
1484
1485 /**
1486 * @param synonyms: the XML Nodegroup to parse
1487 * @param nametosave: the list of objects to save into the CDM
1488 * @param acceptedTaxon: the current acceptedTaxon
1489 * @param refMods: the current reference extracted from the MODS
1490 */
1491 @SuppressWarnings({ "rawtypes" })
1492 private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1493 logger.info("extractSynonyms");
1494 //System.out.println("extractSynonyms for: "+acceptedTaxon);
1495 Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1496 if (ttmp != null) {
1497 acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1498 }
1499 else{
1500 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1501 }
1502 NodeList children = synonymsNode.getChildNodes();
1503 List<MyName> names = new ArrayList<MyName>();
1504
1505 if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1506 try {
1507 MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1508 names.add(myName);
1509 } catch (TransformerFactoryConfigurationError e) {
1510 logger.warn(e);
1511 } catch (TransformerException e) {
1512 logger.warn(e);
1513 }
1514 }
1515
1516
1517 for (int i=0;i<children.getLength();i++){
1518 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1519 NodeList tmp = children.item(i).getChildNodes();
1520 // String fullContent = children.item(i).getTextContent();
1521 for (int j=0; j< tmp.getLength();j++){
1522 if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1523 try {
1524 MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1525 names.add(myName);
1526 } catch (TransformerFactoryConfigurationError e) {
1527 logger.warn(e);
1528 } catch (TransformerException e) {
1529 logger.warn(e);
1530 }
1531 }
1532 }
1533 }
1534 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1535 try {
1536 MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1537 names.add(myName);
1538 } catch (TransformerFactoryConfigurationError e) {
1539 logger.warn(e);
1540 } catch (TransformerException e) {
1541 logger.warn(e);
1542 }
1543
1544 }
1545 }
1546
1547 for(MyName name:names){
1548 TaxonName nameToBeFilled = name.getTaxonName();
1549 Synonym synonym = name.getSyno();
1550 addFollowingTextToName(nameToBeFilled, followingText);
1551
1552 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1553 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1554 if (nameToBeFilled.hasProblem() &&
1555 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1556 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1557 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1558 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1559 }
1560 nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
1561 */
1562 if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1563 setLSID(name.getIdentifier(), synonym);
1564 }
1565
1566 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1567 boolean synoExist = false;
1568 for (Synonym syn: synonymsSet){
1569
1570 boolean a =syn.getName().equals(synonym.getName());
1571 boolean b = syn.getSec().equals(synonym.getSec());
1572 if (a && b) {
1573 synoExist=true;
1574 }
1575 }
1576 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1577 sourceHandler.addSource(refMods, synonym);
1578 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1579 }
1580 }
1581 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1582 }
1583
1584
1585 private boolean addFollowingTextToName(TaxonName nameToBeFilled, String followingText) {
1586 if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1587 if (! followingText.matches("\\d\\.?")){
1588
1589 if (followingText.startsWith(",")){
1590 followingText = followingText.substring(1).trim();
1591 }
1592 nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1593 }
1594 return true;
1595 }
1596 return false;
1597
1598 }
1599
1600 /**
1601 * @param refgroup: the XML nodes
1602 * @param nametosave: the list of objects to save into the CDM
1603 * @param acceptedTaxon: the current acceptedTaxon
1604 * @param nametosave: the list of objects to save into the CDM
1605 * @param refMods: the current reference extracted from the MODS
1606 * @return the acceptedTaxon (why?)
1607 * handle cases where the bibref are inside <p> and outside
1608 */
1609 @SuppressWarnings({ "rawtypes" })
1610 private Taxon extractReferences(Node refgroup, List<TaxonName> nametosave, Taxon acceptedTaxon, Reference refMods) {
1611 logger.info("extractReferences");
1612 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1613
1614 NodeList children = refgroup.getChildNodes();
1615 INonViralName nameToBeFilled = getNonViralNameAccNomenclature();
1616
1617 ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1618 for (int i=0;i<children.getLength();i++){
1619 if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1620 String ref = children.item(i).getTextContent().trim();
1621 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1622 if (!refBuild.isFoundBibref()){
1623 extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1624 }
1625 }
1626
1627 if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1628 NodeList references = children.item(i).getChildNodes();
1629 String descr="";
1630 for (int j=0;j<references.getLength();j++){
1631 if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1632 String ref = references.item(j).getTextContent().trim();
1633 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1634 }
1635 else
1636 if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1637 && !references.item(j).getTextContent().trim().isEmpty()){
1638 descr += references.item(j).getTextContent().trim();
1639 }
1640
1641 }
1642 if (!refBuild.isFoundBibref()){
1643 //if it's not tagged, put it as row information.
1644 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1645 //then put it as a not markup feature if not empty
1646 if (!stringIsEmpty(descr.trim())){
1647 Feature currentFeature= getNotMarkedUpFeatureObject();
1648 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1649 }
1650 }
1651 }
1652 }
1653 // importer.getClassificationService().saveOrUpdate(classification);
1654 return acceptedTaxon;
1655
1656 }
1657
1658 /**
1659 * get the non viral name according to the current nomenclature
1660 * @return
1661 */
1662
1663 private INonViralName getNonViralNameAccNomenclature() {
1664 return nomenclaturalCode.getNewTaxonNameInstance(null);
1665 }
1666
1667 /**
1668 * @return the feature object for the category "not marked up"
1669 */
1670 private Feature getNotMarkedUpFeatureObject() {
1671 // FIXME use getFeature(uuid ....)
1672 logger.info("getNotMarkedUpFeatureObject");
1673 Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1674 if (currentFeature == null) {
1675 currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1676 currentFeature.setUuid(NotMarkedUpUUID);
1677 //TODO use userDefined Feature Vocabulary
1678 Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1679 // importer.getTermService().saveOrUpdate(currentFeature);
1680 importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1681 }
1682 return currentFeature;
1683 }
1684
1685 /**
1686 * @param references
1687 * handle cases where the bibref are inside <p> and outside
1688 */
1689 @SuppressWarnings("rawtypes")
1690 private void extractReferenceRawText(NodeList references, INonViralName nameToBeFilled, Reference refMods,
1691 Taxon acceptedTaxon) {
1692 logger.info("extractReferenceRawText");
1693 String refString="";
1694 currentMyName= new MyName(true);
1695 for (int j=0;j<references.getLength();j++){
1696 acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1697 //no bibref tag inside
1698 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1699 if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1700
1701 try {
1702 String followingText = null; //needs to be checked if follText is possible
1703 //TODO create or not create?
1704 currentMyName = extractScientificName(references.item(j), refMods, followingText);
1705 } catch (TransformerFactoryConfigurationError e) {
1706 logger.warn(e);
1707 } catch (TransformerException e) {
1708 logger.warn(e);
1709 }
1710
1711 // name=name.trim();
1712 }
1713 if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1714 refString = references.item(j).getTextContent().trim();
1715 }
1716 if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1717 //
1718 if (!currentMyName.getStatus().isEmpty()){
1719 String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1720 if (nomNovStatus != null){
1721 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1722 }else{
1723 try {
1724 NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1725 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1726 } catch (UnknownCdmTypeException e) {
1727 addProblematicStatusToFile(currentMyName.getStatus());
1728 logger.warn("Problem with status");
1729 }
1730 }
1731 }
1732
1733 String fullLineRefName = references.item(j).getTextContent().trim();
1734 int nameOrRefOrOther=2;
1735 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1736 if (nameOrRefOrOther==0){
1737 TaxonName nameTBF = currentMyName.getTaxonName();
1738 Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1739
1740 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1741 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1742 boolean synoExist = false;
1743 for (Synonym syn: synonymsSet){
1744 // System.out.println(syn.getName()+" -- "+syn.getSec());
1745 boolean a =syn.getName().equals(synonym.getName());
1746 boolean b = syn.getSec().equals(synonym.getSec());
1747 if (a && b) {
1748 synoExist=true;
1749 }
1750 }
1751 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1752 sourceHandler.addSource(refMods, synonym);
1753
1754 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1755 }
1756 }
1757
1758 if (nameOrRefOrOther==1){
1759 Reference re = ReferenceFactory.newGeneric();
1760 re.setTitleCache(fullLineRefName, true);
1761
1762 /* TaxonName nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1763 if (nameTBF.hasProblem() &&
1764 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1765 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1766 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1767 }
1768 nameTBF = getTaxonName(nameTBF,nametosave,statusType);
1769 */
1770 TaxonName nameTBF = currentMyName.getTaxonName();
1771 Synonym synonym = Synonym.NewInstance(nameTBF, re);
1772
1773 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1774 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1775 boolean synoExist = false;
1776 for (Synonym syn: synonymsSet){
1777 // System.out.println(syn.getName()+" -- "+syn.getSec());
1778 boolean a =syn.getName().equals(synonym.getName());
1779 boolean b = syn.getSec().equals(synonym.getSec());
1780 if (a && b) {
1781 synoExist=true;
1782 }
1783 }
1784 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1785 sourceHandler.addSource(refMods, synonym);
1786
1787 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1788 }
1789
1790 }
1791
1792
1793 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1794 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1795 }
1796 }
1797
1798 if(!currentMyName.getName().isEmpty()){
1799 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1800 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1801 Reference refS = ReferenceFactory.newGeneric();
1802 refS.setTitleCache(refString, true);
1803 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1804 // acceptedTaxon.addDescription(td);
1805 // acceptedTaxon.addSource(refSource);
1806 //
1807 // TextData textData = TextData.NewInstance(Feature.CITATION());
1808 //
1809 // textData.addSource(null, null, refS, null);
1810 // td.addElement(textData);
1811 // td.addSource(refSource);
1812 // importer.getDescriptionService().saveOrUpdate(td);
1813
1814
1815 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1816 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1817
1818 }
1819
1820 acceptedTaxon.getName().setNomenclaturalReference(refS);
1821 }else{
1822 TaxonName nameTBF = currentMyName.getTaxonName();
1823 Synonym synonym = null;
1824 if (! currentMyName.getStatus().isEmpty()){
1825 String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1826 if (nomNovStatus != null){
1827 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1828 }else{
1829 try {
1830 NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1831 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1832 synonym = Synonym.NewInstance(nameTBF, refMods);
1833 } catch (UnknownCdmTypeException e) {
1834 addProblematicStatusToFile(currentMyName.getStatus());
1835 logger.warn("Problem with status");
1836 synonym = Synonym.NewInstance(nameTBF, refMods);
1837 synonym.setAppendedPhrase(currentMyName.getStatus());
1838 }
1839 }
1840 }else{
1841 synonym = Synonym.NewInstance(nameTBF, refMods);
1842 }
1843
1844
1845 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1846 setLSID(currentMyName.getIdentifier(), synonym);
1847 }
1848
1849 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1850 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1851 boolean synoExist = false;
1852 for (Synonym syn: synonymsSet){
1853 // System.out.println(syn.getName()+" -- "+syn.getSec());
1854 boolean a =syn.getName().equals(synonym.getName());
1855 boolean b = syn.getSec().equals(synonym.getSec());
1856 if (a && b) {
1857 synoExist=true;
1858 }
1859 }
1860 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1861 sourceHandler.addSource(refMods, synonym);
1862
1863 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1864 }
1865 }
1866 }
1867 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1868 }
1869 }
1870
1871
1872
1873 /**
1874 * @param identifier
1875 * @param acceptedTaxon
1876 */
1877 @SuppressWarnings("rawtypes")
1878 private void setLSID(String identifier, TaxonBase<?> taxon) {
1879 //logger.info("setLSID");
1880 // boolean lsidok=false;
1881 String id = identifier.split("__")[0];
1882 String source = identifier.split("__")[1];
1883 if (id.indexOf("lsid")>-1){
1884 try {
1885 LSID lsid = new LSID(id);
1886 taxon.setLsid(lsid);
1887 // lsidok=true;
1888 } catch (MalformedLSIDException e) {
1889 logger.warn("Malformed LSID");
1890 }
1891
1892 }
1893
1894 //logger.info("search reference for LSID");
1895 // if ((id.indexOf("lsid")<0) || !lsidok){
1896 //ADD ORIGINAL SOURCE ID EVEN IF LSID
1897 Reference re = null;
1898 Pager<Reference> references = importer.getReferenceService().findByTitleWithRestrictions(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1899 if( references !=null && references.getCount()>0){
1900 re=references.getRecords().get(0);
1901 }
1902 //logger.info("search reference for LSID-end");
1903 if(re == null){
1904 re = ReferenceFactory.newGeneric();
1905 re.setTitleCache(source, true);
1906 importer.getReferenceService().saveOrUpdate(re);
1907 }
1908 re=CdmBase.deproxy(re, Reference.class);
1909
1910 //logger.info("search source for LSID");
1911 Set<IdentifiableSource> sources = taxon.getSources();
1912 boolean lsidinsource=false;
1913 boolean urlinsource=false;
1914 for (IdentifiableSource src:sources){
1915 if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1916 lsidinsource=true;
1917 }
1918 if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1919 urlinsource=true;
1920 }
1921 }
1922 if(!lsidinsource) {
1923 taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1924 }
1925 if(!urlinsource)
1926 {
1927 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1928 taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1929 // }
1930 }
1931
1932 }
1933
1934 /**
1935 * try to solve a parsing problem for a scientific name
1936 * @param original : the name from the OCR document
1937 * @param name : the tagged version
1938 * @param parser
1939 * @return the corrected TaxonName
1940 */
1941 /* @SuppressWarnings({ "unchecked", "rawtypes" })
1942 private TaxonName solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1943 Map<String,String> ato = namesMap.get(original);
1944 if (ato == null) {
1945 ato = namesMap.get(original+" "+author);
1946 }
1947
1948
1949 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1950 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1951 }
1952 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1953 rank = getRank(ato);
1954 }
1955 // TaxonName nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1956 TaxonName nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1957 // logger.info("RANK: "+rank);
1958 int retry=0;
1959 List<ParserProblem> problems = nameTBF.getParsingProblems();
1960 for (ParserProblem pb:problems) {
1961 System.out.println(pb.toString());
1962 }
1963 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1964 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1965 String fullname=name;
1966 if(! skippQuestion) {
1967 fullname = getFullReference(name,nameTBF.getParsingProblems());
1968 }
1969 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1970 nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1971 }
1972 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1973 nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1974 }
1975 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1976 nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1977 }
1978 parser.parseReferencedName(nameTBF, fullname, rank, false);
1979 retry++;
1980 }
1981 if (retry == 1){
1982 if(author != null){
1983 if (name.indexOf(author)>-1) {
1984 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1985 } else {
1986 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1987 }
1988 if (nameTBF.hasProblem()){
1989 if (name.indexOf(author)>-1) {
1990 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1991 } else {
1992 addProblemNameToFile(name,author,nomenclaturalCode,rank);
1993 }
1994 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
1995 problems = nameTBF.getParsingProblems();
1996 for (ParserProblem pb:problems) {
1997 System.out.println(pb.toString());
1998 }
1999 nameTBF.setFullTitleCache(name, true);
2000 }else{
2001 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2002 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2003 }
2004 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2005 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2006 }
2007 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2008 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2009 }
2010 }
2011 // logger.info("FULL TITLE CACHE "+name);
2012 }else{
2013 nameTBF.setFullTitleCache(name, true);
2014 }
2015 }
2016 return nameTBF;
2017 }
2018
2019 */
2020
2021 /**
2022 * @param nomenclatureNode: the XML nodes
2023 * @param nametosave: the list of objects to save into the CDM
2024 * @param refMods: the current reference extracted from the MODS
2025 * @return
2026 */
2027 @SuppressWarnings({ "rawtypes" })
2028 private Taxon extractNomenclature(Node nomenclatureNode, List<TaxonName> nametosave, Reference refMods) throws ClassCastException{
2029 refMods=CdmBase.deproxy(refMods, Reference.class);
2030
2031 logger.info("extractNomenclature");
2032 NodeList children = nomenclatureNode.getChildNodes();
2033 String freetext="";
2034 Taxon acceptedTaxon = null;
2035 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2036
2037 // String fullContent = nomenclatureNode.getTextContent();
2038
2039 NomenclaturalStatusType statusType = null;
2040 String newNameStatus = null;
2041 //TODO
2042 for (int i=0;i<children.getLength();i++){
2043 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2044 String status = children.item(i).getTextContent().trim();
2045
2046 if (!status.isEmpty()){
2047 if (newNameStatus(status) != null){
2048 newNameStatus = newNameStatus(status);
2049 }else{
2050 try {
2051 statusType = nomStatusString2NomStatus(status);
2052 } catch (UnknownCdmTypeException e) {
2053 // nomNovStatus;
2054 addProblematicStatusToFile(status);
2055 logger.warn("Problem with status: " + status);
2056 }
2057 }
2058 }
2059 }
2060 }
2061
2062 boolean containsSynonyms=false;
2063 boolean wasSynonym = false;
2064 usedFollowingTextPrefix = null; //reset
2065
2066 for (int i=0; i<children.getLength(); i++){
2067 Node childNode = children.item(i);
2068 String childName = childNode.getNodeName();
2069
2070
2071 //following text
2072 followingText = null;
2073 if ( i + 1 < children.getLength()){
2074 Node followingTextNode = children.item(i +1);
2075 if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2076 followingText = followingTextNode.getTextContent();
2077 }
2078 }
2079
2080 //traverse nodes
2081 if (childName.equalsIgnoreCase("#text")) {
2082 freetext = childNode.getTextContent().trim();
2083 if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2084 freetext = freetext.substring(usedFollowingTextPrefix.length());
2085 }
2086 usedFollowingTextPrefix = null; //reset
2087 }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2088 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2089 extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonName());
2090 }else if(childName.equalsIgnoreCase("tax:name")){
2091 INonViralName nameToBeFilled;
2092 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2093 if(!containsSynonyms){
2094 wasSynonym = false;
2095
2096 //System.out.println("I : "+i);
2097 currentMyName = new MyName(false);
2098 try {
2099 currentMyName = extractScientificName(childNode, refMods, followingText);
2100 treatmentMainName = currentMyName.getNewName();
2101 originalTreatmentName = currentMyName.getOriginalName();
2102
2103 } catch (TransformerFactoryConfigurationError e1) {
2104 throw new RuntimeException(e1);
2105 } catch (TransformerException e1) {
2106 throw new RuntimeException(e1);
2107 }
2108
2109 if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2110 maxRankRespected=true;
2111
2112 nameToBeFilled=currentMyName.getTaxonName();
2113
2114 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2115 acceptedTaxon=currentMyName.getTaxon();
2116 //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2117
2118
2119 boolean statusMatch=false;
2120 if(acceptedTaxon !=null ){
2121 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2122 statusMatch=compareStatus(acceptedTaxon, statusType);
2123 //System.out.println("statusMatch: "+statusMatch);
2124 }
2125 if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2126
2127 nameToBeFilled=currentMyName.getTaxonName();
2128 if (nameToBeFilled != null){
2129 if (!originalTreatmentName.isEmpty()) {
2130 TaxonNameDescription td = TaxonNameDescription.NewInstance();
2131 td.setTitleCache(originalTreatmentName, true);
2132 nameToBeFilled.addDescription(td);
2133 }
2134
2135 if(statusType != null) {
2136 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2137 }
2138 if(newNameStatus != null){
2139 nameToBeFilled.setAppendedPhrase(newNameStatus);
2140 }
2141 sourceHandler.addSource(refMods, TaxonName.castAndDeproxy(nameToBeFilled));
2142
2143 if (nameToBeFilled.getNomenclaturalReference() == null) {
2144 acceptedTaxon= Taxon.NewInstance(nameToBeFilled,refMods);
2145 //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2146 }
2147 else {
2148 acceptedTaxon= Taxon.NewInstance(nameToBeFilled,nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2149 //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2150 }
2151
2152 sourceHandler.addSource(refMods, acceptedTaxon);
2153
2154 if(!state2.getConfig().doKeepOriginalSecundum()) {
2155 acceptedTaxon.setSec(state2.getConfig().getSecundum());
2156 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2157 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2158 }
2159
2160 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2161 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2162 }
2163
2164
2165 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2166 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2167 }
2168
2169 }else{
2170 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2171 Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2172 boolean sourcelinked=false;
2173 for (IdentifiableSource source:sources){
2174 if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2175 sourcelinked=true;
2176 }
2177 }
2178 if (!state2.getConfig().doKeepOriginalSecundum()) {
2179 acceptedTaxon.setSec(state2.getConfig().getSecundum());
2180 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2181 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2182 }
2183 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2184
2185 if (!sourcelinked){
2186 sourceHandler.addSource(refMods, acceptedTaxon);
2187 }
2188 if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2189
2190 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2191 //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2192 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2193 }
2194 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2195 }
2196 }
2197 }else{
2198 maxRankRespected=false;
2199 }
2200 containsSynonyms=true; //all folowing names are handled as synonyms
2201 }else{
2202 try{
2203 extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2204 wasSynonym = true;
2205
2206 }catch(NullPointerException e){
2207 logger.warn("null pointer exception, the accepted taxon might be null");
2208 }
2209 }
2210 containsSynonyms=true;
2211 }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2212 reloadClassification();
2213 //extract the References within the document
2214 extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2215 }else if (childName.equalsIgnoreCase("tax:bibref")){
2216 logger.warn(childName + " still preliminary");
2217
2218 TaxonName currentName = currentMyName == null ? null : currentMyName.getTaxonName();
2219 boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2220 if (! handled){
2221 setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2222 }
2223 }else{
2224 logger.warn(childName + " not yet handled");
2225 }
2226 if(!stringIsEmpty(freetext.trim())) {;
2227 if (! freetext.matches("\\d\\.?")){
2228 TaxonName currentName = currentMyName == null ? null : currentMyName.getTaxonName();
2229 boolean handled = false;
2230 if (currentName != null && !wasSynonym){
2231 handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2232 }
2233 if (! handled){
2234 setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2235 }
2236 }
2237
2238 freetext = "";
2239 }
2240
2241 }
2242 //importer.getClassificationService().saveOrUpdate(classification);
2243 return acceptedTaxon;
2244 }
2245
2246
2247
2248
2249 /**
2250 * @return
2251 */
2252
2253 private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2254 //logger.info("compareStatus");
2255 boolean statusMatch=false;
2256 //found one taxon
2257 Set<NomenclaturalStatus> status = t.getName().getStatus();
2258 if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2259 for (NomenclaturalStatus st:status){
2260 NomenclaturalStatusType stype = st.getType();
2261 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2262 statusMatch=true;
2263 }
2264 }
2265 }
2266 else{
2267 if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2268 statusMatch=true;
2269 }
2270 }
2271 return statusMatch;
2272 }
2273
2274 /**
2275 * @param acceptedTaxon: the current acceptedTaxon
2276 * @param ref: the current reference extracted from the MODS
2277 * @return the parent for the current accepted taxon
2278 */
2279 /* private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2280 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2281
2282 List<Rank> rankList = new ArrayList<Rank>();
2283 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2284
2285 List<String> rankListStr = new ArrayList<String>();
2286 for (Rank r:rankList) {
2287 rankListStr.add(r.toString());
2288 }
2289 String r="";
2290 String s = acceptedTaxon.getTitleCache();
2291 Taxon tax = null;
2292 if(!skippQuestion){
2293 int addTaxon = askAddParent(s);
2294 logger.info("ADD TAXON: "+addTaxon);
2295 if (addTaxon == 0 ){
2296 Taxon tmp = askParent(acceptedTaxon, classification);
2297 if (tmp == null){
2298 s = askSetParent(s);
2299 r = askRank(s,rankListStr);
2300
2301 TaxonName nameToBeFilled = null;
2302 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2303 nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2304 }
2305 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2306 nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2307 }
2308 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2309 nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2310 }
2311 nameToBeFilled.setTitleCache(s, true);
2312 nameToBeFilled.setRank(getRank(r), true);
2313
2314 tax = Taxon.NewInstance(nameToBeFilled, ref);
2315 }
2316 else{
2317 tax=tmp;
2318 }
2319
2320 createParent(tax, ref);
2321 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2322 classification.addParentChild(tax, acceptedTaxon, ref, null);
2323 }
2324 else{
2325 classification.addChildTaxon(acceptedTaxon, ref, null);
2326 tax=acceptedTaxon;
2327 }
2328 } else{
2329 classification.addChildTaxon(acceptedTaxon, ref, null);
2330 tax=acceptedTaxon;
2331 }
2332 // logger.info("RETURN: "+tax );
2333 return tax;
2334
2335 }
2336
2337 */
2338
2339
2340 private MyName extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2341 //System.out.println("extractScientificNameSynonym");
2342 logger.info("extractScientificNameSynonym");
2343 String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2344 List<String> rankListToPrint = new ArrayList<String>();
2345 for (String r : rankListToPrint_tmp) {
2346 rankListToPrint.add(r.toLowerCase());
2347 }
2348
2349 Rank rank = Rank.UNKNOWN_RANK();
2350 NodeList children = name.getChildNodes();
2351 String originalName="";
2352 String fullName = "";
2353 String newName="";
2354 String identifier="";
2355 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2356 List<String> atomisedName= new ArrayList<String>();
2357
2358 String rankStr = "";
2359 Rank tmpRank ;
2360
2361 String status= extractStatus(children);
2362
2363 for (int i=0;i<children.getLength();i++){
2364 if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2365 NodeList atom = children.item(i).getChildNodes();
2366 for (int k=0;k<atom.getLength();k++){
2367 identifier = extractIdentifier(identifier, atom.item(k));
2368 tmpRank = null;
2369 rankStr = atom.item(k).getNodeName().toLowerCase();
2370 // logger.info("RANKSTR:*"+rankStr+"*");
2371 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2372 rankStr=atom.item(k).getTextContent().trim();
2373 tmpRank = getRank(rankStr);
2374 }
2375 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2376 if (tmpRank != null){
2377 rank=tmpRank;
2378 }
2379 atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2380 }
2381 addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2382 }
2383 if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2384 // logger.info("name non atomised: "+children.item(i).getTextContent());
2385 fullName = children.item(i).getTextContent().trim();
2386 // logger.info("fullname: "+fullName);
2387 }
2388 }
2389 originalName=fullName;
2390 fullName = cleanName(fullName, atomisedName);
2391 namesMap.put(fullName,atomisedMap);
2392
2393 String atomisedNameStr = getAtomisedNameStr(atomisedName);
2394
2395 if (fullName != null){
2396 // System.out.println("fullname: "+fullName);
2397 // System.out.println("atomised: "+atomisedNameStr);
2398 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2399 if (skippQuestion){
2400 // String defaultN = "";
2401 if (atomisedNameStr.length()>fullName.length()) {
2402 newName=atomisedNameStr;
2403 } else {
2404 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2405 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2406 } else {
2407 newName=fullName;
2408 }
2409 }
2410 } else {
2411 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2412 }
2413 } else {
2414 newName=fullName;
2415 }
2416 }
2417 //not really needed
2418 // rank = askForRank(newName, rank, nomenclaturalCode);
2419 // System.out.println("atomised: "+atomisedMap.toString());
2420
2421 // String[] names = new String[5];
2422 MyName myname = new MyName(true);
2423
2424 //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2425 // System.out.println(atomisedMap.keySet());
2426 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2427 myname.setOriginalName(fullName);
2428 myname.setNewName(newName);
2429 myname.setRank(rank);
2430 myname.setIdentifier(identifier);
2431 myname.setStatus(status);
2432 myname.setSource(refMods);
2433
2434 // boolean higherAdded=false;
2435
2436
2437 boolean parseNameManually=false;
2438 INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2439 TaxonName nameToBeFilledTest ;
2440
2441 //if selected the atomised version
2442 if(newName==atomisedNameStr){
2443 nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2444 if (nameToBeFilledTest.hasProblem()){
2445 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2446 nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode, rank);
2447 if (nameToBeFilledTest.hasProblem()){
2448 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2449 parseNameManually=true;
2450 }
2451 }
2452 }else{
2453 nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2454 if (nameToBeFilledTest.hasProblem()){
2455 addProblemNameToFile("fullversion",fullName, nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2456 nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2457 parseNameManually=true;
2458 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2459 addNameDifferenceToFile(originalName,atomisedNameStr);
2460 }
2461 }
2462 }
2463
2464 if(parseNameManually){
2465 //System.out.println("DO IT MANUALLY");
2466 if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2467 createUnparsedSynonym(rank, newName, atomisedMap, myname);
2468 }else{
2469 createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2470 }
2471 } else{
2472 //System.out.println("AUTOMATIC!");
2473 // createAtomisedTaxonString(newName, atomisedMap, myname);
2474 myname.setParsedName(nameToBeFilledTest);
2475 myname.buildTaxon();
2476 }
2477 //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2478 return myname;
2479 }
2480
2481
2482 /**
2483 * @param name
2484 * @throws TransformerFactoryConfigurationError
2485 * @throws TransformerException
2486 * @return a list of possible names
2487 */
2488 @SuppressWarnings({"rawtypes" })
2489 private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2490 logger.info("extractScientificName");
2491
2492 String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2493 List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2494
2495 Rank rank = Rank.UNKNOWN_RANK();
2496 NodeList children = name.getChildNodes();
2497 String originalName = "";
2498 String fullName = "";
2499 String newName = "";
2500 String identifier = "";
2501 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2502 List<String> atomisedNameList= new ArrayList<String>();
2503
2504 String status= extractStatus(children);
2505
2506 for (int i=0;i<children.getLength();i++){
2507 Node nameChild = children.item(i);
2508 if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2509 NodeList xmlDataChildren = nameChild.getChildNodes();
2510 for (int k=0;k<xmlDataChildren.getLength();k++){
2511 Node xmlDataChild = xmlDataChildren.item(k);
2512 identifier = extractIdentifier(identifier, xmlDataChild);
2513 String rankStr = xmlDataChild.getNodeName().toLowerCase();
2514 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2515 rankStr=xmlDataChild.getTextContent().trim();
2516 Rank tmpRank = getRank(rankStr);
2517 if (tmpRank != null){
2518 rank=tmpRank;
2519 }
2520 }
2521 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2522
2523 atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2524 }
2525 addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2526 }
2527 else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2528 // logger.info("name non atomised: "+children.item(i).getTextContent());
2529 fullName = nameChild.getTextContent().trim();
2530 // logger.info("fullname: "+fullName);
2531 }
2532 }
2533 originalName=fullName;
2534 fullName = cleanName(fullName, atomisedNameList);
2535 namesMap.put(fullName,atomisedMap);
2536
2537 String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2538
2539 if (fullName != null){
2540 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2541 if (skippQuestion){
2542 if (atomisedNameStr.length()>fullName.length()) {
2543 newName = atomisedNameStr;
2544 } else {
2545 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2546 newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2547 } else {
2548 newName = fullName;
2549 }
2550 }
2551 } else {
2552 newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2553 }
2554 } else {
2555 newName=fullName;
2556 }
2557 }
2558 //not really needed
2559 // rank = askForRank(newName, rank, nomenclaturalCode);
2560 // System.out.println("atomised: "+atomisedMap.toString());
2561
2562 // String[] names = new String[5];
2563 MyName myname = new MyName(false);
2564
2565 //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2566 // System.out.println(atomisedMap.keySet());
2567 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2568 myname.setOriginalName(fullName);
2569 myname.setNewName(newName);
2570
2571 myname.setRank(rank);
2572 myname.setIdentifier(identifier);
2573 myname.setStatus(status);
2574 myname.setSource(refMods);
2575
2576 // boolean higherAdded=false;
2577
2578
2579 boolean parseNameManually=false;
2580 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2581 TaxonName nameToBeFilledTest = null;
2582
2583 //if selected the atomised version
2584 if(newName==atomisedNameStr){
2585 nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2586 if (nameToBeFilledTest.hasProblem()){
2587 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2588 nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2589 if (nameToBeFilledTest.hasProblem()){
2590 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2591 parseNameManually=true;
2592 }
2593 }
2594 }else{
2595 nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2596 if (nameToBeFilledTest.hasProblem()){
2597 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2598 nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2599 parseNameManually=true;
2600 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2601 addNameDifferenceToFile(originalName,atomisedNameStr);
2602 }
2603 }
2604 }
2605
2606 //System.out.println("parseNameManually: "+parseNameManually);
2607 if(parseNameManually){
2608 createAtomisedTaxon(rank, newName, atomisedMap, myname);
2609 }
2610 else{
2611 createAtomisedTaxonString(newName, atomisedMap, myname);
2612 myname.setParsedName(nameToBeFilledTest);
2613 //TODO correct handling of createIfNotExists
2614 myname.buildTaxon();
2615 }
2616 return myname;
2617
2618 }
2619
2620 private TaxonName parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2621 Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2622
2623 TaxonName name = (TaxonName)parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2624 if (nameExtensionResult != null && nameExtensionResult[0] != null){
2625 String ext = (String)nameExtensionResult[0];
2626 TaxonName extName = (TaxonName)parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2627 if (! extName.hasProblem()){
2628 name = extName;
2629 this.usedFollowingTextPrefix = ext;
2630 //TODO do we need to fill the atomisedMap at all?
2631 if ((Boolean)(nameExtensionResult[1])){
2632 //TODO
2633 }
2634 if ((Boolean)(nameExtensionResult[2])){
2635 //TODO BasionymYear etc.
2636 Integer origYear = name.getPublicationYear();
2637 if (origYear != null){
2638 atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2639 }
2640 }
2641 }
2642 }
2643 return name;
2644 }
2645
2646 private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2647 if (StringUtils.isBlank(followingText)){
2648 return null;
2649 }
2650
2651 boolean includeAuthor = true;
2652 boolean includeYear = false;
2653 if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2654 includeAuthor = false;
2655 }
2656 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2657 includeYear = true;
2658 }
2659 String patternStr = "";
2660 if (includeAuthor){
2661 patternStr += NonViralNameParserImplRegExBase.capitalWord;
2662 }
2663 if (includeYear){
2664 patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2665 }
2666 String match = null;
2667 if (! patternStr.isEmpty()){
2668 Pattern pattern = Pattern.compile("^" + patternStr);
2669 Matcher matcher = pattern.matcher(followingText.trim());
2670 if (matcher.find()){
2671 match = matcher.group();
2672 }
2673 }
2674
2675 return new Object[]{match, includeAuthor, includeYear};
2676 }
2677
2678 /**
2679 * @param atomisedName
2680 * @return
2681 */
2682 private String getAtomisedNameStr(List<String> atomisedName) {
2683 //logger.info("getAtomisedNameStr");
2684 String atomisedNameStr = StringUtils.join(atomisedName," ");
2685 while(atomisedNameStr.contains(" ")) {
2686 atomisedNameStr=atomisedNameStr.replace(" ", " ");
2687 }
2688 atomisedNameStr=atomisedNameStr.trim();
2689 return atomisedNameStr;
2690 }
2691
2692 /**
2693 * @param children
2694 * @param status
2695 * @return
2696 */
2697 private String extractStatus(NodeList children) {
2698 logger.info("extractStatus");
2699 String status="";
2700 for (int i=0;i<children.getLength();i++){
2701 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2702 (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2703 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2704 status = children.item(i).getTextContent().trim();
2705 }
2706 }
2707 return status;
2708 }
2709
2710 /**
2711 * @param identifier
2712 * @param atom
2713 * @param k
2714 * @return
2715 */
2716 private String extractIdentifier(String identifier, Node atom) {
2717 //logger.info("extractIdentifier");
2718 if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2719 try{
2720 identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2721 }catch(Exception e){
2722 System.out.println("pb with identifier, maybe empty");
2723 }
2724 try{
2725 identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2726 }catch(Exception e){
2727 System.out.println("pb with identifier, maybe empty");
2728 }
2729 }
2730 return identifier;
2731 }
2732
2733 /**
2734 * @param rankListToPrint
2735 * @param rank
2736 * @param atomisedName
2737 * @param atom
2738 */
2739 private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2740 logger.info("addAtomisedNamesToMap");
2741 for (int k=0;k<atom.getLength();k++){
2742 Node node = atom.item(k);
2743 String nodeName = node.getNodeName();
2744 if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) { //rank has been handled in higher method
2745 if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2746 atomisedName.add("("+ node.getTextContent().trim()+")");
2747 } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2748 if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2749 atomisedName.add("var. "+node.getTextContent().trim());
2750 }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2751 atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2752 }
2753 } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2754 atomisedName.add(node.getTextContent().trim());
2755 } else{
2756 if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2757 atomisedName.add(node.getTextContent().trim());
2758 }else if (nodeName.equals("#text")){
2759 String text = node.getTextContent();
2760 if (StringUtils.isNotBlank(text)){
2761 //TODO handle text
2762 logger.warn("name xmldata contains text. This is unhandled");
2763 }
2764 }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2765 //we currently do not use higher ranks information
2766 }else{
2767 //TODO handle unhandled node
2768 logger.warn("Unhandled node: " + nodeName);
2769 }
2770 }
2771 }
2772 }
2773 }
2774
2775 /**
2776 * @param fullName
2777 * @param atomisedName
2778 * @return
2779 */
2780 private String cleanName(String name, List<String> atomisedName) {
2781 //logger.info("cleanName");
2782 String fullName =name;
2783 if (fullName != null){
2784 fullName = fullName.replace("( ", "(");
2785 fullName = fullName.replace(" )",")");
2786
2787 if (fullName.trim().isEmpty()){
2788 fullName=StringUtils.join(atomisedName," ");
2789 }
2790
2791 while(fullName.contains(" ")) {
2792 fullName=fullName.replace(" ", " ");
2793 // logger.info("while");
2794 }
2795 fullName=fullName.trim();
2796 }
2797 return fullName;
2798 }
2799
2800 /**
2801 * @param rank
2802 * @param fullName
2803 * @param atomisedMap
2804 * @param myname
2805 * @return
2806 */
2807 private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2808 logger.info("extractAuthorFromNames");
2809 String fullName=name;
2810 if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2811 // System.out.println("rank : "+rank.toString());
2812 if(rank.isHigher(Rank.SPECIES())){
2813 try{
2814 String author=null;
2815 if(atomisedMap.get("dwcranks:subgenus") != null) {
2816 author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2817 }
2818 if(atomisedMap.get("dwc:subgenus") != null) {
2819 author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2820 }
2821 if(author == null) {
2822 if(atomisedMap.get("dwc:genus") != null) {
2823 author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2824 }
2825 }
2826 if(author != null){
2827 fullName = fullName.substring(0, fullName.indexOf(author));
2828 author=author.replaceAll(",","").trim();
2829 myname.setAuthor(author);
2830 }
2831 }catch(Exception e){
2832 //could not extract the author
2833 }
2834 }
2835 if(rank.equals(Rank.SPECIES())){
2836 try{
2837 String author=null;
2838 if(author == null) {
2839 if(atomisedMap.get("dwc:species") != null) {
2840 String[] t = fullName.split(atomisedMap.get("dwc:species"));
2841 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2842 author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2843 // System.out.println("AUTEUR "+author);
2844 }
2845 }
2846 if(author != null){
2847 fullName = fullName.substring(0, fullName.indexOf(author));
2848 author=author.replaceAll(",","").trim();
2849 myname.setAuthor(author);
2850 }
2851 }catch(Exception e){
2852 //could not extract the author
2853 }
2854 }
2855 }else{
2856 myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2857 }
2858 return fullName;
2859 }
2860
2861 /**
2862 * @param newName
2863 * @param atomisedMap
2864 * @param myname
2865 */
2866 private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2867 logger.info("createAtomisedTaxonString "+atomisedMap);
2868 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2869 myname.setFamilyStr(atomisedMap.get("dwc:family"));
2870 }
2871 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
2872 myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2873 }
2874 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2875 myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2876 }
2877 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2878 myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2879 }
2880 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2881 myname.setGenusStr(atomisedMap.get("dwc:genus"));
2882 }
2883 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2884 myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2885 }
2886 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2887 myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2888 }
2889 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2890 String n=newName;
2891 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2892 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2893 n=n.replace("subsp.","");
2894 }
2895 if(atomisedMap.get("dwc:subspecies") != null) {
2896 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2897 n=n.replace("subsp.","");
2898 }
2899 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2900 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2901 n=n.replace("var.","");
2902 n=n.replace("v.","");
2903 }
2904 if(atomisedMap.get("dwcranks:formepithet") != null) {
2905 //TODO
2906 System.out.println("TODO FORMA");
2907 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2908 n=n.replace("forma","");
2909 }
2910 n=n.trim();
2911 String author = myname.getAuthor();
2912 if(n.split(" ").length>2){
2913
2914 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2915 String a= "";
2916 try{
2917 a=n.split(n2)[1].trim();
2918 }catch(Exception e){
2919 logger.info("no author in "+n+"?");}
2920
2921 myname.setAuthor(a);
2922 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2923 n=n2;
2924
2925 }
2926
2927 myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2928 myname.setAuthor(author);
2929 }
2930 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2931 myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2932 }
2933 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2934 myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2935 }
2936 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2937 myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2938 }
2939 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2940 myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2941 }
2942 if (atomisedMap.get(PUBLICATION_YEAR) != null){
2943 myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2944 }
2945 }
2946
2947 /**
2948 * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2949 * @param rank
2950 * @param newName
2951 * @param atomisedMap
2952 * @param myname
2953 */
2954 private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2955 logger.info("createSynonym");
2956 //System.out.println("createsynonym");
2957 if(rank.equals(Rank.UNKNOWN_RANK())){
2958 myname.setNotParsableTaxon(newName);
2959 }else{
2960 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2961 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2962 }
2963 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2964 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2965 }
2966 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2967 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2968 }
2969 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2970 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2971 }
2972 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2973 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2974 }
2975 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2976 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2977 }
2978 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2979 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2980 }
2981 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2982 String n=newName;
2983 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2984 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2985 n=n.replace("subsp.","");
2986 }
2987 if(atomisedMap.get("dwc:subspecies") != null) {
2988 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2989 n=n.replace("subsp.","");
2990 }
2991 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2992 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2993 n=n.replace("var.","");
2994 n=n.replace("v.","");
2995 }
2996 if(atomisedMap.get("dwcranks:formepithet") != null) {
2997 //TODO
2998 //System.out.println("TODO FORMA");
2999 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3000 n=n.replace("forma","");
3001 }
3002 n=n.trim();
3003 String author = myname.getAuthor();
3004 if(n.split(" ").length>2){
3005
3006 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3007 String a="";
3008 try{
3009 a= n.split(n2)[1].trim();
3010 }catch(Exception e){logger.info("no author in "+n);}
3011 myname.setAuthor(a);
3012 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3013 n=n2;
3014
3015 }
3016 Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3017 myname.setSpecies(species);
3018 myname.setAuthor(author);
3019 }
3020 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3021 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3022 }
3023 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3024 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3025 }
3026 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3027 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3028 }
3029 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3030 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3031 }
3032 }
3033
3034 }
3035
3036
3037 /**
3038 * @param refMods
3039 * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3040 * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3041 * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3042 * I created this switch for old
3043 * for Spiders the new version is preferred
3044 */
3045 private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3046 logger.info("createSynonym");
3047
3048 INonViralName nameToBeFilled = this.getNonViralNameAccNomenclature();
3049 //System.out.println("createsynonym");
3050 if(rank.equals(Rank.UNKNOWN_RANK())){
3051 //TODO
3052 myname.setNotParsableTaxon(newName);
3053
3054 nameToBeFilled.setTitleCache(newName, true);
3055 }else{
3056 if(atomisedMap.get("dwc:genus") != null ){
3057 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3058 }
3059 if (rank.isSupraGeneric()){
3060 if (atomisedMap.get("dwcranks:subtribe") != null ){
3061 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3062 }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3063 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3064 }else if (atomisedMap.get("dwcranks:tribe") != null ){
3065 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3066 }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3067 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3068 }else if (atomisedMap.get("dwc:family") != null ){
3069 nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3070 }else{
3071 logger.warn("Supra generic rank not yet handled or atomisation not available");
3072 }
3073 }
3074 if (atomisedMap.get("dwcranks:subgenus") != null){
3075 nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3076 }
3077 if (atomisedMap.get("dwc:subgenus") != null){
3078 nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3079 }
3080 if (atomisedMap.get("dwc:species") != null){
3081 nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3082 }
3083 if (atomisedMap.get("dwcranks:formepithet") != null){
3084 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3085 }else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3086 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3087 }else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3088 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3089 }else if (atomisedMap.get("dwc:subspecies") != null){
3090 nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3091 }
3092 Reference sec = sourceUrlRef;
3093 if(!state2.getConfig().doKeepOriginalSecundum()){
3094 sec = state2.getConfig().getSecundum();
3095 }
3096 Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3097 // sourceHandler.addSource(refMods, syn);
3098 myname.setSyno(syn);
3099 myname.setSynonym(true);
3100 }
3101 }
3102
3103 /**
3104 * @param rank
3105 * @param newName
3106 * @param atomisedMap
3107 * @param myname
3108 */
3109 private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3110 logger.info("createAtomisedTaxon "+atomisedMap);
3111 if(rank.equals(Rank.UNKNOWN_RANK())){
3112 myname.setNotParsableTaxon(newName);
3113 }
3114 else{
3115 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3116 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3117 }
3118 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
3119 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3120 }
3121 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3122 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3123 }
3124 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3125 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3126 }
3127 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3128 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3129 }
3130 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3131 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3132 }
3133 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3134 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3135 }
3136 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3137 String n=newName;
3138 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3139 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3140 n=n.replace("subsp.","");
3141 }
3142 if(atomisedMap.get("dwc:subspecies") != null) {
3143 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3144 n=n.replace("subsp.","");
3145 }
3146 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3147 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3148 n=n.replace("var.","");
3149 n=n.replace("v.","");
3150 }
3151 if(atomisedMap.get("dwcranks:formepithet") != null) {
3152 //TODO
3153 //System.out.println("TODO FORMA");
3154 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3155 n=n.replace("forma","");
3156 }
3157 n=n.trim();
3158 String author = myname.getAuthor();
3159 if(n.split(" ").length>2){
3160 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3161 String a="";
3162 try{
3163 a= n.split(n2)[1].trim();
3164 }catch(Exception e){logger.info("no author in "+n);}
3165 myname.setAuthor(a);
3166 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3167 n=n2;
3168
3169 }
3170
3171 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3172 myname.setAuthor(author);
3173 }
3174 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3175 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3176 }
3177 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3178 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3179 }
3180 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3181 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3182 }
3183 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3184 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3185 }
3186 }
3187 }
3188
3189 /**
3190 * @return
3191 */
3192 private boolean checkRankValidForImport(Rank currentRank) {
3193 //logger.info("checkRankValidForImport");
3194 return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3195 }
3196
3197
3198
3199 /**
3200 * @param classification2
3201 */
3202 public void updateClassification(Classification classification2) {
3203 //logger.info("updateClassification");
3204 classification = classification2;
3205 }
3206
3207
3208
3209 public class MyName {
3210 /**
3211 * @param isSynonym
3212 */
3213 public MyName(boolean isSynonym) {
3214 super();
3215 this.isSynonym = isSynonym;
3216 }
3217
3218 String originalName="";
3219 String newName="";
3220 Rank rank=Rank.UNKNOWN_RANK();
3221 String identifier="";
3222 String status="";
3223 String author=null;
3224
3225 TaxonName taxonName;
3226
3227 Reference refMods ;
3228
3229 Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3230 INonViralName familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3231 String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3232 Integer publicationYear;
3233
3234
3235 Taxon higherTaxa;
3236 Rank higherRank;
3237 private Taxon taxon;
3238 private Synonym syno;
3239
3240 /**
3241 * @return the syno
3242 */
3243 public Synonym getSyno() {
3244 return syno;
3245 }
3246
3247 @Override
3248 public String toString(){
3249 List<String> tot=new ArrayList<String>();
3250 String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3251 for (String elt:n){
3252 if (!StringUtils.isEmpty(elt)) {
3253 tot.add(elt);
3254 } else {
3255 tot.add("*");
3256 }
3257 }
3258 return StringUtils.join(tot," ");
3259 }
3260 /**
3261 * @param syno the syno to set
3262 */
3263 public void setSyno(Synonym syno) {
3264 this.syno = syno;
3265 }
3266
3267 boolean isSynonym=false;
3268
3269 /**
3270 * @return the isSynonym
3271 */
3272 public boolean isSynonym() {
3273 return isSynonym;
3274 }
3275
3276 /**
3277 * @param isSynonym the isSynonym to set
3278 */
3279 public void setSynonym(boolean isSynonym) {
3280 this.isSynonym = isSynonym;
3281 }
3282
3283 public void setSource(Reference re){
3284 refMods=re;
3285 }
3286
3287 /**
3288 * @param string
3289 */
3290 public void setFormStr(String string) {
3291 this.formStr=string;
3292
3293 }
3294 /**
3295 * @param string
3296 */
3297 public void setVarietyStr(String string) {
3298 this.varietyStr=string;
3299
3300 }
3301 /**
3302 * @param string
3303 */
3304 public void setSubspeciesStr(String string) {
3305 this.subspeciesStr=string;
3306
3307 }
3308 /**
3309 * @param string
3310 */
3311 public void setSpeciesStr(String string) {
3312 this.speciesStr=string;
3313
3314 }
3315 /**
3316 * @param string
3317 */
3318 public void setSubgenusStr(String string) {
3319 this.subgenusStr=string;
3320
3321 }
3322 /**
3323 * @param string
3324 */
3325 public void setGenusStr(String string) {
3326 this.genusStr=string;
3327
3328 }
3329 /**
3330 * @param string
3331 */
3332 public void setSubtribeStr(String string) {
3333 this.subtribeStr=string;
3334
3335 }
3336 /**
3337 * @param string
3338 */
3339 public void setTribeStr(String string) {
3340 this.tribeStr=string;
3341
3342 }
3343 /**
3344 * @param string
3345 */
3346 public void setSubfamilyStr(String string) {
3347 this.subfamilyStr=string;
3348
3349 }
3350 /**
3351 * @param string
3352 */
3353 public void setFamilyStr(String string) {
3354 this.familyStr=string;
3355
3356 }
3357 /**
3358 * @return the familyStr
3359 */
3360 public String getFamilyStr() {
3361 return familyStr;
3362 }
3363 /**
3364 * @return the subfamilyStr
3365 */
3366 public String getSubfamilyStr() {
3367 return subfamilyStr;
3368 }
3369 /**
3370 * @return the tribeStr
3371 */
3372 public String getTribeStr() {
3373 return tribeStr;
3374 }
3375 /**
3376 * @return the subtribeStr
3377 */
3378 public String getSubtribeStr() {
3379 return subtribeStr;
3380 }
3381 /**
3382 * @return the genusStr
3383 */
3384 public String getGenusStr() {
3385 return genusStr;
3386 }
3387 /**
3388 * @return the subgenusStr
3389 */
3390 public String getSubgenusStr() {
3391 return subgenusStr;
3392 }
3393 /**
3394 * @return the speciesStr
3395 */
3396 public String getSpeciesStr() {
3397 return speciesStr;
3398 }
3399 /**
3400 * @return the subspeciesStr
3401 */
3402 public String getSubspeciesStr() {
3403 return subspeciesStr;
3404 }
3405 /**
3406 * @return the formStr
3407 */
3408 public String getFormStr() {
3409 return formStr;
3410 }
3411 /**
3412 * @return the varietyStr
3413 */
3414 public String getVarietyStr() {
3415 return varietyStr;
3416 }
3417
3418 public Integer getPublicationYear() {
3419 return publicationYear;
3420 }
3421
3422 public void setPublicationYear(Integer publicationYear) {
3423 this.publicationYear = publicationYear;
3424 }
3425
3426 /**
3427 * @param newName2
3428 */
3429 public void setNotParsableTaxon(String newName2) {
3430 //takes too much time
3431 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3432
3433 NomenclaturalStatusType statusType = null;
3434 if (!getStatus().isEmpty()){
3435 try {
3436 statusType = nomStatusString2NomStatus(getStatus());
3437 } catch (UnknownCdmTypeException e) {
3438 addProblematicStatusToFile(getStatus());
3439 logger.warn("Problem with status");
3440 }
3441 }
3442 List<TaxonBase> tmpList = new ArrayList<>();
3443
3444 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3445 tmpList.addAll(taxontest.getRecords());
3446
3447 //logger.info("tmpList returned: "+tmpList.size());
3448
3449
3450 INonViralName identicName = null;
3451 boolean foundIdentic=false;
3452 TaxonBase<?> tmpTaxonBase=null;
3453 // Taxon tmpPartial=null;
3454 for (TaxonBase<?> tmpb:tmpList){
3455 if(tmpb !=null){
3456 TaxonName tnb = tmpb.getName();
3457 Rank crank=null;
3458 if (tnb != null){
3459 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3460 crank =tnb.getRank();
3461 if (crank !=null && rank !=null){
3462 if (crank.equals(rank)){
3463 identicName = tnb;
3464 if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3465 foundIdentic=true;
3466 tmpTaxonBase=tmpb;
3467 break;
3468 }
3469 }
3470 }
3471 }
3472 }
3473 }
3474 }
3475 boolean statusMatch=false;
3476 boolean appendedMatch=false;
3477 if(tmpTaxonBase !=null && foundIdentic){
3478 statusMatch=compareStatus(tmpTaxonBase, statusType);
3479 if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3480 appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3481 }
3482 if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3483 appendedMatch=true;
3484 }
3485
3486 }
3487 if ((tmpTaxonBase == null || !foundIdentic) || (tmpTaxonBase != null && !statusMatch) || (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3488
3489 INonViralName tnb;
3490 if (identicName == null){
3491 tnb = getNonViralNameAccNomenclature();
3492 tnb.setRank(rank);
3493
3494 if(statusType != null) {
3495 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3496 }
3497 if(StringUtils.isNotBlank(getStatus())) {
3498 tnb.setAppendedPhrase(getStatus());
3499 }
3500 tnb.setTitleCache(newName2,true);
3501 tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3502 }else{
3503 tnb = identicName;
3504 }
3505
3506 if(tmpTaxonBase==null){
3507 tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3508 if(!state2.getConfig().doKeepOriginalSecundum()) {
3509 tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3510 }
3511 //tmptaxonbase.setSec(refMods);
3512 if(!isSynonym) {
3513 classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3514 sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3515 }
3516 }
3517 }
3518
3519 tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3520 if (author != null) {
3521 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3522 setLSID(getIdentifier(), tmpTaxonBase);
3523 importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3524 tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3525 }
3526 }
3527 TaxonName tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonName.class);
3528
3529 if(!isSynonym) {
3530 this.taxon=(Taxon)tmpTaxonBase;
3531 } else {
3532 if (tmpTaxonBase instanceof Taxon){
3533 logger.warn("Incorrect status");
3534 }
3535 this.syno=(Synonym)tmpTaxonBase;
3536 }
3537
3538 taxonName = tnb;
3539
3540 }
3541
3542 /**
3543 *
3544 */
3545 public void buildTaxon() {
3546 //System.out.println("BUILD TAXON");
3547 logger.info("buildTaxon");
3548 NomenclaturalStatusType statusType = null;
3549 if (!getStatus().isEmpty()){
3550 status = getStatus();
3551 String newNameStatus = newNameStatus(status);
3552 if (newNameStatus != null){
3553 taxonName.setAppendedPhrase(newNameStatus);
3554 }else{
3555 try {
3556 statusType = nomStatusString2NomStatus(getStatus());
3557 taxonName.addStatus(NomenclaturalStatus.NewInstance(statusType));
3558 } catch (UnknownCdmTypeException e) {
3559 addProblematicStatusToFile(getStatus());
3560 logger.warn("Problem with status");
3561 }
3562 }
3563 }
3564 importer.getNameService().save(taxonName);
3565
3566 TaxonBase<?> tmpTaxonBase;
3567 if (!isSynonym) {
3568 tmpTaxonBase =Taxon.NewInstance(taxonName, refMods); //sec set null
3569 }
3570 else {
3571 tmpTaxonBase =Synonym.NewInstance(taxonName, refMods); //sec set null
3572 }
3573 boolean exist = false;
3574 if (!isSynonym){
3575 for (TaxonNode node : classification.getAllNodes()){
3576 try{
3577 Taxon nodeTaxon = node.getTaxon();
3578 boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3579 boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3580 boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3581 if(titleMatches && nomStatusMatches) {
3582 if (!isSynonym) {
3583 tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3584 exist =true;
3585 } else {
3586 logger.info("Found the same name but from another type (taxon/synonym)");
3587 TaxonName existingTnb = getTaxon().getName();
3588 tmpTaxonBase = Synonym.NewInstance(existingTnb, refMods);
3589 importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3590 exist =true;
3591 }
3592 }else if (nodeNameReplaceable){
3593 nodeTaxon.setName(tmpTaxonBase.getName());
3594 tmpTaxonBase = nodeTaxon;
3595 exist = true;
3596 }
3597 }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3598 }
3599 }
3600 if (!exist){
3601
3602 boolean insertAsExisting =false;
3603 List<Taxon> existingTaxons=new ArrayList<Taxon>();
3604 try {
3605 existingTaxons = getMatchingTaxa(taxonName);
3606 } catch (Exception e1) {
3607 e1.printStackTrace();
3608 }
3609 double similarityScore=0.0;
3610 double similarityAuthor=-1;
3611 String author1="";
3612 String author2="";
3613 String t1="";
3614 String t2="";
3615 for (Taxon bestMatchingTaxon : existingTaxons){
3616 //System.out.println("tnbase "+taxonname.getTitleCache());
3617 //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3618 if(taxonName.getAuthorshipCache()!=null) {
3619 author1=taxonName.getAuthorshipCache();
3620 }
3621 try {
3622 if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
3623 author2=bestMatchingTaxon.getName().getAuthorshipCache();
3624 }
3625 } catch (Exception e) {
3626 // TODO Auto-generated catch block
3627 e.printStackTrace();
3628 }
3629 try {
3630 t1=taxonName.getTitleCache();
3631 if (author1!=null && !StringUtils.isEmpty(author1)) {
3632 t1=t1.split(Pattern.quote(author1))[0];
3633 }
3634 } catch (Exception e) {
3635 // TODO Auto-generated catch block
3636 e.printStackTrace();
3637 }
3638 try {
3639 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3640 if (author2!=null && !StringUtils.isEmpty(author2)) {
3641 t2=t2.split(Pattern.quote(author2))[0];
3642 }
3643 } catch (Exception e) {
3644 // TODO Auto-generated catch block
3645 e.printStackTrace();
3646 }
3647
3648 similarityScore=similarity(t1.trim(), t2.trim());
3649 //System.out.println("taxonscore "+similarityScore);
3650 similarityAuthor=similarity(author1.trim(), author2.trim());
3651 //System.out.println("authorscore "+similarityAuthor);
3652 insertAsExisting = compareAndCheckTaxon(taxonName, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3653 if(insertAsExisting) {
3654 tmpTaxonBase=bestMatchingTaxon;
3655 break;
3656 }
3657 }
3658 if ( !insertAsExisting ){
3659 if(!state2.getConfig().doKeepOriginalSecundum()) {
3660 tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3661 }
3662
3663 // tmptaxonbase.setSec(refMods);
3664 if (taxonName.getRank().equals(state2.getConfig().getMaxRank())) {
3665 //System.out.println("****************************"+tmptaxonbase);
3666 if (!isSynonym) {
3667 classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3668 }
3669 } else{
3670 hierarchy = new HashMap<Rank, Taxon>();
3671 //System.out.println("LOOK FOR PARENT "+taxonname.toString()+", "+tmptaxonbase.toString());
3672 if (!isSynonym){
3673 lookForParentNode(taxonName,(Taxon)tmpTaxonBase, refMods,this);
3674 //System.out.println("HIERARCHY "+hierarchy);
3675 Taxon parent = buildHierarchy();
3676 if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3677 if(parent !=null) {
3678 classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3679 } else {
3680 classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3681 }
3682 importer.getClassificationService().saveOrUpdate(classification);
3683 }
3684 }
3685 // Set<TaxonNode> nodeList = classification.getAllNodes();
3686 // for(TaxonNode tn:nodeList) {
3687 // System.out.println(tn.getTaxon());
3688 // }
3689 }
3690 }
3691 importer.getClassificationService().saveOrUpdate(classification);
3692 if(isSynonym) {
3693 try{
3694 Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3695 }catch(Exception e){
3696 TaxonName existingTnb = tmpTaxonBase.getName();
3697 Synonym castTest = Synonym.NewInstance(existingTnb, refMods);
3698 importer.getTaxonService().saveOrUpdate(castTest);
3699 tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3700 }
3701 }
3702 }
3703 if(!isSynonym) {
3704 taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3705 } else {
3706 syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3707 }
3708
3709 }
3710
3711 private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3712 //TODO preliminary check
3713 if (newTaxon.isInstanceOf(Synonym.class)){
3714 return false;
3715 }
3716 INonViralName nodeName = nodeTaxon.getName();
3717 INonViralName newName = newTaxon.getName();
3718 if (nodeTaxon.getName() == null || newName == null){
3719 return false;
3720 }
3721 if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3722 return false;
3723 }
3724 boolean compare = true;
3725 for (NomenclaturalStatus status : newName.getStatus() ){
3726 compare &= compareStatus(nodeTaxon, status.getType());
3727 }
3728 if (! compare){
3729 return false;
3730 }
3731
3732 if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3733 if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3734 if (newName.getNameCache().length() < newName.getTitleCache().length()){
3735 logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3736 return true;
3737 }
3738 }
3739 }
3740
3741 return false;
3742 }
3743
3744 /**
3745 *
3746 */
3747 private Taxon buildHierarchy() {
3748 logger.info("buildHierarchy");
3749 Taxon higherTaxon = null;
3750 //add the maxRank as a root
3751 if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3752 Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3753 if(!taxonExistsInClassification(higherTaxon, ct)) {
3754 classification.addChildTaxon(ct, refMods, null);
3755 }
3756 higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3757 // return higherTaxon;
3758 }
3759 //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3760
3761 //TODO higher Ranks
3762
3763 if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3764 higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3765 }
3766 if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3767 higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3768 }
3769 if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3770 higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3771 }
3772 if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3773 higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3774 }
3775 if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3776 higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3777 }
3778 if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3779 higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3780 }
3781 importer.getClassificationService().saveOrUpdate(classification);
3782 return higherTaxon;
3783 }
3784
3785 private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3786 Taxon ct=hierarchy.get(r);
3787 if(!taxonExistsInClassification(higherTaxon,ct )) {
3788 if(higherTaxon != null && ct!=null) {
3789 classification.addParentChild(higherTaxon, ct, refMods, null);
3790 } else
3791 if(higherTaxon == null && ct !=null) {
3792 classification.addChildTaxon(ct, refMods, null);
3793 }
3794 }
3795 return ct;
3796 }
3797
3798 private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3799 logger.info("taxonExistsInClassification");
3800 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3801 boolean found=false;
3802 if(parent !=null){
3803 for (TaxonNode p : classification.getAllNodes()){
3804 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3805 for (TaxonNode c : p.getChildNodes()) {
3806 if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3807 found=true;
3808 break;
3809 }
3810 }
3811 }
3812 }
3813 }
3814 else{
3815 for (TaxonNode p : classification.getAllNodes()){
3816 if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3817 found=true;
3818 break;
3819 }
3820 }
3821 }
3822 // System.out.println("LOOK IF TAXA EXIST? "+found);
3823 return found;
3824 }
3825 /**
3826 * @param nameToBeFilledTest
3827 */
3828 public void setParsedName(TaxonName nameToBeFilledTest) {
3829 this.taxonName = TaxonName.castAndDeproxy(nameToBeFilledTest);
3830
3831 }
3832 //variety dwcranks:varietyEpithet
3833 /**
3834 * @return the author
3835 */
3836 public String getAuthor() {
3837 return author;
3838 }
3839 /**
3840 * @return
3841 */
3842 public Taxon getTaxon() {
3843 return taxon;
3844 }
3845 /**
3846 * @return
3847 */
3848 public TaxonName getTaxonName() {
3849 return taxonName;
3850 }
3851
3852 /**
3853 * @param findOrCreateTaxon
3854 */
3855 public void setForm(Taxon form) {
3856 this.form=form;
3857
3858 }
3859 /**
3860 * @param findOrCreateTaxon
3861 */
3862 public void setVariety(Taxon variety) {
3863 this.variety=variety;
3864
3865 }
3866 /**
3867 * @param string
3868 * @return
3869 */
3870 @SuppressWarnings("rawtypes")
3871 public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3872 logger.info("findOrCreateTaxon");
3873 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3874 //takes too much time
3875 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3876 // logger.info("tmpList returned: "+tmpList.size());
3877
3878 NomenclaturalStatusType statusType = null;
3879 if (!getStatus().isEmpty()){
3880 try {
3881 statusType = nomStatusString2NomStatus(getStatus());
3882 } catch (UnknownCdmTypeException e) {
3883 addProblematicStatusToFile(getStatus());
3884 logger.warn("Problem with status");
3885 }
3886 }
3887
3888 List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3889
3890 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3891
3892 tmpListFiltered.addAll(taxontest.getRecords());
3893 taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3894 tmpListFiltered.addAll(taxontest.getRecords());
3895
3896 //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3897
3898 boolean nameCorrected=false;
3899 if (fullname.indexOf(partialname)<0) {
3900 nameCorrected=true;
3901 }
3902
3903 boolean foundIdentic=false;
3904 Taxon tmp=null;
3905 for (TaxonBase tmpb:tmpListFiltered){
3906 if(tmpb !=null){
3907 TaxonName tnb = tmpb.getName();
3908 Rank crank=null;
3909 if (tnb != null){
3910 if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3911 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3912 crank =tnb.getRank();
3913 if (crank !=null && rank !=null){
3914 if (crank.equals(rank)){
3915 foundIdentic=true;
3916 try{
3917 tmp=(Taxon)tmpb;
3918 break;
3919 }catch(Exception e){
3920 e.printStackTrace();
3921 }
3922 }
3923 }
3924 }
3925 if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3926 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3927 crank =tnb.getRank();
3928 if (crank !=null && rank !=null){
3929 if (crank.equals(rank)){
3930 foundIdentic=true;
3931 try{
3932 tmp=(Taxon)tmpb;
3933 break;
3934 }catch(Exception e){
3935 e.printStackTrace();
3936 }
3937 }
3938 }
3939 }
3940 }
3941 }
3942 else{
3943 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3944 crank =tnb.getRank();
3945 if (crank !=null && rank !=null){
3946 if (crank.equals(rank)){
3947 foundIdentic=true;
3948 try{
3949 tmp=(Taxon)tmpb;
3950 break;
3951 }catch(Exception e){
3952 e.printStackTrace();
3953 }
3954 }
3955 }
3956 }
3957 }
3958 }
3959 }
3960 }
3961 boolean statusMatch=false;
3962 boolean appendedMatch=false;
3963 if(tmp !=null && foundIdentic){
3964 statusMatch=compareStatus(tmp, statusType);
3965 if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3966 appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3967 }
3968 if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3969 appendedMatch=true;
3970 }
3971
3972 }
3973 if ((tmp == null || !foundIdentic) || (tmp != null && !statusMatch) || (tmp != null && !appendedMatch && !statusMatch)){
3974
3975 INonViralName tnb = getNonViralNameAccNomenclature();
3976 tnb.setRank(rank);
3977
3978 if(statusType != null) {
3979 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3980 }
3981 if(StringUtils.isNotBlank(getStatus())) {
3982 tnb.setAppendedPhrase(getStatus());
3983 }
3984
3985 if(rank.equals(Rank.UNKNOWN_RANK())){
3986 tnb.setTitleCache(fullname, true);
3987 // tnb.setGenusOrUninomial(fullname);
3988 }
3989 if(rank.isHigher(Rank.GENUS())) {
3990 tnb.setGenusOrUninomial(partialname);
3991 }
3992
3993 if(rank.isHigher(Rank.SPECIES())) {
3994 tnb.setTitleCache(partialname, true);
3995 }
3996
3997 if (rank.equals(globalrank) && author != null) {
3998
3999 tnb.setCombinationAuthorship(findOrCreateAuthor(author));
4000 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4001 Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4002 if (taxonLSID !=null) {
4003 tmp=taxonLSID;
4004 }
4005 }
4006 }
4007
4008 if(tmp == null){
4009 if (rank.equals(Rank.FAMILY())) {
4010 tmp = buildFamily(tnb);
4011 }
4012 if (rank.equals(Rank.SUBFAMILY())) {
4013 tmp = buildSubfamily(tnb);
4014 }
4015 if (rank.equals(Rank.TRIBE())) {
4016 tmp = buildTribe(tnb);
4017 }
4018 if (rank.equals(Rank.SUBTRIBE())) {
4019 tmp = buildSubtribe(tnb);
4020 }
4021 if (rank.equals(Rank.GENUS())) {
4022 tmp = buildGenus(partialname, tnb);
4023 }
4024
4025 if (rank.equals(Rank.SUBGENUS())) {
4026 tmp = buildSubgenus(partialname, tnb);
4027 }
4028 if (rank.equals(Rank.SPECIES())) {
4029 tmp = buildSpecies(partialname, tnb);
4030 }
4031
4032 if (rank.equals(Rank.SUBSPECIES())) {
4033 tmp = buildSubspecies(partialname, tnb);
4034 }
4035
4036 if (rank.equals(Rank.VARIETY())) {
4037 tmp = buildVariety(fullname, partialname, tnb);
4038 }
4039
4040 if (rank.equals(Rank.FORM())) {
4041 tmp = buildForm(fullname, partialname, tnb);
4042 }
4043 if (tmp != null){
4044 TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4045 }
4046
4047 importer.getClassificationService().saveOrUpdate(classification);
4048 }
4049
4050 }
4051
4052 tmp = CdmBase.deproxy(tmp, Taxon.class);
4053 if (rank.equals(globalrank) && author != null) {
4054 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4055 setLSID(getIdentifier(), tmp);
4056 importer.getTaxonService().saveOrUpdate(tmp);
4057 tmp = CdmBase.deproxy(tmp, Taxon.class);
4058 }
4059 }
4060
4061 this.taxon=tmp;
4062
4063 return tmp;
4064 }
4065
4066 /**
4067 * @param tnb
4068 * @return
4069 */
4070 private Taxon buildSubfamily(INonViralName tnb) {
4071 Taxon tmp;
4072 // tnb.generateTitle();
4073 tmp = findMatchingTaxon(tnb,refMods);
4074 if(tmp ==null){
4075 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4076 if(!state2.getConfig().doKeepOriginalSecundum()) {
4077 tmp.setSec(state2.getConfig().getSecundum());
4078 }
4079 // tmp.setSec(refMods);
4080 // sourceHandler.addSource(refMods, tmp);
4081 if(family != null) {
4082 classification.addParentChild(family, tmp, null, null);
4083 higherRank=Rank.FAMILY();
4084 higherTaxa=family;
4085 } else {
4086 //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4087 classification.addChildTaxon(tmp, null, null);
4088 }
4089 }
4090 return tmp;
4091 }
4092 /**
4093 * @param tnb
4094 * @return
4095 */
4096 private Taxon buildFamily(INonViralName tnb) {
4097 Taxon tmp;
4098 // tnb.generateTitle();
4099 tmp = findMatchingTaxon(tnb,refMods);
4100 if(tmp ==null){
4101 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4102 if(!state2.getConfig().doKeepOriginalSecundum()) {
4103 tmp.setSec(state2.getConfig().getSecundum());
4104 }
4105 // tmp.setSec(refMods);
4106 //sourceHandler.addSource(refMods, tmp);
4107 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4108 classification.addChildTaxon(tmp, null, null);
4109 }
4110 return tmp;
4111 }
4112 /**
4113 * @param fullname
4114 * @param tnb
4115 * @return
4116 */
4117 private Taxon buildForm(String fullname, String partialname, INonViralName tnb) {
4118 if (genusName !=null) {
4119 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4120 }
4121 if (subgenusName !=null) {
4122 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4123 }
4124 if(speciesName !=null) {
4125 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4126 }
4127 if(subspeciesName != null) {
4128 tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4129 }
4130 if(partialname!= null) {
4131 tnb.setInfraSpecificEpithet(partialname);
4132 }
4133 //TODO how to save form??
4134 tnb.setTitleCache(fullname, true);
4135 Taxon tmp = findMatchingTaxon(tnb,refMods);
4136 if(tmp ==null){
4137 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4138 if(!state2.getConfig().doKeepOriginalSecundum()) {
4139 tmp.setSec(state2.getConfig().getSecundum());
4140 }
4141 // tmp.setSec(refMods);
4142 //sourceHandler.addSource(refMods, tmp);
4143 if (subspecies !=null) {
4144 classification.addParentChild(subspecies, tmp, null, null);
4145 higherRank=Rank.SUBSPECIES();
4146 higherTaxa=subspecies;
4147 } else {
4148 if (species !=null) {
4149 classification.addParentChild(species, tmp, null, null);
4150 higherRank=Rank.SPECIES();
4151 higherTaxa=species;
4152 }
4153 else{
4154 // System.out.println("ADDCHILDTAXON FORM "+tmp);
4155 classification.addChildTaxon(tmp, null, null);
4156 }
4157 }
4158 }
4159 return tmp;
4160 }
4161 /**
4162 * @param fullname
4163 * @param tnb
4164 * @return
4165 */
4166 private Taxon buildVariety(String fullname, String partialname, INonViralName tnb) {
4167 Taxon tmp;
4168 if (genusName !=null) {
4169 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4170 }
4171 if (subgenusName !=null) {
4172 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4173 }
4174 if(speciesName !=null) {
4175 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4176 }
4177 if(subspeciesName != null) {
4178 tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4179 }
4180 if(partialname != null) {
4181 tnb.setInfraSpecificEpithet(partialname);
4182 }
4183 //TODO how to save variety?
4184 tnb.setTitleCache(fullname, true);
4185 tmp = findMatchingTaxon(tnb,refMods);
4186 if(tmp ==null){
4187 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4188 if(!state2.getConfig().doKeepOriginalSecundum()) {
4189 tmp.setSec(state2.getConfig().getSecundum());
4190 }
4191 // tmp.setSec(refMods);
4192 //sourceHandler.addSource(refMods, tmp);
4193 if (subspecies !=null) {
4194 classification.addParentChild(subspecies, tmp, null, null);
4195 higherRank=Rank.SUBSPECIES();
4196 higherTaxa=subspecies;
4197 } else {
4198 if(species !=null) {
4199 classification.addParentChild(species, tmp, null, null);
4200 higherRank=Rank.SPECIES();
4201 higherTaxa=species;
4202 }
4203 else{
4204 //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4205 classification.addChildTaxon(tmp, null, null);
4206 }
4207 }
4208 }
4209 return tmp;
4210 }
4211 /**
4212 * @param partialname
4213 * @param tnb
4214 * @return
4215 */
4216 private Taxon buildSubspecies(String partialname, INonViralName tnb) {
4217 if (genusName !=null) {
4218 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4219 }
4220 if (subgenusName !=null) {
4221 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4222 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4223 }
4224 if(speciesName !=null) {
4225 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
4226 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4227 }
4228 tnb.setInfraSpecificEpithet(partialname);
4229 Taxon tmp = findMatchingTaxon(tnb,refMods);
4230 if(tmp ==null){
4231 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4232 if(!state2.getConfig().doKeepOriginalSecundum())
4233 {
4234 tmp.setSec(state2.getConfig().getSecundum());
4235 // tmp.setSec(refMods);
4236 //sourceHandler.addSource(refMods, tmp);
4237 }
4238
4239 if(species != null) {
4240 classification.addParentChild(species, tmp, null, null);
4241 higherRank=Rank.SPECIES();
4242 higherTaxa=species;
4243 }
4244 else{
4245 //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4246 classification.addChildTaxon(tmp, null, null);
4247 }
4248 }
4249 return tmp;
4250 }
4251 /**
4252 * @param partialname
4253 * @param tnb
4254 * @return
4255 */
4256 private Taxon buildSpecies(String partialname, INonViralName tnb) {
4257 if (genusName !=null) {
4258 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4259 }
4260 if (subgenusName !=null) {
4261 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4262 }
4263 tnb.setSpecificEpithet(partialname.toLowerCase());
4264 Taxon tmp = findMatchingTaxon(tnb,refMods);
4265 if(tmp ==null){
4266 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4267 if(!state2.getConfig().doKeepOriginalSecundum()) {
4268 tmp.setSec(state2.getConfig().getSecundum());
4269 }
4270 // tmp.setSec(refMods);
4271 //sourceHandler.addSource(refMods, tmp);
4272 if (subgenus !=null) {
4273 classification.addParentChild(subgenus, tmp, null, null);
4274 higherRank=Rank.SUBGENUS();
4275 higherTaxa=subgenus;
4276 } else {
4277 if (genus !=null) {
4278 classification.addParentChild(genus, tmp, null, null);
4279 higherRank=Rank.GENUS();
4280 higherTaxa=genus;
4281 }
4282 else{
4283 //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4284 classification.addChildTaxon(tmp, null, null);
4285 }
4286 }
4287 }
4288 return tmp;
4289 }
4290 /**
4291 * @param partialname
4292 * @param tnb
4293 * @return
4294 */
4295 private Taxon buildSubgenus(String partialname, INonViralName tnb) {
4296 tnb.setInfraGenericEpithet(partialname);
4297 if (genusName !=null) {
4298 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4299 }
4300 Taxon tmp = findMatchingTaxon(tnb,refMods);
4301 if(tmp ==null){
4302 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4303 if(!state2.getConfig().doKeepOriginalSecundum()) {
4304 tmp.setSec(state2.getConfig().getSecundum());
4305 }
4306 // tmp.setSec(refMods);
4307 //sourceHandler.addSource(refMods, tmp);
4308 if(genus != null) {
4309 classification.addParentChild(genus, tmp, null, null);
4310 higherRank=Rank.GENUS();
4311 higherTaxa=genus;
4312 } else{
4313 //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4314 classification.addChildTaxon(tmp, null, null);
4315 }
4316 }
4317 return tmp;
4318 }
4319 /**
4320 * @param partialname
4321 * @param tnb
4322 * @return
4323 */
4324 private Taxon buildGenus(String partialname, INonViralName tnb) {
4325 Taxon tmp;
4326 tnb.setGenusOrUninomial(partialname);
4327
4328
4329 tmp = findMatchingTaxon(tnb,refMods);
4330 if(tmp ==null){
4331 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4332 if(!state2.getConfig().doKeepOriginalSecundum())
4333 {
4334 tmp.setSec(state2.getConfig().getSecundum());
4335 // tmp.setSec(refMods);
4336 //sourceHandler.addSource(refMods, tmp);
4337 }
4338
4339 if(subtribe != null) {
4340 classification.addParentChild(subtribe, tmp, null, null);
4341 higherRank=Rank.SUBTRIBE();
4342 higherTaxa=subtribe;
4343 } else{
4344 if(tribe !=null) {
4345 classification.addParentChild(tribe, tmp, null, null);
4346 higherRank=Rank.TRIBE();
4347 higherTaxa=tribe;
4348 } else{
4349 if(subfamily !=null) {
4350 classification.addParentChild(subfamily, tmp, null, null);
4351 higherRank=Rank.SUBFAMILY();
4352 higherTaxa=subfamily;
4353 } else
4354 if(family !=null) {
4355 classification.addParentChild(family, tmp, null, null);
4356 higherRank=Rank.FAMILY();
4357 higherTaxa=family;
4358 }
4359 else{
4360 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4361 classification.addChildTaxon(tmp, null, null);
4362 }
4363 }
4364 }
4365 }
4366 return tmp;
4367 }
4368
4369 /**
4370 * @param tnb
4371 * @return
4372 */
4373 private Taxon buildSubtribe(INonViralName tnb) {
4374 Taxon tmp = findMatchingTaxon(tnb,refMods);
4375 if(tmp==null){
4376 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4377 if(!state2.getConfig().doKeepOriginalSecundum()) {
4378 tmp.setSec(state2.getConfig().getSecundum());
4379 }
4380 // tmp.setSec(refMods);
4381 //sourceHandler.addSource(refMods, tmp);
4382 if(tribe != null) {
4383 classification.addParentChild(tribe, tmp, null, null);
4384 higherRank=Rank.TRIBE();
4385 higherTaxa=tribe;
4386 } else{
4387 //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4388 classification.addChildTaxon(tmp, null, null);
4389 }
4390 }
4391 return tmp;
4392 }
4393 /**
4394 * @param tnb
4395 * @return
4396 */
4397 private Taxon buildTribe(INonViralName tnb) {
4398 Taxon tmp = findMatchingTaxon(tnb,refMods);
4399 if(tmp==null){
4400 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4401 if(!state2.getConfig().doKeepOriginalSecundum()) {
4402 tmp.setSec(state2.getConfig().getSecundum());
4403 }
4404 // tmp.setSec(refMods);
4405 //sourceHandler.addSource(refMods, tmp);
4406 if (subfamily !=null) {
4407 classification.addParentChild(subfamily, tmp, null, null);
4408 higherRank=Rank.SUBFAMILY();
4409 higherTaxa=subfamily;
4410 } else {
4411 if(family != null) {
4412 classification.addParentChild(family, tmp, null, null);
4413 higherRank=Rank.FAMILY();
4414 higherTaxa=family;
4415 }
4416 else{
4417 //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4418 classification.addChildTaxon(tmp, null, null);
4419 }
4420 }
4421 }
4422 return tmp;
4423 }
4424
4425 /**
4426 * @param identifier2
4427 * @return
4428 */
4429 @SuppressWarnings("rawtypes")
4430 private Taxon getTaxonByLSID(String identifier) {
4431 //logger.info("getTaxonByLSID");
4432 // boolean lsidok=false;
4433 String id = identifier.split("__")[0];
4434 // String source = identifier.split("__")[1];
4435 LSID lsid = null;
4436 if (id.indexOf("lsid")>-1){
4437 try {
4438 lsid = new LSID(id);
4439 // lsidok=true;
4440 } catch (MalformedLSIDException e) {
4441 logger.warn("Malformed LSID");
4442 }
4443 }
4444 if (lsid !=null){
4445 List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4446 LSID currentlsid=null;
4447 for (Taxon t:taxa){
4448 currentlsid = t.getLsid();
4449 if (currentlsid !=null){
4450 if (currentlsid.getLsid().equals(lsid.getLsid())){
4451 try{
4452 return t;
4453 }
4454 catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4455 }
4456 }
4457 }
4458 }
4459 return null;
4460 }
4461 /**
4462 * @param author2
4463 * @return
4464 */
4465 @SuppressWarnings("rawtypes")
4466 private Person findOrCreateAuthor(String author2) {
4467 //logger.info("findOrCreateAuthor");
4468 List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4469 for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4470 if(hibernateP.getTitleCache().equals(author2)) {
4471 AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4472 return CdmBase.deproxy(existing, Person.class);
4473 }
4474 }
4475 Person p = Person.NewInstance();
4476 p.setTitleCache(author2,true);
4477 importer.getAgentService().saveOrUpdate(p);
4478 return CdmBase.deproxy(p, Person.class);
4479 }
4480 /**
4481 * @param author the author to set
4482 */
4483 public void setAuthor(String author) {
4484 this.author = author;
4485 }
4486
4487 /**
4488 * @return the higherTaxa
4489 */
4490 public Taxon getHigherTaxa() {
4491 return higherTaxa;
4492 }
4493 /**
4494 * @param higherTaxa the higherTaxa to set
4495 */
4496 public void setHigherTaxa(Taxon higherTaxa) {
4497 this.higherTaxa = higherTaxa;
4498 }
4499 /**
4500 * @return the higherRank
4501 */
4502 public Rank getHigherRank() {
4503 return higherRank;
4504 }
4505 /**
4506 * @param higherRank the higherRank to set
4507 */
4508 public void setHigherRank(Rank higherRank) {
4509 this.higherRank = higherRank;
4510 }
4511 public String getName(){
4512 if (newName.isEmpty()) {
4513 return originalName;
4514 } else {
4515 return newName;
4516 }
4517
4518 }
4519 /**
4520 * @return the fullName
4521 */
4522 public String getOriginalName() {
4523 return originalName;
4524 }
4525 /**
4526 * @param fullName the fullName to set
4527 */
4528 public void setOriginalName(String fullName) {
4529 this.originalName = fullName;
4530 }
4531 /**
4532 * @return the newName
4533 */
4534 public String getNewName() {
4535 return newName;
4536 }
4537 /**
4538 * @param newName the newName to set
4539 */
4540 public void setNewName(String newName) {
4541 this.newName = newName;
4542 }
4543 /**
4544 * @return the rank
4545 */
4546 public Rank getRank() {
4547 return rank;
4548 }
4549 /**
4550 * @param rank the rank to set
4551 */
4552 public void setRank(Rank rank) {
4553 this.rank = rank;
4554 }
4555 /**
4556 * @return the idenfitiger
4557 */
4558 public String getIdentifier() {
4559 return identifier;
4560 }
4561 /**
4562 * @param idenfitiger the idenfitiger to set
4563 */
4564 public void setIdentifier(String identifier) {
4565 this.identifier = identifier;
4566 }
4567 /**
4568 * @return the status
4569 */
4570 public String getStatus() {
4571 if (status == null) {
4572 return "";
4573 }
4574 return status;
4575 }
4576 /**
4577 * @param status the status to set
4578 */
4579 public void setStatus(String status) {
4580 this.status = status;
4581 }
4582 /**
4583 * @return the family
4584 */
4585 public Taxon getFamily() {
4586 return family;
4587 }
4588 /**
4589 * @param family the family to set
4590 */
4591 @SuppressWarnings("rawtypes")
4592 public void setFamily(Taxon family) {
4593 this.family = family;
4594 familyName = CdmBase.deproxy(family.getName());
4595 }
4596 /**
4597 * @return the subfamily
4598 */
4599 public Taxon getSubfamily() {
4600 return subfamily;
4601 }
4602 /**
4603 * @param subfamily the subfamily to set
4604 */
4605 @SuppressWarnings("rawtypes")
4606 public void setSubfamily(Taxon subfamily) {
4607 this.subfamily = subfamily;
4608 subfamilyName = CdmBase.deproxy(subfamily.getName());
4609 }
4610 /**
4611 * @return the tribe
4612 */
4613 public Taxon getTribe() {
4614 return tribe;
4615 }
4616 /**
4617 * @param tribe the tribe to set
4618 */
4619 @SuppressWarnings("rawtypes")
4620 public void setTribe(Taxon tribe) {
4621 this.tribe = tribe;
4622 tribeName = CdmBase.deproxy(tribe.getName());
4623 }
4624 /**
4625 * @return the subtribe
4626 */
4627 public Taxon getSubtribe() {
4628 return subtribe;
4629 }
4630 /**
4631 * @param subtribe the subtribe to set
4632 */
4633 @SuppressWarnings("rawtypes")
4634 public void setSubtribe(Taxon subtribe) {
4635 this.subtribe = subtribe;
4636 subtribeName =CdmBase.deproxy(subtribe.getName());
4637 }
4638 /**
4639 * @return the genus
4640 */
4641 public Taxon getGenus() {
4642 return genus;
4643 }
4644 /**
4645 * @param genus the genus to set
4646 */
4647 @SuppressWarnings("rawtypes")
4648 public void setGenus(Taxon genus) {
4649 if (genus != null){
4650 this.genus = genus;
4651 genusName = CdmBase.deproxy(genus.getName());
4652 }
4653 }
4654 /**
4655 * @return the subgenus
4656 */
4657 public Taxon getSubgenus() {
4658 return subgenus;
4659 }
4660 /**
4661 * @param subgenus the subgenus to set
4662 */
4663 @SuppressWarnings("rawtypes")
4664 public void setSubgenus(Taxon subgenus) {
4665 this.subgenus = subgenus;
4666 subgenusName = CdmBase.deproxy(subgenus.getName());
4667 }
4668 /**
4669 * @return the species
4670 */
4671 public Taxon getSpecies() {
4672 return species;
4673 }
4674 /**
4675 * @param species the species to set
4676 */
4677 public void setSpecies(Taxon species) {
4678 if (species != null){
4679 this.species = species;
4680 speciesName = CdmBase.deproxy(species.getName());
4681 }
4682 }
4683 /**
4684 * @return the subspecies
4685 */
4686 public Taxon getSubspecies() {
4687 return subspecies;
4688 }
4689 /**
4690 * @param subspecies the subspecies to set
4691 */
4692 @SuppressWarnings("rawtypes")
4693 public void setSubspecies(Taxon subspecies) {
4694 this.subspecies = subspecies;
4695 subspeciesName = CdmBase.deproxy(subspecies.getName());
4696
4697 }
4698
4699
4700
4701 }
4702
4703
4704 /**
4705 * @param status
4706 */
4707 private void addProblematicStatusToFile(String status) {
4708 try{
4709 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4710 BufferedWriter out = new BufferedWriter(fstream);
4711 out.write(status+"\n");
4712 //Close the output stream
4713 out.close();
4714 }catch (Exception e){//Catch exception if any
4715 System.err.println("Error: " + e.getMessage());
4716 }
4717
4718 }
4719
4720
4721
4722 /**
4723 * @param tnb
4724 * @return
4725 */
4726 private Taxon findMatchingTaxon(INonViralName tnb, Reference refMods) {
4727 logger.info("findMatchingTaxon");
4728 Taxon tmp=null;
4729
4730 refMods=CdmBase.deproxy(refMods, Reference.class);
4731 boolean insertAsExisting =false;
4732 List<Taxon> existingTaxa = new ArrayList<Taxon>();
4733 try {
4734 existingTaxa = getMatchingTaxa(TaxonName.castAndDeproxy(tnb));
4735 } catch (Exception e1) {
4736 // TODO Auto-generated catch block
4737 e1.printStackTrace();
4738 }
4739 double similarityScore=0.0;
4740 double similarityAuthor=-1;
4741 String author1="";
4742 String author2="";
4743 String t1="";
4744 String t2="";
4745 for (Taxon bestMatchingTaxon : existingTaxa){
4746 if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4747 // System.out.println("tnb "+tnb.getTitleCache());
4748 // System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4749 try {
4750 if(tnb.getAuthorshipCache()!=null) {
4751 author1=tnb.getAuthorshipCache();
4752 }
4753 } catch (Exception e) {
4754 // TODO Auto-generated catch block
4755 e.printStackTrace();
4756 }
4757 try {
4758 if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
4759 author2=bestMatchingTaxon.getName().getAuthorshipCache();
4760 }
4761 } catch (Exception e) {
4762 // TODO Auto-generated catch block
4763 e.printStackTrace();
4764 }
4765 try {
4766 t1=tnb.getTitleCache().split("sec.")[0].trim();
4767 if (author1!=null && !StringUtils.isEmpty(author1)) {
4768 t1=t1.split(Pattern.quote(author1))[0];
4769 }
4770 } catch (Exception e) {
4771 // TODO Auto-generated catch block
4772 e.printStackTrace();
4773 }
4774 try {
4775 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4776 if (author2!=null && !StringUtils.isEmpty(author2)) {
4777 t2=t2.split(Pattern.quote(author2))[0];
4778 }
4779 } catch (Exception e) {
4780 // TODO Auto-generated catch block
4781 e.printStackTrace();
4782 }
4783 similarityScore=similarity(t1.trim(), t2.trim());
4784 // System.out.println("taxascore: "+similarityScore);
4785 similarityAuthor=similarity(author1.trim(), author2.trim());
4786 // System.out.println("authorscore: "+similarityAuthor);
4787 insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4788 }
4789 if(insertAsExisting) {
4790 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4791 tmp=bestMatchingTaxon;
4792 sourceHandler.addSource(refMods, tmp);
4793 return tmp;
4794 }
4795 }
4796 return tmp;
4797 }
4798
4799
4800 /**
4801 * @param tnb
4802 * @param refMods
4803 * @param similarityScore
4804 * @param bestMatchingTaxon
4805 * @param similarityAuthor
4806 * @return
4807 */
4808 private boolean compareAndCheckTaxon(INonViralName tnb, Reference refMods, double similarityScore,
4809 Taxon bestMatchingTaxon, double similarityAuthor) {
4810 //logger.info("compareAndCheckTaxon");
4811 boolean insertAsExisting;
4812 // if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4813 // insertAsExisting=false;
4814 // } else{
4815 //a small hack/automatisation for Chenopodium only
4816 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4817 bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4818 insertAsExisting=true;
4819 } else {
4820 insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4821 }
4822 // }
4823
4824 logDecision(tnb, bestMatchingTaxon, insertAsExisting, refMods);
4825 return insertAsExisting;
4826 }
4827
4828 /**
4829 * @return
4830 */
4831 @SuppressWarnings("rawtypes")
4832 private List<Taxon> getMatchingTaxa(TaxonName tnb) {
4833 //logger.info("getMatchingTaxon");
4834 if (tnb.getTitleCache() == null){
4835 tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4836 }
4837
4838 Pager<TaxonBase> pager=importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4839 List<TaxonBase>records = pager.getRecords();
4840
4841 List<Taxon> existingTaxons = new ArrayList<Taxon>();
4842 for (TaxonBase r:records){
4843 try{
4844 Taxon bestMatchingTaxon = (Taxon)r;
4845 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4846 if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4847 existingTaxons.add(bestMatchingTaxon);
4848 }
4849 }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4850 }
4851 Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4852 if (!existingTaxons.contains(bmt) && bmt!=null) {
4853 if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4854 existingTaxons.add(bmt);
4855 }
4856 }
4857 return existingTaxons;
4858 }
4859
4860 /**
4861 * Check if the found Taxon can reasonnably be the same
4862 * example: with and without author should match, but the subspecies should not be suggested for a genus
4863 * */
4864 private boolean compareTaxonNameLength(String f, String o){
4865 //logger.info("compareTaxonNameLength");
4866 boolean lengthOk=false;
4867 int sizeF = f.length();
4868 int sizeO = o.length();
4869 if (sizeO>=sizeF) {
4870 lengthOk=true;
4871 }
4872 if(sizeF>sizeO) {
4873 if (sizeF-sizeO>10) {
4874 lengthOk=false;
4875 } else {
4876 lengthOk=true;
4877 }
4878 }
4879
4880 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4881 return lengthOk;
4882 }
4883
4884 private double similarity(String s1, String s2) {
4885 //logger.info("similarity");
4886 //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4887 if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4888 String l1=s1.toLowerCase().trim();
4889 String l2=s2.toLowerCase().trim();
4890 if (l1.length() < l2.length()) { // s1 should always be bigger
4891 String swap = l1; l1 = l2; l2 = swap;
4892 }
4893 int bigLen = l1.length();
4894 if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4895 return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4896 }
4897 else{
4898 if(s1!=null && s2!=null){
4899 if (s1.equalsIgnoreCase(s2)) {
4900 return 1;
4901 }
4902 }
4903 return -1;
4904 }
4905 }
4906
4907 private int computeEditDistance(String s1, String s2) {
4908 //logger.info("computeEditDistance");
4909 int[] costs = new int[s2.length() + 1];
4910 for (int i = 0; i <= s1.length(); i++) {
4911 int lastValue = i;
4912 for (int j = 0; j <= s2.length(); j++) {
4913 if (i == 0) {
4914 costs[j] = j;
4915 } else {
4916 if (j > 0) {
4917 int newValue = costs[j - 1];
4918 if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4919 newValue = Math.min(Math.min(newValue, lastValue),
4920 costs[j]) + 1;
4921 }
4922 costs[j - 1] = lastValue;
4923 lastValue = newValue;
4924 }
4925 }
4926 }
4927 if (i > 0) {
4928 costs[s2.length()] = lastValue;
4929 }
4930 }
4931 return costs[s2.length()];
4932 }
4933
4934 Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
4935 /**
4936 * @param taxonName
4937 */
4938 @SuppressWarnings("rawtypes")
4939 public void lookForParentNode(INonViralName taxonName, Taxon tax, Reference ref, MyName myName) {
4940 logger.info("lookForParentNode "+taxonName.getTitleCache()+" for "+myName.toString());
4941 //System.out.println("LOOK FOR PARENT NODE "+taxonname.toString()+"; "+tax.toString()+"; "+taxonname.getRank());
4942 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4943 if (taxonName.getRank().equals(Rank.FORM())){
4944 handleFormHierarchy(ref, myName, parser);
4945 }
4946 else if (taxonName.getRank().equals(Rank.VARIETY())){
4947 handleVarietyHierarchy(ref, myName, parser);
4948 }
4949 else if (taxonName.getRank().equals(Rank.SUBSPECIES())){
4950 handleSubSpeciesHierarchy(ref, myName, parser);
4951 }
4952 else if (taxonName.getRank().equals(Rank.SPECIES())){
4953 handleSpeciesHierarchy(ref, myName, parser);
4954 }
4955 else if (taxonName.getRank().equals(Rank.SUBGENUS())){
4956 handleSubgenusHierarchy(ref, myName, parser);
4957 }
4958
4959 if (taxonName.getRank().equals(Rank.GENUS())){
4960 handleGenusHierarchy(ref, myName, parser);
4961 }
4962 if (taxonName.getRank().equals(Rank.SUBTRIBE())){
4963 handleSubtribeHierarchy(ref, myName, parser);
4964 }
4965 if (taxonName.getRank().equals(Rank.TRIBE())){
4966 handleTribeHierarchy(ref, myName, parser);
4967 }
4968
4969 if (taxonName.getRank().equals(Rank.SUBFAMILY())){
4970 handleSubfamilyHierarchy(ref, myName, parser);
4971 }
4972 }
4973
4974 /**
4975 * @param ref
4976 * @param myName
4977 * @param parser
4978 */
4979 private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
4980 System.out.println("handleSubfamilyHierarchy");
4981 String parentStr = myName.getFamilyStr();
4982 Rank r = Rank.FAMILY();
4983 if(parentStr!=null){
4984
4985 Taxon parent = null;
4986 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
4987 for(TaxonBase tb:taxontest.getRecords()){
4988 try {
4989 if (tb.getName().getRank().equals(r)) {
4990 parent=CdmBase.deproxy(tb, Taxon.class);
4991 }
4992 break;
4993 } catch (Exception e) {
4994 // TODO Auto-generated catch block
4995 e.printStackTrace();
4996 }
4997 }
4998 if(parent == null) {
4999 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5000 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5001 if(tmp ==null)
5002 {
5003 parent=Taxon.NewInstance(parentNameName, ref);
5004 importer.getTaxonService().save(parent);
5005 parent = CdmBase.deproxy(parent, Taxon.class);
5006 } else {
5007 parent=tmp;
5008 }
5009 lookForParentNode(parentNameName, parent, ref,myName);
5010
5011 }
5012 hierarchy.put(r,parent);
5013 }
5014 }
5015
5016 /**
5017 * @param ref
5018 * @param myName
5019 * @param parser
5020 */
5021 private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5022 String parentStr = myName.getSubfamilyStr();
5023 Rank r = Rank.SUBFAMILY();
5024 if (parentStr == null){
5025 parentStr = myName.getFamilyStr();
5026 r = Rank.FAMILY();
5027 }
5028 if(parentStr!=null){
5029 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5030 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5031 // importer.getTaxonService().save(parent);
5032 // parent = CdmBase.deproxy(parent, Taxon.class);
5033
5034 boolean parentDoesNotExists = true;
5035 for (TaxonNode p : classification.getAllNodes()){
5036 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5037 parentDoesNotExists = false;
5038 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5039 break;
5040 }
5041 }
5042 // if(parentDoesNotExists) {
5043 // importer.getTaxonService().save(parent);
5044 // parent = CdmBase.deproxy(parent, Taxon.class);
5045 // lookForParentNode(parentNameName, parent, ref,myName);
5046 // }
5047 if(parentDoesNotExists) {
5048 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5049 if(tmp ==null)
5050 {
5051 parent=Taxon.NewInstance(parentNameName, ref);
5052 importer.getTaxonService().save(parent);
5053 parent = CdmBase.deproxy(parent, Taxon.class);
5054 } else {
5055 parent=tmp;
5056 }
5057 lookForParentNode(parentNameName, parent, ref,myName);
5058
5059 }
5060 hierarchy.put(r,parent);
5061 }
5062 }
5063
5064 /**
5065 * @param ref
5066 * @param myName
5067 * @param parser
5068 */
5069 private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5070 String parentStr = myName.getTribeStr();
5071 Rank r = Rank.TRIBE();
5072 if (parentStr == null){
5073 parentStr = myName.getSubfamilyStr();
5074 r = Rank.SUBFAMILY();
5075 }
5076 if (parentStr == null){
5077 parentStr = myName.getFamilyStr();
5078 r = Rank.FAMILY();
5079 }
5080 if(parentStr!=null){
5081 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5082 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5083 // importer.getTaxonService().save(parent);
5084 // parent = CdmBase.deproxy(parent, Taxon.class);
5085
5086 boolean parentDoesNotExists = true;
5087 for (TaxonNode p : classification.getAllNodes()){
5088 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5089 parentDoesNotExists = false;
5090 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5091
5092 break;
5093 }
5094 }
5095 // if(parentDoesNotExists) {
5096 // importer.getTaxonService().save(parent);
5097 // parent = CdmBase.deproxy(parent, Taxon.class);
5098 // lookForParentNode(parentNameName, parent, ref,myName);
5099 // }
5100 if(parentDoesNotExists) {
5101 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5102 if(tmp ==null)
5103 {
5104 parent=Taxon.NewInstance(parentNameName, ref);
5105 importer.getTaxonService().save(parent);
5106 parent = CdmBase.deproxy(parent, Taxon.class);
5107 } else {
5108 parent=tmp;
5109 }
5110 lookForParentNode(parentNameName, parent, ref,myName);
5111
5112 }
5113 hierarchy.put(r,parent);
5114 }
5115 }
5116
5117 /**
5118 * @param ref
5119 * @param myName
5120 * @param parser
5121 */
5122 private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5123 String parentStr = myName.getSubtribeStr();
5124 Rank r = Rank.SUBTRIBE();
5125 if (parentStr == null){
5126 parentStr = myName.getTribeStr();
5127 r = Rank.TRIBE();
5128 }
5129 if (parentStr == null){
5130 parentStr = myName.getSubfamilyStr();
5131 r = Rank.SUBFAMILY();
5132 }
5133 if (parentStr == null){
5134 parentStr = myName.getFamilyStr();
5135 r = Rank.FAMILY();
5136 }
5137 if(parentStr!=null){
5138 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5139 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5140 // importer.getTaxonService().save(parent);
5141 // parent = CdmBase.deproxy(parent, Taxon.class);
5142
5143 boolean parentDoesNotExist = true;
5144 for (TaxonNode p : classification.getAllNodes()){
5145 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5146 // System.out.println(p.getTaxon().getUuid());
5147 // System.out.println(parent.getUuid());
5148 parentDoesNotExist = false;
5149 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5150 break;
5151 }
5152 }
5153 // if(parentDoesNotExists) {
5154 // importer.getTaxonService().save(parent);
5155 // parent = CdmBase.deproxy(parent, Taxon.class);
5156 // lookForParentNode(parentNameName, parent, ref,myName);
5157 // }
5158 if(parentDoesNotExist) {
5159 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5160 if(tmp ==null){
5161
5162 parent=Taxon.NewInstance(parentNameName, ref);
5163 importer.getTaxonService().save(parent);
5164 parent = CdmBase.deproxy(parent, Taxon.class);
5165 } else {
5166 parent=tmp;
5167 }
5168 lookForParentNode(parentNameName, parent, ref,myName);
5169
5170 }
5171 hierarchy.put(r,parent);
5172 }
5173 }
5174
5175 /**
5176 * @param ref
5177 * @param myName
5178 * @param parser
5179 */
5180 private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5181 String parentStr = myName.getGenusStr();
5182 Rank r = Rank.GENUS();
5183
5184 if(parentStr==null){
5185 parentStr = myName.getSubtribeStr();
5186 r = Rank.SUBTRIBE();
5187 }
5188 if (parentStr == null){
5189 parentStr = myName.getTribeStr();
5190 r = Rank.TRIBE();
5191 }
5192 if (parentStr == null){
5193 parentStr = myName.getSubfamilyStr();
5194 r = Rank.SUBFAMILY();
5195 }
5196 if (parentStr == null){
5197 parentStr = myName.getFamilyStr();
5198 r = Rank.FAMILY();
5199 }
5200 if(parentStr!=null){
5201 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5202 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5203 // importer.getTaxonService().save(parent);
5204 // parent = CdmBase.deproxy(parent, Taxon.class);
5205
5206 boolean parentDoesNotExists = true;
5207 for (TaxonNode p : classification.getAllNodes()){
5208 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5209 // System.out.println(p.getTaxon().getUuid());
5210 // System.out.println(parent.getUuid());
5211 parentDoesNotExists = false;
5212 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5213 break;
5214 }
5215 }
5216 // if(parentDoesNotExists) {
5217 // importer.getTaxonService().save(parent);
5218 // parent = CdmBase.deproxy(parent, Taxon.class);
5219 // lookForParentNode(parentNameName, parent, ref,myName);
5220 // }
5221 if(parentDoesNotExists) {
5222 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5223 if(tmp ==null)
5224 {
5225 parent=Taxon.NewInstance(parentNameName, ref);
5226 importer.getTaxonService().save(parent);
5227 parent = CdmBase.deproxy(parent, Taxon.class);
5228 } else {
5229 parent=tmp;
5230 }
5231 lookForParentNode(parentNameName, parent, ref,myName);
5232
5233 }
5234 hierarchy.put(r,parent);
5235 }
5236 }
5237
5238 /**
5239 * @param ref
5240 * @param myName
5241 * @param parser
5242 */
5243 private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5244 String parentStr = myName.getSubgenusStr();
5245 Rank r = Rank.SUBGENUS();
5246
5247 if(parentStr==null){
5248 parentStr = myName.getGenusStr();
5249 r = Rank.GENUS();
5250 }
5251
5252 if(parentStr==null){
5253 parentStr = myName.getSubtribeStr();
5254 r = Rank.SUBTRIBE();
5255 }
5256 if (parentStr == null){
5257 parentStr = myName.getTribeStr();
5258 r = Rank.TRIBE();
5259 }
5260 if (parentStr == null){
5261 parentStr = myName.getSubfamilyStr();
5262 r = Rank.SUBFAMILY();
5263 }
5264 if (parentStr == null){
5265 parentStr = myName.getFamilyStr();
5266 r = Rank.FAMILY();
5267 }
5268 if(parentStr!=null){
5269 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5270 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5271 hierarchy.put(r,parent);
5272 }
5273 }
5274
5275 /**
5276 * @param ref
5277 * @param myName
5278 * @param parser
5279 */
5280 private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5281 String parentStr = myName.getSpeciesStr();
5282 Rank r = Rank.SPECIES();
5283
5284
5285 if(parentStr==null){
5286 parentStr = myName.getSubgenusStr();
5287 r = Rank.SUBGENUS();
5288 }
5289
5290 if(parentStr==null){
5291 parentStr = myName.getGenusStr();
5292 r = Rank.GENUS();
5293 }
5294
5295 if(parentStr==null){
5296 parentStr = myName.getSubtribeStr();
5297 r = Rank.SUBTRIBE();
5298 }
5299 if (parentStr == null){
5300 parentStr = myName.getTribeStr();
5301 r = Rank.TRIBE();
5302 }
5303 if (parentStr == null){
5304 parentStr = myName.getSubfamilyStr();
5305 r = Rank.SUBFAMILY();
5306 }
5307 if (parentStr == null){
5308 parentStr = myName.getFamilyStr();
5309 r = Rank.FAMILY();
5310 }
5311 if(parentStr!=null){
5312 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5313 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5314 hierarchy.put(r,parent);
5315 }
5316 }
5317
5318
5319 /**
5320 * @param ref
5321 * @param myName
5322 * @param parser
5323 */
5324 private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5325 String parentStr = myName.getSubspeciesStr();
5326 Rank r = Rank.SUBSPECIES();
5327
5328
5329 if(parentStr==null){
5330 parentStr = myName.getSpeciesStr();
5331 r = Rank.SPECIES();
5332 }
5333
5334 if(parentStr==null){
5335 parentStr = myName.getSubgenusStr();
5336 r = Rank.SUBGENUS();
5337 }
5338
5339 if(parentStr==null){
5340 parentStr = myName.getGenusStr();
5341 r = Rank.GENUS();
5342 }
5343
5344 if(parentStr==null){
5345 parentStr = myName.getSubtribeStr();
5346 r = Rank.SUBTRIBE();
5347 }
5348 if (parentStr == null){
5349 parentStr = myName.getTribeStr();
5350 r = Rank.TRIBE();
5351 }
5352 if (parentStr == null){
5353 parentStr = myName.getSubfamilyStr();
5354 r = Rank.SUBFAMILY();
5355 }
5356 if (parentStr == null){
5357 parentStr = myName.getFamilyStr();
5358 r = Rank.FAMILY();
5359 }
5360 if(parentStr!=null){
5361 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5362 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5363 hierarchy.put(r,parent);
5364 }
5365 }
5366
5367 /**
5368 * @param ref
5369 * @param myName
5370 * @param parser
5371 */
5372 private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5373 String parentStr = myName.getSubspeciesStr();
5374 Rank r = Rank.SUBSPECIES();
5375
5376 if(parentStr==null){
5377 parentStr = myName.getSpeciesStr();
5378 r = Rank.SPECIES();
5379 }
5380
5381 if(parentStr==null){
5382 parentStr = myName.getSubgenusStr();
5383 r = Rank.SUBGENUS();
5384 }
5385
5386 if(parentStr==null){
5387 parentStr = myName.getGenusStr();
5388 r = Rank.GENUS();
5389 }
5390
5391 if(parentStr==null){
5392 parentStr = myName.getSubtribeStr();
5393 r = Rank.SUBTRIBE();
5394 }
5395 if (parentStr == null){
5396 parentStr = myName.getTribeStr();
5397 r = Rank.TRIBE();
5398 }
5399 if (parentStr == null){
5400 parentStr = myName.getSubfamilyStr();
5401 r = Rank.SUBFAMILY();
5402 }
5403 if (parentStr == null){
5404 parentStr = myName.getFamilyStr();
5405 r = Rank.FAMILY();
5406 }
5407 if(parentStr!=null){
5408 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5409 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5410 hierarchy.put(r,parent);
5411 }
5412 }
5413
5414 /**
5415 * @param ref
5416 * @param myName
5417 * @param parser
5418 * @param parentStr
5419 * @param r
5420 * @return
5421 */
5422 private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5423 INonViralName parentNameName = parser.parseFullName(parentStr, nomenclaturalCode, r);
5424 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5425 // importer.getTaxonService().save(parent);
5426 // parent = CdmBase.deproxy(parent, Taxon.class);
5427
5428 boolean parentDoesNotExists = true;
5429 for (TaxonNode p : classification.getAllNodes()){
5430 if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5431 // System.out.println(p.getTaxon().getUuid());
5432 // System.out.println(parent.getUuid());
5433 parentDoesNotExists = false;
5434 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5435 break;
5436 }
5437 }
5438 if(parentDoesNotExists) {
5439 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5440 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5441 if(tmp ==null){
5442
5443 parent=Taxon.NewInstance(parentNameName, ref);
5444 importer.getTaxonService().save(parent);
5445
5446 } else {
5447 parent=tmp;
5448 }
5449 lookForParentNode(parentNameName, parent, ref,myName);
5450
5451 }
5452 return parent;
5453 }
5454
5455 private void addNameDifferenceToFile(String originalname, String atomisedname){
5456 try{
5457 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5458 BufferedWriter out = new BufferedWriter(fstream);
5459 out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5460 //Close the output stream
5461 out.close();
5462 }catch (Exception e){//Catch exception if any
5463 System.err.println("Error: " + e.getMessage());
5464 }
5465 }
5466 /**
5467 * @param name
5468 * @param author
5469 * @param nomenclaturalCode2
5470 * @param rank
5471 */
5472 private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5473 try{
5474 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5475 BufferedWriter out = new BufferedWriter(fstream);
5476 out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5477 //Close the output stream
5478 out.close();
5479 }catch (Exception e){//Catch exception if any
5480 System.err.println("Error: " + e.getMessage());
5481 }
5482 }
5483
5484
5485 /**
5486 * @param tnb
5487 * @param bestMatchingTaxon
5488 * @param insertAsExisting
5489 * @param refMods
5490 */
5491 private void logDecision(INonViralName tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5492 try{
5493 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt", true);
5494 BufferedWriter out = new BufferedWriter(fstream);
5495 out.write(tnb.getTitleCache() + " sec. " + refMods + "\t" + bestMatchingTaxon.getTitleCache() + "\t" + insertAsExisting + "\n");
5496 //Close the output stream
5497 out.close();
5498 }catch (Exception e){//Catch exception if any
5499 System.err.println("Error: " + e.getMessage());
5500 }
5501 }
5502
5503
5504 @SuppressWarnings("unused")
5505 private String replaceNull(Object in){
5506 if (in == null) {
5507 return "";
5508 }
5509 if (in.getClass().equals(NomenclaturalCode.class)) {
5510 return ((NomenclaturalCode)in).getTitleCache();
5511 }
5512 return in.toString();
5513 }
5514
5515 /**
5516 * @param fullName
5517 * @param nomenclaturalCode2
5518 * @param rank
5519 */
5520 private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5521 try{
5522 FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5523 BufferedWriter out = new BufferedWriter(fstream);
5524 out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5525 //Close the output stream
5526 out.close();
5527 }catch (Exception e){//Catch exception if any
5528 System.err.println("Error: " + e.getMessage());
5529 }
5530
5531 }
5532
5533 }
5534
5535
5536