latest changes to TaxonX import
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / taxonx2013 / TaxonXTreatmentExtractor.java
1 // $Id$
2 /**
3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.taxonx2013;
11
12 import java.io.BufferedWriter;
13 import java.io.File;
14 import java.io.FileWriter;
15 import java.io.IOException;
16 import java.net.URI;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.HashMap;
20 import java.util.List;
21 import java.util.Map;
22 import java.util.Set;
23 import java.util.UUID;
24 import java.util.regex.Pattern;
25
26 import javax.xml.transform.TransformerException;
27 import javax.xml.transform.TransformerFactoryConfigurationError;
28
29 import org.apache.commons.lang.StringUtils;
30 import org.apache.log4j.Logger;
31 import org.w3c.dom.Node;
32 import org.w3c.dom.NodeList;
33
34 import com.ibm.lsid.MalformedLSIDException;
35
36 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
37 import eu.etaxonomy.cdm.api.service.pager.Pager;
38 import eu.etaxonomy.cdm.model.agent.AgentBase;
39 import eu.etaxonomy.cdm.model.agent.Person;
40 import eu.etaxonomy.cdm.model.common.CdmBase;
41 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
42 import eu.etaxonomy.cdm.model.common.LSID;
43 import eu.etaxonomy.cdm.model.common.Language;
44 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
45 import eu.etaxonomy.cdm.model.common.UuidAndTitleCache;
46 import eu.etaxonomy.cdm.model.description.Feature;
47 import eu.etaxonomy.cdm.model.description.FeatureNode;
48 import eu.etaxonomy.cdm.model.description.FeatureTree;
49 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
50 import eu.etaxonomy.cdm.model.description.TaxonDescription;
51 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
52 import eu.etaxonomy.cdm.model.description.TextData;
53 import eu.etaxonomy.cdm.model.name.BacterialName;
54 import eu.etaxonomy.cdm.model.name.BotanicalName;
55 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
56 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
57 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
58 import eu.etaxonomy.cdm.model.name.NonViralName;
59 import eu.etaxonomy.cdm.model.name.Rank;
60 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
61 import eu.etaxonomy.cdm.model.name.ZoologicalName;
62 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
63 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
64 import eu.etaxonomy.cdm.model.reference.Reference;
65 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
66 import eu.etaxonomy.cdm.model.taxon.Classification;
67 import eu.etaxonomy.cdm.model.taxon.Synonym;
68 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
69 import eu.etaxonomy.cdm.model.taxon.Taxon;
70 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
71 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
72 import eu.etaxonomy.cdm.persistence.query.MatchMode;
73 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
74 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
75 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
76
77 /**
78 * @author pkelbert
79 * @date 2 avr. 2013
80 *
81 */
82 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
83
84 private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
85
86 private static final String notMarkedUp = "Not marked-up";
87 private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
88 private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
89 private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
90 private static final boolean skippQuestion = true;
91
92 private final NomenclaturalCode nomenclaturalCode;
93 private Classification classification;
94
95 private String treatmentMainName,originalTreatmentName;
96
97 private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
98
99
100 private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
101 private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
102
103 private boolean maxRankRespected =false;
104 private Map<String, Feature> featuresMap;
105
106 private MyName currentMyName;
107
108 private Reference<?> sourceUrlRef;
109
110 private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
111
112 /**
113 * @param nomenclaturalCode
114 * @param classification
115 * @param importer
116 * @param configState
117 */
118 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
119 TaxonXImportState configState,Map<String, Feature> featuresMap, Reference<?> urlSource ) {
120 this.nomenclaturalCode=nomenclaturalCode;
121 this.classification = classification;
122 this.importer=importer;
123 this.configState=configState;
124 this.featuresMap=featuresMap;
125 this.sourceUrlRef =urlSource;
126 prepareCollectors(configState, importer.getAgentService());
127 this.sourceHandler.setSourceUrlRef(sourceUrlRef);
128 this.sourceHandler.setImporter(importer);
129 this.sourceHandler.setConfigState(configState);
130 }
131
132 /**
133 * extracts all the treament information and save them
134 * @param treatmentnode: the XML Node
135 * @param tosave: the list of object to save into the CDM
136 * @param refMods: the reference extracted from the MODS
137 * @param sourceName: the URI of the document
138 */
139 @SuppressWarnings({ "rawtypes", "unused" })
140 protected void extractTreatment(Node treatmentnode, Reference<?> refMods, URI sourceName) {
141 logger.info("extractTreatment");
142 List<TaxonNameBase> namesToSave = new ArrayList<TaxonNameBase>();
143 NodeList children = treatmentnode.getChildNodes();
144 Taxon acceptedTaxon =null;
145 boolean hasRefgroup=false;
146
147 //needed?
148 for (int i=0;i<children.getLength();i++){
149 if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
150 hasRefgroup=true;
151 }
152 }
153
154 for (int i=0;i<children.getLength();i++){
155 Node child = children.item(i);
156 acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
157 }
158 // logger.info("saveUpdateNames");
159 if (maxRankRespected){
160 importer.getNameService().saveOrUpdate(namesToSave);
161 importer.getClassificationService().saveOrUpdate(classification);
162 //logger.info("saveUpdateNames-ok");
163 }
164
165 buildFeatureTree();
166 }
167
168 private Taxon handleSingleNode(Reference<?> refMods, URI sourceName,
169 List<TaxonNameBase> namesToSave, Node child, Taxon acceptedTaxon) {
170 Taxon defaultTaxon =null;
171
172 String nodeName = child.getNodeName();
173 if (nodeName.equalsIgnoreCase("tax:nomenclature")){
174 NodeList nomenclatureChildren = child.getChildNodes();
175 boolean containsName = false;
176 for(int k=0; k<nomenclatureChildren.getLength(); k++){
177 if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
178 containsName=true;
179 break;
180 }
181 }
182 if (containsName){
183 reloadClassification();
184 //extract "main" the scientific name
185 try{
186 acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
187 }catch(ClassCastException e){
188 //FIXME exception handling
189 e.printStackTrace();
190 System.exit(0);
191 }
192 // System.out.println("acceptedTaxon : "+acceptedTaxon);
193 }
194 }else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
195 reloadClassification();
196 //extract the References within the document
197 extractReferences(child, namesToSave ,acceptedTaxon,refMods);
198 }else if (nodeName.equalsIgnoreCase("tax:div") &&
199 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
200 File file = new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
201 FileWriter writer;
202 try {
203 writer = new FileWriter(file ,true);
204 writer.write(sourceName+"\n");
205 writer.flush();
206 writer.close();
207 } catch (IOException e1) {
208 // TODO Auto-generated catch block
209 logger.error(e1.getMessage());
210 }
211 // String multiple = askMultiple(children.item(i));
212 String multiple = "Other";
213 if (multiple.equalsIgnoreCase("other")) {
214 extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
215 }else if (multiple.equalsIgnoreCase("synonyms")) {
216 try{
217 extractSynonyms(child,acceptedTaxon, refMods);
218 }catch(NullPointerException e){
219 logger.warn("the accepted taxon is maybe null");
220 }
221 }else if(multiple.equalsIgnoreCase("material examined")){
222 extractMaterials(child, acceptedTaxon, refMods, namesToSave);
223 }else if (multiple.equalsIgnoreCase("distribution")){
224 extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
225 }else if (multiple.equalsIgnoreCase("type status")){
226 extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
227 }else if (multiple.equalsIgnoreCase("vernacular name")){
228 extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
229 }else{
230 extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
231 }
232 }
233 else if(nodeName.equalsIgnoreCase("tax:div") &&
234 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
235 extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
236 }
237 else if(nodeName.equalsIgnoreCase("tax:div") &&
238 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
239 extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
240 }
241 else if(nodeName.equalsIgnoreCase("tax:div") &&
242 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
243 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
244 }
245 else if(nodeName.equalsIgnoreCase("tax:div") &&
246 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
247 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
248 }
249 else if(nodeName.equalsIgnoreCase("tax:div") &&
250 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
251 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
252 }
253 else if(nodeName.equalsIgnoreCase("tax:div") &&
254 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
255 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
256 }
257 else if(nodeName.equalsIgnoreCase("tax:div") &&
258 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
259 extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
260 }
261 else if(nodeName.equalsIgnoreCase("tax:div") &&
262 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
263 extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
264 }
265 else if(nodeName.equalsIgnoreCase("tax:div") &&
266 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
267 extractMaterials(child,acceptedTaxon, refMods, namesToSave);
268 }
269 else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
270 extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
271 }
272 else if(nodeName.equalsIgnoreCase("tax:div") &&
273 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
274 extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
275 }else if(nodeName.equalsIgnoreCase("tax:div") &&
276 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
277 //TODO IGNORE keys for the moment
278 //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
279 extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
280 }
281 else{
282 if (! nodeName.equalsIgnoreCase("tax:pb")){
283 //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
284 if (child.getAttributes() !=null) {
285 logger.info("First Attribute: " + child.getAttributes().item(0));
286 }
287 extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
288 }else{
289 //FIXME
290 logger.warn("Unhandled");
291 }
292 }
293 return acceptedTaxon;
294 }
295
296
297 protected Map<String,Feature> getFeaturesUsed(){
298 return featuresMap;
299 }
300 /**
301 *
302 */
303 private void buildFeatureTree() {
304 logger.info("buildFeatureTree");
305 FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
306 if (proibiospheretree == null){
307 List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
308 if (trees.size()==1) {
309 FeatureTree ft = trees.get(0);
310 if (featuresMap==null) {
311 featuresMap=new HashMap<String, Feature>();
312 }
313 for (Feature feature: ft.getDistinctFeatures()){
314 if(feature!=null) {
315 featuresMap.put(feature.getTitleCache(), feature);
316 }
317 }
318 }
319 proibiospheretree = FeatureTree.NewInstance();
320 proibiospheretree.setUuid(proIbioTreeUUID);
321 }
322 // FeatureNode root = proibiospheretree.getRoot();
323 FeatureNode root2 = proibiospheretree.getRoot();
324 if (root2 != null){
325 int nbChildren = root2.getChildCount()-1;
326 while (nbChildren>-1){
327 try{
328 root2.removeChild(nbChildren);
329 }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
330 nbChildren --;
331 }
332
333 }
334
335 for (Feature feature:featuresMap.values()) {
336 root2.addChild(FeatureNode.NewInstance(feature));
337 }
338 importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
339
340 }
341
342
343 /**
344 * @param keys
345 * @param acceptedTaxon: the current acceptedTaxon
346 * @param nametosave: the list of objects to save into the CDM
347 * @param refMods: the current reference extracted from the MODS
348 */
349 /* @SuppressWarnings("rawtypes")
350 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
351 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
352
353 NodeList children = keys.getChildNodes();
354 String key="";
355 PolytomousKey poly = PolytomousKey.NewInstance();
356 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
357 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
358 poly.addTaxonomicScope(acceptedTaxon);
359 poly.setTitleCache("bloup", true);
360 // poly.addCoveredTaxon(acceptedTaxon);
361 PolytomousKeyNode root = poly.getRoot();
362 PolytomousKeyNode previous = null,tmpKey=null;
363 Taxon taxonKey=null;
364 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
365
366 // String fullContent = keys.getTextContent();
367 for (int i=0;i<children.getLength();i++){
368 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
369 NodeList paragraph = children.item(i).getChildNodes();
370 key="";
371 taxonKey=null;
372 for (int j=0;j<paragraph.getLength();j++){
373 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
374 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
375 key+=paragraph.item(j).getTextContent().trim();
376 // logger.info("KEY: "+j+"--"+key);
377 }
378 }
379 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
380 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
381 }
382 }
383 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
384 if (keypattern.matcher(key).matches()){
385 tmpKey = PolytomousKeyNode.NewInstance(key);
386 if (taxonKey!=null) {
387 tmpKey.setTaxon(taxonKey);
388 }
389 polyNodes.add(tmpKey);
390 if (previous == null) {
391 root.addChild(tmpKey);
392 } else {
393 previous.addChild(tmpKey);
394 }
395 }else{
396 if (!key.isEmpty()){
397 tmpKey=PolytomousKeyNode.NewInstance(key);
398 if (taxonKey!=null) {
399 tmpKey.setTaxon(taxonKey);
400 }
401 polyNodes.add(tmpKey);
402 if (keypatternend.matcher(key).matches()) {
403 root.addChild(tmpKey);
404 previous=tmpKey;
405 } else{
406 previous.addChild(tmpKey);
407 }
408
409 }
410 }
411 }
412 }
413 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
414 importer.getPolytomousKeyService().saveOrUpdate(poly);
415 }
416 */
417 // /**
418 // * @param taxons: the XML Nodegroup
419 // * @param nametosave: the list of objects to save into the CDM
420 // * @param acceptedTaxon: the current accepted Taxon
421 // * @param refMods: the current reference extracted from the MODS
422 // *
423 // * @return Taxon object built
424 // */
425 // @SuppressWarnings({ "rawtypes", "unchecked" })
426 // private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
427 // // logger.info("getTaxonFromXML");
428 // // logger.info("acceptedTaxon: "+acceptedTaxon);
429 //
430 // // TaxonNameBase nameToBeFilled = null;
431 //
432 // currentMyName = new MyName();
433 // NomenclaturalStatusType statusType = null;
434 //
435 // try {
436 // currentMyName = extractScientificName(taxons);
437 // if (!currentMyName.getStatus().isEmpty()){
438 // try {
439 // statusType = nomStatusString2NomStatus(currentMyName.getStatus());
440 // } catch (UnknownCdmTypeException e) {
441 // addProblematicStatusToFile(currentMyName.getStatus());
442 // logger.warn("Problem with status");
443 // }
444 // }
445 //
446 // } catch (TransformerFactoryConfigurationError e1) {
447 // logger.warn(e1);
448 // } catch (TransformerException e1) {
449 // logger.warn(e1);
450 // }
451 // /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
452 //
453 // nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
454 // if (nameToBeFilled.hasProblem() &&
455 // !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
456 // // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
457 // addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
458 // nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser, currentMyName.getAuthor(), currentMyName.getRank());
459 // }
460 //
461 // nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
462 // */
463 // TaxonNameBase nameToBeFilled = currentMyName.getTaxonNameBase();
464 // Taxon t = currentMyName.getTaxon();
465 // // importer.getNameService().saveOrUpdate(nametosave);
466 // /* Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
467 // */
468 // boolean statusMatch=false;
469 // if(t !=null ){
470 // statusMatch=compareStatus(t, statusType);
471 // }
472 // if (t ==null || (t != null && !statusMatch)){
473 // if(statusType != null) {
474 // nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
475 // }
476 // t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
477 // if (t.getSec() == null) {
478 // t.setSec(refMods);
479 // }
480 // if(!configState.getConfig().doKeepOriginalSecundum()) {
481 // t.setSec(configState.getConfig().getSecundum());
482 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
483 // }
484 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
485 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
486 //
487 //
488 // if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
489 // setLSID(currentMyName.getIdentifier(), t);
490 // }
491 //
492 // // Taxon parentTaxon = currentMyName.getHigherTaxa();
493 // // if (parentTaxon == null && !skippQuestion) {
494 // // parentTaxon = askParent(t, classification);
495 // // }
496 // // if (parentTaxon ==null){
497 // // while (parentTaxon == null) {
498 // // System.out.println("parent is null");
499 // // parentTaxon = createParent(t, refMods);
500 // // classification.addParentChild(parentTaxon, t, refMods, null);
501 // // }
502 // // }else{
503 // // classification.addParentChild(parentTaxon, t, refMods, null);
504 // // }
505 // }
506 // else{
507 // t = CdmBase.deproxy(t, Taxon.class);
508 // }
509 // if (!configState.getConfig().doKeepOriginalSecundum()) {
510 // t.setSec(configState.getConfig().getSecundum());
511 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
512 // }
513 // return t;
514 // }
515
516
517
518
519 // private Taxon getTaxonFromTaxonNameBase(TaxonNameBase tnb,Reference<?> ref){
520 // Taxon taxon = null;
521 //// System.out.println(tnb.getTitleCache());
522 // Taxon cc= importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
523 // if (cc != null){
524 // if ((cc.getSec() == null || cc.getSec().toString().isEmpty()) || (cc.getSec() != null &&
525 // cc.getSec().getTitleCache().equalsIgnoreCase(ref.getTitleCache()))) {
526 // if(cc.getSec() == null || cc.getSec().toString().isEmpty()){
527 // cc.setSec(ref);
528 // importer.getTaxonService().saveOrUpdate(cc);
529 // }
530 // taxon=cc;
531 // }
532 // }
533 // else{
534 // // List<TaxonBase> c = importer.getTaxonService().searchTaxaByName(tnb.getTitleCache(), ref);
535 // List<TaxonBase> c = importer.getTaxonService().list(TaxonBase.class, 0, 0, null, null);
536 // for (TaxonBase b : c) {
537 // try{
538 // taxon = (Taxon) b;
539 // }catch(ClassCastException e){logger.warn("error while casting existing taxonnamebase");}
540 // }
541 // }
542 // if (taxon == null){
543 //// System.out.println("NEW TAXON HERE "+tnb.toString()+", "+ref.toString());
544 // taxon = Taxon.NewInstance(tnb, ref); //sec set null
545 // importer.getTaxonService().save(taxon);
546 //
547 // }
548 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
549 //
550 // boolean exist = false;
551 // for (TaxonNode p : classification.getAllNodes()){
552 // if(p.getTaxon().equals(taxon)) {
553 // exist =true;
554 // }
555 // }
556 // if (!exist){
557 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
558 // Taxon parentTaxon = currentMyName.getHigherTaxa();
559 // if (parentTaxon != null) {
560 // classification.addParentChild(parentTaxon, taxon, ref, null);
561 // } else {
562 // System.out.println("HERE???");
563 // classification.addChildTaxon(taxon, ref, null);
564 // }
565 // importer.getClassificationService().saveOrUpdate(classification);
566 // // refreshTransaction();
567 // }
568 // taxon = CdmBase.deproxy(taxon, Taxon.class);
569 // // System.out.println("TAXON RETOURNE : "+taxon.getTitleCache());
570 // return taxon;
571 // }
572 /**
573 * @param taxons: the XML Nodegroup
574 * @param nametosave: the list of objects to save into the CDM
575 * @param acceptedTaxon: the current accepted Taxon
576 * @param refMods: the current reference extracted from the MODS
577 *
578 * @return Taxon object built
579 */
580 @SuppressWarnings({ "rawtypes", "unused" })
581 private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods, boolean isSynonym) {
582 // logger.info("getTaxonFromXML");
583 // logger.info("acceptedTaxon: "+acceptedTaxon);
584 logger.info("getTaxonNameBaseFromXML");
585 TaxonNameBase nameToBeFilled = null;
586
587 currentMyName=new MyName(isSynonym);
588
589 NomenclaturalStatusType statusType = null;
590 try {
591 currentMyName = extractScientificName(taxons,refMods);
592 } catch (TransformerFactoryConfigurationError e1) {
593 logger.warn(e1);
594 } catch (TransformerException e1) {
595 logger.warn(e1);
596 }
597 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
598
599 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
600 if (nameToBeFilled.hasProblem() &&
601 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
602 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
603 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
604 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
605 }
606
607 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
608 */
609 nameToBeFilled = currentMyName.getTaxonNameBase();
610 return nameToBeFilled;
611
612 }
613
614
615 /**
616 *
617 */
618 private void reloadClassification() {
619 logger.info("reloadClassification");
620 Classification cl = importer.getClassificationService().find(classification.getUuid());
621 if (cl != null){
622 classification = cl;
623 }else{
624 importer.getClassificationService().saveOrUpdate(classification);
625 classification = importer.getClassificationService().find(classification.getUuid());
626 }
627 }
628
629 // /**
630 // * Create a Taxon for the current NameBase, based on the current reference
631 // * @param taxonNameBase
632 // * @param refMods: the current reference extracted from the MODS
633 // * @return Taxon
634 // */
635 // @SuppressWarnings({ "unused", "rawtypes" })
636 // private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
637 // Taxon t = new Taxon(taxonNameBase,null );
638 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
639 // t.setSec(configState.getConfig().getSecundum());
640 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
641 // }
642 // /*<<<<<<< .courant
643 // boolean sourceExists=false;
644 // Set<IdentifiableSource> sources = t.getSources();
645 // for (IdentifiableSource src : sources){
646 // String micro = src.getCitationMicroReference();
647 // Reference r = src.getCitation();
648 // if (r.equals(refMods) && micro == null) {
649 // sourceExists=true;
650 // }
651 // }
652 // if(!sourceExists) {
653 // t.addSource(null,null,refMods,null);
654 // }
655 //=======*/
656 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
657 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
658 // return t;
659 // }
660
661 private void extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods,
662 String featureName) {
663 // System.out.println("extractDescriptionWithReference !");
664 logger.info("extractDescriptionWithReference");
665 NodeList children = typestatus.getChildNodes();
666
667 Feature currentFeature=getFeatureObjectFromString(featureName);
668
669 String r="";String s="";
670 for (int i=0;i<children.getLength();i++){
671 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
672 s+=children.item(i).getTextContent().trim();
673 }
674 if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
675 r+= children.item(i).getTextContent().trim();
676 }
677 if (s.indexOf(r)>-1) {
678 s=s.split(r)[0];
679 }
680 }
681
682 Reference<?> currentref = ReferenceFactory.newGeneric();
683 if(!r.isEmpty()) {
684 currentref.setTitleCache(r, true);
685 } else {
686 currentref=refMods;
687 }
688 setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
689 }
690
691 /**
692 * @param nametosave
693 * @param distribution: the XML node group
694 * @param acceptedTaxon: the current accepted Taxon
695 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
696 * @param refMods: the current reference extracted from the MODS
697 */
698 @SuppressWarnings("rawtypes")
699 private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods) {
700 logger.info("extractDistribution");
701 // logger.info("acceptedTaxon: "+acceptedTaxon);
702 NodeList children = distribution.getChildNodes();
703 Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
704 Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
705
706 for (int i=0;i<children.getLength();i++){
707 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
708 NodeList paragraph = children.item(i).getChildNodes();
709 for (int j=0;j<paragraph.getLength();j++){
710 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
711 extractText(descriptionsFulltext, i, paragraph.item(j));
712 }
713 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
714 extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
715 }
716 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
717 MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
718 DerivedUnit derivedUnitBase = null;
719 specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
720 extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
721 }
722 }
723 }
724 }
725
726 int m=0;
727 for (int k:descriptionsFulltext.keySet()) {
728 if (k>m) {
729 m=k;
730 }
731 }
732 for (int k:specimenOrObservations.keySet()) {
733 if (k>m) {
734 m=k;
735 }
736 }
737
738
739 if(acceptedTaxon!=null){
740 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
741 Feature currentFeature = Feature.DISTRIBUTION();
742 // DerivedUnit derivedUnitBase=null;
743 // String descr="";
744 for (int k=0;k<=m;k++){
745 if(specimenOrObservations.keySet().contains(k)){
746 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
747 handleAssociation(acceptedTaxon, refMods, td, soo);
748 }
749 }
750
751 if (descriptionsFulltext.keySet().contains(k)){
752 if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
753 setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
754 break;
755 }
756 else{
757 handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
758 }
759 }
760
761 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
762 acceptedTaxon.addDescription(td);
763 sourceHandler.addAndSaveSource(refMods, td, null);
764 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
765 }
766 }
767 }
768 }
769
770 /**
771 * @param refMods
772 * @param descriptionsFulltext
773 * @param td
774 * @param currentFeature
775 * @param k
776 */
777 private void handleTextData(Reference<?> refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
778 Feature currentFeature, int k) {
779 //logger.info("handleTextData");
780 TextData textData = TextData.NewInstance();
781 textData.setFeature(currentFeature);
782 textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
783 sourceHandler.addSource(refMods, textData);
784 td.addElement(textData);
785 }
786
787 /**
788 * @param acceptedTaxon
789 * @param refMods
790 * @param td
791 * @param soo
792 */
793 private void handleAssociation(Taxon acceptedTaxon, Reference<?> refMods, TaxonDescription td, MySpecimenOrObservation soo) {
794 logger.info("handleAssociation");
795 String descr=soo.getDescr();
796 DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
797
798 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
799
800 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
801
802 Feature feature=null;
803 feature = makeFeature(derivedUnitBase);
804 if(!StringUtils.isEmpty(descr)) {
805 derivedUnitBase.setTitleCache(descr, true);
806 }
807
808 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
809
810 taxonDescription.addElement(indAssociation);
811 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
812 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
813 td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
814 }
815
816 /**
817 * create an individualAssociation
818 * @param refMods
819 * @param derivedUnitBase
820 * @param feature
821 * @return
822 */
823 private IndividualsAssociation createIndividualAssociation(Reference<?> refMods, DerivedUnit derivedUnitBase,
824 Feature feature) {
825 logger.info("createIndividualAssociation");
826 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
827 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
828 indAssociation.setFeature(feature);
829 indAssociation = sourceHandler.addSource(refMods, indAssociation);
830 return indAssociation;
831 }
832
833 /**
834 * @param specimenOrObservations
835 * @param descriptionsFulltext
836 * @param i
837 * @param specimenOrObservation
838 */
839 private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
840 Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
841 logger.info("extractTextFromSpecimenOrObservation");
842 List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
843 if (speObsList == null) {
844 speObsList=new ArrayList<MySpecimenOrObservation>();
845 }
846 speObsList.add(specimenOrObservation);
847 specimenOrObservations.put(i,speObsList);
848
849 String s = specimenOrObservation.getDerivedUnitBase().toString();
850 if (descriptionsFulltext.get(i) !=null){
851 s = descriptionsFulltext.get(i)+" "+s;
852 }
853 descriptionsFulltext.put(i, s);
854 }
855
856 /**
857 * Extract the text with the inline link to a taxon
858 * @param nametosave
859 * @param refMods
860 * @param descriptionsFulltext
861 * @param i
862 * @param paragraph
863 */
864 @SuppressWarnings("rawtypes")
865 private void extractInLine(List<TaxonNameBase> nametosave, Reference<?> refMods, Map<Integer, String> descriptionsFulltext,
866 int i, Node paragraph) {
867 //logger.info("extractInLine");
868 String inLine=getInlineTextForName(nametosave, refMods, paragraph);
869 if (descriptionsFulltext.get(i) !=null){
870 inLine = descriptionsFulltext.get(i)+inLine;
871 }
872 descriptionsFulltext.put(i, inLine);
873 }
874
875 /**
876 * Extract the raw text from a Node
877 * @param descriptionsFulltext
878 * @param node
879 * @param j
880 */
881 private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
882 //logger.info("extractText");
883 if(!node.getTextContent().trim().isEmpty()) {
884 String s =node.getTextContent().trim();
885 if (descriptionsFulltext.get(i) !=null){
886 s = descriptionsFulltext.get(i)+" "+s;
887 }
888 descriptionsFulltext.put(i, s);
889 }
890 }
891
892
893 /**
894 * @param materials: the XML node group
895 * @param acceptedTaxon: the current accepted Taxon
896 * @param refMods: the current reference extracted from the MODS
897 */
898 @SuppressWarnings("rawtypes")
899 private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference<?> refMods,List<TaxonNameBase> nametosave) {
900 logger.info("EXTRACTMATERIALS");
901 // logger.info("acceptedTaxon: "+acceptedTaxon);
902 NodeList children = materials.getChildNodes();
903 NodeList events = null;
904 // String descr="";
905
906
907 for (int i=0;i<children.getLength();i++){
908 String rawAssociation="";
909 boolean added=false;
910 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
911 events = children.item(i).getChildNodes();
912 for(int k=0;k<events.getLength();k++){
913 if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
914 String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
915 if(!inLine.isEmpty()) {
916 rawAssociation+=inLine;
917 }
918 }
919 if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
920 && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
921 rawAssociation+= events.item(k).getTextContent().trim();
922 }
923 if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
924 if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
925 rawAssociation="no description text";
926 }
927 added=true;
928 handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
929 }
930 if (!rawAssociation.isEmpty() && !added){
931
932 Feature feature = Feature.MATERIALS_EXAMINED();
933 featuresMap.put(feature.getTitleCache(),feature);
934
935 TextData textData = createTextData(rawAssociation, refMods, feature);
936
937 if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
938 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
939 td.addElement(textData);
940 acceptedTaxon.addDescription(td);
941 sourceHandler.addAndSaveSource(refMods, td, null);
942 }
943 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
944 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
945 //
946 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
947 // acceptedTaxon.addDescription(taxonDescription);
948 //
949 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
950 //
951 // Feature feature = Feature.MATERIALS_EXAMINED();
952 // featuresMap.put(feature.getTitleCache(),feature);
953 // if(!StringUtils.isEmpty(rawAssociation)) {
954 // derivedUnitBase.setTitleCache(rawAssociation, true);
955 // }
956 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
957 // indAssociation.setFeature(feature);
958 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
959 //
960 // /*boolean sourceExists=false;
961 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
962 // for (DescriptionElementSource src : dsources){
963 // String micro = src.getCitationMicroReference();
964 // Reference r = src.getCitation();
965 // if (r.equals(refMods) && micro == null) {
966 // sourceExists=true;
967 // }
968 // }
969 // if(!sourceExists) {
970 // indAssociation.addSource(null, null, refMods, null);
971 // }*/
972 // taxonDescription.addElement(indAssociation);
973 // taxonDescription.setTaxon(acceptedTaxon);
974 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
975 //
976 // /*sourceExists=false;
977 // Set<IdentifiableSource> sources = taxonDescription.getSources();
978 // for (IdentifiableSource src : sources){
979 // String micro = src.getCitationMicroReference();
980 // Reference r = src.getCitation();
981 // if (r.equals(refMods) && micro == null) {
982 // sourceExists=true;
983 // }
984 // }
985 // if(!sourceExists) {
986 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
987 // }*/
988 //
989 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
990 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
991
992 rawAssociation="";
993 }
994 }
995 }
996 }
997 }
998
999 /**
1000 * @param acceptedTaxon
1001 * @param refMods
1002 * @param events
1003 * @param rawAssociation
1004 * @param k
1005 */
1006 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference<?> refMods, Node event,
1007 String rawAssociation) {
1008 logger.info("handleDerivedUnitFacadeAndBase");
1009 String descr;
1010 DerivedUnit derivedUnitBase;
1011 MySpecimenOrObservation myspecimenOrObservation;
1012 DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
1013 derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
1014
1015 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
1016
1017 myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit);
1018 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
1019 descr=myspecimenOrObservation.getDescr();
1020
1021 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
1022
1023 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1024
1025 Feature feature = makeFeature(derivedUnitBase);
1026 featuresMap.put(feature.getTitleCache(),feature);
1027 if(!StringUtils.isEmpty(descr)) {
1028 derivedUnitBase.setTitleCache(descr, true);
1029 }
1030
1031 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
1032
1033 taxonDescription.addElement(indAssociation);
1034 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
1035 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1036 }
1037
1038
1039
1040 /**
1041 * @param materials: the XML node group
1042 * @param acceptedTaxon: the current accepted Taxon
1043 * @param refMods: the current reference extracted from the MODS
1044 */
1045 private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference<?> refMods, String event) {
1046 logger.info("extractMaterialsDirect");
1047 // logger.info("acceptedTaxon: "+acceptedTaxon);
1048 String descr="";
1049
1050 DerivedUnit derivedUnitBase=null;
1051 MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
1052 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
1053
1054 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
1055
1056 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1057
1058 Feature feature=null;
1059 if (event.equalsIgnoreCase("collection")){
1060 feature = makeFeature(derivedUnitBase);
1061 }
1062 else{
1063 feature = Feature.MATERIALS_EXAMINED();
1064 }
1065 featuresMap.put(feature.getTitleCache(), feature);
1066
1067 descr=myspecimenOrObservation.getDescr();
1068 if(!StringUtils.isEmpty(descr)) {
1069 derivedUnitBase.setTitleCache(descr, true);
1070 }
1071
1072 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
1073
1074 taxonDescription.addElement(indAssociation);
1075 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
1076 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1077
1078 return derivedUnitBase.getTitleCache();
1079
1080 }
1081
1082
1083 /**
1084 * @param description: the XML node group
1085 * @param acceptedTaxon: the current acceptedTaxon
1086 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1087 * @param nametosave: the list of objects to save into the CDM
1088 * @param refMods: the current reference extracted from the MODS
1089 * @param featureName: the feature name
1090 */
1091 @SuppressWarnings({ "rawtypes"})
1092 private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1093 List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
1094 logger.info("extractSpecificFeature "+featureName);
1095 // System.out.println("GRUUUUuu");
1096 NodeList children = description.getChildNodes();
1097 NodeList insideNodes ;
1098 NodeList trNodes;
1099 // String descr ="";
1100 String localdescr="";
1101 List<String> blabla=null;
1102 List<String> text = new ArrayList<String>();
1103
1104 String table="<table>";
1105 String head="";
1106 String line="";
1107
1108 Feature currentFeature=getFeatureObjectFromString(featureName);
1109
1110 // String fullContent = description.getTextContent();
1111 for (int i=0;i<children.getLength();i++){
1112 // localdescr="";
1113 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1114 text.add(children.item(i).getTextContent().trim());
1115 }
1116 if (featureName.equalsIgnoreCase("table")){
1117 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1118 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
1119 head = extractTableHead(children.item(i));
1120 table+=head;
1121 line = extractTableLine(children.item(i));
1122 if (!line.equalsIgnoreCase("<tr></tr>")) {
1123 table+=line;
1124 }
1125 }
1126 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1127 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1128 line = extractTableLineWithColumn(children.item(i).getChildNodes());
1129 if(!line.equalsIgnoreCase("<tr></tr>")) {
1130 table+=line;
1131 }
1132 }
1133 }
1134 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1135 insideNodes=children.item(i).getChildNodes();
1136 blabla= new ArrayList<String>();
1137 for (int j=0;j<insideNodes.getLength();j++){
1138 Node insideNode = insideNodes.item(j);
1139 if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
1140 String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
1141 if (!inlinetext.isEmpty()) {
1142 blabla.add(inlinetext);
1143 }
1144 }
1145 else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
1146 if(!insideNode.getTextContent().trim().isEmpty()){
1147 blabla.add(insideNode.getTextContent().trim());
1148 // localdescr += insideNodes.item(j).getTextContent().trim();
1149 }
1150 }
1151 }
1152 if (!blabla.isEmpty()) {
1153 String blaStr = StringUtils.join(blabla," ").trim();
1154 if(!stringIsEmpty(blaStr)) {
1155 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1156 text.add(blaStr);
1157 }
1158 }
1159
1160 }
1161 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1162 if(!children.item(i).getTextContent().trim().isEmpty()){
1163 localdescr = children.item(i).getTextContent().trim();
1164 if(!stringIsEmpty(localdescr)) {
1165 setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1166 }
1167 }
1168 }
1169 }
1170
1171 table+="</table>";
1172 if (!table.equalsIgnoreCase("<table></table>")){
1173 // System.out.println("TABLE : "+table);
1174 text.add(table);
1175 }
1176
1177 if (text !=null && !text.isEmpty()) {
1178 return StringUtils.join(text," ");
1179 } else {
1180 return "";
1181 }
1182
1183 }
1184
1185 /**
1186 * @param children
1187 * @param i
1188 * @return
1189 */
1190 private String extractTableLine(Node child) {
1191 //logger.info("extractTableLine");
1192 String line;
1193 line="<tr>";
1194 if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1195 line = extractTableLineWithColumn(child.getChildNodes());
1196 }
1197 line+="</tr>";
1198 return line;
1199 }
1200
1201 /**
1202 * @param children
1203 * @param i
1204 * @return
1205 */
1206 private String extractTableHead(Node child) {
1207 //logger.info("extractTableHead");
1208 String head;
1209 String line;
1210 head="<th>";
1211 NodeList trNodes = child.getChildNodes();
1212 for (int k=0;k<trNodes.getLength();k++){
1213 if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1214 && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1215 line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1216 head+=line;
1217 }
1218 }
1219 head+="</th>";
1220 return head;
1221 }
1222
1223 /**
1224 * build a html table line, with td columns
1225 * @param tdNodes
1226 * @return an html coded line
1227 */
1228 private String extractTableLineWithColumn(NodeList tdNodes) {
1229 //logger.info("extractTableLineWithColumn");
1230 String line;
1231 line="<tr>";
1232 for (int l=0;l<tdNodes.getLength();l++){
1233 if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1234 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1235 }
1236 }
1237 line+="</tr>";
1238 return line;
1239 }
1240
1241 /**
1242 * @param description: the XML node group
1243 * @param acceptedTaxon: the current acceptedTaxon
1244 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1245 * @param nametosave: the list of objects to save into the CDM
1246 * @param refMods: the current reference extracted from the MODS
1247 * @param featureName: the feature name
1248 */
1249 @SuppressWarnings({ "unused", "rawtypes" })
1250 private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1251 List<TaxonNameBase> nameToSave, Reference<?> refMods, String featureName ) {
1252 logger.info("extractSpecificFeatureNotStructured " + featureName);
1253 NodeList children = description.getChildNodes();
1254 NodeList insideNodes ;
1255 List<String> blabla= new ArrayList<String>();
1256
1257
1258 Feature currentFeature = getFeatureObjectFromString(featureName);
1259
1260 String fullContent = description.getTextContent();
1261 for (int i=0;i<children.getLength();i++){
1262 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1263 insideNodes=children.item(i).getChildNodes();
1264 for (int j=0;j<insideNodes.getLength();j++){
1265 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1266 String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1267 if(!inlineText.isEmpty()) {
1268 blabla.add(inlineText);
1269 }
1270 }
1271 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1272 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1273 blabla.add(insideNodes.item(j).getTextContent().trim());
1274 }
1275 }
1276 }
1277 }
1278 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1279 if(!children.item(i).getTextContent().trim().isEmpty()){
1280 String localdescr = children.item(i).getTextContent().trim();
1281 if(!localdescr.isEmpty())
1282 {
1283 blabla.add(localdescr);
1284 }
1285 }
1286 }
1287 }
1288
1289 if (blabla !=null && !blabla.isEmpty()) {
1290 String blaStr = StringUtils.join(blabla," ").trim();
1291 if (! stringIsEmpty(blaStr)) {
1292 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1293 return blaStr;
1294 } else {
1295 return "";
1296 }
1297 } else {
1298 return "";
1299 }
1300
1301 }
1302
1303 /**
1304 * @param blaStr
1305 * @return
1306 */
1307 private boolean stringIsEmpty(String blaStr) {
1308 if (!StringUtils.isEmpty(blaStr)) {
1309 if (!blaStr.equalsIgnoreCase(".")) {
1310 if (!blaStr.equalsIgnoreCase(",")) {
1311 if (!blaStr.equalsIgnoreCase(";")) {
1312 return false;
1313 }
1314 }
1315 }
1316 }
1317 return true;
1318 }
1319
1320 /**
1321 * @param nametosave
1322 * @param refMods
1323 * @param insideNodes
1324 * @param blabla
1325 * @param j
1326 */
1327 @SuppressWarnings({ "rawtypes" })
1328 private String getInlineTextForName(List<TaxonNameBase> nametosave, Reference<?> refMods, Node insideNode) {
1329 if (true){
1330 NodeList children = insideNode.getChildNodes();
1331 String result = "";
1332 for (int i=0;i<children.getLength();i++){
1333 Node nameChild = children.item(i);
1334 if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1335 result += nameChild.getTextContent();
1336 }else{
1337 //do nothing
1338 }
1339 }
1340 return result.replace("\n", "").trim();
1341 }else{
1342 TaxonNameBase tnb = getTaxonNameBaseFromXML(insideNode, nametosave,refMods,false);
1343 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1344 Taxon tax = currentMyName.getTaxon();
1345 if(tnb !=null && tax != null){
1346 String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1347 return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1348 }else if (tnb != null && tax == null){
1349 //TODO
1350 return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0] +"</cdm:taxonName>";
1351 }else{
1352 logger.warn("Inline text has no content yet");
1353 }
1354 return "";
1355 }
1356 }
1357
1358 /**
1359 * @param featureName
1360 * @return
1361 */
1362 @SuppressWarnings("rawtypes")
1363 private Feature getFeatureObjectFromString(String featureName) {
1364 logger.info("getFeatureObjectFromString");
1365 List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1366 Feature currentFeature=null;
1367 for (Feature feature: features){
1368 String tmpF = feature.getTitleCache();
1369 if (tmpF.equalsIgnoreCase(featureName)) {
1370 currentFeature=feature;
1371 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1372 }
1373 }
1374 if (currentFeature == null) {
1375 currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1376 if(featureName.equalsIgnoreCase("Other")){
1377 currentFeature.setUuid(OtherUUID);
1378 }
1379 if(featureName.equalsIgnoreCase(notMarkedUp)){
1380 currentFeature.setUuid(NotMarkedUpUUID);
1381 }
1382 importer.getTermService().saveOrUpdate(currentFeature);
1383 }
1384 return currentFeature;
1385 }
1386
1387
1388
1389
1390 /**
1391 * @param children: the XML node group
1392 * @param nametosave: the list of objects to save into the CDM
1393 * @param acceptedTaxon: the current acceptedTaxon
1394 * @param refMods: the current reference extracted from the MODS
1395 * @param fullContent :the parsed XML content
1396 * @return a list of description (text)
1397 */
1398 @SuppressWarnings({ "unused", "rawtypes" })
1399 private List<String> parseParagraph(List<TaxonNameBase> namesToSave, Taxon acceptedTaxon, Reference<?> refMods, Node paragraph, Feature feature){
1400 logger.info("parseParagraph "+feature.toString());
1401 List<String> fullDescription= new ArrayList<String>();
1402 // String localdescr;
1403 String descr="";
1404 NodeList insideNodes ;
1405 boolean collectionEvent = false;
1406 List<Node>collectionEvents = new ArrayList<Node>();
1407
1408 NodeList children = paragraph.getChildNodes();
1409
1410 for (int i=0;i<children.getLength();i++){
1411 // localdescr="";
1412 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1413 descr += children.item(i).getTextContent().trim();
1414 }
1415 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1416 insideNodes=children.item(i).getChildNodes();
1417 List<String> blabla= new ArrayList<String>();
1418 for (int j=0;j<insideNodes.getLength();j++){
1419 boolean nodeKnown = false;
1420 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1421 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1422 String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1423 if (!inlineText.isEmpty()) {
1424 blabla.add(inlineText);
1425 }
1426 nodeKnown=true;
1427 }
1428 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1429 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1430 blabla.add(insideNodes.item(j).getTextContent().trim());
1431 // localdescr += insideNodes.item(j).getTextContent().trim();
1432 }
1433 nodeKnown=true;
1434 }
1435 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1436 String ref = insideNodes.item(j).getTextContent().trim();
1437 if (ref.endsWith(";") && ((ref.length())>1)) {
1438 ref=ref.substring(0, ref.length()-1)+".";
1439 }
1440 Reference<?> reference = ReferenceFactory.newGeneric();
1441 reference.setTitleCache(ref, true);
1442 blabla.add(reference.getTitleCache());
1443 nodeKnown=true;
1444 }
1445 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1446 String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1447 blabla.add(figure);
1448 }
1449 else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1450 insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1451 insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1452 String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1453 blabla.add(table);
1454 }
1455 else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1456 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1457 String titlecache = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection");
1458 blabla.add(titlecache);
1459 collectionEvent=true;
1460 collectionEvents.add(insideNodes.item(j));
1461 nodeKnown=true;
1462 }else{
1463 logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1464 }
1465
1466 }
1467 if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1468 fullDescription.add(StringUtils.join(blabla," "));
1469 }
1470 }
1471 if (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1472 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1473 fullDescription.add(figure);
1474 }
1475 if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1476 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1477 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1478 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1479 fullDescription.add(table);
1480 }
1481 }
1482
1483 if( !stringIsEmpty(descr.trim())){
1484 Feature currentFeature= getNotMarkedUpFeatureObject();
1485 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1486 }
1487 // if (collectionEvent) {
1488 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1489 // for (Node coll:collectionEvents){
1490 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1491 // }
1492 // }
1493 return fullDescription;
1494 }
1495
1496
1497 /**
1498 * @param description: the XML node group
1499 * @param acceptedTaxon: the current acceptedTaxon
1500 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1501 * @param nametosave: the list of objects to save into the CDM
1502 * @param refMods: the current reference extracted from the MODS
1503 * @param feature: the feature to link the data with
1504 */
1505 @SuppressWarnings("rawtypes")
1506 private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> namesToSave, Reference<?> refMods, Feature feature){
1507 logger.info("EXTRACT FEATURE "+feature.toString());
1508 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1509 List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1510
1511 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1512 if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1513 setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1514 }
1515
1516 }
1517
1518
1519 /**
1520 * @param descr: the XML Nodegroup to parse
1521 * @param acceptedTaxon: the current acceptedTaxon
1522 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1523 * @param refMods: the current reference extracted from the MODS
1524 * @param currentFeature: the feature name
1525 * @return
1526 */
1527 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods, Feature currentFeature) {
1528 logger.info("setParticularDescription "+currentFeature.getTitleCache()+", \n blabla : "+descr);
1529 // System.out.println("setParticularDescription "+currentFeature.getTitleCache()+", \n blabla : "+descr);
1530 // logger.info("acceptedTaxon: "+acceptedTaxon);
1531 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1532
1533 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1534
1535 TextData textData = createTextData(descr, refMods, currentFeature);
1536
1537 if(acceptedTaxon!=null){
1538 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1539 td.addElement(textData);
1540 acceptedTaxon.addDescription(td);
1541
1542 sourceHandler.addAndSaveSource(refMods, td, null);
1543 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1544 }
1545
1546 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1547 try{
1548 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1549 if (tmp!=null) {
1550 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1551 }else{
1552 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1553 }
1554 }catch(Exception e){
1555 logger.debug("TAXON EXISTS"+defaultTaxon);
1556 }
1557
1558 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1559 defaultTaxon.addDescription(td);
1560 td.addElement(textData);
1561 sourceHandler.addAndSaveSource(refMods, td, null);
1562 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1563 }
1564 }
1565
1566 /**
1567 * @param descr
1568 * @param refMods
1569 * @param currentFeature
1570 * @return
1571 */
1572 private TextData createTextData(String descr, Reference<?> refMods, Feature currentFeature) {
1573 //logger.info("createTextData");
1574 TextData textData = TextData.NewInstance();
1575 textData.setFeature(currentFeature);
1576 sourceHandler.addSource(refMods, textData);
1577
1578 textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1579 return textData;
1580 }
1581
1582
1583
1584 /**
1585 * @param descr: the XML Nodegroup to parse
1586 * @param acceptedTaxon: the current acceptedTaxon
1587 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1588 * @param refMods: the current reference extracted from the MODS
1589 * @param currentFeature: the feature name
1590 * @return
1591 */
1592 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference<?> currentRef, Reference<?> refMods, Feature currentFeature) {
1593 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1594 // logger.info("acceptedTaxon: "+acceptedTaxon);
1595 logger.info("setParticularDescription");
1596 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1597
1598 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1599 TextData textData = createTextData(descr, refMods, currentFeature);
1600
1601 if(! descr.isEmpty() && (acceptedTaxon!=null)){
1602 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1603 td.addElement(textData);
1604 acceptedTaxon.addDescription(td);
1605
1606 sourceHandler.addAndSaveSource(refMods, td, currentRef);
1607 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1608 }
1609
1610 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1611 try{
1612 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1613 if (tmp!=null) {
1614 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1615 }else{
1616 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1617 }
1618 }catch(Exception e){
1619 logger.debug("TAXON EXISTS"+defaultTaxon);
1620 }
1621
1622 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1623 defaultTaxon.addDescription(td);
1624 td.addElement(textData);
1625 sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1626 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1627 }
1628 }
1629
1630
1631
1632 /**
1633 * @param synonyms: the XML Nodegroup to parse
1634 * @param nametosave: the list of objects to save into the CDM
1635 * @param acceptedTaxon: the current acceptedTaxon
1636 * @param refMods: the current reference extracted from the MODS
1637 */
1638 @SuppressWarnings({ "rawtypes" })
1639 private void extractSynonyms(Node synonyms, Taxon acceptedTaxon,Reference<?> refMods) {
1640 logger.info("extractSynonyms");
1641 //System.out.println("extractSynonyms for: "+acceptedTaxon);
1642 Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1643 if (ttmp != null) {
1644 acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1645 }
1646 else{
1647 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1648 }
1649 NodeList children = synonyms.getChildNodes();
1650 TaxonNameBase nameToBeFilled = null;
1651 List<MyName> names = new ArrayList<MyName>();
1652
1653 if(synonyms.getNodeName().equalsIgnoreCase("tax:name")){
1654 MyName myName;
1655 try {
1656 myName = extractScientificNameSynonym(synonyms,refMods);
1657 names.add(myName);
1658 } catch (TransformerFactoryConfigurationError e) {
1659 logger.warn(e);
1660 } catch (TransformerException e) {
1661 logger.warn(e);
1662 }
1663 }
1664
1665
1666 for (int i=0;i<children.getLength();i++){
1667 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1668 NodeList tmp = children.item(i).getChildNodes();
1669 // String fullContent = children.item(i).getTextContent();
1670 for (int j=0; j< tmp.getLength();j++){
1671 if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1672 MyName myName;
1673 try {
1674 myName = extractScientificNameSynonym(tmp.item(j),refMods);
1675 names.add(myName);
1676 } catch (TransformerFactoryConfigurationError e) {
1677 logger.warn(e);
1678 } catch (TransformerException e) {
1679 logger.warn(e);
1680 }
1681
1682 }
1683 }
1684 }
1685 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1686 MyName myName;
1687 try {
1688 myName = extractScientificNameSynonym(children.item(i),refMods);
1689 names.add(myName);
1690 } catch (TransformerFactoryConfigurationError e) {
1691 logger.warn(e);
1692 } catch (TransformerException e) {
1693 logger.warn(e);
1694 }
1695
1696 }
1697 }
1698 NomenclaturalStatusType statusType = null;
1699 //System.out.println("names: "+names);
1700 for(MyName name:names){
1701 //System.out.println("HANDLE NAME "+name);
1702
1703 statusType = null;
1704
1705 nameToBeFilled = name.getTaxonNameBase();
1706
1707 Synonym synonym = name.getSyno();
1708 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1709 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1710 if (nameToBeFilled.hasProblem() &&
1711 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1712 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1713 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1714 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1715 }
1716 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1717 */
1718 if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1719 setLSID(name.getIdentifier(), synonym);
1720 }
1721
1722 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1723 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1724 boolean synoExist = false;
1725 for (Synonym syn: synonymsSet){
1726 //System.out.println(syn.getName()+" -- "+syn.getSec());
1727 boolean a =syn.getName().equals(synonym.getName());
1728 boolean b = syn.getSec().equals(synonym.getSec());
1729 if (a && b) {
1730 synoExist=true;
1731 }
1732 }
1733 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1734 //System.out.println("SYNONYM");
1735 sourceHandler.addSource(refMods, synonym);
1736
1737 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1738
1739 }
1740 }
1741 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1742 }
1743
1744
1745 /**
1746 * @param refgroup: the XML nodes
1747 * @param nametosave: the list of objects to save into the CDM
1748 * @param acceptedTaxon: the current acceptedTaxon
1749 * @param nametosave: the list of objects to save into the CDM
1750 * @param refMods: the current reference extracted from the MODS
1751 * @return the acceptedTaxon (why?)
1752 * handle cases where the bibref are inside <p> and outside
1753 */
1754 @SuppressWarnings({ "rawtypes" })
1755 private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods) {
1756 logger.info("extractReferences");
1757 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1758
1759 NodeList children = refgroup.getChildNodes();
1760 NonViralName<?> nameToBeFilled = getNonViralNameAccNomenclature();
1761
1762 ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1763 for (int i=0;i<children.getLength();i++){
1764 if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1765 String ref = children.item(i).getTextContent().trim();
1766 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1767 if (!refBuild.isFoundBibref()){
1768 extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1769 }
1770 }
1771
1772 if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1773 NodeList references = children.item(i).getChildNodes();
1774 String descr="";
1775 for (int j=0;j<references.getLength();j++){
1776 if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1777 String ref = references.item(j).getTextContent().trim();
1778 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1779 }
1780 else
1781 if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1782 && !references.item(j).getTextContent().trim().isEmpty()){
1783 descr += references.item(j).getTextContent().trim();
1784 }
1785
1786 }
1787 if (!refBuild.isFoundBibref()){
1788 //if it's not tagged, put it as row information.
1789 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1790 //then put it as a not markup feature if not empty
1791 if (!stringIsEmpty(descr.trim())){
1792 Feature currentFeature= getNotMarkedUpFeatureObject();
1793 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1794 }
1795 }
1796 }
1797 }
1798 // importer.getClassificationService().saveOrUpdate(classification);
1799 return acceptedTaxon;
1800
1801 }
1802
1803 /**
1804 * get the non viral name according to the current nomenclature
1805 * @return
1806 */
1807
1808 private NonViralName<?> getNonViralNameAccNomenclature() {
1809 return (NonViralName<?>)nomenclaturalCode.getNewTaxonNameInstance(null);
1810 }
1811
1812 /**
1813 * @return the feature object for the category "not marked up"
1814 */
1815 private Feature getNotMarkedUpFeatureObject() {
1816 // FIXME use getFeature(uuid ....)
1817 logger.info("getNotMarkedUpFeatureObject");
1818 Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1819 if (currentFeature == null) {
1820 currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1821 currentFeature.setUuid(NotMarkedUpUUID);
1822 //TODO use userDefined Feature Vocabulary
1823 Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1824 // importer.getTermService().saveOrUpdate(currentFeature);
1825 importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1826 }
1827 return currentFeature;
1828 }
1829
1830 /**
1831 * @param references
1832 * handle cases where the bibref are inside <p> and outside
1833 */
1834 @SuppressWarnings("rawtypes")
1835 private void extractReferenceRawText(NodeList references, NonViralName<?> nameToBeFilled, Reference<?> refMods,
1836 Taxon acceptedTaxon) {
1837 logger.info("extractReferenceRawText");
1838 String refString="";
1839 NomenclaturalStatusType statusType = null;
1840 currentMyName= new MyName(true);
1841 for (int j=0;j<references.getLength();j++){
1842 acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1843 //no bibref tag inside
1844 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1845 if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1846
1847 try {
1848
1849 //TODO create or not create?
1850 currentMyName = extractScientificName(references.item(j), refMods);
1851 } catch (TransformerFactoryConfigurationError e) {
1852 logger.warn(e);
1853 } catch (TransformerException e) {
1854 logger.warn(e);
1855 }
1856
1857 // name=name.trim();
1858 }
1859 if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1860 refString = references.item(j).getTextContent().trim();
1861 }
1862 if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1863 //
1864 statusType = null;
1865 if (!currentMyName.getStatus().isEmpty()){
1866 try {
1867 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1868 } catch (UnknownCdmTypeException e) {
1869 addProblematicStatusToFile(currentMyName.getStatus());
1870 logger.warn("Problem with status");
1871 }
1872 }
1873
1874
1875 /*INonViralNameParser parser = NonViralNameParserImpl.NewInstance();*/
1876 String fullLineRefName = references.item(j).getTextContent().trim();
1877 int nameOrRefOrOther=2;
1878 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1879 // System.out.println("NAMEORREFOR?? "+nameOrRefOrOther);
1880 if (nameOrRefOrOther==0){
1881 /*TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1882 if (nameTBF.hasProblem() &&
1883 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1884 addProblemNameToFile(fullLineRefName,"",nomenclaturalCode,Rank.UNKNOWN_RANK());
1885 nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser,currentMyName.getAuthor(), currentMyName.getRank());
1886 }
1887 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1888 */
1889 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1890 Synonym synonym = null;
1891 if (!currentMyName.getStatus().isEmpty()){
1892 try {
1893 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1894 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1895 synonym = Synonym.NewInstance(nameTBF, refMods);
1896 } catch (UnknownCdmTypeException e) {
1897 addProblematicStatusToFile(currentMyName.getStatus());
1898 logger.warn("Problem with status");
1899 synonym = Synonym.NewInstance(nameTBF, refMods);
1900 synonym.setAppendedPhrase(currentMyName.getStatus());
1901 }
1902 }
1903 else{
1904 synonym = Synonym.NewInstance(nameTBF, refMods);
1905 }
1906
1907 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1908 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1909 boolean synoExist = false;
1910 for (Synonym syn: synonymsSet){
1911 // System.out.println(syn.getName()+" -- "+syn.getSec());
1912 boolean a =syn.getName().equals(synonym.getName());
1913 boolean b = syn.getSec().equals(synonym.getSec());
1914 if (a && b) {
1915 synoExist=true;
1916 }
1917 }
1918 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1919 sourceHandler.addSource(refMods, synonym);
1920
1921 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1922 }
1923 }
1924
1925 if (nameOrRefOrOther==1){
1926 Reference<?> re = ReferenceFactory.newGeneric();
1927 re.setTitleCache(fullLineRefName, true);
1928
1929 /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1930 if (nameTBF.hasProblem() &&
1931 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1932 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1933 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1934 }
1935 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1936 */
1937 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1938 Synonym synonym = null;
1939 if (!currentMyName.getStatus().isEmpty()){
1940 try {
1941 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1942 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1943 synonym = Synonym.NewInstance(nameTBF, refMods);
1944 } catch (UnknownCdmTypeException e) {
1945 addProblematicStatusToFile(currentMyName.getStatus());
1946 logger.warn("Problem with status");
1947 synonym = Synonym.NewInstance(nameTBF, refMods);
1948 synonym.setAppendedPhrase(currentMyName.getStatus());
1949 }
1950 }
1951 else{
1952 synonym = Synonym.NewInstance(nameTBF, refMods);
1953 }
1954
1955 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1956 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1957 boolean synoExist = false;
1958 for (Synonym syn: synonymsSet){
1959 // System.out.println(syn.getName()+" -- "+syn.getSec());
1960 boolean a =syn.getName().equals(synonym.getName());
1961 boolean b = syn.getSec().equals(synonym.getSec());
1962 if (a && b) {
1963 synoExist=true;
1964 }
1965 }
1966 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1967 sourceHandler.addSource(refMods, synonym);
1968
1969 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),re, null);
1970 }
1971
1972 }
1973
1974
1975 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1976 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1977 }
1978 }
1979
1980 if(!currentMyName.getName().isEmpty()){
1981 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1982 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1983 Reference<?> refS = ReferenceFactory.newGeneric();
1984 refS.setTitleCache(refString, true);
1985 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1986 // acceptedTaxon.addDescription(td);
1987 // acceptedTaxon.addSource(refSource);
1988 //
1989 // TextData textData = TextData.NewInstance(Feature.CITATION());
1990 //
1991 // textData.addSource(null, null, refS, null);
1992 // td.addElement(textData);
1993 // td.addSource(refSource);
1994 // importer.getDescriptionService().saveOrUpdate(td);
1995
1996
1997 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1998 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1999
2000 }
2001
2002 acceptedTaxon.getName().setNomenclaturalReference(refS);
2003 }
2004 else{
2005 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2006 TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
2007 if (nameTBF.hasProblem() &&
2008 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
2009 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
2010 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
2011 nameTBF=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
2012 }
2013 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
2014 */
2015 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
2016 Synonym synonym = null;
2017 if (!currentMyName.getStatus().isEmpty()){
2018 try {
2019 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
2020 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2021 synonym = Synonym.NewInstance(nameTBF, refMods);
2022 } catch (UnknownCdmTypeException e) {
2023 addProblematicStatusToFile(currentMyName.getStatus());
2024 logger.warn("Problem with status");
2025 synonym = Synonym.NewInstance(nameTBF, refMods);
2026 synonym.setAppendedPhrase(currentMyName.getStatus());
2027 }
2028 }
2029 else{
2030 synonym = Synonym.NewInstance(nameTBF, refMods);
2031 }
2032
2033
2034 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2035 setLSID(currentMyName.getIdentifier(), synonym);
2036 }
2037
2038 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
2039 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
2040 boolean synoExist = false;
2041 for (Synonym syn: synonymsSet){
2042 // System.out.println(syn.getName()+" -- "+syn.getSec());
2043 boolean a =syn.getName().equals(synonym.getName());
2044 boolean b = syn.getSec().equals(synonym.getSec());
2045 if (a && b) {
2046 synoExist=true;
2047 }
2048 }
2049 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
2050 sourceHandler.addSource(refMods, synonym);
2051
2052 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
2053 }
2054 }
2055 }
2056 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2057 }
2058 }
2059
2060
2061
2062 /**
2063 * @param identifier
2064 * @param acceptedTaxon
2065 */
2066 @SuppressWarnings("rawtypes")
2067 private void setLSID(String identifier, TaxonBase<?> taxon) {
2068 //logger.info("setLSID");
2069 // boolean lsidok=false;
2070 String id = identifier.split("__")[0];
2071 String source = identifier.split("__")[1];
2072 if (id.indexOf("lsid")>-1){
2073 try {
2074 LSID lsid = new LSID(id);
2075 taxon.setLsid(lsid);
2076 // lsidok=true;
2077 } catch (MalformedLSIDException e) {
2078 logger.warn("Malformed LSID");
2079 }
2080
2081 }
2082
2083 //logger.info("search reference for LSID");
2084 // if ((id.indexOf("lsid")<0) || !lsidok){
2085 //ADD ORIGINAL SOURCE ID EVEN IF LSID
2086 Reference<?> re = null;
2087 Pager<Reference> references = importer.getReferenceService().findByTitle(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
2088 if( references !=null && references.getCount()>0){
2089 re=references.getRecords().get(0);
2090 }
2091 //logger.info("search reference for LSID-end");
2092 if(re == null){
2093 re = ReferenceFactory.newGeneric();
2094 re.setTitleCache(source, true);
2095 importer.getReferenceService().saveOrUpdate(re);
2096 }
2097 re=CdmBase.deproxy(re, Reference.class);
2098
2099 //logger.info("search source for LSID");
2100 Set<IdentifiableSource> sources = taxon.getSources();
2101 boolean lsidinsource=false;
2102 boolean urlinsource=false;
2103 for (IdentifiableSource src:sources){
2104 if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
2105 lsidinsource=true;
2106 }
2107 if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
2108 urlinsource=true;
2109 }
2110 }
2111 if(!lsidinsource) {
2112 taxon.addSource(OriginalSourceType.Import, id,null,re,null);
2113 }
2114 if(!urlinsource)
2115 {
2116 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
2117 taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
2118 // }
2119 }
2120
2121 }
2122
2123 /**
2124 * try to solve a parsing problem for a scientific name
2125 * @param original : the name from the OCR document
2126 * @param name : the tagged version
2127 * @param parser
2128 * @return the corrected TaxonNameBase
2129 */
2130 /* @SuppressWarnings({ "unchecked", "rawtypes" })
2131 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
2132 Map<String,String> ato = namesMap.get(original);
2133 if (ato == null) {
2134 ato = namesMap.get(original+" "+author);
2135 }
2136
2137
2138 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
2139 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
2140 }
2141 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
2142 rank = getRank(ato);
2143 }
2144 // TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
2145 TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2146 // logger.info("RANK: "+rank);
2147 int retry=0;
2148 List<ParserProblem> problems = nameTBF.getParsingProblems();
2149 for (ParserProblem pb:problems) {
2150 System.out.println(pb.toString());
2151 }
2152 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
2153 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2154 String fullname=name;
2155 if(! skippQuestion) {
2156 fullname = getFullReference(name,nameTBF.getParsingProblems());
2157 }
2158 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2159 nameTBF = BotanicalName.NewInstance(null);
2160 }
2161 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2162 nameTBF = ZoologicalName.NewInstance(null);
2163 }
2164 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2165 nameTBF= BacterialName.NewInstance(null);
2166 }
2167 parser.parseReferencedName(nameTBF, fullname, rank, false);
2168 retry++;
2169 }
2170 if (retry == 1){
2171 if(author != null){
2172 if (name.indexOf(author)>-1) {
2173 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
2174 } else {
2175 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2176 }
2177 if (nameTBF.hasProblem()){
2178 if (name.indexOf(author)>-1) {
2179 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
2180 } else {
2181 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2182 }
2183 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
2184 problems = nameTBF.getParsingProblems();
2185 for (ParserProblem pb:problems) {
2186 System.out.println(pb.toString());
2187 }
2188 nameTBF.setFullTitleCache(name, true);
2189 }else{
2190 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2191 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2192 }
2193 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2194 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2195 }
2196 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2197 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2198 }
2199 }
2200 // logger.info("FULL TITLE CACHE "+name);
2201 }else{
2202 nameTBF.setFullTitleCache(name, true);
2203 }
2204 }
2205 return nameTBF;
2206 }
2207
2208 */
2209
2210 /**
2211 * @param nomenclatureNode: the XML nodes
2212 * @param nametosave: the list of objects to save into the CDM
2213 * @param refMods: the current reference extracted from the MODS
2214 * @return
2215 */
2216 @SuppressWarnings({ "rawtypes" })
2217 private Taxon extractNomenclature(Node nomenclatureNode, List<TaxonNameBase> nametosave, Reference<?> refMods) throws ClassCastException{
2218 refMods=CdmBase.deproxy(refMods, Reference.class);
2219
2220 logger.info("extractNomenclature");
2221 NodeList children = nomenclatureNode.getChildNodes();
2222 String freetext="";
2223 NonViralName<?> nameToBeFilled = null;
2224 Taxon acceptedTaxon = null;
2225 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2226
2227 // String fullContent = nomenclatureNode.getTextContent();
2228
2229 NomenclaturalStatusType statusType = null;
2230 for (int i=0;i<children.getLength();i++){
2231 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2232 String status = children.item(i).getTextContent().trim();
2233 if (!status.isEmpty()){
2234 try {
2235 statusType = nomStatusString2NomStatus(status);
2236 } catch (UnknownCdmTypeException e) {
2237 addProblematicStatusToFile(status);
2238 logger.warn("Problem with status");
2239 }
2240 }
2241 }
2242 }
2243
2244 boolean containsSynonyms=false;
2245 for (int i=0;i<children.getLength();i++){
2246
2247 if (children.item(i).getNodeName().equalsIgnoreCase("#text")) {
2248 freetext=children.item(i).getTextContent();
2249 }
2250 if (children.item(i).getNodeName().equalsIgnoreCase("tax:collection_event")) {
2251 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2252 extractMaterialsDirect(children.item(i), acceptedTaxon, refMods, "collection");
2253 }
2254 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
2255 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2256 if(!containsSynonyms){
2257 //System.out.println("I : "+i);
2258 currentMyName = new MyName(false);
2259 try {
2260 currentMyName = extractScientificName(children.item(i),refMods);
2261 treatmentMainName = currentMyName.getNewName();
2262 originalTreatmentName = currentMyName.getOriginalName();
2263
2264 } catch (TransformerFactoryConfigurationError e1) {
2265 throw new RuntimeException(e1);
2266 } catch (TransformerException e1) {
2267 throw new RuntimeException(e1);
2268 }
2269
2270 if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(configState.getConfig().getMaxRank()) || currentMyName.getRank().equals(configState.getConfig().getMaxRank())){
2271 maxRankRespected=true;
2272
2273 nameToBeFilled=currentMyName.getTaxonNameBase();
2274
2275 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2276 acceptedTaxon=currentMyName.getTaxon();
2277 //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2278
2279
2280 boolean statusMatch=false;
2281 if(acceptedTaxon !=null ){
2282 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2283 statusMatch=compareStatus(acceptedTaxon, statusType);
2284 //System.out.println("statusMatch: "+statusMatch);
2285 }
2286 if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2287
2288 nameToBeFilled=currentMyName.getTaxonNameBase();
2289 if (nameToBeFilled!=null){
2290 if (!originalTreatmentName.isEmpty()) {
2291 TaxonNameDescription td = TaxonNameDescription.NewInstance();
2292 td.setTitleCache(originalTreatmentName, true);
2293 nameToBeFilled.addDescription(td);
2294 }
2295
2296 if(statusType != null) {
2297 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2298 }
2299 sourceHandler.addSource(refMods, nameToBeFilled);
2300
2301 if (nameToBeFilled.getNomenclaturalReference() == null) {
2302 acceptedTaxon= new Taxon(nameToBeFilled,refMods);
2303 //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2304 }
2305 else {
2306 acceptedTaxon= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2307 //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2308 }
2309
2310 sourceHandler.addSource(refMods, acceptedTaxon);
2311
2312 if(!configState.getConfig().doKeepOriginalSecundum()) {
2313 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2314 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2315 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2316 }
2317
2318 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2319 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2320 }
2321
2322
2323 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2324 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2325 }
2326
2327 }else{
2328 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2329 Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2330 boolean sourcelinked=false;
2331 for (IdentifiableSource source:sources){
2332 if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2333 sourcelinked=true;
2334 }
2335 }
2336 if (!configState.getConfig().doKeepOriginalSecundum()) {
2337 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2338 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2339 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2340 }
2341 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2342
2343 if (!sourcelinked){
2344 sourceHandler.addSource(refMods, acceptedTaxon);
2345 }
2346 if (!sourcelinked || !configState.getConfig().doKeepOriginalSecundum()){
2347
2348 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2349 //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2350 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2351 }
2352 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2353 }
2354 }
2355 }else{
2356 maxRankRespected=false;
2357 }
2358 containsSynonyms=true;
2359 }else{
2360 //System.out.println("YOUHOUUU "+i);
2361 try{
2362 extractSynonyms(children.item(i), acceptedTaxon, refMods);
2363 }catch(NullPointerException e){
2364 logger.warn("nullpointerexception, the accepted taxon might be null");
2365 }
2366 }
2367 containsSynonyms=true;
2368 }
2369 if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2370 reloadClassification();
2371 //extract the References within the document
2372 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
2373 }
2374 if(!stringIsEmpty(freetext.trim())) {
2375 setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2376 }
2377
2378 }
2379 // importer.getClassificationService().saveOrUpdate(classification);
2380 return acceptedTaxon;
2381 }
2382
2383
2384 /**
2385 * @return
2386 */
2387
2388 private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2389 //logger.info("compareStatus");
2390 boolean statusMatch=false;
2391 //found one taxon
2392 Set<NomenclaturalStatus> status = t.getName().getStatus();
2393 if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2394 for (NomenclaturalStatus st:status){
2395 NomenclaturalStatusType stype = st.getType();
2396 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2397 statusMatch=true;
2398 }
2399 }
2400 }
2401 else{
2402 if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2403 statusMatch=true;
2404 }
2405 }
2406 return statusMatch;
2407 }
2408
2409 /**
2410 * @param acceptedTaxon: the current acceptedTaxon
2411 * @param ref: the current reference extracted from the MODS
2412 * @return the parent for the current accepted taxon
2413 */
2414 /* private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
2415 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2416
2417 List<Rank> rankList = new ArrayList<Rank>();
2418 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2419
2420 List<String> rankListStr = new ArrayList<String>();
2421 for (Rank r:rankList) {
2422 rankListStr.add(r.toString());
2423 }
2424 String r="";
2425 String s = acceptedTaxon.getTitleCache();
2426 Taxon tax = null;
2427 if(!skippQuestion){
2428 int addTaxon = askAddParent(s);
2429 logger.info("ADD TAXON: "+addTaxon);
2430 if (addTaxon == 0 ){
2431 Taxon tmp = askParent(acceptedTaxon, classification);
2432 if (tmp == null){
2433 s = askSetParent(s);
2434 r = askRank(s,rankListStr);
2435
2436 NonViralName<?> nameToBeFilled = null;
2437 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2438 nameToBeFilled = BotanicalName.NewInstance(null);
2439 }
2440 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2441 nameToBeFilled = ZoologicalName.NewInstance(null);
2442 }
2443 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2444 nameToBeFilled = BacterialName.NewInstance(null);
2445 }
2446 nameToBeFilled.setTitleCache(s, true);
2447 nameToBeFilled.setRank(getRank(r), true);
2448
2449 tax = Taxon.NewInstance(nameToBeFilled, ref);
2450 }
2451 else{
2452 tax=tmp;
2453 }
2454
2455 createParent(tax, ref);
2456 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2457 classification.addParentChild(tax, acceptedTaxon, ref, null);
2458 }
2459 else{
2460 classification.addChildTaxon(acceptedTaxon, ref, null);
2461 tax=acceptedTaxon;
2462 }
2463 } else{
2464 classification.addChildTaxon(acceptedTaxon, ref, null);
2465 tax=acceptedTaxon;
2466 }
2467 // logger.info("RETURN: "+tax );
2468 return tax;
2469
2470 }
2471
2472 */
2473
2474
2475 private MyName extractScientificNameSynonym(Node name, Reference<?> refMods) throws TransformerFactoryConfigurationError, TransformerException {
2476 //System.out.println("extractScientificNameSynonym");
2477 logger.info("extractScientificNameSynonym");
2478 String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2479 List<String> rankListToPrint = new ArrayList<String>();
2480 for (String r : rankListToPrint_tmp) {
2481 rankListToPrint.add(r.toLowerCase());
2482 }
2483
2484 Rank rank = Rank.UNKNOWN_RANK();
2485 NodeList children = name.getChildNodes();
2486 String originalName="";
2487 String fullName = "";
2488 String newName="";
2489 String identifier="";
2490 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2491 List<String> atomisedName= new ArrayList<String>();
2492
2493 String rankStr = "";
2494 Rank tmpRank ;
2495
2496 String status= extractStatus(children);
2497
2498 for (int i=0;i<children.getLength();i++){
2499 if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2500 NodeList atom = children.item(i).getChildNodes();
2501 for (int k=0;k<atom.getLength();k++){
2502 identifier = extractIdentifier(identifier, atom.item(k));
2503 tmpRank = null;
2504 rankStr = atom.item(k).getNodeName().toLowerCase();
2505 // logger.info("RANKSTR:*"+rankStr+"*");
2506 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2507 rankStr=atom.item(k).getTextContent().trim();
2508 tmpRank = getRank(rankStr);
2509 }
2510 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2511 if (tmpRank != null){
2512 rank=tmpRank;
2513 }
2514 atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2515 }
2516 addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2517 }
2518 if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2519 // logger.info("name non atomised: "+children.item(i).getTextContent());
2520 fullName = children.item(i).getTextContent().trim();
2521 // logger.info("fullname: "+fullName);
2522 }
2523 }
2524 originalName=fullName;
2525 fullName = cleanName(fullName, atomisedName);
2526 namesMap.put(fullName,atomisedMap);
2527
2528 String atomisedNameStr = getAtomisedNameStr(atomisedName);
2529
2530 if (fullName != null){
2531 // System.out.println("fullname: "+fullName);
2532 // System.out.println("atomised: "+atomisedNameStr);
2533 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2534 if (skippQuestion){
2535 // String defaultN = "";
2536 if (atomisedNameStr.length()>fullName.length()) {
2537 newName=atomisedNameStr;
2538 } else {
2539 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2540 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2541 } else {
2542 newName=fullName;
2543 }
2544 }
2545 } else {
2546 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2547 }
2548 } else {
2549 newName=fullName;
2550 }
2551 }
2552 //not really needed
2553 // rank = askForRank(newName, rank, nomenclaturalCode);
2554 // System.out.println("atomised: "+atomisedMap.toString());
2555
2556 // String[] names = new String[5];
2557 MyName myname = new MyName(true);
2558
2559 //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2560 // System.out.println(atomisedMap.keySet());
2561 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2562 myname.setOriginalName(fullName);
2563 myname.setNewName(newName);
2564 myname.setRank(rank);
2565 myname.setIdentifier(identifier);
2566 myname.setStatus(status);
2567 myname.setSource(refMods);
2568
2569 // boolean higherAdded=false;
2570
2571
2572 boolean parseNameManually=false;
2573 INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2574 TaxonNameBase<?,?> nameToBeFilledTest ;
2575
2576 //if selected the atomised version
2577 if(newName==atomisedNameStr){
2578 nameToBeFilledTest = parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2579 if (nameToBeFilledTest.hasProblem()){
2580 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2581 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2582 if (nameToBeFilledTest.hasProblem()){
2583 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2584 parseNameManually=true;
2585 }
2586 }
2587 }else{
2588 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode, rank);
2589 if (nameToBeFilledTest.hasProblem()){
2590 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2591 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2592 parseNameManually=true;
2593 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2594 addNameDifferenceToFile(originalName,atomisedNameStr);
2595 }
2596 }
2597 }
2598
2599 if(parseNameManually){
2600 //System.out.println("DO IT MANUALLY");
2601 createSynonym(rank, newName, atomisedMap, myname);
2602 }
2603 else{
2604 //System.out.println("AUTOMATIC!");
2605 // createAtomisedTaxonString(newName, atomisedMap, myname);
2606 myname.setParsedName(nameToBeFilledTest);
2607 myname.buildTaxon();
2608 }
2609 //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2610 return myname;
2611 }
2612 /**
2613 * @param name
2614 * @throws TransformerFactoryConfigurationError
2615 * @throws TransformerException
2616 * @return a list of possible names
2617 */
2618 @SuppressWarnings({"rawtypes" })
2619 private MyName extractScientificName(Node name, Reference<?> refMods) throws TransformerFactoryConfigurationError, TransformerException {
2620 logger.info("extractScientificName");
2621
2622 String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2623 List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2624
2625 Rank rank = Rank.UNKNOWN_RANK();
2626 NodeList children = name.getChildNodes();
2627 String originalName = "";
2628 String fullName = "";
2629 String newName = "";
2630 String identifier = "";
2631 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2632 List<String> atomisedNameList= new ArrayList<String>();
2633
2634 String status= extractStatus(children);
2635
2636 for (int i=0;i<children.getLength();i++){
2637 Node nameChild = children.item(i);
2638 if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2639 NodeList xmlDataChildren = nameChild.getChildNodes();
2640 for (int k=0;k<xmlDataChildren.getLength();k++){
2641 Node xmlDataChild = xmlDataChildren.item(k);
2642 identifier = extractIdentifier(identifier, xmlDataChild);
2643 String rankStr = xmlDataChild.getNodeName().toLowerCase();
2644 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2645 rankStr=xmlDataChild.getTextContent().trim();
2646 Rank tmpRank = getRank(rankStr);
2647 if (tmpRank != null){
2648 rank=tmpRank;
2649 }
2650 }
2651 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2652
2653 atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2654 }
2655 addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2656 }
2657 else if(nameChild.getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(nameChild.getTextContent())){
2658 // logger.info("name non atomised: "+children.item(i).getTextContent());
2659 fullName = nameChild.getTextContent().trim();
2660 // logger.info("fullname: "+fullName);
2661 }
2662 }
2663 originalName=fullName;
2664 fullName = cleanName(fullName, atomisedNameList);
2665 namesMap.put(fullName,atomisedMap);
2666
2667 String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2668
2669 if (fullName != null){
2670 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2671 if (skippQuestion){
2672 if (atomisedNameStr.length()>fullName.length()) {
2673 newName = atomisedNameStr;
2674 } else {
2675 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2676 newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2677 } else {
2678 newName = fullName;
2679 }
2680 }
2681 } else {
2682 newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2683 }
2684 } else {
2685 newName=fullName;
2686 }
2687 }
2688 //not really needed
2689 // rank = askForRank(newName, rank, nomenclaturalCode);
2690 // System.out.println("atomised: "+atomisedMap.toString());
2691
2692 // String[] names = new String[5];
2693 MyName myname = new MyName(false);
2694
2695 //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2696 // System.out.println(atomisedMap.keySet());
2697 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2698 myname.setOriginalName(fullName);
2699 myname.setNewName(newName);
2700
2701 myname.setRank(rank);
2702 myname.setIdentifier(identifier);
2703 myname.setStatus(status);
2704 myname.setSource(refMods);
2705
2706 // boolean higherAdded=false;
2707
2708
2709 boolean parseNameManually=false;
2710 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2711 TaxonNameBase nameToBeFilledTest = null;
2712
2713 //if selected the atomised version
2714 if(newName==atomisedNameStr){
2715 nameToBeFilledTest = parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2716 if (nameToBeFilledTest.hasProblem()){
2717 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2718 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2719 if (nameToBeFilledTest.hasProblem()){
2720 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2721 parseNameManually=true;
2722 }
2723 }
2724 }else{
2725 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode, rank);
2726 if (nameToBeFilledTest.hasProblem()){
2727 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2728 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2729 parseNameManually=true;
2730 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2731 addNameDifferenceToFile(originalName,atomisedNameStr);
2732 }
2733 }
2734 }
2735
2736 //System.out.println("parseNameManually: "+parseNameManually);
2737 if(parseNameManually){
2738 createAtomisedTaxon(rank, newName, atomisedMap, myname);
2739 }
2740 else{
2741 createAtomisedTaxonString(newName, atomisedMap, myname);
2742 myname.setParsedName(nameToBeFilledTest);
2743 //TODO correct handling of createIfNotExists
2744 myname.buildTaxon();
2745 }
2746 return myname;
2747
2748 }
2749
2750 /**
2751 * @param atomisedName
2752 * @return
2753 */
2754 private String getAtomisedNameStr(List<String> atomisedName) {
2755 //logger.info("getAtomisedNameStr");
2756 String atomisedNameStr = StringUtils.join(atomisedName," ");
2757 while(atomisedNameStr.contains(" ")) {
2758 atomisedNameStr=atomisedNameStr.replace(" ", " ");
2759 }
2760 atomisedNameStr=atomisedNameStr.trim();
2761 return atomisedNameStr;
2762 }
2763
2764 /**
2765 * @param children
2766 * @param status
2767 * @return
2768 */
2769 private String extractStatus(NodeList children) {
2770 logger.info("extractStatus");
2771 String status="";
2772 for (int i=0;i<children.getLength();i++){
2773 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2774 (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2775 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2776 status = children.item(i).getTextContent().trim();
2777 }
2778 }
2779 return status;
2780 }
2781
2782 /**
2783 * @param identifier
2784 * @param atom
2785 * @param k
2786 * @return
2787 */
2788 private String extractIdentifier(String identifier, Node atom) {
2789 //logger.info("extractIdentifier");
2790 if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2791 try{
2792 identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2793 }catch(Exception e){
2794 System.out.println("pb with identifier, maybe empty");
2795 }
2796 try{
2797 identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2798 }catch(Exception e){
2799 System.out.println("pb with identifier, maybe empty");
2800 }
2801 }
2802 return identifier;
2803 }
2804
2805 /**
2806 * @param rankListToPrint
2807 * @param rank
2808 * @param atomisedName
2809 * @param atom
2810 */
2811 private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2812 logger.info("addAtomisedNamesToMap");
2813 for (int k=0;k<atom.getLength();k++){
2814 Node node = atom.item(k);
2815 String nodeName = node.getNodeName();
2816 if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) { //rank has been handled in higher method
2817 if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2818 atomisedName.add("("+ node.getTextContent().trim()+")");
2819 } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2820 if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2821 atomisedName.add("var. "+node.getTextContent().trim());
2822 }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2823 atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2824 }
2825 } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2826 atomisedName.add(node.getTextContent().trim());
2827 } else{
2828 if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2829 atomisedName.add(node.getTextContent().trim());
2830 }else if (nodeName.equals("#text")){
2831 String text = node.getTextContent();
2832 if (StringUtils.isNotBlank(text)){
2833 //TODO handle text
2834 logger.warn("name xmldata contains text. This is unhandled");
2835 }
2836 }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2837 //we currently do not use higher ranks information
2838 }else{
2839 //TODO handle unhandled node
2840 logger.warn("Unhandled node: " + nodeName);
2841 }
2842 }
2843 }
2844 }
2845 }
2846
2847 /**
2848 * @param fullName
2849 * @param atomisedName
2850 * @return
2851 */
2852 private String cleanName(String name, List<String> atomisedName) {
2853 //logger.info("cleanName");
2854 String fullName =name;
2855 if (fullName != null){
2856 fullName = fullName.replace("( ", "(");
2857 fullName = fullName.replace(" )",")");
2858
2859 if (fullName.trim().isEmpty()){
2860 fullName=StringUtils.join(atomisedName," ");
2861 }
2862
2863 while(fullName.contains(" ")) {
2864 fullName=fullName.replace(" ", " ");
2865 // logger.info("while");
2866 }
2867 fullName=fullName.trim();
2868 }
2869 return fullName;
2870 }
2871
2872 /**
2873 * @param rank
2874 * @param fullName
2875 * @param atomisedMap
2876 * @param myname
2877 * @return
2878 */
2879 private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2880 logger.info("extractAuthorFromNames");
2881 String fullName=name;
2882 if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2883 // System.out.println("rank : "+rank.toString());
2884 if(rank.isHigher(Rank.SPECIES())){
2885 try{
2886 String author=null;
2887 if(atomisedMap.get("dwcranks:subgenus") != null) {
2888 author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2889 }
2890 if(atomisedMap.get("dwc:subgenus") != null) {
2891 author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2892 }
2893 if(author == null) {
2894 if(atomisedMap.get("dwc:genus") != null) {
2895 author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2896 }
2897 }
2898 if(author != null){
2899 fullName = fullName.substring(0, fullName.indexOf(author));
2900 author=author.replaceAll(",","").trim();
2901 myname.setAuthor(author);
2902 }
2903 }catch(Exception e){
2904 //could not extract the author
2905 }
2906 }
2907 if(rank.equals(Rank.SPECIES())){
2908 try{
2909 String author=null;
2910 if(author == null) {
2911 if(atomisedMap.get("dwc:species") != null) {
2912 String[] t = fullName.split(atomisedMap.get("dwc:species"));
2913 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2914 author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2915 // System.out.println("AUTEUR "+author);
2916 }
2917 }
2918 if(author != null){
2919 fullName = fullName.substring(0, fullName.indexOf(author));
2920 author=author.replaceAll(",","").trim();
2921 myname.setAuthor(author);
2922 }
2923 }catch(Exception e){
2924 //could not extract the author
2925 }
2926 }
2927 }else{
2928 myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2929 }
2930 return fullName;
2931 }
2932
2933 /**
2934 * @param newName
2935 * @param atomisedMap
2936 * @param myname
2937 */
2938 private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2939 logger.info("createAtomisedTaxonString "+atomisedMap);
2940 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2941 myname.setFamilyStr(atomisedMap.get("dwc:family"));
2942 }
2943 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
2944 myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2945 }
2946 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2947 myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2948 }
2949 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2950 myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2951 }
2952 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2953 myname.setGenusStr(atomisedMap.get("dwc:genus"));
2954 }
2955 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2956 myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2957 }
2958 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2959 myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2960 }
2961 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2962 String n=newName;
2963 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2964 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2965 n=n.replace("subsp.","");
2966 }
2967 if(atomisedMap.get("dwc:subspecies") != null) {
2968 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2969 n=n.replace("subsp.","");
2970 }
2971 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2972 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2973 n=n.replace("var.","");
2974 n=n.replace("v.","");
2975 }
2976 if(atomisedMap.get("dwcranks:formepithet") != null) {
2977 //TODO
2978 System.out.println("TODO FORMA");
2979 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2980 n=n.replace("forma","");
2981 }
2982 n=n.trim();
2983 String author = myname.getAuthor();
2984 if(n.split(" ").length>2)
2985 {
2986 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2987 String a= "";
2988 try{
2989 a=n.split(n2)[1].trim();
2990 }catch(Exception e){
2991 logger.info("no author in "+n+"?");}
2992
2993 myname.setAuthor(a);
2994 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2995 n=n2;
2996
2997 }
2998
2999 myname.setSpeciesStr(atomisedMap.get("dwc:species"));
3000 myname.setAuthor(author);
3001 }
3002 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3003 myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
3004 }
3005 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3006 myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
3007 }
3008 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3009 myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
3010 }
3011 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3012 myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
3013 }
3014 }
3015
3016 private void createSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3017 logger.info("createSynonym");
3018 //System.out.println("createsynonym");
3019 if(rank.equals(Rank.UNKNOWN_RANK())){
3020 myname.setNotParsableTaxon(newName);
3021 }else
3022 {if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
3023 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3024 }
3025 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
3026 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3027 }
3028 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
3029 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3030 }
3031 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
3032 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3033 }
3034 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
3035 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3036 }
3037 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
3038 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3039 }
3040 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
3041 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3042 }
3043 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
3044 String n=newName;
3045 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3046 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3047 n=n.replace("subsp.","");
3048 }
3049 if(atomisedMap.get("dwc:subspecies") != null) {
3050 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3051 n=n.replace("subsp.","");
3052 }
3053 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3054 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3055 n=n.replace("var.","");
3056 n=n.replace("v.","");
3057 }
3058 if(atomisedMap.get("dwcranks:formepithet") != null) {
3059 //TODO
3060 //System.out.println("TODO FORMA");
3061 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3062 n=n.replace("forma","");
3063 }
3064 n=n.trim();
3065 String author = myname.getAuthor();
3066 if(n.split(" ").length>2)
3067 {
3068 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3069 String a="";
3070 try{
3071 a= n.split(n2)[1].trim();
3072 }catch(Exception e){logger.info("no author in "+n);}
3073 myname.setAuthor(a);
3074 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3075 n=n2;
3076
3077 }
3078
3079 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3080 myname.setAuthor(author);
3081 }
3082 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3083 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3084 }
3085 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3086 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3087 }
3088 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3089 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3090 }
3091 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3092 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3093 }
3094 }
3095
3096 }
3097 /**
3098 * @param rank
3099 * @param newName
3100 * @param atomisedMap
3101 * @param myname
3102 */
3103 private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3104 logger.info("createAtomisedTaxon "+atomisedMap);
3105 if(rank.equals(Rank.UNKNOWN_RANK())){
3106 myname.setNotParsableTaxon(newName);
3107 }
3108 else{
3109 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3110 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3111 }
3112 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
3113 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3114 }
3115 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3116 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3117 }
3118 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3119 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3120 }
3121 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3122 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3123 }
3124 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3125 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3126 }
3127 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3128 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3129 }
3130 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3131 String n=newName;
3132 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3133 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3134 n=n.replace("subsp.","");
3135 }
3136 if(atomisedMap.get("dwc:subspecies") != null) {
3137 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3138 n=n.replace("subsp.","");
3139 }
3140 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3141 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3142 n=n.replace("var.","");
3143 n=n.replace("v.","");
3144 }
3145 if(atomisedMap.get("dwcranks:formepithet") != null) {
3146 //TODO
3147 //System.out.println("TODO FORMA");
3148 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3149 n=n.replace("forma","");
3150 }
3151 n=n.trim();
3152 String author = myname.getAuthor();
3153 if(n.split(" ").length>2){
3154 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3155 String a="";
3156 try{
3157 a= n.split(n2)[1].trim();
3158 }catch(Exception e){logger.info("no author in "+n);}
3159 myname.setAuthor(a);
3160 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3161 n=n2;
3162
3163 }
3164
3165 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3166 myname.setAuthor(author);
3167 }
3168 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3169 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3170 }
3171 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3172 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3173 }
3174 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3175 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3176 }
3177 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3178 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3179 }
3180 }
3181 }
3182
3183 /**
3184 * @return
3185 */
3186 private boolean checkRankValidForImport(Rank currentRank) {
3187 //logger.info("checkRankValidForImport");
3188 return currentRank.isLower(configState.getConfig().getMaxRank()) || currentRank.equals(configState.getConfig().getMaxRank());
3189 }
3190
3191
3192
3193 /**
3194 * @param classification2
3195 */
3196 public void updateClassification(Classification classification2) {
3197 //logger.info("updateClassification");
3198 classification = classification2;
3199 }
3200
3201 /**
3202 * @param tnb
3203 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3204 * if errors, cast into a classis nonviralname
3205 * @param taxonnamebase2
3206 */
3207 @SuppressWarnings("rawtypes")
3208 public NonViralName<?> castTaxonNameBase(TaxonNameBase tnb, NonViralName<?> nvn) {
3209 //logger.info("castTaxonNameBase");
3210 NonViralName<?> taxonnamebase2 = nvn;
3211 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
3212 try{
3213 taxonnamebase2=(BotanicalName) tnb;
3214 }catch(Exception e){
3215 taxonnamebase2= (NonViralName<?>) tnb;
3216 }
3217 }
3218 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
3219 try{
3220 taxonnamebase2=(ZoologicalName) tnb;
3221 }catch(Exception e){
3222 taxonnamebase2= (NonViralName<?>) tnb;
3223 }
3224 }
3225 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
3226 try{
3227 taxonnamebase2=(BacterialName) tnb;
3228 }catch(Exception e){
3229 taxonnamebase2= (NonViralName<?>) tnb;
3230 }
3231 }
3232 return taxonnamebase2;
3233 }
3234
3235 /**
3236 * @param tnb
3237 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3238 * if errors, cast into a classis nonviralname
3239 * @param taxonnamebase2
3240 */
3241 @SuppressWarnings("rawtypes")
3242 public NonViralName<?> castTaxonNameBase(TaxonNameBase tnb) {
3243 //logger.info("castTaxonNameBase2");
3244 NonViralName<?> taxonnamebase2 = null;
3245 tnb=CdmBase.deproxy(tnb, TaxonNameBase.class);
3246 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
3247 try{
3248 taxonnamebase2=(BotanicalName) tnb;
3249 }catch(Exception e){
3250 taxonnamebase2= (NonViralName<?>) tnb;
3251 }
3252 }
3253 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
3254 try{
3255 taxonnamebase2=(ZoologicalName) tnb;
3256 }catch(Exception e){
3257 taxonnamebase2= (NonViralName<?>) tnb;
3258 }
3259 }
3260 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
3261 try{
3262 taxonnamebase2=(BacterialName) tnb;
3263 }catch(Exception e){
3264 taxonnamebase2= (NonViralName<?>) tnb;
3265 }
3266 }
3267 return taxonnamebase2;
3268 }
3269
3270 public class MyName {
3271 /**
3272 * @param isSynonym
3273 */
3274 public MyName(boolean isSynonym) {
3275 super();
3276 this.isSynonym = isSynonym;
3277 }
3278
3279 String originalName="";
3280 String newName="";
3281 Rank rank=Rank.UNKNOWN_RANK();
3282 String identifier="";
3283 String status="";
3284 String author=null;
3285
3286 NonViralName<?> taxonnamebase;
3287
3288 Reference<?> refMods ;
3289
3290 Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3291 NonViralName<?> familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3292 String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3293 Taxon higherTaxa;
3294 Rank higherRank;
3295 private Taxon taxon;
3296 private Synonym syno;
3297
3298 /**
3299 * @return the syno
3300 */
3301 public Synonym getSyno() {
3302 return syno;
3303 }
3304
3305 @Override
3306 public String toString(){
3307 List<String> tot=new ArrayList<String>();
3308 String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3309 for (String elt:n){
3310 if (!StringUtils.isEmpty(elt)) {
3311 tot.add(elt);
3312 } else {
3313 tot.add("*");
3314 }
3315 }
3316 return StringUtils.join(tot," ");
3317 }
3318 /**
3319 * @param syno the syno to set
3320 */
3321 public void setSyno(Synonym syno) {
3322 this.syno = syno;
3323 }
3324
3325 boolean isSynonym=false;
3326
3327 /**
3328 * @return the isSynonym
3329 */
3330 public boolean isSynonym() {
3331 return isSynonym;
3332 }
3333
3334 /**
3335 * @param isSynonym the isSynonym to set
3336 */
3337 public void setSynonym(boolean isSynonym) {
3338 this.isSynonym = isSynonym;
3339 }
3340
3341 public void setSource(Reference<?> re){
3342 refMods=re;
3343 }
3344
3345 /**
3346 * @param string
3347 */
3348 public void setFormStr(String string) {
3349 this.formStr=string;
3350
3351 }
3352 /**
3353 * @param string
3354 */
3355 public void setVarietyStr(String string) {
3356 this.varietyStr=string;
3357
3358 }
3359 /**
3360 * @param string
3361 */
3362 public void setSubspeciesStr(String string) {
3363 this.subspeciesStr=string;
3364
3365 }
3366 /**
3367 * @param string
3368 */
3369 public void setSpeciesStr(String string) {
3370 this.speciesStr=string;
3371
3372 }
3373 /**
3374 * @param string
3375 */
3376 public void setSubgenusStr(String string) {
3377 this.subgenusStr=string;
3378
3379 }
3380 /**
3381 * @param string
3382 */
3383 public void setGenusStr(String string) {
3384 this.genusStr=string;
3385
3386 }
3387 /**
3388 * @param string
3389 */
3390 public void setSubtribeStr(String string) {
3391 this.subtribeStr=string;
3392
3393 }
3394 /**
3395 * @param string
3396 */
3397 public void setTribeStr(String string) {
3398 this.tribeStr=string;
3399
3400 }
3401 /**
3402 * @param string
3403 */
3404 public void setSubfamilyStr(String string) {
3405 this.subfamilyStr=string;
3406
3407 }
3408 /**
3409 * @param string
3410 */
3411 public void setFamilyStr(String string) {
3412 this.familyStr=string;
3413
3414 }
3415 /**
3416 * @return the familyStr
3417 */
3418 public String getFamilyStr() {
3419 return familyStr;
3420 }
3421 /**
3422 * @return the subfamilyStr
3423 */
3424 public String getSubfamilyStr() {
3425 return subfamilyStr;
3426 }
3427 /**
3428 * @return the tribeStr
3429 */
3430 public String getTribeStr() {
3431 return tribeStr;
3432 }
3433 /**
3434 * @return the subtribeStr
3435 */
3436 public String getSubtribeStr() {
3437 return subtribeStr;
3438 }
3439 /**
3440 * @return the genusStr
3441 */
3442 public String getGenusStr() {
3443 return genusStr;
3444 }
3445 /**
3446 * @return the subgenusStr
3447 */
3448 public String getSubgenusStr() {
3449 return subgenusStr;
3450 }
3451 /**
3452 * @return the speciesStr
3453 */
3454 public String getSpeciesStr() {
3455 return speciesStr;
3456 }
3457 /**
3458 * @return the subspeciesStr
3459 */
3460 public String getSubspeciesStr() {
3461 return subspeciesStr;
3462 }
3463 /**
3464 * @return the formStr
3465 */
3466 public String getFormStr() {
3467 return formStr;
3468 }
3469 /**
3470 * @return the varietyStr
3471 */
3472 public String getVarietyStr() {
3473 return varietyStr;
3474 }
3475
3476 /**
3477 * @param newName2
3478 */
3479 public void setNotParsableTaxon(String newName2) {
3480 //takes too much time
3481 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3482
3483 NomenclaturalStatusType statusType = null;
3484 if (!getStatus().isEmpty()){
3485 try {
3486 statusType = nomStatusString2NomStatus(getStatus());
3487 } catch (UnknownCdmTypeException e) {
3488 addProblematicStatusToFile(getStatus());
3489 logger.warn("Problem with status");
3490 }
3491 }
3492 List<TaxonBase> tmpList = new ArrayList<TaxonBase>();
3493
3494 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3495 tmpList.addAll(taxontest.getRecords());
3496
3497 //logger.info("tmpList returned: "+tmpList.size());
3498
3499
3500 NonViralName<?> identicName = null;
3501 boolean foundIdentic=false;
3502 TaxonBase<?> tmpTaxonBase=null;
3503 // Taxon tmpPartial=null;
3504 for (TaxonBase<?> tmpb:tmpList){
3505 if(tmpb !=null){
3506 TaxonNameBase<?,?> tnb = tmpb.getName();
3507 Rank crank=null;
3508 if (tnb != null){
3509 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3510 crank =tnb.getRank();
3511 if (crank !=null && rank !=null){
3512 if (crank.equals(rank)){
3513 identicName = CdmBase.deproxy(tnb, NonViralName.class);
3514 if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3515 foundIdentic=true;
3516 tmpTaxonBase=tmpb;
3517 break;
3518 }
3519 }
3520 }
3521 }
3522 }
3523 }
3524 }
3525 boolean statusMatch=false;
3526 boolean appendedMatch=false;
3527 if(tmpTaxonBase !=null && foundIdentic){
3528 statusMatch=compareStatus(tmpTaxonBase, statusType);
3529 if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3530 appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3531 }
3532 if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3533 appendedMatch=true;
3534 }
3535
3536 }
3537 if ((tmpTaxonBase == null || !foundIdentic) || (tmpTaxonBase != null && !statusMatch) || (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3538
3539 NonViralName<?> tnb;
3540 if (identicName == null){
3541 tnb = getNonViralNameAccNomenclature();
3542 tnb.setRank(rank);
3543
3544 if(statusType != null) {
3545 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3546 }
3547 if(getStatus()!=null) {
3548 tnb.setAppendedPhrase(getStatus());
3549 }
3550 }else{
3551 tnb = identicName;
3552 }
3553
3554 tnb.setTitleCache(newName2,true);
3555 tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3556 if(tmpTaxonBase==null){
3557 tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3558 if(!configState.getConfig().doKeepOriginalSecundum()) {
3559 tmpTaxonBase.setSec(configState.getConfig().getSecundum());
3560 }
3561 //tmptaxonbase.setSec(refMods);
3562 if(!isSynonym) {
3563 classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3564 sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3565 }
3566 }
3567 }
3568
3569 tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3570 if (author != null) {
3571 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3572 setLSID(getIdentifier(), tmpTaxonBase);
3573 importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3574 tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3575 }
3576 }
3577 TaxonNameBase<?,?> tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonNameBase.class);
3578
3579 if(!isSynonym) {
3580 this.taxon=(Taxon)tmpTaxonBase;
3581 } else {
3582 if (tmpTaxonBase instanceof Taxon){
3583 logger.warn("Incorrect status");
3584 }
3585 this.syno=(Synonym)tmpTaxonBase;
3586 }
3587
3588 taxonnamebase = castTaxonNameBase(tnb, taxonnamebase);
3589
3590 }
3591
3592 /**
3593 *
3594 */
3595 public void buildTaxon() {
3596 //System.out.println("BUILD TAXON");
3597 logger.info("buildTaxon");
3598 NomenclaturalStatusType statusType = null;
3599 if (!getStatus().isEmpty()){
3600 try {
3601 statusType = nomStatusString2NomStatus(getStatus());
3602 taxonnamebase.addStatus(NomenclaturalStatus.NewInstance(statusType));
3603 } catch (UnknownCdmTypeException e) {
3604 addProblematicStatusToFile(getStatus());
3605 logger.warn("Problem with status");
3606 }
3607 }
3608 importer.getNameService().save(taxonnamebase);
3609
3610 TaxonBase<?> tmptaxonbase;
3611 if (!isSynonym) {
3612 tmptaxonbase =Taxon.NewInstance(taxonnamebase, refMods); //sec set null
3613 }
3614 else {
3615 tmptaxonbase =Synonym.NewInstance(taxonnamebase, refMods); //sec set null
3616 }
3617 boolean exist = false;
3618 for (TaxonNode p : classification.getAllNodes()){
3619 try{
3620 if(p.getTaxon().getTitleCache().equalsIgnoreCase(tmptaxonbase.getTitleCache())) {
3621 if(compareStatus(p.getTaxon(), statusType)){
3622 try{
3623 if (!isSynonym) {
3624 tmptaxonbase=CdmBase.deproxy(p.getTaxon(), TaxonBase.class);
3625 } else {
3626 tmptaxonbase=CdmBase.deproxy(p.getTaxon(), Synonym.class);
3627 }
3628 exist =true;
3629 }catch(Exception e){
3630 logger.warn("Found the same name but from another type (taxon/synonym)");
3631 TaxonNameBase<?,?> existingTnb = getTaxon().getName();
3632 if (isSynonym){
3633 tmptaxonbase = new Synonym(existingTnb, refMods);
3634 importer.getTaxonService().saveOrUpdate(tmptaxonbase);
3635 tmptaxonbase=CdmBase.deproxy(tmptaxonbase, Synonym.class);
3636 exist =true;
3637 }
3638 else{
3639 tmptaxonbase = new Taxon(existingTnb, refMods);
3640 }
3641 }
3642 }
3643 }
3644 }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3645 }
3646 if (!exist){
3647
3648 boolean insertAsExisting =false;
3649 List<Taxon> existingTaxons=new ArrayList<Taxon>();
3650 try {
3651 existingTaxons = getMatchingTaxa(taxonnamebase);
3652 } catch (Exception e1) {
3653 // TODO Auto-generated catch block
3654 e1.printStackTrace();
3655 }
3656 double similarityScore=0.0;
3657 double similarityAuthor=-1;
3658 String author1="";
3659 String author2="";
3660 String t1="";
3661 String t2="";
3662 for (Taxon bestMatchingTaxon:existingTaxons){
3663 //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3664 //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3665 if(taxonnamebase.getAuthorshipCache()!=null) {
3666 author1=taxonnamebase.getAuthorshipCache();
3667 }
3668 try {
3669 if(castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache()!=null) {
3670 author2=castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache();
3671 }
3672 } catch (Exception e) {
3673 // TODO Auto-generated catch block
3674 e.printStackTrace();
3675 }
3676 try {
3677 t1=taxonnamebase.getTitleCache().split("sec.")[0].trim();
3678 if (author1!=null && !StringUtils.isEmpty(author1)) {
3679 t1=t1.split(Pattern.quote(author1))[0];
3680 }
3681 } catch (Exception e) {
3682 // TODO Auto-generated catch block
3683 e.printStackTrace();
3684 }
3685 try {
3686 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3687 if (author2!=null && !StringUtils.isEmpty(author2)) {
3688 t2=t2.split(Pattern.quote(author2))[0];
3689 }
3690 } catch (Exception e) {
3691 // TODO Auto-generated catch block
3692 e.printStackTrace();
3693 }
3694
3695 similarityScore=similarity(t1.trim(), t2.trim());
3696 //System.out.println("taxonscore "+similarityScore);
3697 similarityAuthor=similarity(author1.trim(), author2.trim());
3698 //System.out.println("authorscore "+similarityAuthor);
3699 insertAsExisting = compareAndCheckTaxon(taxonnamebase, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3700 if(insertAsExisting) {
3701 tmptaxonbase=bestMatchingTaxon;
3702 break;
3703 }
3704 }
3705 if (!insertAsExisting ){
3706 if(!configState.getConfig().doKeepOriginalSecundum()) {
3707 tmptaxonbase.setSec(configState.getConfig().getSecundum());
3708 }
3709
3710 // tmptaxonbase.setSec(refMods);
3711 if (taxonnamebase.getRank().equals(configState.getConfig().getMaxRank())) {
3712 //System.out.println("****************************"+tmptaxonbase);
3713 if (!isSynonym) {
3714 classification.addChildTaxon((Taxon)tmptaxonbase, refMods, null);
3715 }
3716 } else{
3717 hierarchy = new HashMap<Rank, Taxon>();
3718 //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3719 if (!isSynonym){
3720 lookForParentNode(taxonnamebase,(Taxon)tmptaxonbase, refMods,this);
3721 //System.out.println("HIERARCHY "+hierarchy);
3722 Taxon parent = buildHierarchy();
3723 if(!taxonExistsInClassification(parent,(Taxon)tmptaxonbase)){
3724 if(parent !=null) {
3725 classification.addParentChild(parent, (Taxon)tmptaxonbase, refMods, null);
3726 } else {
3727 classification.addChildTaxon((Taxon)tmptaxonbase, refMods, null);
3728 }
3729 importer.getClassificationService().saveOrUpdate(classification);
3730 }
3731 }
3732 // Set<TaxonNode> nodeList = classification.getAllNodes();
3733 // for(TaxonNode tn:nodeList) {
3734 // System.out.println(tn.getTaxon());
3735 // }
3736 }
3737 }
3738 importer.getClassificationService().saveOrUpdate(classification);
3739 // refreshTransaction();
3740 if(isSynonym) {
3741 try{
3742 Synonym castTest=CdmBase.deproxy(tmptaxonbase, Synonym.class);
3743 }catch(Exception e){
3744 TaxonNameBase<?,?> existingTnb = tmptaxonbase.getName();
3745 Synonym castTest = new Synonym(existingTnb, refMods);
3746 importer.getTaxonService().saveOrUpdate(castTest);
3747 tmptaxonbase=CdmBase.deproxy(castTest, Synonym.class);
3748 }
3749 }
3750 }
3751 if(!isSynonym) {
3752 taxon=CdmBase.deproxy(tmptaxonbase, Taxon.class);
3753 } else {
3754 syno=CdmBase.deproxy(tmptaxonbase, Synonym.class);
3755 }
3756
3757 }
3758
3759
3760 /**
3761 *
3762 */
3763 private Taxon buildHierarchy() {
3764 logger.info("buildHierarchy");
3765 Taxon higherTaxon = null;
3766 //add the maxRank as a root
3767 if(hierarchy.containsKey(configState.getConfig().getMaxRank())){
3768 Taxon ct=hierarchy.get(configState.getConfig().getMaxRank());
3769 if(!taxonExistsInClassification(higherTaxon, ct)) {
3770 //System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"+hierarchy.get(configState.getConfig().getMaxRank()));
3771 classification.addChildTaxon(ct, refMods, null);
3772 }
3773 higherTaxon = hierarchy.get(configState.getConfig().getMaxRank());
3774 // return higherTaxon;
3775 }
3776 //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3777 if(hierarchy.containsKey(Rank.SUBFAMILY()) && !configState.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3778 higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3779 }
3780 if(hierarchy.containsKey(Rank.TRIBE())&& !configState.getConfig().getMaxRank().equals(Rank.TRIBE())){
3781 higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3782 }
3783 if(hierarchy.containsKey(Rank.SUBTRIBE())&& !configState.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3784 higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3785 }
3786 if(hierarchy.containsKey(Rank.GENUS())&& !configState.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3787 higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3788 }
3789 if(hierarchy.containsKey(Rank.SUBGENUS())&& !configState.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3790 higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3791 }
3792 importer.getClassificationService().saveOrUpdate(classification);
3793 return higherTaxon;
3794 }
3795
3796 private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3797 Taxon ct=hierarchy.get(r);
3798 if(!taxonExistsInClassification(higherTaxon,ct )) {
3799 if(higherTaxon != null && ct!=null) {
3800 classification.addParentChild(higherTaxon, ct, refMods, null);
3801 } else
3802 if(higherTaxon == null && ct !=null) {
3803 classification.addChildTaxon(ct, refMods, null);
3804 }
3805 }
3806 return ct;
3807 }
3808
3809 private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3810 logger.info("taxonExistsInClassification");
3811 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3812 boolean found=false;
3813 if(parent !=null){
3814 for (TaxonNode p : classification.getAllNodes()){
3815 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3816 for (TaxonNode c : p.getChildNodes()) {
3817 if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3818 found=true;
3819 break;
3820 }
3821 }
3822 }
3823 }
3824 }
3825 else{
3826 for (TaxonNode p : classification.getAllNodes()){
3827 if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3828 found=true;
3829 break;
3830 }
3831 }
3832 }
3833 // System.out.println("LOOK IF TAXA EXIST? "+found);
3834 return found;
3835 }
3836 /**
3837 * @param nameToBeFilledTest
3838 */
3839 @SuppressWarnings("rawtypes")
3840 public void setParsedName(TaxonNameBase nameToBeFilledTest) {
3841 this.taxonnamebase = (NonViralName<?>) nameToBeFilledTest;
3842
3843 }
3844 //variety dwcranks:varietyEpithet
3845 /**
3846 * @return the author
3847 */
3848 public String getAuthor() {
3849 return author;
3850 }
3851 /**
3852 * @return
3853 */
3854 public Taxon getTaxon() {
3855 return taxon;
3856 }
3857 /**
3858 * @return
3859 */
3860 public NonViralName<?> getTaxonNameBase() {
3861 return taxonnamebase;
3862 }
3863
3864 /**
3865 * @param findOrCreateTaxon
3866 */
3867 public void setForm(Taxon form) {
3868 this.form=form;
3869
3870 }
3871 /**
3872 * @param findOrCreateTaxon
3873 */
3874 public void setVariety(Taxon variety) {
3875 this.variety=variety;
3876
3877 }
3878 /**
3879 * @param string
3880 * @return
3881 */
3882 @SuppressWarnings("rawtypes")
3883 public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3884 logger.info("findOrCreateTaxon");
3885 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3886 //takes too much time
3887 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3888 // logger.info("tmpList returned: "+tmpList.size());
3889
3890 NomenclaturalStatusType statusType = null;
3891 if (!getStatus().isEmpty()){
3892 try {
3893 statusType = nomStatusString2NomStatus(getStatus());
3894 } catch (UnknownCdmTypeException e) {
3895 addProblematicStatusToFile(getStatus());
3896 logger.warn("Problem with status");
3897 }
3898 }
3899
3900 List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3901
3902 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3903
3904 tmpListFiltered.addAll(taxontest.getRecords());
3905 taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3906 tmpListFiltered.addAll(taxontest.getRecords());
3907
3908 //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3909
3910 boolean nameCorrected=false;
3911 if (fullname.indexOf(partialname)<0) {
3912 nameCorrected=true;
3913 }
3914
3915 boolean foundIdentic=false;
3916 Taxon tmp=null;
3917 // Taxon tmpPartial=null;
3918 for (TaxonBase tmpb:tmpListFiltered){
3919 if(tmpb !=null){
3920 TaxonNameBase tnb = tmpb.getName();
3921 Rank crank=null;
3922 if (tnb != null){
3923 // //System.out.println(tnb.getTitleCache());
3924 // if (tnb.getTitleCache().split("sec.")[0].equals(partialname) ||tnb.getTitleCache().split("sec.")[0].equals(fullname) ){
3925 if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3926 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3927 crank =tnb.getRank();
3928 if (crank !=null && rank !=null){
3929 if (crank.equals(rank)){
3930 foundIdentic=true;
3931 try{
3932 tmp=(Taxon)tmpb;
3933 break;
3934 }catch(Exception e){
3935 e.printStackTrace();
3936 }
3937 }
3938 }
3939 }
3940 if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3941 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3942 crank =tnb.getRank();
3943 if (crank !=null && rank !=null){
3944 if (crank.equals(rank)){
3945 foundIdentic=true;
3946 try{
3947 tmp=(Taxon)tmpb;
3948 break;
3949 }catch(Exception e){
3950 e.printStackTrace();
3951 }
3952 }
3953 }
3954 }
3955 }
3956 }
3957 else{
3958 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3959 crank =tnb.getRank();
3960 if (crank !=null && rank !=null){
3961 if (crank.equals(rank)){
3962 foundIdentic=true;
3963 try{
3964 tmp=(Taxon)tmpb;
3965 break;
3966 }catch(Exception e){
3967 e.printStackTrace();
3968 }
3969 }
3970 }
3971 }
3972 }
3973 }
3974 }
3975 }
3976 boolean statusMatch=false;
3977 boolean appendedMatch=false;
3978 if(tmp !=null && foundIdentic){
3979 statusMatch=compareStatus(tmp, statusType);
3980 if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3981 appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3982 }
3983 if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3984 appendedMatch=true;
3985 }
3986
3987 }
3988 if ((tmp == null || !foundIdentic) || (tmp != null && !statusMatch) || (tmp != null && !appendedMatch && !statusMatch)){
3989
3990 NonViralName<?> tnb = getNonViralNameAccNomenclature();
3991 tnb.setRank(rank);
3992
3993 if(statusType != null) {
3994 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3995 }
3996 if(getStatus()!=null) {
3997 tnb.setAppendedPhrase(getStatus());
3998 }
3999
4000 if(rank.equals(Rank.UNKNOWN_RANK())){
4001 tnb.setTitleCache(fullname, true);
4002 // tnb.setGenusOrUninomial(fullname);
4003 }
4004 if(rank.isHigher(Rank.GENUS())) {
4005 tnb.setGenusOrUninomial(partialname);
4006 }
4007
4008 if(rank.isHigher(Rank.SPECIES())) {
4009 tnb.setTitleCache(partialname, true);
4010 }
4011
4012 if (rank.equals(globalrank) && author != null) {
4013 if(fullname.indexOf("opulifolium")>-1) {
4014 //System.out.println("AUTOR: "+author);
4015 }
4016 tnb.setCombinationAuthorTeam(findOrCreateAuthor(author));
4017 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4018 Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4019 if (taxonLSID !=null) {
4020 tmp=taxonLSID;
4021 }
4022 }
4023 }
4024
4025 if(tmp == null){
4026 if (rank.equals(Rank.FAMILY())) {
4027 tmp = buildFamily(tnb);
4028 }
4029 if (rank.equals(Rank.SUBFAMILY())) {
4030 tmp = buildSubfamily(tnb);
4031 }
4032 if (rank.equals(Rank.TRIBE())) {
4033 tmp = buildTribe(tnb);
4034 }
4035 if (rank.equals(Rank.SUBTRIBE())) {
4036 tmp = buildSubtribe(tnb);
4037 }
4038 if (rank.equals(Rank.GENUS())) {
4039 tmp = buildGenus(partialname, tnb);
4040 }
4041
4042 if (rank.equals(Rank.SUBGENUS())) {
4043 tmp = buildSubgenus(partialname, tnb);
4044 }
4045 if (rank.equals(Rank.SPECIES())) {
4046 tmp = buildSpecies(partialname, tnb);
4047 }
4048
4049 if (rank.equals(Rank.SUBSPECIES())) {
4050 tmp = buildSubspecies(partialname, tnb);
4051 }
4052
4053 if (rank.equals(Rank.VARIETY())) {
4054 tmp = buildVariety(fullname, partialname, tnb);
4055 }
4056
4057 if (rank.equals(Rank.FORM())) {
4058 tmp = buildForm(fullname, partialname, tnb);
4059 }
4060
4061 importer.getClassificationService().saveOrUpdate(classification);
4062 }
4063 }
4064
4065 tmp = CdmBase.deproxy(tmp, Taxon.class);
4066 if (rank.equals(globalrank) && author != null) {
4067 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4068 setLSID(getIdentifier(), tmp);
4069 importer.getTaxonService().saveOrUpdate(tmp);
4070 tmp = CdmBase.deproxy(tmp, Taxon.class);
4071 }
4072 }
4073 // TaxonNameBase tnb = CdmBase.deproxy(tmp.getName(), TaxonNameBase.class);
4074
4075 this.taxon=tmp;
4076 // castTaxonNameBase(tnb, taxonnamebase);
4077 return tmp;
4078 }
4079
4080 /**
4081 * @param tnb
4082 * @return
4083 */
4084 private Taxon buildSubfamily(NonViralName<?> tnb) {
4085 Taxon tmp;
4086 // tnb.generateTitle();
4087 tmp = findMatchingTaxon(tnb,refMods);
4088 if(tmp ==null){
4089 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4090 if(!configState.getConfig().doKeepOriginalSecundum()) {
4091 tmp.setSec(configState.getConfig().getSecundum());
4092 }
4093 // tmp.setSec(refMods);
4094 // sourceHandler.addSource(refMods, tmp);
4095 if(family != null) {
4096 classification.addParentChild(family, tmp, null, null);
4097 higherRank=Rank.FAMILY();
4098 higherTaxa=family;
4099 } else {
4100 //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4101 classification.addChildTaxon(tmp, null, null);
4102 }
4103 }
4104 return tmp;
4105 }
4106 /**
4107 * @param tnb
4108 * @return
4109 */
4110 private Taxon buildFamily(NonViralName<?> tnb) {
4111 Taxon tmp;
4112 // tnb.generateTitle();
4113 tmp = findMatchingTaxon(tnb,refMods);
4114 if(tmp ==null){
4115 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4116 if(!configState.getConfig().doKeepOriginalSecundum()) {
4117 tmp.setSec(configState.getConfig().getSecundum());
4118 }
4119 // tmp.setSec(refMods);
4120 //sourceHandler.addSource(refMods, tmp);
4121 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4122 classification.addChildTaxon(tmp, null, null);
4123 }
4124 return tmp;
4125 }
4126 /**
4127 * @param fullname
4128 * @param tnb
4129 * @return
4130 */
4131 private Taxon buildForm(String fullname, String partialname, NonViralName<?> tnb) {
4132 if (genusName !=null) {
4133 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4134 }
4135 if (subgenusName !=null) {
4136 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4137 }
4138 if(speciesName !=null) {
4139 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4140 }
4141 if(subspeciesName != null) {
4142 tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4143 }
4144 if(partialname!= null) {
4145 tnb.setInfraSpecificEpithet(partialname);
4146 }
4147 //TODO how to save form??
4148 tnb.setTitleCache(fullname, true);
4149 Taxon tmp = findMatchingTaxon(tnb,refMods);
4150 if(tmp ==null){
4151 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4152 if(!configState.getConfig().doKeepOriginalSecundum()) {
4153 tmp.setSec(configState.getConfig().getSecundum());
4154 }
4155 // tmp.setSec(refMods);
4156 //sourceHandler.addSource(refMods, tmp);
4157 if (subspecies !=null) {
4158 classification.addParentChild(subspecies, tmp, null, null);
4159 higherRank=Rank.SUBSPECIES();
4160 higherTaxa=subspecies;
4161 } else {
4162 if (species !=null) {
4163 classification.addParentChild(species, tmp, null, null);
4164 higherRank=Rank.SPECIES();
4165 higherTaxa=species;
4166 }
4167 else{
4168 // System.out.println("ADDCHILDTAXON FORM "+tmp);
4169 classification.addChildTaxon(tmp, null, null);
4170 }
4171 }
4172 }
4173 return tmp;
4174 }
4175 /**
4176 * @param fullname
4177 * @param tnb
4178 * @return
4179 */
4180 private Taxon buildVariety(String fullname, String partialname, NonViralName<?> tnb) {
4181 Taxon tmp;
4182 if (genusName !=null) {
4183 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4184 }
4185 if (subgenusName !=null) {
4186 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4187 }
4188 if(speciesName !=null) {
4189 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4190 }
4191 if(subspeciesName != null) {
4192 tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4193 }
4194 if(partialname != null) {
4195 tnb.setInfraSpecificEpithet(partialname);
4196 }
4197 //TODO how to save variety?
4198 tnb.setTitleCache(fullname, true);
4199 tmp = findMatchingTaxon(tnb,refMods);
4200 if(tmp ==null){
4201 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4202 if(!configState.getConfig().doKeepOriginalSecundum()) {
4203 tmp.setSec(configState.getConfig().getSecundum());
4204 }
4205 // tmp.setSec(refMods);
4206 //sourceHandler.addSource(refMods, tmp);
4207 if (subspecies !=null) {
4208 classification.addParentChild(subspecies, tmp, null, null);
4209 higherRank=Rank.SUBSPECIES();
4210 higherTaxa=subspecies;
4211 } else {
4212 if(species !=null) {
4213 classification.addParentChild(species, tmp, null, null);
4214 higherRank=Rank.SPECIES();
4215 higherTaxa=species;
4216 }
4217 else{
4218 //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4219 classification.addChildTaxon(tmp, null, null);
4220 }
4221 }
4222 }
4223 return tmp;
4224 }
4225 /**
4226 * @param partialname
4227 * @param tnb
4228 * @return
4229 */
4230 private Taxon buildSubspecies(String partialname, NonViralName<?> tnb) {
4231 if (genusName !=null) {
4232 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4233 }
4234 if (subgenusName !=null) {
4235 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4236 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4237 }
4238 if(speciesName !=null) {
4239 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
4240 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4241 }
4242 tnb.setInfraSpecificEpithet(partialname);
4243 Taxon tmp = findMatchingTaxon(tnb,refMods);
4244 if(tmp ==null){
4245 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4246 if(!configState.getConfig().doKeepOriginalSecundum())
4247 {
4248 tmp.setSec(configState.getConfig().getSecundum());
4249 // tmp.setSec(refMods);
4250 //sourceHandler.addSource(refMods, tmp);
4251 }
4252
4253 if(species != null) {
4254 classification.addParentChild(species, tmp, null, null);
4255 higherRank=Rank.SPECIES();
4256 higherTaxa=species;
4257 }
4258 else{
4259 //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4260 classification.addChildTaxon(tmp, null, null);
4261 }
4262 }
4263 return tmp;
4264 }
4265 /**
4266 * @param partialname
4267 * @param tnb
4268 * @return
4269 */
4270 private Taxon buildSpecies(String partialname, NonViralName<?> tnb) {
4271 if (genusName !=null) {
4272 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4273 }
4274 if (subgenusName !=null) {
4275 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4276 }
4277 tnb.setSpecificEpithet(partialname.toLowerCase());
4278 Taxon tmp = findMatchingTaxon(tnb,refMods);
4279 if(tmp ==null){
4280 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4281 if(!configState.getConfig().doKeepOriginalSecundum()) {
4282 tmp.setSec(configState.getConfig().getSecundum());
4283 }
4284 // tmp.setSec(refMods);
4285 //sourceHandler.addSource(refMods, tmp);
4286 if (subgenus !=null) {
4287 classification.addParentChild(subgenus, tmp, null, null);
4288 higherRank=Rank.SUBGENUS();
4289 higherTaxa=subgenus;
4290 } else {
4291 if (genus !=null) {
4292 classification.addParentChild(genus, tmp, null, null);
4293 higherRank=Rank.GENUS();
4294 higherTaxa=genus;
4295 }
4296 else{
4297 //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4298 classification.addChildTaxon(tmp, null, null);
4299 }
4300 }
4301 }
4302 return tmp;
4303 }
4304 /**
4305 * @param partialname
4306 * @param tnb
4307 * @return
4308 */
4309 private Taxon buildSubgenus(String partialname, NonViralName<?> tnb) {
4310 tnb.setInfraGenericEpithet(partialname);
4311 if (genusName !=null) {
4312 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4313 }
4314 Taxon tmp = findMatchingTaxon(tnb,refMods);
4315 if(tmp ==null){
4316 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4317 if(!configState.getConfig().doKeepOriginalSecundum()) {
4318 tmp.setSec(configState.getConfig().getSecundum());
4319 }
4320 // tmp.setSec(refMods);
4321 //sourceHandler.addSource(refMods, tmp);
4322 if(genus != null) {
4323 classification.addParentChild(genus, tmp, null, null);
4324 higherRank=Rank.GENUS();
4325 higherTaxa=genus;
4326 } else{
4327 //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4328 classification.addChildTaxon(tmp, null, null);
4329 }
4330 }
4331 return tmp;
4332 }
4333 /**
4334 * @param partialname
4335 * @param tnb
4336 * @return
4337 */
4338 private Taxon buildGenus(String partialname, NonViralName<?> tnb) {
4339 Taxon tmp;
4340 tnb.setGenusOrUninomial(partialname);
4341
4342
4343 tmp = findMatchingTaxon(tnb,refMods);
4344 if(tmp ==null){
4345 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4346 if(!configState.getConfig().doKeepOriginalSecundum())
4347 {
4348 tmp.setSec(configState.getConfig().getSecundum());
4349 // tmp.setSec(refMods);
4350 //sourceHandler.addSource(refMods, tmp);
4351 }
4352
4353 if(subtribe != null) {
4354 classification.addParentChild(subtribe, tmp, null, null);
4355 higherRank=Rank.SUBTRIBE();
4356 higherTaxa=subtribe;
4357 } else{
4358 if(tribe !=null) {
4359 classification.addParentChild(tribe, tmp, null, null);
4360 higherRank=Rank.TRIBE();
4361 higherTaxa=tribe;
4362 } else{
4363 if(subfamily !=null) {
4364 classification.addParentChild(subfamily, tmp, null, null);
4365 higherRank=Rank.SUBFAMILY();
4366 higherTaxa=subfamily;
4367 } else
4368 if(family !=null) {
4369 classification.addParentChild(family, tmp, null, null);
4370 higherRank=Rank.FAMILY();
4371 higherTaxa=family;
4372 }
4373 else{
4374 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4375 classification.addChildTaxon(tmp, null, null);
4376 }
4377 }
4378 }
4379 }
4380 return tmp;
4381 }
4382
4383 /**
4384 * @param tnb
4385 * @return
4386 */
4387 private Taxon buildSubtribe(NonViralName<?> tnb) {
4388 Taxon tmp = findMatchingTaxon(tnb,refMods);
4389 if(tmp==null){
4390 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4391 if(!configState.getConfig().doKeepOriginalSecundum()) {
4392 tmp.setSec(configState.getConfig().getSecundum());
4393 }
4394 // tmp.setSec(refMods);
4395 //sourceHandler.addSource(refMods, tmp);
4396 if(tribe != null) {
4397 classification.addParentChild(tribe, tmp, null, null);
4398 higherRank=Rank.TRIBE();
4399 higherTaxa=tribe;
4400 } else{
4401 //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4402 classification.addChildTaxon(tmp, null, null);
4403 }
4404 }
4405 return tmp;
4406 }
4407 /**
4408 * @param tnb
4409 * @return
4410 */
4411 private Taxon buildTribe(NonViralName<?> tnb) {
4412 Taxon tmp = findMatchingTaxon(tnb,refMods);
4413 if(tmp==null){
4414 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4415 if(!configState.getConfig().doKeepOriginalSecundum()) {
4416 tmp.setSec(configState.getConfig().getSecundum());
4417 }
4418 // tmp.setSec(refMods);
4419 //sourceHandler.addSource(refMods, tmp);
4420 if (subfamily !=null) {
4421 classification.addParentChild(subfamily, tmp, null, null);
4422 higherRank=Rank.SUBFAMILY();
4423 higherTaxa=subfamily;
4424 } else {
4425 if(family != null) {
4426 classification.addParentChild(family, tmp, null, null);
4427 higherRank=Rank.FAMILY();
4428 higherTaxa=family;
4429 }
4430 else{
4431 //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4432 classification.addChildTaxon(tmp, null, null);
4433 }
4434 }
4435 }
4436 return tmp;
4437 }
4438
4439 /**
4440 * @param identifier2
4441 * @return
4442 */
4443 @SuppressWarnings("rawtypes")
4444 private Taxon getTaxonByLSID(String identifier) {
4445 //logger.info("getTaxonByLSID");
4446 // boolean lsidok=false;
4447 String id = identifier.split("__")[0];
4448 // String source = identifier.split("__")[1];
4449 LSID lsid = null;
4450 if (id.indexOf("lsid")>-1){
4451 try {
4452 lsid = new LSID(id);
4453 // lsidok=true;
4454 } catch (MalformedLSIDException e) {
4455 logger.warn("Malformed LSID");
4456 }
4457 }
4458 if (lsid !=null){
4459 List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4460 LSID currentlsid=null;
4461 for (Taxon t:taxa){
4462 currentlsid = t.getLsid();
4463 if (currentlsid !=null){
4464 if (currentlsid.getLsid().equals(lsid.getLsid())){
4465 try{
4466 return (Taxon) t;
4467 }
4468 catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4469 }
4470 }
4471 }
4472 }
4473 return null;
4474 }
4475 /**
4476 * @param author2
4477 * @return
4478 */
4479 @SuppressWarnings("rawtypes")
4480 private Person findOrCreateAuthor(String author2) {
4481 //logger.info("findOrCreateAuthor");
4482 List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4483 for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4484 if(hibernateP.getTitleCache().equals(author2)) {
4485 AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4486 return CdmBase.deproxy(existing, Person.class);
4487 }
4488 }
4489 Person p = Person.NewInstance();
4490 p.setTitleCache(author2,true);
4491 importer.getAgentService().saveOrUpdate(p);
4492 return CdmBase.deproxy(p, Person.class);
4493 }
4494 /**
4495 * @param author the author to set
4496 */
4497 public void setAuthor(String author) {
4498 this.author = author;
4499 }
4500
4501 /**
4502 * @return the higherTaxa
4503 */
4504 public Taxon getHigherTaxa() {
4505 return higherTaxa;
4506 }
4507 /**
4508 * @param higherTaxa the higherTaxa to set
4509 */
4510 public void setHigherTaxa(Taxon higherTaxa) {
4511 this.higherTaxa = higherTaxa;
4512 }
4513 /**
4514 * @return the higherRank
4515 */
4516 public Rank getHigherRank() {
4517 return higherRank;
4518 }
4519 /**
4520 * @param higherRank the higherRank to set
4521 */
4522 public void setHigherRank(Rank higherRank) {
4523 this.higherRank = higherRank;
4524 }
4525 public String getName(){
4526 if (newName.isEmpty()) {
4527 return originalName;
4528 } else {
4529 return newName;
4530 }
4531
4532 }
4533 /**
4534 * @return the fullName
4535 */
4536 public String getOriginalName() {
4537 return originalName;
4538 }
4539 /**
4540 * @param fullName the fullName to set
4541 */
4542 public void setOriginalName(String fullName) {
4543 this.originalName = fullName;
4544 }
4545 /**
4546 * @return the newName
4547 */
4548 public String getNewName() {
4549 return newName;
4550 }
4551 /**
4552 * @param newName the newName to set
4553 */
4554 public void setNewName(String newName) {
4555 this.newName = newName;
4556 }
4557 /**
4558 * @return the rank
4559 */
4560 public Rank getRank() {
4561 return rank;
4562 }
4563 /**
4564 * @param rank the rank to set
4565 */
4566 public void setRank(Rank rank) {
4567 this.rank = rank;
4568 }
4569 /**
4570 * @return the idenfitiger
4571 */
4572 public String getIdentifier() {
4573 return identifier;
4574 }
4575 /**
4576 * @param idenfitiger the idenfitiger to set
4577 */
4578 public void setIdentifier(String identifier) {
4579 this.identifier = identifier;
4580 }
4581 /**
4582 * @return the status
4583 */
4584 public String getStatus() {
4585 if (status == null) {
4586 return "";
4587 }
4588 return status;
4589 }
4590 /**
4591 * @param status the status to set
4592 */
4593 public void setStatus(String status) {
4594 this.status = status;
4595 }
4596 /**
4597 * @return the family
4598 */
4599 public Taxon getFamily() {
4600 return family;
4601 }
4602 /**
4603 * @param family the family to set
4604 */
4605 @SuppressWarnings("rawtypes")
4606 public void setFamily(Taxon family) {
4607 this.family = family;
4608 TaxonNameBase taxonNameBase = CdmBase.deproxy(family.getName(), TaxonNameBase.class);
4609 familyName = castTaxonNameBase(taxonNameBase,familyName);
4610 }
4611 /**
4612 * @return the subfamily
4613 */
4614 public Taxon getSubfamily() {
4615 return subfamily;
4616 }
4617 /**
4618 * @param subfamily the subfamily to set
4619 */
4620 @SuppressWarnings("rawtypes")
4621 public void setSubfamily(Taxon subfamily) {
4622 this.subfamily = subfamily;
4623 TaxonNameBase taxonNameBase = CdmBase.deproxy(subfamily.getName(), TaxonNameBase.class);
4624 subfamilyName = castTaxonNameBase(taxonNameBase,subfamilyName);
4625 }
4626 /**
4627 * @return the tribe
4628 */
4629 public Taxon getTribe() {
4630 return tribe;
4631 }
4632 /**
4633 * @param tribe the tribe to set
4634 */
4635 @SuppressWarnings("rawtypes")
4636 public void setTribe(Taxon tribe) {
4637 this.tribe = tribe;
4638 TaxonNameBase taxonNameBase = CdmBase.deproxy(tribe.getName(), TaxonNameBase.class);
4639 tribeName = castTaxonNameBase(taxonNameBase,tribeName);
4640 }
4641 /**
4642 * @return the subtribe
4643 */
4644 public Taxon getSubtribe() {
4645 return subtribe;
4646 }
4647 /**
4648 * @param subtribe the subtribe to set
4649 */
4650 @SuppressWarnings("rawtypes")
4651 public void setSubtribe(Taxon subtribe) {
4652 this.subtribe = subtribe;
4653 TaxonNameBase taxonNameBase = CdmBase.deproxy(subtribe.getName(), TaxonNameBase.class);
4654 subtribeName =castTaxonNameBase(taxonNameBase,subtribeName);
4655 }
4656 /**
4657 * @return the genus
4658 */
4659 public Taxon getGenus() {
4660 return genus;
4661 }
4662 /**
4663 * @param genus the genus to set
4664 */
4665 @SuppressWarnings("rawtypes")
4666 public void setGenus(Taxon genus) {
4667 if (genus != null){
4668 this.genus = genus;
4669 TaxonNameBase taxonNameBase = CdmBase.deproxy(genus.getName(), TaxonNameBase.class);
4670 genusName = castTaxonNameBase(taxonNameBase,genusName);
4671 }
4672 }
4673 /**
4674 * @return the subgenus
4675 */
4676 public Taxon getSubgenus() {
4677 return subgenus;
4678 }
4679 /**
4680 * @param subgenus the subgenus to set
4681 */
4682 @SuppressWarnings("rawtypes")
4683 public void setSubgenus(Taxon subgenus) {
4684 this.subgenus = subgenus;
4685 TaxonNameBase taxonNameBase = CdmBase.deproxy(subgenus.getName(), TaxonNameBase.class);
4686 subgenusName = castTaxonNameBase(taxonNameBase,subgenusName);
4687 }
4688 /**
4689 * @return the species
4690 */
4691 public Taxon getSpecies() {
4692 return species;
4693 }
4694 /**
4695 * @param species the species to set
4696 */
4697 public void setSpecies(Taxon species) {
4698 if (species != null){
4699 this.species = species;
4700 @SuppressWarnings("rawtypes")
4701 TaxonNameBase taxonNameBase = CdmBase.deproxy(species.getName(), TaxonNameBase.class);
4702 speciesName = castTaxonNameBase(taxonNameBase,speciesName);
4703 }
4704 }
4705 /**
4706 * @return the subspecies
4707 */
4708 public Taxon getSubspecies() {
4709 return subspecies;
4710 }
4711 /**
4712 * @param subspecies the subspecies to set
4713 */
4714 @SuppressWarnings("rawtypes")
4715 public void setSubspecies(Taxon subspecies) {
4716 this.subspecies = subspecies;
4717 TaxonNameBase taxonNameBase = CdmBase.deproxy(subspecies.getName(), TaxonNameBase.class);
4718 subspeciesName = castTaxonNameBase(taxonNameBase,subspeciesName);
4719
4720 }
4721
4722
4723
4724 }
4725
4726
4727 /**
4728 * @param status
4729 */
4730 private void addProblematicStatusToFile(String status) {
4731 try{
4732 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/StatusUnknown_"+classification.getTitleCache()+".txt",true);
4733 BufferedWriter out = new BufferedWriter(fstream);
4734 out.write(status+"\n");
4735 //Close the output stream
4736 out.close();
4737 }catch (Exception e){//Catch exception if any
4738 System.err.println("Error: " + e.getMessage());
4739 }
4740
4741 }
4742
4743
4744
4745 /**
4746 * @param tnb
4747 * @return
4748 */
4749 private Taxon findMatchingTaxon(NonViralName<?> tnb, Reference refMods) {
4750 logger.info("findMatchingTaxon");
4751 Taxon tmp=null;
4752
4753 refMods=CdmBase.deproxy(refMods, Reference.class);
4754 boolean insertAsExisting =false;
4755 List<Taxon> existingTaxa = new ArrayList<Taxon>();
4756 try {
4757 existingTaxa = getMatchingTaxa(tnb);
4758 } catch (Exception e1) {
4759 // TODO Auto-generated catch block
4760 e1.printStackTrace();
4761 }
4762 double similarityScore=0.0;
4763 double similarityAuthor=-1;
4764 String author1="";
4765 String author2="";
4766 String t1="";
4767 String t2="";
4768 for (Taxon bestMatchingTaxon : existingTaxa){
4769 if (!existingTaxa.isEmpty() && configState.getConfig().isInteractWithUser() && !insertAsExisting) {
4770 // System.out.println("tnb "+tnb.getTitleCache());
4771 // System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4772 try {
4773 if(tnb.getAuthorshipCache()!=null) {
4774 author1=tnb.getAuthorshipCache();
4775 }
4776 } catch (Exception e) {
4777 // TODO Auto-generated catch block
4778 e.printStackTrace();
4779 }
4780 try {
4781 if(castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache()!=null) {
4782 author2=castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache();
4783 }
4784 } catch (Exception e) {
4785 // TODO Auto-generated catch block
4786 e.printStackTrace();
4787 }
4788 try {
4789 t1=tnb.getTitleCache().split("sec.")[0].trim();
4790 if (author1!=null && !StringUtils.isEmpty(author1)) {
4791 t1=t1.split(Pattern.quote(author1))[0];
4792 }
4793 } catch (Exception e) {
4794 // TODO Auto-generated catch block
4795 e.printStackTrace();
4796 }
4797 try {
4798 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4799 if (author2!=null && !StringUtils.isEmpty(author2)) {
4800 t2=t2.split(Pattern.quote(author2))[0];
4801 }
4802 } catch (Exception e) {
4803 // TODO Auto-generated catch block
4804 e.printStackTrace();
4805 }
4806 similarityScore=similarity(t1.trim(), t2.trim());
4807 // System.out.println("taxascore: "+similarityScore);
4808 similarityAuthor=similarity(author1.trim(), author2.trim());
4809 // System.out.println("authorscore: "+similarityAuthor);
4810 insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4811 }
4812 if(insertAsExisting) {
4813 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4814 tmp=bestMatchingTaxon;
4815 sourceHandler.addSource(refMods, tmp);
4816 return tmp;
4817 }
4818 }
4819 return tmp;
4820 }
4821
4822
4823 /**
4824 * @param tnb
4825 * @param refMods
4826 * @param similarityScore
4827 * @param bestMatchingTaxon
4828 * @param similarityAuthor
4829 * @return
4830 */
4831 private boolean compareAndCheckTaxon(NonViralName<?> tnb, Reference<?> refMods, double similarityScore,
4832 Taxon bestMatchingTaxon, double similarityAuthor) {
4833 //logger.info("compareAndCheckTaxon");
4834 boolean insertAsExisting;
4835 // if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4836 // insertAsExisting=false;
4837 // } else{
4838 //a small hack/automatisation for Chenopodium only
4839 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4840 bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4841 insertAsExisting=true;
4842 } else {
4843 insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4844 }
4845 // }
4846
4847 logDecision(tnb,bestMatchingTaxon,insertAsExisting, refMods);
4848 return insertAsExisting;
4849 }
4850
4851 /**
4852 * @return
4853 */
4854 @SuppressWarnings("rawtypes")
4855 private List<Taxon> getMatchingTaxa(TaxonNameBase tnb) {
4856 //logger.info("getMatchingTaxon");
4857 Pager<TaxonBase> pager=importer.getTaxonService().findByTitle(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4858 List<TaxonBase>records = pager.getRecords();
4859
4860 List<Taxon> existingTaxons = new ArrayList<Taxon>();
4861 for (TaxonBase r:records){
4862 try{
4863 Taxon bestMatchingTaxon = (Taxon)r;
4864 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4865 if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4866 existingTaxons.add(bestMatchingTaxon);
4867 }
4868 }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4869 }
4870 Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4871 if (!existingTaxons.contains(bmt) && bmt!=null) {
4872 if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4873 existingTaxons.add(bmt);
4874 }
4875 }
4876 return existingTaxons;
4877 }
4878
4879 /**
4880 * Check if the found Taxon can reasonnably be the same
4881 * example: with and without author should match, but the subspecies should not be suggested for a genus
4882 * */
4883 private boolean compareTaxonNameLength(String f, String o){
4884 //logger.info("compareTaxonNameLength");
4885 boolean lengthOk=false;
4886 int sizeF = f.length();
4887 int sizeO = o.length();
4888 if (sizeO>=sizeF) {
4889 lengthOk=true;
4890 }
4891 if(sizeF>sizeO) {
4892 if (sizeF-sizeO>10) {
4893 lengthOk=false;
4894 } else {
4895 lengthOk=true;
4896 }
4897 }
4898
4899 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4900 return lengthOk;
4901 }
4902
4903 private double similarity(String s1, String s2) {
4904 //logger.info("similarity");
4905 //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4906 if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4907 String l1=s1.toLowerCase().trim();
4908 String l2=s2.toLowerCase().trim();
4909 if (l1.length() < l2.length()) { // s1 should always be bigger
4910 String swap = l1; l1 = l2; l2 = swap;
4911 }
4912 int bigLen = l1.length();
4913 if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4914 return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4915 }
4916 else{
4917 if(s1!=null && s2!=null){
4918 if (s1.equalsIgnoreCase(s2)) {
4919 return 1;
4920 }
4921 }
4922 return -1;
4923 }
4924 }
4925
4926 private int computeEditDistance(String s1, String s2) {
4927 //logger.info("computeEditDistance");
4928 int[] costs = new int[s2.length() + 1];
4929 for (int i = 0; i <= s1.length(); i++) {
4930 int lastValue = i;
4931 for (int j = 0; j <= s2.length(); j++) {
4932 if (i == 0) {
4933 costs[j] = j;
4934 } else {
4935 if (j > 0) {
4936 int newValue = costs[j - 1];
4937 if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4938 newValue = Math.min(Math.min(newValue, lastValue),
4939 costs[j]) + 1;
4940 }
4941 costs[j - 1] = lastValue;
4942 lastValue = newValue;
4943 }
4944 }
4945 }
4946 if (i > 0) {
4947 costs[s2.length()] = lastValue;
4948 }
4949 }
4950 return costs[s2.length()];
4951 }
4952
4953 Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
4954 /**
4955 * @param taxonnamebase
4956 */
4957 @SuppressWarnings("rawtypes")
4958 public void lookForParentNode(NonViralName<?> taxonnamebase, Taxon tax, Reference<?> ref, MyName myName) {
4959 logger.info("lookForParentNode "+taxonnamebase.getTitleCache()+" for "+myName.toString());
4960 //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
4961 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4962 if (taxonnamebase.getRank().equals(Rank.FORM())){
4963 handleFormHierarchy(ref, myName, parser);
4964 }
4965 if (taxonnamebase.getRank().equals(Rank.VARIETY())){
4966 handleVarietyHierarchy(ref, myName, parser);
4967 }
4968 if (taxonnamebase.getRank().equals(Rank.SUBSPECIES())){
4969 handleSubSpeciesHierarchy(ref, myName, parser);
4970 }
4971 if (taxonnamebase.getRank().equals(Rank.SPECIES())){
4972 handleSpeciesHierarchy(ref, myName, parser);
4973 }
4974 if (taxonnamebase.getRank().equals(Rank.SUBGENUS())){
4975 handleSubgenusHierarchy(ref, myName, parser);
4976 }
4977
4978 if (taxonnamebase.getRank().equals(Rank.GENUS())){
4979 handleGenusHierarchy(ref, myName, parser);
4980 }
4981 if (taxonnamebase.getRank().equals(Rank.SUBTRIBE())){
4982 handleSubtribeHierarchy(ref, myName, parser);
4983 }
4984 if (taxonnamebase.getRank().equals(Rank.TRIBE())){
4985 handleTribeHierarchy(ref, myName, parser);
4986 }
4987
4988 if (taxonnamebase.getRank().equals(Rank.SUBFAMILY())){
4989 handleSubfamilyHierarchy(ref, myName, parser);
4990 }
4991 }
4992
4993 /**
4994 * @param ref
4995 * @param myName
4996 * @param parser
4997 */
4998 private void handleSubfamilyHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
4999 System.out.println("handleSubfamilyHierarchy");
5000 String parentStr = myName.getFamilyStr();
5001 Rank r = Rank.FAMILY();
5002 if(parentStr!=null){
5003
5004 Taxon parent = null;
5005 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
5006 for(TaxonBase tb:taxontest.getRecords()){
5007 try {
5008 if (tb.getName().getRank().equals(r)) {
5009 parent=CdmBase.deproxy(tb, Taxon.class);
5010 }
5011 break;
5012 } catch (Exception e) {
5013 // TODO Auto-generated catch block
5014 e.printStackTrace();
5015 }
5016 }
5017 if(parent == null) {
5018 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5019 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5020 if(tmp ==null)
5021 {
5022 parent=Taxon.NewInstance(parentNameName, ref);
5023 importer.getTaxonService().save(parent);
5024 parent = CdmBase.deproxy(parent, Taxon.class);
5025 } else {
5026 parent=tmp;
5027 }
5028 lookForParentNode(parentNameName, parent, ref,myName);
5029
5030 }
5031 hierarchy.put(r,parent);
5032 }
5033 }
5034
5035 /**
5036 * @param ref
5037 * @param myName
5038 * @param parser
5039 */
5040 private void handleTribeHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5041 String parentStr = myName.getSubfamilyStr();
5042 Rank r = Rank.SUBFAMILY();
5043 if (parentStr == null){
5044 parentStr = myName.getFamilyStr();
5045 r = Rank.FAMILY();
5046 }
5047 if(parentStr!=null){
5048 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5049 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5050 // importer.getTaxonService().save(parent);
5051 // parent = CdmBase.deproxy(parent, Taxon.class);
5052
5053 boolean parentDoesNotExists = true;
5054 for (TaxonNode p : classification.getAllNodes()){
5055 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5056 parentDoesNotExists = false;
5057 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5058 break;
5059 }
5060 }
5061 // if(parentDoesNotExists) {
5062 // importer.getTaxonService().save(parent);
5063 // parent = CdmBase.deproxy(parent, Taxon.class);
5064 // lookForParentNode(parentNameName, parent, ref,myName);
5065 // }
5066 if(parentDoesNotExists) {
5067 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5068 if(tmp ==null)
5069 {
5070 parent=Taxon.NewInstance(parentNameName, ref);
5071 importer.getTaxonService().save(parent);
5072 parent = CdmBase.deproxy(parent, Taxon.class);
5073 } else {
5074 parent=tmp;
5075 }
5076 lookForParentNode(parentNameName, parent, ref,myName);
5077
5078 }
5079 hierarchy.put(r,parent);
5080 }
5081 }
5082
5083 /**
5084 * @param ref
5085 * @param myName
5086 * @param parser
5087 */
5088 private void handleSubtribeHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5089 String parentStr = myName.getTribeStr();
5090 Rank r = Rank.TRIBE();
5091 if (parentStr == null){
5092 parentStr = myName.getSubfamilyStr();
5093 r = Rank.SUBFAMILY();
5094 }
5095 if (parentStr == null){
5096 parentStr = myName.getFamilyStr();
5097 r = Rank.FAMILY();
5098 }
5099 if(parentStr!=null){
5100 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5101 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5102 // importer.getTaxonService().save(parent);
5103 // parent = CdmBase.deproxy(parent, Taxon.class);
5104
5105 boolean parentDoesNotExists = true;
5106 for (TaxonNode p : classification.getAllNodes()){
5107 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5108 parentDoesNotExists = false;
5109 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5110
5111 break;
5112 }
5113 }
5114 // if(parentDoesNotExists) {
5115 // importer.getTaxonService().save(parent);
5116 // parent = CdmBase.deproxy(parent, Taxon.class);
5117 // lookForParentNode(parentNameName, parent, ref,myName);
5118 // }
5119 if(parentDoesNotExists) {
5120 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5121 if(tmp ==null)
5122 {
5123 parent=Taxon.NewInstance(parentNameName, ref);
5124 importer.getTaxonService().save(parent);
5125 parent = CdmBase.deproxy(parent, Taxon.class);
5126 } else {
5127 parent=tmp;
5128 }
5129 lookForParentNode(parentNameName, parent, ref,myName);
5130
5131 }
5132 hierarchy.put(r,parent);
5133 }
5134 }
5135
5136 /**
5137 * @param ref
5138 * @param myName
5139 * @param parser
5140 */
5141 private void handleGenusHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5142 String parentStr = myName.getSubtribeStr();
5143 Rank r = Rank.SUBTRIBE();
5144 if (parentStr == null){
5145 parentStr = myName.getTribeStr();
5146 r = Rank.TRIBE();
5147 }
5148 if (parentStr == null){
5149 parentStr = myName.getSubfamilyStr();
5150 r = Rank.SUBFAMILY();
5151 }
5152 if (parentStr == null){
5153 parentStr = myName.getFamilyStr();
5154 r = Rank.FAMILY();
5155 }
5156 if(parentStr!=null){
5157 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5158 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5159 // importer.getTaxonService().save(parent);
5160 // parent = CdmBase.deproxy(parent, Taxon.class);
5161
5162 boolean parentDoesNotExists = true;
5163 for (TaxonNode p : classification.getAllNodes()){
5164 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5165 // System.out.println(p.getTaxon().getUuid());
5166 // System.out.println(parent.getUuid());
5167 parentDoesNotExists = false;
5168 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5169 break;
5170 }
5171 }
5172 // if(parentDoesNotExists) {
5173 // importer.getTaxonService().save(parent);
5174 // parent = CdmBase.deproxy(parent, Taxon.class);
5175 // lookForParentNode(parentNameName, parent, ref,myName);
5176 // }
5177 if(parentDoesNotExists) {
5178 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5179 if(tmp ==null)
5180 {
5181 parent=Taxon.NewInstance(parentNameName, ref);
5182 importer.getTaxonService().save(parent);
5183 parent = CdmBase.deproxy(parent, Taxon.class);
5184 } else {
5185 parent=tmp;
5186 }
5187 lookForParentNode(parentNameName, parent, ref,myName);
5188
5189 }
5190 hierarchy.put(r,parent);
5191 }
5192 }
5193
5194 /**
5195 * @param ref
5196 * @param myName
5197 * @param parser
5198 */
5199 private void handleSubgenusHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5200 String parentStr = myName.getGenusStr();
5201 Rank r = Rank.GENUS();
5202
5203 if(parentStr==null){
5204 parentStr = myName.getSubtribeStr();
5205 r = Rank.SUBTRIBE();
5206 }
5207 if (parentStr == null){
5208 parentStr = myName.getTribeStr();
5209 r = Rank.TRIBE();
5210 }
5211 if (parentStr == null){
5212 parentStr = myName.getSubfamilyStr();
5213 r = Rank.SUBFAMILY();
5214 }
5215 if (parentStr == null){
5216 parentStr = myName.getFamilyStr();
5217 r = Rank.FAMILY();
5218 }
5219 if(parentStr!=null){
5220 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5221 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5222 // importer.getTaxonService().save(parent);
5223 // parent = CdmBase.deproxy(parent, Taxon.class);
5224
5225 boolean parentDoesNotExists = true;
5226 for (TaxonNode p : classification.getAllNodes()){
5227 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5228 // System.out.println(p.getTaxon().getUuid());
5229 // System.out.println(parent.getUuid());
5230 parentDoesNotExists = false;
5231 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5232 break;
5233 }
5234 }
5235 // if(parentDoesNotExists) {
5236 // importer.getTaxonService().save(parent);
5237 // parent = CdmBase.deproxy(parent, Taxon.class);
5238 // lookForParentNode(parentNameName, parent, ref,myName);
5239 // }
5240 if(parentDoesNotExists) {
5241 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5242 if(tmp ==null)
5243 {
5244 parent=Taxon.NewInstance(parentNameName, ref);
5245 importer.getTaxonService().save(parent);
5246 parent = CdmBase.deproxy(parent, Taxon.class);
5247 } else {
5248 parent=tmp;
5249 }
5250 lookForParentNode(parentNameName, parent, ref,myName);
5251
5252 }
5253 hierarchy.put(r,parent);
5254 }
5255 }
5256
5257 /**
5258 * @param ref
5259 * @param myName
5260 * @param parser
5261 */
5262 private void handleSpeciesHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5263 String parentStr = myName.getSubgenusStr();
5264 Rank r = Rank.SUBGENUS();
5265
5266 if(parentStr==null){
5267 parentStr = myName.getGenusStr();
5268 r = Rank.GENUS();
5269 }
5270
5271 if(parentStr==null){
5272 parentStr = myName.getSubtribeStr();
5273 r = Rank.SUBTRIBE();
5274 }
5275 if (parentStr == null){
5276 parentStr = myName.getTribeStr();
5277 r = Rank.TRIBE();
5278 }
5279 if (parentStr == null){
5280 parentStr = myName.getSubfamilyStr();
5281 r = Rank.SUBFAMILY();
5282 }
5283 if (parentStr == null){
5284 parentStr = myName.getFamilyStr();
5285 r = Rank.FAMILY();
5286 }
5287 if(parentStr!=null){
5288 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5289 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5290 hierarchy.put(r,parent);
5291 }
5292 }
5293
5294 /**
5295 * @param ref
5296 * @param myName
5297 * @param parser
5298 */
5299 private void handleSubSpeciesHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5300 String parentStr = myName.getSpeciesStr();
5301 Rank r = Rank.SPECIES();
5302
5303
5304 if(parentStr==null){
5305 parentStr = myName.getSubgenusStr();
5306 r = Rank.SUBGENUS();
5307 }
5308
5309 if(parentStr==null){
5310 parentStr = myName.getGenusStr();
5311 r = Rank.GENUS();
5312 }
5313
5314 if(parentStr==null){
5315 parentStr = myName.getSubtribeStr();
5316 r = Rank.SUBTRIBE();
5317 }
5318 if (parentStr == null){
5319 parentStr = myName.getTribeStr();
5320 r = Rank.TRIBE();
5321 }
5322 if (parentStr == null){
5323 parentStr = myName.getSubfamilyStr();
5324 r = Rank.SUBFAMILY();
5325 }
5326 if (parentStr == null){
5327 parentStr = myName.getFamilyStr();
5328 r = Rank.FAMILY();
5329 }
5330 if(parentStr!=null){
5331 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5332 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5333 hierarchy.put(r,parent);
5334 }
5335 }
5336
5337
5338 /**
5339 * @param ref
5340 * @param myName
5341 * @param parser
5342 */
5343 private void handleFormHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5344 String parentStr = myName.getSubspeciesStr();
5345 Rank r = Rank.SUBSPECIES();
5346
5347
5348 if(parentStr==null){
5349 parentStr = myName.getSpeciesStr();
5350 r = Rank.SPECIES();
5351 }
5352
5353 if(parentStr==null){
5354 parentStr = myName.getSubgenusStr();
5355 r = Rank.SUBGENUS();
5356 }
5357
5358 if(parentStr==null){
5359 parentStr = myName.getGenusStr();
5360 r = Rank.GENUS();
5361 }
5362
5363 if(parentStr==null){
5364 parentStr = myName.getSubtribeStr();
5365 r = Rank.SUBTRIBE();
5366 }
5367 if (parentStr == null){
5368 parentStr = myName.getTribeStr();
5369 r = Rank.TRIBE();
5370 }
5371 if (parentStr == null){
5372 parentStr = myName.getSubfamilyStr();
5373 r = Rank.SUBFAMILY();
5374 }
5375 if (parentStr == null){
5376 parentStr = myName.getFamilyStr();
5377 r = Rank.FAMILY();
5378 }
5379 if(parentStr!=null){
5380 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5381 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5382 hierarchy.put(r,parent);
5383 }
5384 }
5385
5386 /**
5387 * @param ref
5388 * @param myName
5389 * @param parser
5390 */
5391 private void handleVarietyHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5392 String parentStr = myName.getSubspeciesStr();
5393 Rank r = Rank.SUBSPECIES();
5394
5395 if(parentStr==null){
5396 parentStr = myName.getSpeciesStr();
5397 r = Rank.SPECIES();
5398 }
5399
5400 if(parentStr==null){
5401 parentStr = myName.getSubgenusStr();
5402 r = Rank.SUBGENUS();
5403 }
5404
5405 if(parentStr==null){
5406 parentStr = myName.getGenusStr();
5407 r = Rank.GENUS();
5408 }
5409
5410 if(parentStr==null){
5411 parentStr = myName.getSubtribeStr();
5412 r = Rank.SUBTRIBE();
5413 }
5414 if (parentStr == null){
5415 parentStr = myName.getTribeStr();
5416 r = Rank.TRIBE();
5417 }
5418 if (parentStr == null){
5419 parentStr = myName.getSubfamilyStr();
5420 r = Rank.SUBFAMILY();
5421 }
5422 if (parentStr == null){
5423 parentStr = myName.getFamilyStr();
5424 r = Rank.FAMILY();
5425 }
5426 if(parentStr!=null){
5427 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5428 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5429 hierarchy.put(r,parent);
5430 }
5431 }
5432
5433 /**
5434 * @param ref
5435 * @param myName
5436 * @param parser
5437 * @param parentStr
5438 * @param r
5439 * @return
5440 */
5441 private Taxon handleParentName(Reference<?> ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5442 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5443 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5444 // importer.getTaxonService().save(parent);
5445 // parent = CdmBase.deproxy(parent, Taxon.class);
5446
5447 boolean parentDoesNotExists = true;
5448 for (TaxonNode p : classification.getAllNodes()){
5449 if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5450 // System.out.println(p.getTaxon().getUuid());
5451 // System.out.println(parent.getUuid());
5452 parentDoesNotExists = false;
5453 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5454 break;
5455 }
5456 }
5457 if(parentDoesNotExists) {
5458 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5459 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5460 if(tmp ==null)
5461 {
5462 parent=Taxon.NewInstance(parentNameName, ref);
5463 importer.getTaxonService().save(parent);
5464 parent = CdmBase.deproxy(parent, Taxon.class);
5465 } else {
5466 parent=tmp;
5467 }
5468 lookForParentNode(parentNameName, parent, ref,myName);
5469
5470 }
5471 return parent;
5472 }
5473
5474 private void addNameDifferenceToFile(String originalname, String atomisedname){
5475 try{
5476 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/NamesDifferent_"+classification.getTitleCache()+".txt",true);
5477 BufferedWriter out = new BufferedWriter(fstream);
5478 out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5479 //Close the output stream
5480 out.close();
5481 }catch (Exception e){//Catch exception if any
5482 System.err.println("Error: " + e.getMessage());
5483 }
5484 }
5485 /**
5486 * @param name
5487 * @param author
5488 * @param nomenclaturalCode2
5489 * @param rank
5490 */
5491 private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5492 try{
5493 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/NameNotParsed.txt",true);
5494 BufferedWriter out = new BufferedWriter(fstream);
5495 out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5496 //Close the output stream
5497 out.close();
5498 }catch (Exception e){//Catch exception if any
5499 System.err.println("Error: " + e.getMessage());
5500 }
5501 }
5502
5503
5504 /**
5505 * @param tnb
5506 * @param bestMatchingTaxon
5507 * @param insertAsExisting
5508 * @param refMods
5509 */
5510 private void logDecision(NonViralName<?> tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5511 try{
5512 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/Decisions_"+classification.toString()+".txt",true);
5513 BufferedWriter out = new BufferedWriter(fstream);
5514 out.write(tnb.getTitleCache()+" sec. "+refMods+"\t"+bestMatchingTaxon.getTitleCache()+"\t"+insertAsExisting+"\n");
5515 //Close the output stream
5516 out.close();
5517 }catch (Exception e){//Catch exception if any
5518 System.err.println("Error: " + e.getMessage());
5519 }
5520 }
5521
5522
5523 @SuppressWarnings("unused")
5524 private String replaceNull(Object in){
5525 if (in == null) {
5526 return "";
5527 }
5528 if (in.getClass().equals(NomenclaturalCode.class)) {
5529 return ((NomenclaturalCode)in).getTitleCache();
5530 }
5531 return in.toString();
5532 }
5533
5534 /**
5535 * @param fullName
5536 * @param nomenclaturalCode2
5537 * @param rank
5538 */
5539 private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5540 try{
5541 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/NameNotParsed_"+classification.getTitleCache()+".txt",true);
5542 BufferedWriter out = new BufferedWriter(fstream);
5543 out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5544 //Close the output stream
5545 out.close();
5546 }catch (Exception e){//Catch exception if any
5547 System.err.println("Error: " + e.getMessage());
5548 }
5549
5550 }
5551
5552 }
5553
5554
5555