commit changes TaxonX import
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / taxonx2013 / TaxonXTreatmentExtractor.java
1 // $Id$
2 /**
3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.taxonx2013;
11
12 import java.io.BufferedWriter;
13 import java.io.File;
14 import java.io.FileWriter;
15 import java.io.IOException;
16 import java.net.URI;
17 import java.util.ArrayList;
18 import java.util.HashMap;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Set;
22 import java.util.UUID;
23 import java.util.regex.Pattern;
24
25 import javax.xml.transform.TransformerException;
26 import javax.xml.transform.TransformerFactoryConfigurationError;
27
28 import org.apache.commons.lang.StringUtils;
29 import org.apache.log4j.Logger;
30 import org.w3c.dom.Node;
31 import org.w3c.dom.NodeList;
32
33 import com.ibm.lsid.MalformedLSIDException;
34
35 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
36 import eu.etaxonomy.cdm.api.service.pager.Pager;
37 import eu.etaxonomy.cdm.model.agent.AgentBase;
38 import eu.etaxonomy.cdm.model.agent.Person;
39 import eu.etaxonomy.cdm.model.common.CdmBase;
40 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
41 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
42 import eu.etaxonomy.cdm.model.common.LSID;
43 import eu.etaxonomy.cdm.model.common.Language;
44 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
45 import eu.etaxonomy.cdm.model.common.UuidAndTitleCache;
46 import eu.etaxonomy.cdm.model.description.Feature;
47 import eu.etaxonomy.cdm.model.description.FeatureNode;
48 import eu.etaxonomy.cdm.model.description.FeatureTree;
49 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
50 import eu.etaxonomy.cdm.model.description.TaxonDescription;
51 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
52 import eu.etaxonomy.cdm.model.description.TextData;
53 import eu.etaxonomy.cdm.model.name.BacterialName;
54 import eu.etaxonomy.cdm.model.name.BotanicalName;
55 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
56 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
57 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
58 import eu.etaxonomy.cdm.model.name.NonViralName;
59 import eu.etaxonomy.cdm.model.name.Rank;
60 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
61 import eu.etaxonomy.cdm.model.name.ZoologicalName;
62 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
63 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
64 import eu.etaxonomy.cdm.model.reference.Reference;
65 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
66 import eu.etaxonomy.cdm.model.taxon.Classification;
67 import eu.etaxonomy.cdm.model.taxon.Synonym;
68 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
69 import eu.etaxonomy.cdm.model.taxon.Taxon;
70 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
71 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
72 import eu.etaxonomy.cdm.persistence.query.MatchMode;
73 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
74 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
75 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
76
77 /**
78 * @author pkelbert
79 * @date 2 avr. 2013
80 *
81 */
82 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
83
84 private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
85
86 private static final String notMarkedUp = "Not marked-up";
87 private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
88 private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
89 private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
90 private static final boolean skippQuestion = true;
91
92 private final NomenclaturalCode nomenclaturalCode;
93 private Classification classification;
94
95 private String treatmentMainName,originalTreatmentName;
96
97 private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
98
99
100 private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
101 private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
102
103 private boolean maxRankRespected =false;
104 private Map<String, Feature> featuresMap;
105
106 private MyName currentMyName;
107
108 private Reference<?> sourceUrlRef;
109
110 private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
111
112 /**
113 * @param nomenclaturalCode
114 * @param classification
115 * @param importer
116 * @param configState
117 */
118 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
119 TaxonXImportState configState,Map<String, Feature> featuresMap, Reference<?> urlSource ) {
120 this.nomenclaturalCode=nomenclaturalCode;
121 this.classification = classification;
122 this.importer=importer;
123 this.configState=configState;
124 this.featuresMap=featuresMap;
125 this.sourceUrlRef =urlSource;
126 prepareCollectors(configState, importer.getAgentService());
127 this.sourceHandler.setSourceUrlRef(sourceUrlRef);
128 this.sourceHandler.setImporter(importer);
129 this.sourceHandler.setConfigState(configState);
130 }
131
132 /**
133 * extracts all the treament information and save them
134 * @param treatmentnode: the XML Node
135 * @param tosave: the list of object to save into the CDM
136 * @param refMods: the reference extracted from the MODS
137 * @param sourceName: the URI of the document
138 */
139 @SuppressWarnings({ "rawtypes", "unused" })
140 protected void extractTreatment(Node treatmentnode, List<Object> tosave, Reference<?> refMods, URI sourceName) {
141 logger.info("extractTreatment");
142 List<TaxonNameBase> nametosave = new ArrayList<TaxonNameBase>();
143 NodeList children = treatmentnode.getChildNodes();
144 Taxon acceptedTaxon =null;
145 Taxon defaultTaxon =null;
146 boolean refgroup=false;
147
148 for (int i=0;i<children.getLength();i++){
149 if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
150 refgroup=true;
151 }
152 }
153
154 for (int i=0;i<children.getLength();i++){
155
156 if (children.item(i).getNodeName().equalsIgnoreCase("tax:nomenclature")){
157 NodeList nomenclature = children.item(i).getChildNodes();
158 boolean containsName=false;
159 for(int k=0;k<nomenclature.getLength();k++){
160 if(nomenclature.item(k).getNodeName().equalsIgnoreCase("tax:name")){
161 containsName=true;
162 break;
163 }
164 }
165 if (containsName){
166 reloadClassification();
167 //extract "main" the scientific name
168 try{
169 acceptedTaxon = extractNomenclature(children.item(i),nametosave,refMods);
170 }catch(ClassCastException e){e.printStackTrace();System.exit(0);}
171 // System.out.println("acceptedTaxon : "+acceptedTaxon);
172 }
173 }
174 else if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
175 reloadClassification();
176 //extract the References within the document
177 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
178 }
179 else if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
180 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
181 File file = new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
182 FileWriter writer;
183 try {
184 writer = new FileWriter(file ,true);
185 writer.write(sourceName+"\n");
186 writer.flush();
187 writer.close();
188 } catch (IOException e1) {
189 // TODO Auto-generated catch block
190 e1.printStackTrace();
191 }
192 // String multiple = askMultiple(children.item(i));
193 String multiple = "Other";
194 if (multiple.equalsIgnoreCase("other")) {
195 extractSpecificFeatureNotStructured(children.item(i),acceptedTaxon, defaultTaxon,nametosave, refMods,multiple);
196 }
197 else
198 if (multiple.equalsIgnoreCase("synonyms")) {
199 try{
200 extractSynonyms(children.item(i),acceptedTaxon, refMods);
201 }catch(NullPointerException e){
202 logger.warn("the accepted taxon is maybe null");
203 }
204 }
205 else
206 if(multiple.equalsIgnoreCase("material examined")){
207 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
208 }
209 else
210 if (multiple.equalsIgnoreCase("distribution")){
211 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
212 }
213 else
214 if (multiple.equalsIgnoreCase("type status")){
215 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,refMods, "TypeStatus");
216 }
217 else
218 if (multiple.equalsIgnoreCase("vernacular name")){
219 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
220
221 }
222 else{
223 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,multiple);
224 }
225
226 }
227 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
228 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
229 extractFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, Feature.BIOLOGY_ECOLOGY());
230 }
231 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
232 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
233 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
234 }
235 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
236 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
237 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
238 }
239 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
240 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
241 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.DIAGNOSIS());
242 }
243 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
244 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
245 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DISCUSSION());
246 }
247 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
248 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
249 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
250 }
251
252 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
253 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
254 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
255 }
256 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
257 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
258 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave,refMods,Feature.ETYMOLOGY());
259 }
260
261 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
262 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
263 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
264 }
265 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:figure") && maxRankRespected){
266 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, "Figure");
267 }
268 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
269 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
270 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, "table");
271 }
272
273 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
274 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
275 //TODO IGNORE keys for the moment
276 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
277 extractSpecificFeatureNotStructured(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,"Keys - unparsed");
278 }
279 else{
280 if (!children.item(i).getNodeName().equalsIgnoreCase("tax:pb")){
281 //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
282 if (children.item(i).getAttributes() !=null) {
283 //logger.info(children.item(i).getAttributes().item(0));
284 }
285 extractSpecificFeatureNotStructured(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,notMarkedUp);
286 }
287 }
288 }
289 // logger.info("saveUpdateNames");
290 if (maxRankRespected){
291 importer.getNameService().saveOrUpdate(nametosave);
292 importer.getClassificationService().saveOrUpdate(classification);
293 //logger.info("saveUpdateNames-ok");
294 }
295
296 buildFeatureTree();
297 }
298
299
300 protected Map<String,Feature> getFeaturesUsed(){
301 return featuresMap;
302 }
303 /**
304 *
305 */
306 private void buildFeatureTree() {
307 logger.info("buildFeatureTree");
308 FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
309 if (proibiospheretree == null){
310 List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
311 if (trees.size()==1) {
312 FeatureTree ft = trees.get(0);
313 if (featuresMap==null) {
314 featuresMap=new HashMap<String, Feature>();
315 }
316 for (Feature feature: ft.getDistinctFeatures()){
317 if(feature!=null) {
318 featuresMap.put(feature.getTitleCache(), feature);
319 }
320 }
321 }
322 proibiospheretree = FeatureTree.NewInstance();
323 proibiospheretree.setUuid(proIbioTreeUUID);
324 }
325 // FeatureNode root = proibiospheretree.getRoot();
326 FeatureNode root2 = proibiospheretree.getRoot();
327 if (root2 != null){
328 int nbChildren = root2.getChildCount()-1;
329 while (nbChildren>-1){
330 try{
331 root2.removeChild(nbChildren);
332 }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
333 nbChildren --;
334 }
335
336 }
337
338 for (Feature feature:featuresMap.values()) {
339 root2.addChild(FeatureNode.NewInstance(feature));
340 }
341 importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
342
343 }
344
345
346 /**
347 * @param keys
348 * @param acceptedTaxon: the current acceptedTaxon
349 * @param nametosave: the list of objects to save into the CDM
350 * @param refMods: the current reference extracted from the MODS
351 */
352 /* @SuppressWarnings("rawtypes")
353 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
354 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
355
356 NodeList children = keys.getChildNodes();
357 String key="";
358 PolytomousKey poly = PolytomousKey.NewInstance();
359 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
360 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
361 poly.addTaxonomicScope(acceptedTaxon);
362 poly.setTitleCache("bloup");
363 // poly.addCoveredTaxon(acceptedTaxon);
364 PolytomousKeyNode root = poly.getRoot();
365 PolytomousKeyNode previous = null,tmpKey=null;
366 Taxon taxonKey=null;
367 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
368
369 // String fullContent = keys.getTextContent();
370 for (int i=0;i<children.getLength();i++){
371 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
372 NodeList paragraph = children.item(i).getChildNodes();
373 key="";
374 taxonKey=null;
375 for (int j=0;j<paragraph.getLength();j++){
376 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
377 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
378 key+=paragraph.item(j).getTextContent().trim();
379 // logger.info("KEY: "+j+"--"+key);
380 }
381 }
382 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
383 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
384 }
385 }
386 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
387 if (keypattern.matcher(key).matches()){
388 tmpKey = PolytomousKeyNode.NewInstance(key);
389 if (taxonKey!=null) {
390 tmpKey.setTaxon(taxonKey);
391 }
392 polyNodes.add(tmpKey);
393 if (previous == null) {
394 root.addChild(tmpKey);
395 } else {
396 previous.addChild(tmpKey);
397 }
398 }else{
399 if (!key.isEmpty()){
400 tmpKey=PolytomousKeyNode.NewInstance(key);
401 if (taxonKey!=null) {
402 tmpKey.setTaxon(taxonKey);
403 }
404 polyNodes.add(tmpKey);
405 if (keypatternend.matcher(key).matches()) {
406 root.addChild(tmpKey);
407 previous=tmpKey;
408 } else{
409 previous.addChild(tmpKey);
410 }
411
412 }
413 }
414 }
415 }
416 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
417 importer.getPolytomousKeyService().saveOrUpdate(poly);
418 }
419 */
420 // /**
421 // * @param taxons: the XML Nodegroup
422 // * @param nametosave: the list of objects to save into the CDM
423 // * @param acceptedTaxon: the current accepted Taxon
424 // * @param refMods: the current reference extracted from the MODS
425 // *
426 // * @return Taxon object built
427 // */
428 // @SuppressWarnings({ "rawtypes", "unchecked" })
429 // private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
430 // // logger.info("getTaxonFromXML");
431 // // logger.info("acceptedTaxon: "+acceptedTaxon);
432 //
433 // // TaxonNameBase nameToBeFilled = null;
434 //
435 // currentMyName = new MyName();
436 // NomenclaturalStatusType statusType = null;
437 //
438 // try {
439 // currentMyName = extractScientificName(taxons);
440 // if (!currentMyName.getStatus().isEmpty()){
441 // try {
442 // statusType = nomStatusString2NomStatus(currentMyName.getStatus());
443 // } catch (UnknownCdmTypeException e) {
444 // addProblematicStatusToFile(currentMyName.getStatus());
445 // logger.warn("Problem with status");
446 // }
447 // }
448 //
449 // } catch (TransformerFactoryConfigurationError e1) {
450 // logger.warn(e1);
451 // } catch (TransformerException e1) {
452 // logger.warn(e1);
453 // }
454 // /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
455 //
456 // nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
457 // if (nameToBeFilled.hasProblem() &&
458 // !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
459 // // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
460 // addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
461 // nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser, currentMyName.getAuthor(), currentMyName.getRank());
462 // }
463 //
464 // nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
465 // */
466 // TaxonNameBase nameToBeFilled = currentMyName.getTaxonNameBase();
467 // Taxon t = currentMyName.getTaxon();
468 // // importer.getNameService().saveOrUpdate(nametosave);
469 // /* Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
470 // */
471 // boolean statusMatch=false;
472 // if(t !=null ){
473 // statusMatch=compareStatus(t, statusType);
474 // }
475 // if (t ==null || (t != null && !statusMatch)){
476 // if(statusType != null) {
477 // nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
478 // }
479 // t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
480 // if (t.getSec() == null) {
481 // t.setSec(refMods);
482 // }
483 // if(!configState.getConfig().doKeepOriginalSecundum()) {
484 // t.setSec(configState.getConfig().getSecundum());
485 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
486 // }
487 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
488 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
489 //
490 //
491 // if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
492 // setLSID(currentMyName.getIdentifier(), t);
493 // }
494 //
495 // // Taxon parentTaxon = currentMyName.getHigherTaxa();
496 // // if (parentTaxon == null && !skippQuestion) {
497 // // parentTaxon = askParent(t, classification);
498 // // }
499 // // if (parentTaxon ==null){
500 // // while (parentTaxon == null) {
501 // // System.out.println("parent is null");
502 // // parentTaxon = createParent(t, refMods);
503 // // classification.addParentChild(parentTaxon, t, refMods, null);
504 // // }
505 // // }else{
506 // // classification.addParentChild(parentTaxon, t, refMods, null);
507 // // }
508 // }
509 // else{
510 // t = CdmBase.deproxy(t, Taxon.class);
511 // }
512 // if (!configState.getConfig().doKeepOriginalSecundum()) {
513 // t.setSec(configState.getConfig().getSecundum());
514 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
515 // }
516 // return t;
517 // }
518
519
520
521
522 // private Taxon getTaxonFromTaxonNameBase(TaxonNameBase tnb,Reference<?> ref){
523 // Taxon taxon = null;
524 //// System.out.println(tnb.getTitleCache());
525 // Taxon cc= importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
526 // if (cc != null){
527 // if ((cc.getSec() == null || cc.getSec().toString().isEmpty()) || (cc.getSec() != null &&
528 // cc.getSec().getTitleCache().equalsIgnoreCase(ref.getTitleCache()))) {
529 // if(cc.getSec() == null || cc.getSec().toString().isEmpty()){
530 // cc.setSec(ref);
531 // importer.getTaxonService().saveOrUpdate(cc);
532 // }
533 // taxon=cc;
534 // }
535 // }
536 // else{
537 // // List<TaxonBase> c = importer.getTaxonService().searchTaxaByName(tnb.getTitleCache(), ref);
538 // List<TaxonBase> c = importer.getTaxonService().list(TaxonBase.class, 0, 0, null, null);
539 // for (TaxonBase b : c) {
540 // try{
541 // taxon = (Taxon) b;
542 // }catch(ClassCastException e){logger.warn("error while casting existing taxonnamebase");}
543 // }
544 // }
545 // if (taxon == null){
546 //// System.out.println("NEW TAXON HERE "+tnb.toString()+", "+ref.toString());
547 // taxon = Taxon.NewInstance(tnb, ref); //sec set null
548 // importer.getTaxonService().save(taxon);
549 //
550 // }
551 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
552 //
553 // boolean exist = false;
554 // for (TaxonNode p : classification.getAllNodes()){
555 // if(p.getTaxon().equals(taxon)) {
556 // exist =true;
557 // }
558 // }
559 // if (!exist){
560 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
561 // Taxon parentTaxon = currentMyName.getHigherTaxa();
562 // if (parentTaxon != null) {
563 // classification.addParentChild(parentTaxon, taxon, ref, null);
564 // } else {
565 // System.out.println("HERE???");
566 // classification.addChildTaxon(taxon, ref, null);
567 // }
568 // importer.getClassificationService().saveOrUpdate(classification);
569 // // refreshTransaction();
570 // }
571 // taxon = CdmBase.deproxy(taxon, Taxon.class);
572 // // System.out.println("TAXON RETOURNE : "+taxon.getTitleCache());
573 // return taxon;
574 // }
575 /**
576 * @param taxons: the XML Nodegroup
577 * @param nametosave: the list of objects to save into the CDM
578 * @param acceptedTaxon: the current accepted Taxon
579 * @param refMods: the current reference extracted from the MODS
580 *
581 * @return Taxon object built
582 */
583 @SuppressWarnings({ "rawtypes", "unused" })
584 private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods, boolean isSynonym) {
585 // logger.info("getTaxonFromXML");
586 // logger.info("acceptedTaxon: "+acceptedTaxon);
587 logger.info("getTaxonNameBaseFromXML");
588 TaxonNameBase nameToBeFilled = null;
589
590 currentMyName=new MyName(isSynonym);
591
592 NomenclaturalStatusType statusType = null;
593 try {
594 currentMyName = extractScientificName(taxons,refMods);
595 } catch (TransformerFactoryConfigurationError e1) {
596 logger.warn(e1);
597 } catch (TransformerException e1) {
598 logger.warn(e1);
599 }
600 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
601
602 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
603 if (nameToBeFilled.hasProblem() &&
604 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
605 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
606 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
607 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
608 }
609
610 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
611 */
612 nameToBeFilled = currentMyName.getTaxonNameBase();
613 return nameToBeFilled;
614
615 }
616
617 // @SuppressWarnings("rawtypes")
618 // private TaxonNameBase getTaxonNameBase (TaxonNameBase name, List<TaxonNameBase> nametosave, NomenclaturalStatusType statusType){
619 // List<TaxonNameBase> names = importer.getNameService().list(TaxonNameBase.class, null, null, null, null);
620 // for (TaxonNameBase tb : names){
621 // if (tb.getTitleCache().equalsIgnoreCase(name.getTitleCache())) {
622 // boolean statusMatch=false;
623 // if(tb !=null ){
624 // statusMatch=compareStatus(tb, statusType);
625 // }
626 // if (!statusMatch){
627 // if(statusType != null) {
628 // name.addStatus(NomenclaturalStatus.NewInstance(statusType));
629 // }
630 // }else
631 // {
632 // logger.info("TaxonNameBase FOUND"+name.getTitleCache());
633 // return CdmBase.deproxy(tb, TaxonNameBase.class);
634 // }
635 // }
636 // }
637 // // logger.info("TaxonNameBase NOT FOUND "+name.getTitleCache());
638 // // System.out.println("add name "+name);
639 // nametosave.add(name);
640 // name = CdmBase.deproxy(name, TaxonNameBase.class);
641 // return name;
642 //
643 // }
644
645
646
647 // /**
648 // * @param tb
649 // * @param statusType
650 // * @return
651 // */
652 // private boolean compareStatus(TaxonNameBase tb, NomenclaturalStatusType statusType) {
653 // boolean statusMatch=false;
654 // //found one taxon
655 // Set<NomenclaturalStatus> status = tb.getStatus();
656 // if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
657 // for (NomenclaturalStatus st:status){
658 // NomenclaturalStatusType stype = st.getType();
659 // if (stype.toString().equalsIgnoreCase(statusType.toString())) {
660 // statusMatch=true;
661 // }
662 // }
663 // }
664 // else{
665 // if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
666 // statusMatch=true;
667 // }
668 // }
669 // return statusMatch;
670 // }
671
672 /**
673 *
674 */
675 private void reloadClassification() {
676 logger.info("reloadClassification");
677 Classification cl = importer.getClassificationService().find(classification.getUuid());
678 if (cl != null){
679 classification=cl;
680 }else{
681 importer.getClassificationService().saveOrUpdate(classification);
682 classification = importer.getClassificationService().find(classification.getUuid());
683 }
684 }
685
686 // /**
687 // * Create a Taxon for the current NameBase, based on the current reference
688 // * @param taxonNameBase
689 // * @param refMods: the current reference extracted from the MODS
690 // * @return Taxon
691 // */
692 // @SuppressWarnings({ "unused", "rawtypes" })
693 // private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
694 // Taxon t = new Taxon(taxonNameBase,null );
695 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
696 // t.setSec(configState.getConfig().getSecundum());
697 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
698 // }
699 // /*<<<<<<< .courant
700 // boolean sourceExists=false;
701 // Set<IdentifiableSource> sources = t.getSources();
702 // for (IdentifiableSource src : sources){
703 // String micro = src.getCitationMicroReference();
704 // Reference r = src.getCitation();
705 // if (r.equals(refMods) && micro == null) {
706 // sourceExists=true;
707 // }
708 // }
709 // if(!sourceExists) {
710 // t.addSource(null,null,refMods,null);
711 // }
712 //=======*/
713 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
714 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
715 // return t;
716 // }
717
718 private void extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods,
719 String featureName) {
720 // System.out.println("extractDescriptionWithReference !");
721 logger.info("extractDescriptionWithReference");
722 NodeList children = typestatus.getChildNodes();
723
724 Feature currentFeature=getFeatureObjectFromString(featureName);
725
726 String r="";String s="";
727 for (int i=0;i<children.getLength();i++){
728 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
729 s+=children.item(i).getTextContent().trim();
730 }
731 if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
732 r+= children.item(i).getTextContent().trim();
733 }
734 if (s.indexOf(r)>-1) {
735 s=s.split(r)[0];
736 }
737 }
738
739 Reference<?> currentref = ReferenceFactory.newGeneric();
740 if(!r.isEmpty()) {
741 currentref.setTitleCache(r, true);
742 } else {
743 currentref=refMods;
744 }
745 setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
746 }
747
748 /**
749 * @param nametosave
750 * @param distribution: the XML node group
751 * @param acceptedTaxon: the current accepted Taxon
752 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
753 * @param refMods: the current reference extracted from the MODS
754 */
755 @SuppressWarnings("rawtypes")
756 private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods) {
757 logger.info("extractDistribution");
758 // logger.info("acceptedTaxon: "+acceptedTaxon);
759 NodeList children = distribution.getChildNodes();
760 Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
761 Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
762
763 for (int i=0;i<children.getLength();i++){
764 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
765 NodeList paragraph = children.item(i).getChildNodes();
766 for (int j=0;j<paragraph.getLength();j++){
767 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
768 extractText(descriptionsFulltext, i, paragraph.item(j));
769 }
770 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
771 extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
772 }
773 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
774 MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
775 DerivedUnit derivedUnitBase = null;
776 specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
777 extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
778 }
779 }
780 }
781 }
782
783 int m=0;
784 for (int k:descriptionsFulltext.keySet()) {
785 if (k>m) {
786 m=k;
787 }
788 }
789 for (int k:specimenOrObservations.keySet()) {
790 if (k>m) {
791 m=k;
792 }
793 }
794
795
796 if(acceptedTaxon!=null){
797 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
798 Feature currentFeature = Feature.DISTRIBUTION();
799 // DerivedUnit derivedUnitBase=null;
800 // String descr="";
801 for (int k=0;k<=m;k++){
802 if(specimenOrObservations.keySet().contains(k)){
803 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
804 handleAssociation(acceptedTaxon, refMods, td, soo);
805 }
806 }
807
808 if (descriptionsFulltext.keySet().contains(k)){
809 if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
810 setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
811 break;
812 }
813 else{
814 handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
815 }
816 }
817
818 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
819 acceptedTaxon.addDescription(td);
820 sourceHandler.addAndSaveSource(refMods, td, null);
821 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
822 }
823 }
824 }
825 }
826
827 /**
828 * @param refMods
829 * @param descriptionsFulltext
830 * @param td
831 * @param currentFeature
832 * @param k
833 */
834 private void handleTextData(Reference<?> refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
835 Feature currentFeature, int k) {
836 //logger.info("handleTextData");
837 TextData textData = TextData.NewInstance();
838 textData.setFeature(currentFeature);
839 textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
840 sourceHandler.addSource(refMods, textData);
841 td.addElement(textData);
842 }
843
844 /**
845 * @param acceptedTaxon
846 * @param refMods
847 * @param td
848 * @param soo
849 */
850 private void handleAssociation(Taxon acceptedTaxon, Reference<?> refMods, TaxonDescription td, MySpecimenOrObservation soo) {
851 logger.info("handleAssociation");
852 String descr=soo.getDescr();
853 DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
854
855 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
856
857 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
858
859 Feature feature=null;
860 feature = makeFeature(derivedUnitBase);
861 if(!StringUtils.isEmpty(descr)) {
862 derivedUnitBase.setTitleCache(descr, true);
863 }
864
865 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
866
867 taxonDescription.addElement(indAssociation);
868 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
869 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
870 td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
871 }
872
873 /**
874 * create an individualAssociation
875 * @param refMods
876 * @param derivedUnitBase
877 * @param feature
878 * @return
879 */
880 private IndividualsAssociation createIndividualAssociation(Reference<?> refMods, DerivedUnit derivedUnitBase,
881 Feature feature) {
882 logger.info("createIndividualAssociation");
883 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
884 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
885 indAssociation.setFeature(feature);
886 indAssociation = sourceHandler.addSource(refMods, indAssociation);
887 return indAssociation;
888 }
889
890 /**
891 * @param specimenOrObservations
892 * @param descriptionsFulltext
893 * @param i
894 * @param specimenOrObservation
895 */
896 private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
897 Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
898 logger.info("extractTextFromSpecimenOrObservation");
899 List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
900 if (speObsList == null) {
901 speObsList=new ArrayList<MySpecimenOrObservation>();
902 }
903 speObsList.add(specimenOrObservation);
904 specimenOrObservations.put(i,speObsList);
905
906 String s = specimenOrObservation.getDerivedUnitBase().toString();
907 if (descriptionsFulltext.get(i) !=null){
908 s = descriptionsFulltext.get(i)+" "+s;
909 }
910 descriptionsFulltext.put(i, s);
911 }
912
913 /**
914 * Extract the text with the inline link to a taxon
915 * @param nametosave
916 * @param refMods
917 * @param descriptionsFulltext
918 * @param i
919 * @param paragraph
920 */
921 @SuppressWarnings("rawtypes")
922 private void extractInLine(List<TaxonNameBase> nametosave, Reference<?> refMods, Map<Integer, String> descriptionsFulltext,
923 int i, Node paragraph) {
924 //logger.info("extractInLine");
925 String inLine=getInlineText(nametosave, refMods, paragraph);
926 if (descriptionsFulltext.get(i) !=null){
927 inLine = descriptionsFulltext.get(i)+inLine;
928 }
929 descriptionsFulltext.put(i, inLine);
930 }
931
932 /**
933 * Extract the raw text from a Node
934 * @param descriptionsFulltext
935 * @param node
936 * @param j
937 */
938 private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
939 //logger.info("extractText");
940 if(!node.getTextContent().trim().isEmpty()) {
941 String s =node.getTextContent().trim();
942 if (descriptionsFulltext.get(i) !=null){
943 s = descriptionsFulltext.get(i)+" "+s;
944 }
945 descriptionsFulltext.put(i, s);
946 }
947 }
948
949
950 /**
951 * @param materials: the XML node group
952 * @param acceptedTaxon: the current accepted Taxon
953 * @param refMods: the current reference extracted from the MODS
954 */
955 @SuppressWarnings("rawtypes")
956 private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference<?> refMods,List<TaxonNameBase> nametosave) {
957 logger.info("EXTRACTMATERIALS");
958 // logger.info("acceptedTaxon: "+acceptedTaxon);
959 NodeList children = materials.getChildNodes();
960 NodeList events = null;
961 // String descr="";
962
963
964 for (int i=0;i<children.getLength();i++){
965 String rawAssociation="";
966 boolean added=false;
967 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
968 events = children.item(i).getChildNodes();
969 for(int k=0;k<events.getLength();k++){
970 if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
971 String inLine= getInlineText(nametosave, refMods, events.item(k));
972 if(!inLine.isEmpty()) {
973 rawAssociation+=inLine;
974 }
975 }
976 if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
977 && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
978 rawAssociation+= events.item(k).getTextContent().trim();
979 }
980 if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
981 if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
982 rawAssociation="no description text";
983 }
984 added=true;
985 handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
986 }
987 if (!rawAssociation.isEmpty() && !added){
988
989 Feature feature = Feature.MATERIALS_EXAMINED();
990 featuresMap.put(feature.getTitleCache(),feature);
991
992 TextData textData = createTextData(rawAssociation, refMods, feature);
993
994 if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
995 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
996 td.addElement(textData);
997 acceptedTaxon.addDescription(td);
998 sourceHandler.addAndSaveSource(refMods, td, null);
999 }
1000 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
1001 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
1002 //
1003 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1004 // acceptedTaxon.addDescription(taxonDescription);
1005 //
1006 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
1007 //
1008 // Feature feature = Feature.MATERIALS_EXAMINED();
1009 // featuresMap.put(feature.getTitleCache(),feature);
1010 // if(!StringUtils.isEmpty(rawAssociation)) {
1011 // derivedUnitBase.setTitleCache(rawAssociation, true);
1012 // }
1013 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
1014 // indAssociation.setFeature(feature);
1015 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
1016 //
1017 // /*boolean sourceExists=false;
1018 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
1019 // for (DescriptionElementSource src : dsources){
1020 // String micro = src.getCitationMicroReference();
1021 // Reference r = src.getCitation();
1022 // if (r.equals(refMods) && micro == null) {
1023 // sourceExists=true;
1024 // }
1025 // }
1026 // if(!sourceExists) {
1027 // indAssociation.addSource(null, null, refMods, null);
1028 // }*/
1029 // taxonDescription.addElement(indAssociation);
1030 // taxonDescription.setTaxon(acceptedTaxon);
1031 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
1032 //
1033 // /*sourceExists=false;
1034 // Set<IdentifiableSource> sources = taxonDescription.getSources();
1035 // for (IdentifiableSource src : sources){
1036 // String micro = src.getCitationMicroReference();
1037 // Reference r = src.getCitation();
1038 // if (r.equals(refMods) && micro == null) {
1039 // sourceExists=true;
1040 // }
1041 // }
1042 // if(!sourceExists) {
1043 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
1044 // }*/
1045 //
1046 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
1047 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1048
1049 rawAssociation="";
1050 }
1051 }
1052 }
1053 }
1054 }
1055
1056 /**
1057 * @param acceptedTaxon
1058 * @param refMods
1059 * @param events
1060 * @param rawAssociation
1061 * @param k
1062 */
1063 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference<?> refMods, Node event,
1064 String rawAssociation) {
1065 logger.info("handleDerivedUnitFacadeAndBase");
1066 String descr;
1067 DerivedUnit derivedUnitBase;
1068 MySpecimenOrObservation myspecimenOrObservation;
1069 DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
1070 derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
1071
1072 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
1073
1074 myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit);
1075 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
1076 descr=myspecimenOrObservation.getDescr();
1077
1078 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
1079
1080 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1081
1082 Feature feature = makeFeature(derivedUnitBase);
1083 featuresMap.put(feature.getTitleCache(),feature);
1084 if(!StringUtils.isEmpty(descr)) {
1085 derivedUnitBase.setTitleCache(descr, true);
1086 }
1087
1088 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
1089
1090 taxonDescription.addElement(indAssociation);
1091 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
1092 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1093 }
1094
1095
1096
1097 /**
1098 * @param materials: the XML node group
1099 * @param acceptedTaxon: the current accepted Taxon
1100 * @param refMods: the current reference extracted from the MODS
1101 */
1102 private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference<?> refMods, String event) {
1103 logger.info("extractMaterialsDirect");
1104 // logger.info("acceptedTaxon: "+acceptedTaxon);
1105 String descr="";
1106
1107 DerivedUnit derivedUnitBase=null;
1108 MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
1109 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
1110
1111 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
1112
1113 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1114
1115 Feature feature=null;
1116 if (event.equalsIgnoreCase("collection")){
1117 feature = makeFeature(derivedUnitBase);
1118 }
1119 else{
1120 feature = Feature.MATERIALS_EXAMINED();
1121 }
1122 featuresMap.put(feature.getTitleCache(), feature);
1123
1124 descr=myspecimenOrObservation.getDescr();
1125 if(!StringUtils.isEmpty(descr)) {
1126 derivedUnitBase.setTitleCache(descr, true);
1127 }
1128
1129 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
1130
1131 taxonDescription.addElement(indAssociation);
1132 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
1133 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1134
1135 return derivedUnitBase.getTitleCache();
1136
1137 }
1138
1139
1140 /**
1141 * @param description: the XML node group
1142 * @param acceptedTaxon: the current acceptedTaxon
1143 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1144 * @param nametosave: the list of objects to save into the CDM
1145 * @param refMods: the current reference extracted from the MODS
1146 * @param featureName: the feature name
1147 */
1148 @SuppressWarnings({ "rawtypes"})
1149 private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1150 List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
1151 logger.info("extractSpecificFeature "+featureName);
1152 // System.out.println("GRUUUUuu");
1153 NodeList children = description.getChildNodes();
1154 NodeList insideNodes ;
1155 NodeList trNodes;
1156 // String descr ="";
1157 String localdescr="";
1158 List<String> blabla=null;
1159 List<String> text = new ArrayList<String>();
1160
1161 String table="<table>";
1162 String head="";
1163 String line="";
1164
1165 Feature currentFeature=getFeatureObjectFromString(featureName);
1166
1167 // String fullContent = description.getTextContent();
1168 for (int i=0;i<children.getLength();i++){
1169 // localdescr="";
1170 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1171 text.add(children.item(i).getTextContent().trim());
1172 }
1173 if (featureName.equalsIgnoreCase("table")){
1174 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1175 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
1176 head = extractTableHead(children.item(i));
1177 table+=head;
1178 line = extractTableLine(children.item(i));
1179 if (!line.equalsIgnoreCase("<tr></tr>")) {
1180 table+=line;
1181 }
1182 }
1183 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1184 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1185 line = extractTableLineWithColumn(children.item(i).getChildNodes());
1186 if(!line.equalsIgnoreCase("<tr></tr>")) {
1187 table+=line;
1188 }
1189 }
1190 }
1191 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1192 insideNodes=children.item(i).getChildNodes();
1193 blabla= new ArrayList<String>();
1194 for (int j=0;j<insideNodes.getLength();j++){
1195 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1196 String inlinetext = getInlineText(nametosave, refMods, insideNodes.item(j));
1197 if (!inlinetext.isEmpty()) {
1198 blabla.add(inlinetext);
1199 }
1200 }
1201 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1202 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1203 blabla.add(insideNodes.item(j).getTextContent().trim());
1204 // localdescr += insideNodes.item(j).getTextContent().trim();
1205 }
1206 }
1207 }
1208 if (!blabla.isEmpty()) {
1209 String blaStr = StringUtils.join(blabla," ").trim();
1210 if(!stringIsEmpty(blaStr)) {
1211 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1212 text.add(blaStr);
1213 }
1214 }
1215
1216 }
1217 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1218 if(!children.item(i).getTextContent().trim().isEmpty()){
1219 localdescr = children.item(i).getTextContent().trim();
1220 if(!stringIsEmpty(localdescr)) {
1221 setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1222 }
1223 }
1224 }
1225 }
1226
1227 table+="</table>";
1228 if (!table.equalsIgnoreCase("<table></table>")){
1229 // System.out.println("TABLE : "+table);
1230 text.add(table);
1231 }
1232
1233 if (text !=null && !text.isEmpty()) {
1234 return StringUtils.join(text," ");
1235 } else {
1236 return "";
1237 }
1238
1239 }
1240
1241 /**
1242 * @param children
1243 * @param i
1244 * @return
1245 */
1246 private String extractTableLine(Node child) {
1247 //logger.info("extractTableLine");
1248 String line;
1249 line="<tr>";
1250 if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1251 line = extractTableLineWithColumn(child.getChildNodes());
1252 }
1253 line+="</tr>";
1254 return line;
1255 }
1256
1257 /**
1258 * @param children
1259 * @param i
1260 * @return
1261 */
1262 private String extractTableHead(Node child) {
1263 //logger.info("extractTableHead");
1264 String head;
1265 String line;
1266 head="<th>";
1267 NodeList trNodes = child.getChildNodes();
1268 for (int k=0;k<trNodes.getLength();k++){
1269 if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1270 && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1271 line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1272 head+=line;
1273 }
1274 }
1275 head+="</th>";
1276 return head;
1277 }
1278
1279 /**
1280 * build a html table line, with td columns
1281 * @param tdNodes
1282 * @return an html coded line
1283 */
1284 private String extractTableLineWithColumn(NodeList tdNodes) {
1285 //logger.info("extractTableLineWithColumn");
1286 String line;
1287 line="<tr>";
1288 for (int l=0;l<tdNodes.getLength();l++){
1289 if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1290 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1291 }
1292 }
1293 line+="</tr>";
1294 return line;
1295 }
1296
1297 /**
1298 * @param description: the XML node group
1299 * @param acceptedTaxon: the current acceptedTaxon
1300 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1301 * @param nametosave: the list of objects to save into the CDM
1302 * @param refMods: the current reference extracted from the MODS
1303 * @param featureName: the feature name
1304 */
1305 @SuppressWarnings({ "unused", "rawtypes" })
1306 private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1307 List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
1308 logger.info("extractSpecificFeatureNotStructured "+featureName);
1309 NodeList children = description.getChildNodes();
1310 NodeList insideNodes ;
1311 List<String> blabla= new ArrayList<String>();
1312
1313
1314 Feature currentFeature = getFeatureObjectFromString(featureName);
1315
1316 String fullContent = description.getTextContent();
1317 for (int i=0;i<children.getLength();i++){
1318 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1319 insideNodes=children.item(i).getChildNodes();
1320 for (int j=0;j<insideNodes.getLength();j++){
1321 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1322 String inlineText =getInlineText(nametosave, refMods, insideNodes.item(j));
1323 if(!inlineText.isEmpty()) {
1324 blabla.add(inlineText);
1325 }
1326 }
1327 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1328 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1329 blabla.add(insideNodes.item(j).getTextContent().trim());
1330 }
1331 }
1332 }
1333 }
1334 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1335 if(!children.item(i).getTextContent().trim().isEmpty()){
1336 String localdescr = children.item(i).getTextContent().trim();
1337 if(!localdescr.isEmpty())
1338 {
1339 blabla.add(localdescr);
1340 }
1341 }
1342 }
1343 }
1344
1345 if (blabla !=null && !blabla.isEmpty()) {
1346 String blaStr = StringUtils.join(blabla," ").trim();
1347 if (! stringIsEmpty(blaStr)) {
1348 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1349 return blaStr;
1350 } else {
1351 return "";
1352 }
1353 } else {
1354 return "";
1355 }
1356
1357 }
1358
1359 /**
1360 * @param blaStr
1361 * @return
1362 */
1363 private boolean stringIsEmpty(String blaStr) {
1364 if (!StringUtils.isEmpty(blaStr)) {
1365 if (!blaStr.equalsIgnoreCase(".")) {
1366 if (!blaStr.equalsIgnoreCase(",")) {
1367 if (!blaStr.equalsIgnoreCase(";")) {
1368 return false;
1369 }
1370 }
1371 }
1372 }
1373 return true;
1374 }
1375
1376 /**
1377 * @param nametosave
1378 * @param refMods
1379 * @param insideNodes
1380 * @param blabla
1381 * @param j
1382 */
1383 @SuppressWarnings({ "rawtypes" })
1384 private String getInlineText(List<TaxonNameBase> nametosave, Reference<?> refMods, Node insideNode) {
1385 //logger.info("getInlineText");
1386 TaxonNameBase tnb = getTaxonNameBaseFromXML(insideNode, nametosave,refMods,false);
1387 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1388 Taxon tax = currentMyName.getTaxon();
1389 if(tnb !=null){
1390 String linkedTaxon = tnb.toString().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1391 return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1392 }
1393 return "";
1394 }
1395
1396 /**
1397 * @param featureName
1398 * @return
1399 */
1400 @SuppressWarnings("rawtypes")
1401 private Feature getFeatureObjectFromString(String featureName) {
1402 logger.info("getFeatureObjectFromString");
1403 List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
1404 Feature currentFeature=null;
1405 for (DefinedTermBase feature: features){
1406 String tmpF = ((Feature)feature).getTitleCache();
1407 if (tmpF.equalsIgnoreCase(featureName)) {
1408 currentFeature=(Feature)feature;
1409 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1410 }
1411 }
1412 if (currentFeature == null) {
1413 currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1414 if(featureName.equalsIgnoreCase("Other")){
1415 currentFeature.setUuid(OtherUUID);
1416 }
1417 if(featureName.equalsIgnoreCase(notMarkedUp)){
1418 currentFeature.setUuid(NotMarkedUpUUID);
1419 }
1420 importer.getTermService().saveOrUpdate(currentFeature);
1421 }
1422 return currentFeature;
1423 }
1424
1425
1426
1427
1428 /**
1429 * @param children: the XML node group
1430 * @param nametosave: the list of objects to save into the CDM
1431 * @param acceptedTaxon: the current acceptedTaxon
1432 * @param refMods: the current reference extracted from the MODS
1433 * @param fullContent :the parsed XML content
1434 * @return a list of description (text)
1435 */
1436 @SuppressWarnings({ "unused", "rawtypes" })
1437 private List<String> parseParagraph(List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods, Node paragraph, Feature feature){
1438 logger.info("parseParagraph "+feature.toString());
1439 List<String> fullDescription= new ArrayList<String>();
1440 // String localdescr;
1441 String descr="";
1442 NodeList insideNodes ;
1443 boolean collectionEvent = false;
1444 List<Node>collectionEvents = new ArrayList<Node>();
1445
1446 NodeList children = paragraph.getChildNodes();
1447
1448 for (int i=0;i<children.getLength();i++){
1449 // localdescr="";
1450 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1451 descr += children.item(i).getTextContent().trim();
1452 }
1453 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1454 insideNodes=children.item(i).getChildNodes();
1455 List<String> blabla= new ArrayList<String>();
1456 for (int j=0;j<insideNodes.getLength();j++){
1457 boolean nodeKnown = false;
1458 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1459 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1460 String inlineText = getInlineText(nametosave, refMods, insideNodes.item(j));
1461 if (!inlineText.isEmpty()) {
1462 blabla.add(inlineText);
1463 }
1464 nodeKnown=true;
1465 }
1466 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1467 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1468 blabla.add(insideNodes.item(j).getTextContent().trim());
1469 // localdescr += insideNodes.item(j).getTextContent().trim();
1470 }
1471 nodeKnown=true;
1472 }
1473 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1474 String ref = insideNodes.item(j).getTextContent().trim();
1475 if (ref.endsWith(";") && ((ref.length())>1)) {
1476 ref=ref.substring(0, ref.length()-1)+".";
1477 }
1478 Reference<?> reference = ReferenceFactory.newGeneric();
1479 reference.setTitleCache(ref, true);
1480 blabla.add(reference.getTitleCache());
1481 nodeKnown=true;
1482 }
1483 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1484 String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, nametosave, refMods, "figure");
1485 blabla.add(figure);
1486 }
1487 if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1488 insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1489 insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1490 String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, nametosave, refMods, "table");
1491 blabla.add(table);
1492 }
1493 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1494 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1495 String titlecache = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection");
1496 blabla.add(titlecache);
1497 collectionEvent=true;
1498 collectionEvents.add(insideNodes.item(j));
1499 nodeKnown=true;
1500 }
1501 // if (!nodeKnown && !insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:pb")) {
1502 // logger.info("Node not handled yet : "+insideNodes.item(j).getNodeName());
1503 // }
1504
1505 }
1506 if (!StringUtils.isEmpty(StringUtils.join(blabla," "))) {
1507 fullDescription.add(StringUtils.join(blabla," "));
1508 }
1509 }
1510 if (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1511 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, nametosave, refMods, "Figure");
1512 fullDescription.add(figure);
1513 }
1514 if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1515 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1516 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1517 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, nametosave, refMods, "table");
1518 fullDescription.add(table);
1519 }
1520 }
1521
1522 if( !stringIsEmpty(descr.trim())){
1523 Feature currentFeature= getNotMarkedUpFeatureObject();
1524 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1525 }
1526 // if (collectionEvent) {
1527 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1528 // for (Node coll:collectionEvents){
1529 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1530 // }
1531 // }
1532 return fullDescription;
1533 }
1534
1535
1536 /**
1537 * @param description: the XML node group
1538 * @param acceptedTaxon: the current acceptedTaxon
1539 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1540 * @param nametosave: the list of objects to save into the CDM
1541 * @param refMods: the current reference extracted from the MODS
1542 * @param feature: the feature to link the data with
1543 */
1544 @SuppressWarnings("rawtypes")
1545 private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods, Feature feature){
1546 logger.info("EXTRACT FEATURE "+feature.toString());
1547 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1548 List<String> fullDescription= parseParagraph( nametosave, acceptedTaxon, refMods, description,feature);
1549
1550 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1551 if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription," ").trim())) {
1552 setParticularDescription(StringUtils.join(fullDescription," ").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1553 }
1554
1555 }
1556
1557
1558 /**
1559 * @param descr: the XML Nodegroup to parse
1560 * @param acceptedTaxon: the current acceptedTaxon
1561 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1562 * @param refMods: the current reference extracted from the MODS
1563 * @param currentFeature: the feature name
1564 * @return
1565 */
1566 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods, Feature currentFeature) {
1567 logger.info("setParticularDescription "+currentFeature.getTitleCache()+", \n blabla : "+descr);
1568 // System.out.println("setParticularDescription "+currentFeature.getTitleCache()+", \n blabla : "+descr);
1569 // logger.info("acceptedTaxon: "+acceptedTaxon);
1570 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1571
1572 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1573
1574 TextData textData = createTextData(descr, refMods, currentFeature);
1575
1576 if(acceptedTaxon!=null){
1577 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1578 td.addElement(textData);
1579 acceptedTaxon.addDescription(td);
1580
1581 sourceHandler.addAndSaveSource(refMods, td, null);
1582 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1583 }
1584
1585 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1586 try{
1587 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1588 if (tmp!=null) {
1589 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1590 }else{
1591 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1592 }
1593 }catch(Exception e){
1594 logger.debug("TAXON EXISTS"+defaultTaxon);
1595 }
1596
1597 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1598 defaultTaxon.addDescription(td);
1599 td.addElement(textData);
1600 sourceHandler.addAndSaveSource(refMods, td, null);
1601 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1602 }
1603 }
1604
1605 /**
1606 * @param descr
1607 * @param refMods
1608 * @param currentFeature
1609 * @return
1610 */
1611 private TextData createTextData(String descr, Reference<?> refMods, Feature currentFeature) {
1612 //logger.info("createTextData");
1613 TextData textData = TextData.NewInstance();
1614 textData.setFeature(currentFeature);
1615 sourceHandler.addSource(refMods, textData);
1616
1617 textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1618 return textData;
1619 }
1620
1621
1622
1623 /**
1624 * @param descr: the XML Nodegroup to parse
1625 * @param acceptedTaxon: the current acceptedTaxon
1626 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1627 * @param refMods: the current reference extracted from the MODS
1628 * @param currentFeature: the feature name
1629 * @return
1630 */
1631 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference<?> currentRef, Reference<?> refMods, Feature currentFeature) {
1632 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1633 // logger.info("acceptedTaxon: "+acceptedTaxon);
1634 logger.info("setParticularDescription");
1635 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1636
1637 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1638 TextData textData = createTextData(descr, refMods, currentFeature);
1639
1640 if(! descr.isEmpty() && (acceptedTaxon!=null)){
1641 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1642 td.addElement(textData);
1643 acceptedTaxon.addDescription(td);
1644
1645 sourceHandler.addAndSaveSource(refMods, td, currentRef);
1646 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1647 }
1648
1649 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1650 try{
1651 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1652 if (tmp!=null) {
1653 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1654 }else{
1655 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1656 }
1657 }catch(Exception e){
1658 logger.debug("TAXON EXISTS"+defaultTaxon);
1659 }
1660
1661 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1662 defaultTaxon.addDescription(td);
1663 td.addElement(textData);
1664 sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1665 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1666 }
1667 }
1668
1669
1670
1671 /**
1672 * @param synonyms: the XML Nodegroup to parse
1673 * @param nametosave: the list of objects to save into the CDM
1674 * @param acceptedTaxon: the current acceptedTaxon
1675 * @param refMods: the current reference extracted from the MODS
1676 */
1677 @SuppressWarnings({ "rawtypes" })
1678 private void extractSynonyms(Node synonyms, Taxon acceptedTaxon,Reference<?> refMods) {
1679 logger.info("extractSynonyms");
1680 //System.out.println("extractSynonyms for: "+acceptedTaxon);
1681 Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1682 if (ttmp != null) {
1683 acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1684 }
1685 else{
1686 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1687 }
1688 NodeList children = synonyms.getChildNodes();
1689 TaxonNameBase nameToBeFilled = null;
1690 List<MyName> names = new ArrayList<MyName>();
1691
1692 if(synonyms.getNodeName().equalsIgnoreCase("tax:name")){
1693 MyName myName;
1694 try {
1695 myName = extractScientificNameSynonym(synonyms,refMods);
1696 names.add(myName);
1697 } catch (TransformerFactoryConfigurationError e) {
1698 logger.warn(e);
1699 } catch (TransformerException e) {
1700 logger.warn(e);
1701 }
1702 }
1703
1704
1705 for (int i=0;i<children.getLength();i++){
1706 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1707 NodeList tmp = children.item(i).getChildNodes();
1708 // String fullContent = children.item(i).getTextContent();
1709 for (int j=0; j< tmp.getLength();j++){
1710 if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1711 MyName myName;
1712 try {
1713 myName = extractScientificNameSynonym(tmp.item(j),refMods);
1714 names.add(myName);
1715 } catch (TransformerFactoryConfigurationError e) {
1716 logger.warn(e);
1717 } catch (TransformerException e) {
1718 logger.warn(e);
1719 }
1720
1721 }
1722 }
1723 }
1724 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1725 MyName myName;
1726 try {
1727 myName = extractScientificNameSynonym(children.item(i),refMods);
1728 names.add(myName);
1729 } catch (TransformerFactoryConfigurationError e) {
1730 logger.warn(e);
1731 } catch (TransformerException e) {
1732 logger.warn(e);
1733 }
1734
1735 }
1736 }
1737 NomenclaturalStatusType statusType = null;
1738 //System.out.println("names: "+names);
1739 for(MyName name:names){
1740 //System.out.println("HANDLE NAME "+name);
1741
1742 statusType = null;
1743
1744 nameToBeFilled = name.getTaxonNameBase();
1745
1746 Synonym synonym = name.getSyno();
1747 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1748 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1749 if (nameToBeFilled.hasProblem() &&
1750 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1751 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1752 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1753 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1754 }
1755 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1756 */
1757 if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1758 setLSID(name.getIdentifier(), synonym);
1759 }
1760
1761 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1762 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1763 boolean synoExist = false;
1764 for (Synonym syn: synonymsSet){
1765 //System.out.println(syn.getName()+" -- "+syn.getSec());
1766 boolean a =syn.getName().equals(synonym.getName());
1767 boolean b = syn.getSec().equals(synonym.getSec());
1768 if (a && b) {
1769 synoExist=true;
1770 }
1771 }
1772 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1773 //System.out.println("SYNONYM");
1774 sourceHandler.addSource(refMods, synonym);
1775
1776 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1777
1778 }
1779 }
1780 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1781 }
1782
1783
1784 /**
1785 * @param refgroup: the XML nodes
1786 * @param nametosave: the list of objects to save into the CDM
1787 * @param acceptedTaxon: the current acceptedTaxon
1788 * @param nametosave: the list of objects to save into the CDM
1789 * @param refMods: the current reference extracted from the MODS
1790 * @return the acceptedTaxon (why?)
1791 * handle cases where the bibref are inside <p> and outside
1792 */
1793 @SuppressWarnings({ "rawtypes" })
1794 private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods) {
1795 logger.info("extractReferences");
1796 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1797
1798 NodeList children = refgroup.getChildNodes();
1799 NonViralName<?> nameToBeFilled = getNonViralNameAccNomenclature();
1800
1801 ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1802 for (int i=0;i<children.getLength();i++){
1803 if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1804 String ref = children.item(i).getTextContent().trim();
1805 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1806 if (!refBuild.isFoundBibref()){
1807 extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1808 }
1809 }
1810
1811 if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1812 NodeList references = children.item(i).getChildNodes();
1813 String descr="";
1814 for (int j=0;j<references.getLength();j++){
1815 if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1816 String ref = references.item(j).getTextContent().trim();
1817 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1818 }
1819 else
1820 if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1821 && !references.item(j).getTextContent().trim().isEmpty()){
1822 descr += references.item(j).getTextContent().trim();
1823 }
1824
1825 }
1826 if (!refBuild.isFoundBibref()){
1827 //if it's not tagged, put it as row information.
1828 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1829 //then put it as a not markup feature if not empty
1830 if (!stringIsEmpty(descr.trim())){
1831 Feature currentFeature= getNotMarkedUpFeatureObject();
1832 setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1833 }
1834 }
1835 }
1836 }
1837 // importer.getClassificationService().saveOrUpdate(classification);
1838 return acceptedTaxon;
1839
1840 }
1841
1842 /**
1843 * get the non viral name according to the current nomenclature
1844 * @return
1845 */
1846 private NonViralName<?> getNonViralNameAccNomenclature() {
1847 //logger.info("getNonViralNameAccNomenclature");
1848 NonViralName<?> nameToBeFilled = null;
1849 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1850 nameToBeFilled = BotanicalName.NewInstance(null);
1851 }
1852 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1853 nameToBeFilled = ZoologicalName.NewInstance(null);
1854 }
1855 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1856 nameToBeFilled = BacterialName.NewInstance(null);
1857 }
1858 return nameToBeFilled;
1859 }
1860
1861 /**
1862 * @return the feature object for the category "not marked up"
1863 */
1864 @SuppressWarnings("rawtypes")
1865 private Feature getNotMarkedUpFeatureObject() {
1866 logger.info("getNotMarkedUpFeatureObject");
1867 List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
1868 Feature currentFeature =null;
1869 for (DefinedTermBase feat: features){
1870 String tmpF = ((Feature)feat).getTitleCache();
1871 if (tmpF.equalsIgnoreCase(notMarkedUp)) {
1872 currentFeature=(Feature)feat;
1873 }
1874 }
1875 if (currentFeature == null) {
1876 currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1877 currentFeature.setUuid(NotMarkedUpUUID);
1878 importer.getTermService().saveOrUpdate(currentFeature);
1879 }
1880 return currentFeature;
1881 }
1882
1883 /**
1884 * @param references
1885 * handle cases where the bibref are inside <p> and outside
1886 */
1887 @SuppressWarnings("rawtypes")
1888 private void extractReferenceRawText(NodeList references, NonViralName<?> nameToBeFilled, Reference<?> refMods,
1889 Taxon acceptedTaxon) {
1890 logger.info("extractReferenceRawText");
1891 String refString="";
1892 NomenclaturalStatusType statusType = null;
1893 currentMyName= new MyName(true);
1894 for (int j=0;j<references.getLength();j++){
1895 acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1896 //no bibref tag inside
1897 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1898 if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1899
1900 try {
1901 currentMyName = extractScientificName(references.item(j),refMods);
1902 // if (myName.getNewName().isEmpty()) {
1903 // name=myName.getOriginalName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1904 // } else {
1905 // name=myName.getNewName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1906 // }
1907 } catch (TransformerFactoryConfigurationError e) {
1908 logger.warn(e);
1909 } catch (TransformerException e) {
1910 logger.warn(e);
1911 }
1912
1913 // name=name.trim();
1914 }
1915 if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1916 refString = references.item(j).getTextContent().trim();
1917 }
1918 if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1919 //
1920 statusType = null;
1921 if (!currentMyName.getStatus().isEmpty()){
1922 try {
1923 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1924 } catch (UnknownCdmTypeException e) {
1925 addProblematicStatusToFile(currentMyName.getStatus());
1926 logger.warn("Problem with status");
1927 }
1928 }
1929
1930
1931 /*INonViralNameParser parser = NonViralNameParserImpl.NewInstance();*/
1932 String fullLineRefName = references.item(j).getTextContent().trim();
1933 int nameOrRefOrOther=2;
1934 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1935 // System.out.println("NAMEORREFOR?? "+nameOrRefOrOther);
1936 if (nameOrRefOrOther==0){
1937 /*TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1938 if (nameTBF.hasProblem() &&
1939 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1940 addProblemNameToFile(fullLineRefName,"",nomenclaturalCode,Rank.UNKNOWN_RANK());
1941 nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser,currentMyName.getAuthor(), currentMyName.getRank());
1942 }
1943 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1944 */
1945 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1946 Synonym synonym = null;
1947 if (!currentMyName.getStatus().isEmpty()){
1948 try {
1949 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1950 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1951 synonym = Synonym.NewInstance(nameTBF, refMods);
1952 } catch (UnknownCdmTypeException e) {
1953 addProblematicStatusToFile(currentMyName.getStatus());
1954 logger.warn("Problem with status");
1955 synonym = Synonym.NewInstance(nameTBF, refMods);
1956 synonym.setAppendedPhrase(currentMyName.getStatus());
1957 }
1958 }
1959 else{
1960 synonym = Synonym.NewInstance(nameTBF, refMods);
1961 }
1962
1963 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1964 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1965 boolean synoExist = false;
1966 for (Synonym syn: synonymsSet){
1967 // System.out.println(syn.getName()+" -- "+syn.getSec());
1968 boolean a =syn.getName().equals(synonym.getName());
1969 boolean b = syn.getSec().equals(synonym.getSec());
1970 if (a && b) {
1971 synoExist=true;
1972 }
1973 }
1974 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1975 sourceHandler.addSource(refMods, synonym);
1976
1977 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1978 }
1979 }
1980
1981 if (nameOrRefOrOther==1){
1982 Reference<?> re = ReferenceFactory.newGeneric();
1983 re.setTitleCache(fullLineRefName, true);
1984
1985 /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1986 if (nameTBF.hasProblem() &&
1987 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1988 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1989 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1990 }
1991 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1992 */
1993 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1994 Synonym synonym = null;
1995 if (!currentMyName.getStatus().isEmpty()){
1996 try {
1997 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1998 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1999 synonym = Synonym.NewInstance(nameTBF, refMods);
2000 } catch (UnknownCdmTypeException e) {
2001 addProblematicStatusToFile(currentMyName.getStatus());
2002 logger.warn("Problem with status");
2003 synonym = Synonym.NewInstance(nameTBF, refMods);
2004 synonym.setAppendedPhrase(currentMyName.getStatus());
2005 }
2006 }
2007 else{
2008 synonym = Synonym.NewInstance(nameTBF, refMods);
2009 }
2010
2011 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
2012 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
2013 boolean synoExist = false;
2014 for (Synonym syn: synonymsSet){
2015 // System.out.println(syn.getName()+" -- "+syn.getSec());
2016 boolean a =syn.getName().equals(synonym.getName());
2017 boolean b = syn.getSec().equals(synonym.getSec());
2018 if (a && b) {
2019 synoExist=true;
2020 }
2021 }
2022 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
2023 sourceHandler.addSource(refMods, synonym);
2024
2025 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),re, null);
2026 }
2027
2028 }
2029
2030
2031 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2032 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2033 }
2034 }
2035
2036 if(!currentMyName.getName().isEmpty()){
2037 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
2038 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
2039 Reference<?> refS = ReferenceFactory.newGeneric();
2040 refS.setTitleCache(refString, true);
2041 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
2042 // acceptedTaxon.addDescription(td);
2043 // acceptedTaxon.addSource(refSource);
2044 //
2045 // TextData textData = TextData.NewInstance(Feature.CITATION());
2046 //
2047 // textData.addSource(null, null, refS, null);
2048 // td.addElement(textData);
2049 // td.addSource(refSource);
2050 // importer.getDescriptionService().saveOrUpdate(td);
2051
2052
2053 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2054 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2055
2056 }
2057
2058 acceptedTaxon.getName().setNomenclaturalReference(refS);
2059 }
2060 else{
2061 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2062 TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
2063 if (nameTBF.hasProblem() &&
2064 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
2065 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
2066 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
2067 nameTBF=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
2068 }
2069 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
2070 */
2071 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
2072 Synonym synonym = null;
2073 if (!currentMyName.getStatus().isEmpty()){
2074 try {
2075 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
2076 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2077 synonym = Synonym.NewInstance(nameTBF, refMods);
2078 } catch (UnknownCdmTypeException e) {
2079 addProblematicStatusToFile(currentMyName.getStatus());
2080 logger.warn("Problem with status");
2081 synonym = Synonym.NewInstance(nameTBF, refMods);
2082 synonym.setAppendedPhrase(currentMyName.getStatus());
2083 }
2084 }
2085 else{
2086 synonym = Synonym.NewInstance(nameTBF, refMods);
2087 }
2088
2089
2090 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2091 setLSID(currentMyName.getIdentifier(), synonym);
2092 }
2093
2094 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
2095 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
2096 boolean synoExist = false;
2097 for (Synonym syn: synonymsSet){
2098 // System.out.println(syn.getName()+" -- "+syn.getSec());
2099 boolean a =syn.getName().equals(synonym.getName());
2100 boolean b = syn.getSec().equals(synonym.getSec());
2101 if (a && b) {
2102 synoExist=true;
2103 }
2104 }
2105 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
2106 sourceHandler.addSource(refMods, synonym);
2107
2108 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
2109 }
2110 }
2111 }
2112 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2113 }
2114 }
2115
2116
2117
2118 /**
2119 * @param identifier
2120 * @param acceptedTaxon
2121 */
2122 @SuppressWarnings("rawtypes")
2123 private void setLSID(String identifier, TaxonBase<?> taxon) {
2124 //logger.info("setLSID");
2125 // boolean lsidok=false;
2126 String id = identifier.split("__")[0];
2127 String source = identifier.split("__")[1];
2128 if (id.indexOf("lsid")>-1){
2129 try {
2130 LSID lsid = new LSID(id);
2131 taxon.setLsid(lsid);
2132 // lsidok=true;
2133 } catch (MalformedLSIDException e) {
2134 logger.warn("Malformed LSID");
2135 }
2136
2137 }
2138
2139 //logger.info("search reference for LSID");
2140 // if ((id.indexOf("lsid")<0) || !lsidok){
2141 //ADD ORIGINAL SOURCE ID EVEN IF LSID
2142 Reference<?> re = null;
2143 Pager<Reference> references = importer.getReferenceService().findByTitle(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
2144 if( references !=null && references.getCount()>0){
2145 re=references.getRecords().get(0);
2146 }
2147 //logger.info("search reference for LSID-end");
2148 if(re == null){
2149 re = ReferenceFactory.newGeneric();
2150 re.setTitleCache(source, true);
2151 importer.getReferenceService().saveOrUpdate(re);
2152 }
2153 re=CdmBase.deproxy(re, Reference.class);
2154
2155 //logger.info("search source for LSID");
2156 Set<IdentifiableSource> sources = taxon.getSources();
2157 boolean lsidinsource=false;
2158 boolean urlinsource=false;
2159 for (IdentifiableSource src:sources){
2160 if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
2161 lsidinsource=true;
2162 }
2163 if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
2164 urlinsource=true;
2165 }
2166 }
2167 if(!lsidinsource) {
2168 taxon.addSource(OriginalSourceType.Import, id,null,re,null);
2169 }
2170 if(!urlinsource)
2171 {
2172 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
2173 taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
2174 // }
2175 }
2176
2177 }
2178
2179 /**
2180 * try to solve a parsing problem for a scientific name
2181 * @param original : the name from the OCR document
2182 * @param name : the tagged version
2183 * @param parser
2184 * @return the corrected TaxonNameBase
2185 */
2186 /* @SuppressWarnings({ "unchecked", "rawtypes" })
2187 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
2188 Map<String,String> ato = namesMap.get(original);
2189 if (ato == null) {
2190 ato = namesMap.get(original+" "+author);
2191 }
2192
2193
2194 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
2195 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
2196 }
2197 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
2198 rank = getRank(ato);
2199 }
2200 // TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
2201 TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2202 // logger.info("RANK: "+rank);
2203 int retry=0;
2204 List<ParserProblem> problems = nameTBF.getParsingProblems();
2205 for (ParserProblem pb:problems) {
2206 System.out.println(pb.toString());
2207 }
2208 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
2209 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2210 String fullname=name;
2211 if(! skippQuestion) {
2212 fullname = getFullReference(name,nameTBF.getParsingProblems());
2213 }
2214 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2215 nameTBF = BotanicalName.NewInstance(null);
2216 }
2217 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2218 nameTBF = ZoologicalName.NewInstance(null);
2219 }
2220 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2221 nameTBF= BacterialName.NewInstance(null);
2222 }
2223 parser.parseReferencedName(nameTBF, fullname, rank, false);
2224 retry++;
2225 }
2226 if (retry == 1){
2227 if(author != null){
2228 if (name.indexOf(author)>-1) {
2229 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
2230 } else {
2231 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2232 }
2233 if (nameTBF.hasProblem()){
2234 if (name.indexOf(author)>-1) {
2235 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
2236 } else {
2237 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2238 }
2239 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
2240 problems = nameTBF.getParsingProblems();
2241 for (ParserProblem pb:problems) {
2242 System.out.println(pb.toString());
2243 }
2244 nameTBF.setFullTitleCache(name, true);
2245 }else{
2246 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2247 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2248 }
2249 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2250 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2251 }
2252 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2253 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2254 }
2255 }
2256 // logger.info("FULL TITLE CACHE "+name);
2257 }else{
2258 nameTBF.setFullTitleCache(name, true);
2259 }
2260 }
2261 return nameTBF;
2262 }
2263
2264 */
2265
2266 /**
2267 * @param nomenclatureNode: the XML nodes
2268 * @param nametosave: the list of objects to save into the CDM
2269 * @param refMods: the current reference extracted from the MODS
2270 * @return
2271 */
2272 @SuppressWarnings({ "rawtypes" })
2273 private Taxon extractNomenclature(Node nomenclatureNode, List<TaxonNameBase> nametosave, Reference<?> refMods) throws ClassCastException{
2274 refMods=CdmBase.deproxy(refMods, Reference.class);
2275
2276 logger.info("extractNomenclature");
2277 NodeList children = nomenclatureNode.getChildNodes();
2278 String freetext="";
2279 NonViralName<?> nameToBeFilled = null;
2280 Taxon acceptedTaxon = null;
2281 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2282
2283 // String fullContent = nomenclatureNode.getTextContent();
2284
2285 NomenclaturalStatusType statusType = null;
2286 for (int i=0;i<children.getLength();i++){
2287 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2288 String status = children.item(i).getTextContent().trim();
2289 if (!status.isEmpty()){
2290 try {
2291 statusType = nomStatusString2NomStatus(status);
2292 } catch (UnknownCdmTypeException e) {
2293 addProblematicStatusToFile(status);
2294 logger.warn("Problem with status");
2295 }
2296 }
2297 }
2298 }
2299
2300 boolean containsSynonyms=false;
2301 for (int i=0;i<children.getLength();i++){
2302
2303 if (children.item(i).getNodeName().equalsIgnoreCase("#text")) {
2304 freetext=children.item(i).getTextContent();
2305 }
2306 if (children.item(i).getNodeName().equalsIgnoreCase("tax:collection_event")) {
2307 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2308 extractMaterialsDirect(children.item(i), acceptedTaxon, refMods, "collection");
2309 }
2310 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
2311 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2312 if(!containsSynonyms){
2313 //System.out.println("I : "+i);
2314 currentMyName = new MyName(false);
2315 try {
2316 currentMyName = extractScientificName(children.item(i),refMods);
2317 treatmentMainName = currentMyName.getNewName();
2318 originalTreatmentName = currentMyName.getOriginalName();
2319
2320 } catch (TransformerFactoryConfigurationError e1) {
2321 logger.warn(e1);
2322 } catch (TransformerException e1) {
2323 logger.warn(e1);
2324 }
2325
2326 if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(configState.getConfig().getMaxRank()) || currentMyName.getRank().equals(configState.getConfig().getMaxRank())){
2327 maxRankRespected=true;
2328
2329 nameToBeFilled=currentMyName.getTaxonNameBase();
2330
2331 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2332 acceptedTaxon=currentMyName.getTaxon();
2333 //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2334
2335
2336 boolean statusMatch=false;
2337 if(acceptedTaxon !=null ){
2338 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2339 statusMatch=compareStatus(acceptedTaxon, statusType);
2340 //System.out.println("statusMatch: "+statusMatch);
2341 }
2342 if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2343
2344 nameToBeFilled=currentMyName.getTaxonNameBase();
2345 if (nameToBeFilled!=null){
2346 if (!originalTreatmentName.isEmpty()) {
2347 TaxonNameDescription td = TaxonNameDescription.NewInstance();
2348 td.setTitleCache(originalTreatmentName, true);
2349 nameToBeFilled.addDescription(td);
2350 }
2351
2352 if(statusType != null) {
2353 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2354 }
2355 sourceHandler.addSource(refMods, nameToBeFilled);
2356
2357 if (nameToBeFilled.getNomenclaturalReference() == null) {
2358 acceptedTaxon= new Taxon(nameToBeFilled,refMods);
2359 //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2360 }
2361 else {
2362 acceptedTaxon= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2363 //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2364 }
2365
2366 sourceHandler.addSource(refMods, acceptedTaxon);
2367
2368 if(!configState.getConfig().doKeepOriginalSecundum()) {
2369 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2370 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2371 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2372 }
2373
2374 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2375 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2376 }
2377
2378
2379 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2380 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2381 }
2382
2383 }else{
2384 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2385 Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2386 boolean sourcelinked=false;
2387 for (IdentifiableSource source:sources){
2388 if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2389 sourcelinked=true;
2390 }
2391 }
2392 if (!configState.getConfig().doKeepOriginalSecundum()) {
2393 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2394 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2395 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2396 }
2397 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2398 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2399 if (!sourcelinked){
2400 sourceHandler.addSource(refMods, acceptedTaxon);
2401 }
2402 if (!sourcelinked || !configState.getConfig().doKeepOriginalSecundum()){
2403
2404 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2405 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2406 }
2407 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2408 }
2409 }
2410 }else{
2411 maxRankRespected=false;
2412 }
2413 containsSynonyms=true;
2414 }else{
2415 //System.out.println("YOUHOUUU "+i);
2416 try{
2417 extractSynonyms(children.item(i), acceptedTaxon, refMods);
2418 }catch(NullPointerException e){
2419 logger.warn("nullpointerexception, the accepted taxon might be null");
2420 }
2421 }
2422 containsSynonyms=true;
2423 }
2424 if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2425 reloadClassification();
2426 //extract the References within the document
2427 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
2428 }
2429 if(!stringIsEmpty(freetext.trim())) {
2430 setParticularDescription(freetext.trim(),acceptedTaxon,acceptedTaxon, refMods,getNotMarkedUpFeatureObject());
2431 }
2432
2433 }
2434 // importer.getClassificationService().saveOrUpdate(classification);
2435 return acceptedTaxon;
2436 }
2437
2438
2439 /**
2440 * @return
2441 */
2442
2443 private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2444 //logger.info("compareStatus");
2445 boolean statusMatch=false;
2446 //found one taxon
2447 Set<NomenclaturalStatus> status = t.getName().getStatus();
2448 if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2449 for (NomenclaturalStatus st:status){
2450 NomenclaturalStatusType stype = st.getType();
2451 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2452 statusMatch=true;
2453 }
2454 }
2455 }
2456 else{
2457 if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2458 statusMatch=true;
2459 }
2460 }
2461 return statusMatch;
2462 }
2463
2464 /**
2465 * @param acceptedTaxon: the current acceptedTaxon
2466 * @param ref: the current reference extracted from the MODS
2467 * @return the parent for the current accepted taxon
2468 */
2469 /* private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
2470 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2471
2472 List<Rank> rankList = new ArrayList<Rank>();
2473 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2474
2475 List<String> rankListStr = new ArrayList<String>();
2476 for (Rank r:rankList) {
2477 rankListStr.add(r.toString());
2478 }
2479 String r="";
2480 String s = acceptedTaxon.getTitleCache();
2481 Taxon tax = null;
2482 if(!skippQuestion){
2483 int addTaxon = askAddParent(s);
2484 logger.info("ADD TAXON: "+addTaxon);
2485 if (addTaxon == 0 ){
2486 Taxon tmp = askParent(acceptedTaxon, classification);
2487 if (tmp == null){
2488 s = askSetParent(s);
2489 r = askRank(s,rankListStr);
2490
2491 NonViralName<?> nameToBeFilled = null;
2492 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2493 nameToBeFilled = BotanicalName.NewInstance(null);
2494 }
2495 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2496 nameToBeFilled = ZoologicalName.NewInstance(null);
2497 }
2498 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2499 nameToBeFilled = BacterialName.NewInstance(null);
2500 }
2501 nameToBeFilled.setTitleCache(s);
2502 nameToBeFilled.setRank(getRank(r));
2503
2504 tax = Taxon.NewInstance(nameToBeFilled, ref);
2505 }
2506 else{
2507 tax=tmp;
2508 }
2509
2510 createParent(tax, ref);
2511 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2512 classification.addParentChild(tax, acceptedTaxon, ref, null);
2513 }
2514 else{
2515 classification.addChildTaxon(acceptedTaxon, ref, null);
2516 tax=acceptedTaxon;
2517 }
2518 } else{
2519 classification.addChildTaxon(acceptedTaxon, ref, null);
2520 tax=acceptedTaxon;
2521 }
2522 // logger.info("RETURN: "+tax );
2523 return tax;
2524
2525 }
2526
2527 */
2528
2529
2530 private MyName extractScientificNameSynonym(Node name, Reference<?> refMods) throws TransformerFactoryConfigurationError, TransformerException {
2531 //System.out.println("extractScientificNameSynonym");
2532 logger.info("extractScientificNameSynonym");
2533 String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2534 List<String> rankListToPrint = new ArrayList<String>();
2535 for (String r : rankListToPrint_tmp) {
2536 rankListToPrint.add(r.toLowerCase());
2537 }
2538
2539 Rank rank = Rank.UNKNOWN_RANK();
2540 NodeList children = name.getChildNodes();
2541 String originalName="";
2542 String fullName = "";
2543 String newName="";
2544 String identifier="";
2545 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2546 List<String> atomisedName= new ArrayList<String>();
2547
2548 String rankStr = "";
2549 Rank tmpRank ;
2550
2551 String status= extractStatus(children);
2552
2553 for (int i=0;i<children.getLength();i++){
2554 if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2555 NodeList atom = children.item(i).getChildNodes();
2556 for (int k=0;k<atom.getLength();k++){
2557 identifier = extractIdentifier(identifier, atom.item(k));
2558 tmpRank = null;
2559 rankStr = atom.item(k).getNodeName().toLowerCase();
2560 // logger.info("RANKSTR:*"+rankStr+"*");
2561 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2562 rankStr=atom.item(k).getTextContent().trim();
2563 tmpRank = getRank(rankStr);
2564 }
2565 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2566 if (tmpRank != null){
2567 rank=tmpRank;
2568 }
2569 atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2570 }
2571 addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2572 }
2573 if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2574 // logger.info("name non atomised: "+children.item(i).getTextContent());
2575 fullName = children.item(i).getTextContent().trim();
2576 // logger.info("fullname: "+fullName);
2577 }
2578 }
2579 originalName=fullName;
2580 fullName = cleanName(fullName, atomisedName);
2581 namesMap.put(fullName,atomisedMap);
2582
2583 String atomisedNameStr = getAtomisedNameStr(atomisedName);
2584
2585 if (fullName != null){
2586 // System.out.println("fullname: "+fullName);
2587 // System.out.println("atomised: "+atomisedNameStr);
2588 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2589 if (skippQuestion){
2590 // String defaultN = "";
2591 if (atomisedNameStr.length()>fullName.length()) {
2592 newName=atomisedNameStr;
2593 } else {
2594 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2595 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2596 } else {
2597 newName=fullName;
2598 }
2599 }
2600 } else {
2601 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2602 }
2603 } else {
2604 newName=fullName;
2605 }
2606 }
2607 //not really needed
2608 // rank = askForRank(newName, rank, nomenclaturalCode);
2609 // System.out.println("atomised: "+atomisedMap.toString());
2610
2611 // String[] names = new String[5];
2612 MyName myname = new MyName(true);
2613
2614 //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2615 // System.out.println(atomisedMap.keySet());
2616 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2617 myname.setOriginalName(fullName);
2618 myname.setNewName(newName);
2619 myname.setRank(rank);
2620 myname.setIdentifier(identifier);
2621 myname.setStatus(status);
2622 myname.setSource(refMods);
2623
2624 // boolean higherAdded=false;
2625
2626
2627 boolean parseNameManually=false;
2628 INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2629 TaxonNameBase<?,?> nameToBeFilledTest ;
2630
2631 //if selected the atomised version
2632 if(newName==atomisedNameStr){
2633 nameToBeFilledTest = parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2634 if (nameToBeFilledTest.hasProblem()){
2635 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2636 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2637 if (nameToBeFilledTest.hasProblem()){
2638 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2639 parseNameManually=true;
2640 }
2641 }
2642 }else{
2643 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode, rank);
2644 if (nameToBeFilledTest.hasProblem()){
2645 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2646 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2647 parseNameManually=true;
2648 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2649 addNameDifferenceToFile(originalName,atomisedNameStr);
2650 }
2651 }
2652 }
2653
2654 if(parseNameManually){
2655 //System.out.println("DO IT MANUALLY");
2656 createSynonym(rank, newName, atomisedMap, myname);
2657 }
2658 else{
2659 //System.out.println("AUTOMATIC!");
2660 // createAtomisedTaxonString(newName, atomisedMap, myname);
2661 myname.setParsedName(nameToBeFilledTest);
2662 myname.buildTaxon();
2663 }
2664 //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2665 return myname;
2666 }
2667 /**
2668 * @param name
2669 * @throws TransformerFactoryConfigurationError
2670 * @throws TransformerException
2671 * @return a list of possible names
2672 */
2673 @SuppressWarnings({ "null", "rawtypes" })
2674 private MyName extractScientificName(Node name, Reference<?> refMods) throws TransformerFactoryConfigurationError, TransformerException {
2675 logger.info("extractScientificName");
2676
2677 String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2678 List<String> rankListToPrint = new ArrayList<String>();
2679 for (String r : rankListToPrint_tmp) {
2680 rankListToPrint.add(r.toLowerCase());
2681 }
2682
2683 Rank rank = Rank.UNKNOWN_RANK();
2684 NodeList children = name.getChildNodes();
2685 String originalName="";
2686 String fullName = "";
2687 String newName="";
2688 String identifier="";
2689 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2690 List<String> atomisedName= new ArrayList<String>();
2691
2692 String rankStr = "";
2693 Rank tmpRank ;
2694
2695 String status= extractStatus(children);
2696
2697 for (int i=0;i<children.getLength();i++){
2698 if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2699 NodeList atom = children.item(i).getChildNodes();
2700 for (int k=0;k<atom.getLength();k++){
2701 identifier = extractIdentifier(identifier, atom.item(k));
2702 tmpRank = null;
2703 rankStr = atom.item(k).getNodeName().toLowerCase();
2704 // logger.info("RANKSTR:*"+rankStr+"*");
2705 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2706 rankStr=atom.item(k).getTextContent().trim();
2707 tmpRank = getRank(rankStr);
2708 }
2709 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2710 if (tmpRank != null){
2711 rank=tmpRank;
2712 }
2713 atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2714 }
2715 addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2716 }
2717 if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2718 // logger.info("name non atomised: "+children.item(i).getTextContent());
2719 fullName = children.item(i).getTextContent().trim();
2720 // logger.info("fullname: "+fullName);
2721 }
2722 }
2723 originalName=fullName;
2724 fullName = cleanName(fullName, atomisedName);
2725 namesMap.put(fullName,atomisedMap);
2726
2727 String atomisedNameStr = getAtomisedNameStr(atomisedName);
2728
2729 if (fullName != null){
2730 // System.out.println("fullname: "+fullName);
2731 // System.out.println("atomised: "+atomisedNameStr);
2732 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2733 //System.out.println("atomisedNameStr vs. fullName:"+atomisedNameStr+"--"+fullName);
2734 if (skippQuestion){
2735 // String defaultN = "";
2736 if (atomisedNameStr.length()>fullName.length()) {
2737 newName=atomisedNameStr;
2738 } else {
2739 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2740 //System.out.println("là");
2741 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2742 } else {
2743 //System.out.println("ici");
2744 newName=fullName;
2745 }
2746 }
2747 } else {
2748 newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2749 }
2750 } else {
2751 newName=fullName;
2752 }
2753 }
2754 //not really needed
2755 // rank = askForRank(newName, rank, nomenclaturalCode);
2756 // System.out.println("atomised: "+atomisedMap.toString());
2757
2758 // String[] names = new String[5];
2759 MyName myname = new MyName(false);
2760
2761 //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2762 // System.out.println(atomisedMap.keySet());
2763 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2764 myname.setOriginalName(fullName);
2765 myname.setNewName(newName);
2766
2767 myname.setRank(rank);
2768 myname.setIdentifier(identifier);
2769 myname.setStatus(status);
2770 myname.setSource(refMods);
2771
2772 // boolean higherAdded=false;
2773
2774
2775 boolean parseNameManually=false;
2776 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2777 TaxonNameBase nameToBeFilledTest = null;
2778
2779 //if selected the atomised version
2780 if(newName==atomisedNameStr){
2781 nameToBeFilledTest = parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2782 if (nameToBeFilledTest.hasProblem()){
2783 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2784 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2785 if (nameToBeFilledTest.hasProblem()){
2786 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2787 parseNameManually=true;
2788 }
2789 }
2790 }else{
2791 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode, rank);
2792 if (nameToBeFilledTest.hasProblem()){
2793 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2794 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2795 parseNameManually=true;
2796 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2797 addNameDifferenceToFile(originalName,atomisedNameStr);
2798 }
2799 }
2800 }
2801
2802 //System.out.println("parseNameManually: "+parseNameManually);
2803 if(parseNameManually){
2804 createAtomisedTaxon(rank, newName, atomisedMap, myname);
2805 }
2806 else{
2807 createAtomisedTaxonString(newName, atomisedMap, myname);
2808 myname.setParsedName(nameToBeFilledTest);
2809 myname.buildTaxon();
2810 }
2811 return myname;
2812
2813 }
2814
2815 /**
2816 * @param atomisedName
2817 * @return
2818 */
2819 private String getAtomisedNameStr(List<String> atomisedName) {
2820 //logger.info("getAtomisedNameStr");
2821 String atomisedNameStr = StringUtils.join(atomisedName," ");
2822 while(atomisedNameStr.contains(" ")) {
2823 atomisedNameStr=atomisedNameStr.replace(" ", " ");
2824 }
2825 atomisedNameStr=atomisedNameStr.trim();
2826 return atomisedNameStr;
2827 }
2828
2829 /**
2830 * @param children
2831 * @param status
2832 * @return
2833 */
2834 private String extractStatus(NodeList children) {
2835 logger.info("extractStatus");
2836 String status="";
2837 for (int i=0;i<children.getLength();i++){
2838 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2839 (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2840 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2841 status = children.item(i).getTextContent().trim();
2842 }
2843 }
2844 return status;
2845 }
2846
2847 /**
2848 * @param identifier
2849 * @param atom
2850 * @param k
2851 * @return
2852 */
2853 private String extractIdentifier(String identifier, Node atom) {
2854 //logger.info("extractIdentifier");
2855 if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2856 try{
2857 identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2858 }catch(Exception e){
2859 System.out.println("pb with identifier, maybe empty");
2860 }
2861 try{
2862 identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2863 }catch(Exception e){
2864 System.out.println("pb with identifier, maybe empty");
2865 }
2866 }
2867 return identifier;
2868 }
2869
2870 /**
2871 * @param rankListToPrint
2872 * @param rank
2873 * @param atomisedName
2874 * @param atom
2875 */
2876 private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2877 logger.info("addAtomisedNamesToMap");
2878 for (int k=0;k<atom.getLength();k++){
2879 if (!atom.item(k).getNodeName().equalsIgnoreCase("dwc:taxonRank") ) {
2880 if (atom.item(k).getNodeName().equalsIgnoreCase("dwc:subgenus") || atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:subgenus")) {
2881 atomisedName.add("("+atom.item(k).getTextContent().trim()+")");
2882 } else{
2883 if(atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet") || atom.item(k).getNodeName().equalsIgnoreCase("dwc:Subspecies")) {
2884 if(atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet")){
2885 atomisedName.add("var. "+atom.item(k).getTextContent().trim());
2886 }
2887 if(atom.item(k).getNodeName().equalsIgnoreCase("dwc:Subspecies") || atom.item(k).getNodeName().equalsIgnoreCase("dwc:infraspecificepithet")) {
2888 atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2889 }
2890 }
2891 else{
2892 if(rankListToPrint.contains(atom.item(k).getNodeName().toLowerCase())) {
2893 atomisedName.add(atom.item(k).getTextContent().trim());
2894 }
2895 else{
2896 // System.out.println("rank : "+rank.toString());
2897 if (rank.isHigher(Rank.GENUS()) && (atom.item(k).getNodeName().indexOf("dwcranks:")>-1 || atom.item(k).getNodeName().indexOf("dwc:Family")>-1)) {
2898 atomisedName.add(atom.item(k).getTextContent().trim());
2899 }
2900 // else{
2901 // System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2902 // }
2903 }
2904 // else{
2905 // System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2906 // }
2907 }
2908 }
2909 }
2910 }
2911 }
2912
2913 /**
2914 * @param fullName
2915 * @param atomisedName
2916 * @return
2917 */
2918 private String cleanName(String name, List<String> atomisedName) {
2919 //logger.info("cleanName");
2920 String fullName =name;
2921 if (fullName != null){
2922 fullName = fullName.replace("( ", "(");
2923 fullName = fullName.replace(" )",")");
2924
2925 if (fullName.trim().isEmpty()){
2926 fullName=StringUtils.join(atomisedName," ");
2927 }
2928
2929 while(fullName.contains(" ")) {
2930 fullName=fullName.replace(" ", " ");
2931 // logger.info("while");
2932 }
2933 fullName=fullName.trim();
2934 }
2935 return fullName;
2936 }
2937
2938 /**
2939 * @param rank
2940 * @param fullName
2941 * @param atomisedMap
2942 * @param myname
2943 * @return
2944 */
2945 private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap,
2946 MyName myname) {
2947 logger.info("extractAuthorFromNames");
2948 String fullName=name;
2949 if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2950 // System.out.println("rank : "+rank.toString());
2951 if(rank.isHigher(Rank.SPECIES())){
2952 try{
2953 String author=null;
2954 if(atomisedMap.get("dwcranks:subgenus") != null) {
2955 author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2956 }
2957 if(atomisedMap.get("dwc:subgenus") != null) {
2958 author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2959 }
2960 if(author == null) {
2961 if(atomisedMap.get("dwc:genus") != null) {
2962 author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2963 }
2964 }
2965 if(author != null){
2966 fullName = fullName.substring(0, fullName.indexOf(author));
2967 author=author.replaceAll(",","").trim();
2968 myname.setAuthor(author);
2969 }
2970 }catch(Exception e){
2971 //could not extract the author
2972 }
2973 }
2974 if(rank.equals(Rank.SPECIES())){
2975 try{
2976 String author=null;
2977 if(author == null) {
2978 if(atomisedMap.get("dwc:species") != null) {
2979 String[] t = fullName.split(atomisedMap.get("dwc:species"));
2980 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2981 author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2982 // System.out.println("AUTEUR "+author);
2983 }
2984 }
2985 if(author != null){
2986 fullName = fullName.substring(0, fullName.indexOf(author));
2987 author=author.replaceAll(",","").trim();
2988 myname.setAuthor(author);
2989 }
2990 }catch(Exception e){
2991 //could not extract the author
2992 }
2993 }
2994 }else{
2995 myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2996 }
2997 return fullName;
2998 }
2999
3000 /**
3001 * @param newName
3002 * @param atomisedMap
3003 * @param myname
3004 */
3005 private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
3006 logger.info("createAtomisedTaxonString "+atomisedMap);
3007 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3008 myname.setFamilyStr(atomisedMap.get("dwc:family"));
3009 }
3010 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
3011 myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
3012 }
3013 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3014 myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
3015 }
3016 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3017 myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
3018 }
3019 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3020 myname.setGenusStr(atomisedMap.get("dwc:genus"));
3021 }
3022 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3023 myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
3024 }
3025 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3026 myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
3027 }
3028 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3029 String n=newName;
3030 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3031 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3032 n=n.replace("subsp.","");
3033 }
3034 if(atomisedMap.get("dwc:subspecies") != null) {
3035 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3036 n=n.replace("subsp.","");
3037 }
3038 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3039 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3040 n=n.replace("var.","");
3041 n=n.replace("v.","");
3042 }
3043 if(atomisedMap.get("dwcranks:formepithet") != null) {
3044 //TODO
3045 System.out.println("TODO FORMA");
3046 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3047 n=n.replace("forma","");
3048 }
3049 n=n.trim();
3050 String author = myname.getAuthor();
3051 if(n.split(" ").length>2)
3052 {
3053 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3054 String a= "";
3055 try{
3056 a=n.split(n2)[1].trim();
3057 }catch(Exception e){
3058 logger.info("no author in "+n+"?");}
3059
3060 myname.setAuthor(a);
3061 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3062 n=n2;
3063
3064 }
3065
3066 myname.setSpeciesStr(atomisedMap.get("dwc:species"));
3067 myname.setAuthor(author);
3068 }
3069 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3070 myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
3071 }
3072 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3073 myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
3074 }
3075 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3076 myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
3077 }
3078 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3079 myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
3080 }
3081 }
3082
3083 private void createSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3084 logger.info("createSynonym");
3085 //System.out.println("createsynonym");
3086 if(rank.equals(Rank.UNKNOWN_RANK())){
3087 myname.setNotParsableTaxon(newName);
3088 }else
3089 {if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
3090 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3091 }
3092 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
3093 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3094 }
3095 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
3096 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3097 }
3098 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
3099 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3100 }
3101 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
3102 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3103 }
3104 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
3105 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3106 }
3107 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
3108 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3109 }
3110 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
3111 String n=newName;
3112 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3113 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3114 n=n.replace("subsp.","");
3115 }
3116 if(atomisedMap.get("dwc:subspecies") != null) {
3117 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3118 n=n.replace("subsp.","");
3119 }
3120 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3121 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3122 n=n.replace("var.","");
3123 n=n.replace("v.","");
3124 }
3125 if(atomisedMap.get("dwcranks:formepithet") != null) {
3126 //TODO
3127 //System.out.println("TODO FORMA");
3128 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3129 n=n.replace("forma","");
3130 }
3131 n=n.trim();
3132 String author = myname.getAuthor();
3133 if(n.split(" ").length>2)
3134 {
3135 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3136 String a="";
3137 try{
3138 a= n.split(n2)[1].trim();
3139 }catch(Exception e){logger.info("no author in "+n);}
3140 myname.setAuthor(a);
3141 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3142 n=n2;
3143
3144 }
3145
3146 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3147 myname.setAuthor(author);
3148 }
3149 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3150 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3151 }
3152 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3153 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3154 }
3155 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3156 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3157 }
3158 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3159 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3160 }
3161 }
3162
3163 }
3164 /**
3165 * @param rank
3166 * @param newName
3167 * @param atomisedMap
3168 * @param myname
3169 */
3170 private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3171 logger.info("createAtomisedTaxon "+atomisedMap);
3172 if(rank.equals(Rank.UNKNOWN_RANK())){
3173 myname.setNotParsableTaxon(newName);
3174 }
3175 else{
3176 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3177 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3178 }
3179 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
3180 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3181 }
3182 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3183 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3184 }
3185 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3186 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3187 }
3188 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3189 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3190 }
3191 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3192 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3193 }
3194 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3195 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3196 }
3197 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3198 String n=newName;
3199 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3200 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3201 n=n.replace("subsp.","");
3202 }
3203 if(atomisedMap.get("dwc:subspecies") != null) {
3204 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3205 n=n.replace("subsp.","");
3206 }
3207 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3208 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3209 n=n.replace("var.","");
3210 n=n.replace("v.","");
3211 }
3212 if(atomisedMap.get("dwcranks:formepithet") != null) {
3213 //TODO
3214 //System.out.println("TODO FORMA");
3215 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3216 n=n.replace("forma","");
3217 }
3218 n=n.trim();
3219 String author = myname.getAuthor();
3220 if(n.split(" ").length>2)
3221 {
3222 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3223 String a="";
3224 try{
3225 a= n.split(n2)[1].trim();
3226 }catch(Exception e){logger.info("no author in "+n);}
3227 myname.setAuthor(a);
3228 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3229 n=n2;
3230
3231 }
3232
3233 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3234 myname.setAuthor(author);
3235 }
3236 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3237 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3238 }
3239 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3240 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3241 }
3242 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3243 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3244 }
3245 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3246 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3247 }
3248 }
3249 }
3250
3251 /**
3252 * @return
3253 */
3254 private boolean checkRankValidForImport(Rank currentRank) {
3255 //logger.info("checkRankValidForImport");
3256 return currentRank.isLower(configState.getConfig().getMaxRank()) || currentRank.equals(configState.getConfig().getMaxRank());
3257 }
3258
3259
3260
3261 /**
3262 * @param classification2
3263 */
3264 public void updateClassification(Classification classification2) {
3265 //logger.info("updateClassification");
3266 classification = classification2;
3267 }
3268
3269 /**
3270 * @param tnb
3271 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3272 * if errors, cast into a classis nonviralname
3273 * @param taxonnamebase2
3274 */
3275 @SuppressWarnings("rawtypes")
3276 public NonViralName<?> castTaxonNameBase(TaxonNameBase tnb, NonViralName<?> nvn) {
3277 //logger.info("castTaxonNameBase");
3278 NonViralName<?> taxonnamebase2 = nvn;
3279 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
3280 try{
3281 taxonnamebase2=(BotanicalName) tnb;
3282 }catch(Exception e){
3283 taxonnamebase2= (NonViralName<?>) tnb;
3284 }
3285 }
3286 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
3287 try{
3288 taxonnamebase2=(ZoologicalName) tnb;
3289 }catch(Exception e){
3290 taxonnamebase2= (NonViralName<?>) tnb;
3291 }
3292 }
3293 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
3294 try{
3295 taxonnamebase2=(BacterialName) tnb;
3296 }catch(Exception e){
3297 taxonnamebase2= (NonViralName<?>) tnb;
3298 }
3299 }
3300 return taxonnamebase2;
3301 }
3302
3303 /**
3304 * @param tnb
3305 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3306 * if errors, cast into a classis nonviralname
3307 * @param taxonnamebase2
3308 */
3309 @SuppressWarnings("rawtypes")
3310 public NonViralName<?> castTaxonNameBase(TaxonNameBase tnb) {
3311 //logger.info("castTaxonNameBase2");
3312 NonViralName<?> taxonnamebase2 = null;
3313 tnb=CdmBase.deproxy(tnb, TaxonNameBase.class);
3314 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
3315 try{
3316 taxonnamebase2=(BotanicalName) tnb;
3317 }catch(Exception e){
3318 taxonnamebase2= (NonViralName<?>) tnb;
3319 }
3320 }
3321 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
3322 try{
3323 taxonnamebase2=(ZoologicalName) tnb;
3324 }catch(Exception e){
3325 taxonnamebase2= (NonViralName<?>) tnb;
3326 }
3327 }
3328 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
3329 try{
3330 taxonnamebase2=(BacterialName) tnb;
3331 }catch(Exception e){
3332 taxonnamebase2= (NonViralName<?>) tnb;
3333 }
3334 }
3335 return taxonnamebase2;
3336 }
3337
3338 public class MyName {
3339 /**
3340 * @param isSynonym
3341 */
3342 public MyName(boolean isSynonym) {
3343 super();
3344 this.isSynonym = isSynonym;
3345 }
3346
3347 String originalName="";
3348 String newName="";
3349 Rank rank=Rank.UNKNOWN_RANK();
3350 String identifier="";
3351 String status="";
3352 String author=null;
3353
3354 NonViralName<?> taxonnamebase;
3355
3356 Reference<?> refMods ;
3357
3358 Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3359 NonViralName<?> familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3360 String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3361 Taxon higherTaxa;
3362 Rank higherRank;
3363 private Taxon taxon;
3364 private Synonym syno;
3365
3366 /**
3367 * @return the syno
3368 */
3369 public Synonym getSyno() {
3370 return syno;
3371 }
3372
3373 @Override
3374 public String toString(){
3375 List<String> tot=new ArrayList<String>();
3376 String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3377 for (String elt:n){
3378 if (!StringUtils.isEmpty(elt)) {
3379 tot.add(elt);
3380 } else {
3381 tot.add("*");
3382 }
3383 }
3384 return StringUtils.join(tot," ");
3385 }
3386 /**
3387 * @param syno the syno to set
3388 */
3389 public void setSyno(Synonym syno) {
3390 this.syno = syno;
3391 }
3392
3393 boolean isSynonym=false;
3394
3395 /**
3396 * @return the isSynonym
3397 */
3398 public boolean isSynonym() {
3399 return isSynonym;
3400 }
3401
3402 /**
3403 * @param isSynonym the isSynonym to set
3404 */
3405 public void setSynonym(boolean isSynonym) {
3406 this.isSynonym = isSynonym;
3407 }
3408
3409 public void setSource(Reference<?> re){
3410 refMods=re;
3411 }
3412
3413 /**
3414 * @param string
3415 */
3416 public void setFormStr(String string) {
3417 this.formStr=string;
3418
3419 }
3420 /**
3421 * @param string
3422 */
3423 public void setVarietyStr(String string) {
3424 this.varietyStr=string;
3425
3426 }
3427 /**
3428 * @param string
3429 */
3430 public void setSubspeciesStr(String string) {
3431 this.subspeciesStr=string;
3432
3433 }
3434 /**
3435 * @param string
3436 */
3437 public void setSpeciesStr(String string) {
3438 this.speciesStr=string;
3439
3440 }
3441 /**
3442 * @param string
3443 */
3444 public void setSubgenusStr(String string) {
3445 this.subgenusStr=string;
3446
3447 }
3448 /**
3449 * @param string
3450 */
3451 public void setGenusStr(String string) {
3452 this.genusStr=string;
3453
3454 }
3455 /**
3456 * @param string
3457 */
3458 public void setSubtribeStr(String string) {
3459 this.subtribeStr=string;
3460
3461 }
3462 /**
3463 * @param string
3464 */
3465 public void setTribeStr(String string) {
3466 this.tribeStr=string;
3467
3468 }
3469 /**
3470 * @param string
3471 */
3472 public void setSubfamilyStr(String string) {
3473 this.subfamilyStr=string;
3474
3475 }
3476 /**
3477 * @param string
3478 */
3479 public void setFamilyStr(String string) {
3480 this.familyStr=string;
3481
3482 }
3483 /**
3484 * @return the familyStr
3485 */
3486 public String getFamilyStr() {
3487 return familyStr;
3488 }
3489 /**
3490 * @return the subfamilyStr
3491 */
3492 public String getSubfamilyStr() {
3493 return subfamilyStr;
3494 }
3495 /**
3496 * @return the tribeStr
3497 */
3498 public String getTribeStr() {
3499 return tribeStr;
3500 }
3501 /**
3502 * @return the subtribeStr
3503 */
3504 public String getSubtribeStr() {
3505 return subtribeStr;
3506 }
3507 /**
3508 * @return the genusStr
3509 */
3510 public String getGenusStr() {
3511 return genusStr;
3512 }
3513 /**
3514 * @return the subgenusStr
3515 */
3516 public String getSubgenusStr() {
3517 return subgenusStr;
3518 }
3519 /**
3520 * @return the speciesStr
3521 */
3522 public String getSpeciesStr() {
3523 return speciesStr;
3524 }
3525 /**
3526 * @return the subspeciesStr
3527 */
3528 public String getSubspeciesStr() {
3529 return subspeciesStr;
3530 }
3531 /**
3532 * @return the formStr
3533 */
3534 public String getFormStr() {
3535 return formStr;
3536 }
3537 /**
3538 * @return the varietyStr
3539 */
3540 public String getVarietyStr() {
3541 return varietyStr;
3542 }
3543
3544 /**
3545 * @param newName2
3546 */
3547 public void setNotParsableTaxon(String newName2) {
3548 //takes too much time
3549 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3550
3551 NomenclaturalStatusType statusType = null;
3552 if (!getStatus().isEmpty()){
3553 try {
3554 statusType = nomStatusString2NomStatus(getStatus());
3555 } catch (UnknownCdmTypeException e) {
3556 addProblematicStatusToFile(getStatus());
3557 logger.warn("Problem with status");
3558 }
3559 }
3560 List<TaxonBase> tmpList = new ArrayList<TaxonBase>();
3561
3562 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3563 tmpList.addAll(taxontest.getRecords());
3564
3565 //logger.info("tmpList returned: "+tmpList.size());
3566
3567
3568 boolean foundIdentic=false;
3569 TaxonBase<?> tmptaxonbase=null;
3570 // Taxon tmpPartial=null;
3571 for (TaxonBase<?> tmpb:tmpList){
3572 if(tmpb !=null){
3573 TaxonNameBase<?,?> tnb = tmpb.getName();
3574 Rank crank=null;
3575 if (tnb != null){
3576 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3577 crank =tnb.getRank();
3578 if (crank !=null && rank !=null){
3579 if (crank.equals(rank)){
3580 foundIdentic=true;
3581 try{
3582 if(!isSynonym) {
3583 tmptaxonbase=tmpb;
3584 } else {
3585 tmptaxonbase=tmpb;
3586 }
3587 break;
3588 }catch(Exception e){
3589 e.printStackTrace();
3590 }
3591 }
3592 }
3593 }
3594 }
3595 }
3596 }
3597 boolean statusMatch=false;
3598 boolean appendedMatch=false;
3599 if(tmptaxonbase !=null && foundIdentic){
3600 statusMatch=compareStatus(tmptaxonbase, statusType);
3601 if (!getStatus().isEmpty() && ! (tmptaxonbase.getAppendedPhrase() == null)) {
3602 appendedMatch=tmptaxonbase.getAppendedPhrase().equals(getStatus());
3603 }
3604 if (getStatus().isEmpty() && tmptaxonbase.getAppendedPhrase() == null) {
3605 appendedMatch=true;
3606 }
3607
3608 }
3609 if ((tmptaxonbase == null || !foundIdentic) || (tmptaxonbase != null && !statusMatch) || (tmptaxonbase != null && !appendedMatch && !statusMatch)){
3610
3611 NonViralName<?> tnb = getNonViralNameAccNomenclature();
3612 tnb.setRank(rank);
3613
3614 if(statusType != null) {
3615 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3616 }
3617 if(getStatus()!=null) {
3618 tnb.setAppendedPhrase(getStatus());
3619 }
3620
3621 tnb.setTitleCache(newName2,true);
3622 tmptaxonbase = findMatchingTaxon(tnb,refMods);
3623 if(tmptaxonbase==null){
3624 tmptaxonbase=Taxon.NewInstance(tnb, refMods);
3625 if(!configState.getConfig().doKeepOriginalSecundum()) {
3626 tmptaxonbase.setSec(configState.getConfig().getSecundum());
3627 }
3628 // tmptaxonbase.setSec(refMods);
3629 if(!isSynonym) {
3630 classification.addChildTaxon((Taxon)tmptaxonbase, null, null);
3631 sourceHandler.addSource(refMods, (Taxon)tmptaxonbase);
3632 }
3633 }
3634 }
3635 if(!isSynonym) {
3636 tmptaxonbase = CdmBase.deproxy(tmptaxonbase, Taxon.class);
3637 } else {
3638 tmptaxonbase = CdmBase.deproxy(tmptaxonbase, Synonym.class);
3639 }
3640 if (author != null) {
3641 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3642 setLSID(getIdentifier(), tmptaxonbase);
3643 importer.getTaxonService().saveOrUpdate(tmptaxonbase);
3644 if(!isSynonym) {
3645 tmptaxonbase = CdmBase.deproxy(tmptaxonbase, Taxon.class);
3646 } else {
3647 tmptaxonbase = CdmBase.deproxy(tmptaxonbase, Synonym.class);
3648 }
3649 }
3650 }
3651 TaxonNameBase<?,?> tnb = CdmBase.deproxy(tmptaxonbase.getName(), TaxonNameBase.class);
3652
3653 if(!isSynonym) {
3654 this.taxon=(Taxon)tmptaxonbase;
3655 } else {
3656 this.syno=(Synonym)tmptaxonbase;
3657 }
3658 castTaxonNameBase(tnb, taxonnamebase);
3659
3660 }
3661
3662 /**
3663 *
3664 */
3665 public void buildTaxon() {
3666 //System.out.println("BUILD TAXON");
3667 logger.info("buildTaxon");
3668 NomenclaturalStatusType statusType = null;
3669 if (!getStatus().isEmpty()){
3670 try {
3671 statusType = nomStatusString2NomStatus(getStatus());
3672 taxonnamebase.addStatus(NomenclaturalStatus.NewInstance(statusType));
3673 } catch (UnknownCdmTypeException e) {
3674 addProblematicStatusToFile(getStatus());
3675 logger.warn("Problem with status");
3676 }
3677 }
3678 importer.getNameService().save(taxonnamebase);
3679
3680 TaxonBase<?> tmptaxonbase;
3681 if (!isSynonym) {
3682 tmptaxonbase =Taxon.NewInstance(taxonnamebase, refMods); //sec set null
3683 }
3684 else {
3685 tmptaxonbase =Synonym.NewInstance(taxonnamebase, refMods); //sec set null
3686 }
3687 boolean exist = false;
3688 for (TaxonNode p : classification.getAllNodes()){
3689 try{
3690 if(p.getTaxon().getTitleCache().equalsIgnoreCase(tmptaxonbase.getTitleCache())) {
3691 if(compareStatus(p.getTaxon(), statusType)){
3692 try{
3693 if (!isSynonym) {
3694 tmptaxonbase=CdmBase.deproxy(p.getTaxon(), Taxon.class);
3695 } else {
3696 tmptaxonbase=CdmBase.deproxy(p.getTaxon(), Synonym.class);
3697 }
3698 exist =true;
3699 }catch(Exception e){
3700 logger.warn("Found the same name but from another type (taxon/synonym)");
3701 TaxonNameBase<?,?> existingTnb = getTaxon().getName();
3702 if (isSynonym){
3703 tmptaxonbase = new Synonym(existingTnb, refMods);
3704 importer.getTaxonService().saveOrUpdate(tmptaxonbase);
3705 tmptaxonbase=CdmBase.deproxy(tmptaxonbase, Synonym.class);
3706 exist =true;
3707 }
3708 else{
3709 tmptaxonbase = new Taxon(existingTnb, refMods);
3710 }
3711 }
3712 }
3713 }
3714 }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3715 }
3716 if (!exist){
3717
3718 boolean insertAsExisting =false;
3719 List<Taxon> existingTaxons=new ArrayList<Taxon>();
3720 try {
3721 existingTaxons = getMatchingTaxon(taxonnamebase);
3722 } catch (Exception e1) {
3723 // TODO Auto-generated catch block
3724 e1.printStackTrace();
3725 }
3726 double similarityScore=0.0;
3727 double similarityAuthor=-1;
3728 String author1="";
3729 String author2="";
3730 String t1="";
3731 String t2="";
3732 for (Taxon bestMatchingTaxon:existingTaxons){
3733 //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3734 //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3735 try {
3736 if(taxonnamebase.getAuthorshipCache()!=null) {
3737 author1=taxonnamebase.getAuthorshipCache();
3738 }
3739 } catch (Exception e) {
3740 // TODO Auto-generated catch block
3741 e.printStackTrace();
3742 }
3743 try {
3744 if(castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache()!=null) {
3745 author2=castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache();
3746 }
3747 } catch (Exception e) {
3748 // TODO Auto-generated catch block
3749 e.printStackTrace();
3750 }
3751 try {
3752 t1=taxonnamebase.getTitleCache().split("sec.")[0].trim();
3753 if (author1!=null && !StringUtils.isEmpty(author1)) {
3754 t1=t1.split(Pattern.quote(author1))[0];
3755 }
3756 } catch (Exception e) {
3757 // TODO Auto-generated catch block
3758 e.printStackTrace();
3759 }
3760 try {
3761 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3762 if (author2!=null && !StringUtils.isEmpty(author2)) {
3763 t2=t2.split(Pattern.quote(author2))[0];
3764 }
3765 } catch (Exception e) {
3766 // TODO Auto-generated catch block
3767 e.printStackTrace();
3768 }
3769
3770 similarityScore=similarity(t1.trim(), t2.trim());
3771 //System.out.println("taxonscore "+similarityScore);
3772 similarityAuthor=similarity(author1.trim(), author2.trim());
3773 //System.out.println("authorscore "+similarityAuthor);
3774 insertAsExisting = compareAndCheckTaxon(taxonnamebase, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
3775 if(insertAsExisting) {
3776 tmptaxonbase=bestMatchingTaxon;
3777 break;
3778 }
3779 }
3780 if (!insertAsExisting){
3781 if(!configState.getConfig().doKeepOriginalSecundum()) {
3782 tmptaxonbase.setSec(configState.getConfig().getSecundum());
3783 }
3784
3785 // tmptaxonbase.setSec(refMods);
3786 if (taxonnamebase.getRank().equals(configState.getConfig().getMaxRank())) {
3787 //System.out.println("****************************"+tmptaxonbase);
3788 if (!isSynonym) {
3789 classification.addChildTaxon((Taxon)tmptaxonbase, refMods, null);
3790 }
3791 } else{
3792 hierarchy = new HashMap<Rank, Taxon>();
3793 //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3794 if (!isSynonym){
3795 lookForParentNode(taxonnamebase,(Taxon)tmptaxonbase, refMods,this);
3796 //System.out.println("HIERARCHY "+hierarchy);
3797 Taxon parent = buildHierarchy();
3798 if(!taxonExistsInClassification(parent,(Taxon)tmptaxonbase)){
3799 if(parent !=null) {
3800 classification.addParentChild(parent, (Taxon)tmptaxonbase, refMods, null);
3801 } else {
3802 classification.addChildTaxon((Taxon)tmptaxonbase, refMods, null);
3803 }
3804 importer.getClassificationService().saveOrUpdate(classification);
3805 }
3806 }
3807 // Set<TaxonNode> nodeList = classification.getAllNodes();
3808 // for(TaxonNode tn:nodeList) {
3809 // System.out.println(tn.getTaxon());
3810 // }
3811 }
3812 }
3813 importer.getClassificationService().saveOrUpdate(classification);
3814 // refreshTransaction();
3815 if(isSynonym) {
3816 try{
3817 Synonym castTest=CdmBase.deproxy(tmptaxonbase, Synonym.class);
3818 }catch(Exception e){
3819 TaxonNameBase<?,?> existingTnb = tmptaxonbase.getName();
3820 Synonym castTest = new Synonym(existingTnb, refMods);
3821 importer.getTaxonService().saveOrUpdate(castTest);
3822 tmptaxonbase=CdmBase.deproxy(castTest, Synonym.class);
3823 }
3824 }
3825 }
3826 if(!isSynonym) {
3827 taxon=CdmBase.deproxy(tmptaxonbase, Taxon.class);
3828 } else {
3829 syno=CdmBase.deproxy(tmptaxonbase, Synonym.class);
3830 }
3831
3832
3833
3834 }
3835
3836
3837 /**
3838 *
3839 */
3840 private Taxon buildHierarchy() {
3841 logger.info("buildHierarchy");
3842 Taxon higherTaxon = null;
3843 //add the maxRank as a root
3844 if(hierarchy.containsKey(configState.getConfig().getMaxRank())){
3845 Taxon ct=hierarchy.get(configState.getConfig().getMaxRank());
3846 if(!taxonExistsInClassification(higherTaxon, ct)) {
3847 //System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"+hierarchy.get(configState.getConfig().getMaxRank()));
3848 classification.addChildTaxon(ct, refMods, null);
3849 }
3850 higherTaxon = hierarchy.get(configState.getConfig().getMaxRank());
3851 // return higherTaxon;
3852 }
3853 //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3854 if(hierarchy.containsKey(Rank.SUBFAMILY()) && !configState.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3855 higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3856 }
3857 if(hierarchy.containsKey(Rank.TRIBE())&& !configState.getConfig().getMaxRank().equals(Rank.TRIBE())){
3858 higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3859 }
3860 if(hierarchy.containsKey(Rank.SUBTRIBE())&& !configState.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3861 higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3862 }
3863 if(hierarchy.containsKey(Rank.GENUS())&& !configState.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3864 higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3865 }
3866 if(hierarchy.containsKey(Rank.SUBGENUS())&& !configState.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3867 higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3868 }
3869 importer.getClassificationService().saveOrUpdate(classification);
3870 return higherTaxon;
3871 }
3872
3873 private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3874 Taxon ct=hierarchy.get(r);
3875 if(!taxonExistsInClassification(higherTaxon,ct )) {
3876 if(higherTaxon != null && ct!=null) {
3877 classification.addParentChild(higherTaxon, ct, refMods, null);
3878 } else
3879 if(higherTaxon == null && ct !=null) {
3880 classification.addChildTaxon(ct, refMods, null);
3881 }
3882 }
3883 return ct;
3884 }
3885
3886 private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3887 logger.info("taxonExistsInClassification");
3888 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3889 boolean found=false;
3890 if(parent !=null){
3891 for (TaxonNode p : classification.getAllNodes()){
3892 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3893 for (TaxonNode c : p.getChildNodes()) {
3894 if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3895 found=true;
3896 break;
3897 }
3898 }
3899 }
3900 }
3901 }
3902 else{
3903 for (TaxonNode p : classification.getAllNodes()){
3904 if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3905 found=true;
3906 break;
3907 }
3908 }
3909 }
3910 // System.out.println("LOOK IF TAXA EXIST? "+found);
3911 return found;
3912 }
3913 /**
3914 * @param nameToBeFilledTest
3915 */
3916 @SuppressWarnings("rawtypes")
3917 public void setParsedName(TaxonNameBase nameToBeFilledTest) {
3918 this.taxonnamebase = (NonViralName<?>) nameToBeFilledTest;
3919
3920 }
3921 //variety dwcranks:varietyEpithet
3922 /**
3923 * @return the author
3924 */
3925 public String getAuthor() {
3926 return author;
3927 }
3928 /**
3929 * @return
3930 */
3931 public Taxon getTaxon() {
3932 return taxon;
3933 }
3934 /**
3935 * @return
3936 */
3937 public NonViralName<?> getTaxonNameBase() {
3938 return taxonnamebase;
3939 }
3940
3941 /**
3942 * @param findOrCreateTaxon
3943 */
3944 public void setForm(Taxon form) {
3945 this.form=form;
3946
3947 }
3948 /**
3949 * @param findOrCreateTaxon
3950 */
3951 public void setVariety(Taxon variety) {
3952 this.variety=variety;
3953
3954 }
3955 /**
3956 * @param string
3957 * @return
3958 */
3959 @SuppressWarnings("rawtypes")
3960 public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3961 logger.info("findOrCreateTaxon");
3962 sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3963 //takes too much time
3964 // List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3965 // logger.info("tmpList returned: "+tmpList.size());
3966
3967 NomenclaturalStatusType statusType = null;
3968 if (!getStatus().isEmpty()){
3969 try {
3970 statusType = nomStatusString2NomStatus(getStatus());
3971 } catch (UnknownCdmTypeException e) {
3972 addProblematicStatusToFile(getStatus());
3973 logger.warn("Problem with status");
3974 }
3975 }
3976
3977 List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3978
3979 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3980
3981 tmpListFiltered.addAll(taxontest.getRecords());
3982 taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3983 tmpListFiltered.addAll(taxontest.getRecords());
3984
3985 //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3986
3987 boolean nameCorrected=false;
3988 if (fullname.indexOf(partialname)<0) {
3989 nameCorrected=true;
3990 }
3991
3992 boolean foundIdentic=false;
3993 Taxon tmp=null;
3994 // Taxon tmpPartial=null;
3995 for (TaxonBase tmpb:tmpListFiltered){
3996 if(tmpb !=null){
3997 TaxonNameBase tnb = tmpb.getName();
3998 Rank crank=null;
3999 if (tnb != null){
4000 // //System.out.println(tnb.getTitleCache());
4001 // if (tnb.getTitleCache().split("sec.")[0].equals(partialname) ||tnb.getTitleCache().split("sec.")[0].equals(fullname) ){
4002 if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
4003 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
4004 crank =tnb.getRank();
4005 if (crank !=null && rank !=null){
4006 if (crank.equals(rank)){
4007 foundIdentic=true;
4008 try{
4009 tmp=(Taxon)tmpb;
4010 break;
4011 }catch(Exception e){
4012 e.printStackTrace();
4013 }
4014 }
4015 }
4016 }
4017 if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
4018 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
4019 crank =tnb.getRank();
4020 if (crank !=null && rank !=null){
4021 if (crank.equals(rank)){
4022 foundIdentic=true;
4023 try{
4024 tmp=(Taxon)tmpb;
4025 break;
4026 }catch(Exception e){
4027 e.printStackTrace();
4028 }
4029 }
4030 }
4031 }
4032 }
4033 }
4034 else{
4035 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
4036 crank =tnb.getRank();
4037 if (crank !=null && rank !=null){
4038 if (crank.equals(rank)){
4039 foundIdentic=true;
4040 try{
4041 tmp=(Taxon)tmpb;
4042 break;
4043 }catch(Exception e){
4044 e.printStackTrace();
4045 }
4046 }
4047 }
4048 }
4049 }
4050 }
4051 }
4052 }
4053 boolean statusMatch=false;
4054 boolean appendedMatch=false;
4055 if(tmp !=null && foundIdentic){
4056 statusMatch=compareStatus(tmp, statusType);
4057 if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
4058 appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
4059 }
4060 if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
4061 appendedMatch=true;
4062 }
4063
4064 }
4065 if ((tmp == null || !foundIdentic) || (tmp != null && !statusMatch) || (tmp != null && !appendedMatch && !statusMatch)){
4066
4067 NonViralName<?> tnb = getNonViralNameAccNomenclature();
4068 tnb.setRank(rank);
4069
4070 if(statusType != null) {
4071 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
4072 }
4073 if(getStatus()!=null) {
4074 tnb.setAppendedPhrase(getStatus());
4075 }
4076
4077 if(rank.equals(Rank.UNKNOWN_RANK())){
4078 tnb.setTitleCache(fullname);
4079 // tnb.setGenusOrUninomial(fullname);
4080 tnb.setProtectedTitleCache(true);
4081 }
4082 if(rank.isHigher(Rank.GENUS())) {
4083 tnb.setGenusOrUninomial(partialname);
4084 }
4085
4086 if(rank.isHigher(Rank.SPECIES())) {
4087 tnb.setTitleCache(partialname);
4088 }
4089
4090 if (rank.equals(globalrank) && author != null) {
4091 if(fullname.indexOf("opulifolium")>-1) {
4092 //System.out.println("AUTOR: "+author);
4093 }
4094 tnb.setCombinationAuthorTeam(findOrCreateAuthor(author));
4095 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4096 Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4097 if (taxonLSID !=null) {
4098 tmp=taxonLSID;
4099 }
4100 }
4101 }
4102
4103 if(tmp == null){
4104 if (rank.equals(Rank.FAMILY())) {
4105 tmp = buildFamily(tnb);
4106 }
4107 if (rank.equals(Rank.SUBFAMILY())) {
4108 tmp = buildSubfamily(tnb);
4109 }
4110 if (rank.equals(Rank.TRIBE())) {
4111 tmp = buildTribe(tnb);
4112 }
4113 if (rank.equals(Rank.SUBTRIBE())) {
4114 tmp = buildSubtribe(tnb);
4115 }
4116 if (rank.equals(Rank.GENUS())) {
4117 tmp = buildGenus(partialname, tnb);
4118 }
4119
4120 if (rank.equals(Rank.SUBGENUS())) {
4121 tmp = buildSubgenus(partialname, tnb);
4122 }
4123 if (rank.equals(Rank.SPECIES())) {
4124 tmp = buildSpecies(partialname, tnb);
4125 }
4126
4127 if (rank.equals(Rank.SUBSPECIES())) {
4128 tmp = buildSubspecies(partialname, tnb);
4129 }
4130
4131 if (rank.equals(Rank.VARIETY())) {
4132 tmp = buildVariety(fullname, partialname, tnb);
4133 }
4134
4135 if (rank.equals(Rank.FORM())) {
4136 tmp = buildForm(fullname, partialname, tnb);
4137 }
4138
4139 importer.getClassificationService().saveOrUpdate(classification);
4140 }
4141 }
4142
4143 tmp = CdmBase.deproxy(tmp, Taxon.class);
4144 if (rank.equals(globalrank) && author != null) {
4145 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4146 setLSID(getIdentifier(), tmp);
4147 importer.getTaxonService().saveOrUpdate(tmp);
4148 tmp = CdmBase.deproxy(tmp, Taxon.class);
4149 }
4150 }
4151 TaxonNameBase tnb = CdmBase.deproxy(tmp.getName(), TaxonNameBase.class);
4152
4153 this.taxon=tmp;
4154 castTaxonNameBase(tnb, taxonnamebase);
4155 return tmp;
4156 }
4157 /**
4158 * @param tnb
4159 * @return
4160 */
4161 private Taxon buildSubfamily(NonViralName<?> tnb) {
4162 Taxon tmp;
4163 // tnb.generateTitle();
4164 tmp = findMatchingTaxon(tnb,refMods);
4165 if(tmp ==null){
4166 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4167 if(!configState.getConfig().doKeepOriginalSecundum()) {
4168 tmp.setSec(configState.getConfig().getSecundum());
4169 }
4170 // tmp.setSec(refMods);
4171 // sourceHandler.addSource(refMods, tmp);
4172 if(family != null) {
4173 classification.addParentChild(family, tmp, null, null);
4174 higherRank=Rank.FAMILY();
4175 higherTaxa=family;
4176 } else {
4177 //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4178 classification.addChildTaxon(tmp, null, null);
4179 }
4180 }
4181 return tmp;
4182 }
4183 /**
4184 * @param tnb
4185 * @return
4186 */
4187 private Taxon buildFamily(NonViralName<?> tnb) {
4188 Taxon tmp;
4189 // tnb.generateTitle();
4190 tmp = findMatchingTaxon(tnb,refMods);
4191 if(tmp ==null){
4192 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4193 if(!configState.getConfig().doKeepOriginalSecundum()) {
4194 tmp.setSec(configState.getConfig().getSecundum());
4195 }
4196 // tmp.setSec(refMods);
4197 //sourceHandler.addSource(refMods, tmp);
4198 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4199 classification.addChildTaxon(tmp, null, null);
4200 }
4201 return tmp;
4202 }
4203 /**
4204 * @param fullname
4205 * @param tnb
4206 * @return
4207 */
4208 private Taxon buildForm(String fullname, String partialname, NonViralName<?> tnb) {
4209 Taxon tmp;
4210 if (genusName !=null) {
4211 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4212 }
4213 if (subgenusName !=null) {
4214 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4215 }
4216 if(speciesName !=null) {
4217 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4218 }
4219 if(subspeciesName != null) {
4220 tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4221 }
4222 if(partialname!= null) {
4223 tnb.setInfraSpecificEpithet(partialname);
4224 }
4225 tnb.generateTitle();
4226 //TODO how to save form??
4227 tnb.setTitleCache(fullname, true);
4228 tmp = findMatchingTaxon(tnb,refMods);
4229 if(tmp ==null){
4230 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4231 if(!configState.getConfig().doKeepOriginalSecundum()) {
4232 tmp.setSec(configState.getConfig().getSecundum());
4233 }
4234 // tmp.setSec(refMods);
4235 //sourceHandler.addSource(refMods, tmp);
4236 if (subspecies !=null) {
4237 classification.addParentChild(subspecies, tmp, null, null);
4238 higherRank=Rank.SUBSPECIES();
4239 higherTaxa=subspecies;
4240 } else {
4241 if (species !=null) {
4242 classification.addParentChild(species, tmp, null, null);
4243 higherRank=Rank.SPECIES();
4244 higherTaxa=species;
4245 }
4246 else{
4247 // System.out.println("ADDCHILDTAXON FORM "+tmp);
4248 classification.addChildTaxon(tmp, null, null);
4249 }
4250 }
4251 }
4252 return tmp;
4253 }
4254 /**
4255 * @param fullname
4256 * @param tnb
4257 * @return
4258 */
4259 private Taxon buildVariety(String fullname, String partialname, NonViralName<?> tnb) {
4260 Taxon tmp;
4261 if (genusName !=null) {
4262 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4263 }
4264 if (subgenusName !=null) {
4265 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4266 }
4267 if(speciesName !=null) {
4268 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4269 }
4270 if(subspeciesName != null) {
4271 tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4272 }
4273 if(partialname != null) {
4274 tnb.setInfraSpecificEpithet(partialname);
4275 }
4276 //TODO how to save variety?
4277 tnb.setTitleCache(fullname, true);
4278 tmp = findMatchingTaxon(tnb,refMods);
4279 if(tmp ==null){
4280 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4281 if(!configState.getConfig().doKeepOriginalSecundum()) {
4282 tmp.setSec(configState.getConfig().getSecundum());
4283 }
4284 // tmp.setSec(refMods);
4285 //sourceHandler.addSource(refMods, tmp);
4286 if (subspecies !=null) {
4287 classification.addParentChild(subspecies, tmp, null, null);
4288 higherRank=Rank.SUBSPECIES();
4289 higherTaxa=subspecies;
4290 } else {
4291 if(species !=null) {
4292 classification.addParentChild(species, tmp, null, null);
4293 higherRank=Rank.SPECIES();
4294 higherTaxa=species;
4295 }
4296 else{
4297 //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4298 classification.addChildTaxon(tmp, null, null);
4299 }
4300 }
4301 }
4302 return tmp;
4303 }
4304 /**
4305 * @param partialname
4306 * @param tnb
4307 * @return
4308 */
4309 private Taxon buildSubspecies(String partialname, NonViralName<?> tnb) {
4310 Taxon tmp;
4311 if (genusName !=null) {
4312 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4313 }
4314 if (subgenusName !=null) {
4315 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4316 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4317 }
4318 if(speciesName !=null) {
4319 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
4320 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4321 }
4322 tnb.setInfraSpecificEpithet(partialname);
4323 tnb.generateTitle();
4324 tmp = findMatchingTaxon(tnb,refMods);
4325 if(tmp ==null){
4326 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4327 if(!configState.getConfig().doKeepOriginalSecundum())
4328 {
4329 tmp.setSec(configState.getConfig().getSecundum());
4330 // tmp.setSec(refMods);
4331 //sourceHandler.addSource(refMods, tmp);
4332 }
4333
4334 if(species != null) {
4335 classification.addParentChild(species, tmp, null, null);
4336 higherRank=Rank.SPECIES();
4337 higherTaxa=species;
4338 }
4339 else{
4340 //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4341 classification.addChildTaxon(tmp, null, null);
4342 }
4343 }
4344 return tmp;
4345 }
4346 /**
4347 * @param partialname
4348 * @param tnb
4349 * @return
4350 */
4351 private Taxon buildSpecies(String partialname, NonViralName<?> tnb) {
4352 Taxon tmp;
4353 if (genusName !=null) {
4354 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4355 }
4356 if (subgenusName !=null) {
4357 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4358 }
4359 tnb.setSpecificEpithet(partialname.toLowerCase());
4360 tnb.generateTitle();
4361 tmp = findMatchingTaxon(tnb,refMods);
4362 if(tmp ==null){
4363 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4364 if(!configState.getConfig().doKeepOriginalSecundum()) {
4365 tmp.setSec(configState.getConfig().getSecundum());
4366 }
4367 // tmp.setSec(refMods);
4368 //sourceHandler.addSource(refMods, tmp);
4369 if (subgenus !=null) {
4370 classification.addParentChild(subgenus, tmp, null, null);
4371 higherRank=Rank.SUBGENUS();
4372 higherTaxa=subgenus;
4373 } else {
4374 if (genus !=null) {
4375 classification.addParentChild(genus, tmp, null, null);
4376 higherRank=Rank.GENUS();
4377 higherTaxa=genus;
4378 }
4379 else{
4380 //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4381 classification.addChildTaxon(tmp, null, null);
4382 }
4383 }
4384 }
4385 return tmp;
4386 }
4387 /**
4388 * @param partialname
4389 * @param tnb
4390 * @return
4391 */
4392 private Taxon buildSubgenus(String partialname, NonViralName<?> tnb) {
4393 Taxon tmp;
4394 tnb.setInfraGenericEpithet(partialname);
4395 if (genusName !=null) {
4396 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4397 }
4398 tnb.generateTitle();
4399 tmp = findMatchingTaxon(tnb,refMods);
4400 if(tmp ==null){
4401 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4402 if(!configState.getConfig().doKeepOriginalSecundum()) {
4403 tmp.setSec(configState.getConfig().getSecundum());
4404 }
4405 // tmp.setSec(refMods);
4406 //sourceHandler.addSource(refMods, tmp);
4407 if(genus != null) {
4408 classification.addParentChild(genus, tmp, null, null);
4409 higherRank=Rank.GENUS();
4410 higherTaxa=genus;
4411 } else{
4412 //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4413 classification.addChildTaxon(tmp, null, null);
4414 }
4415 }
4416 return tmp;
4417 }
4418 /**
4419 * @param partialname
4420 * @param tnb
4421 * @return
4422 */
4423 private Taxon buildGenus(String partialname, NonViralName<?> tnb) {
4424 Taxon tmp;
4425 tnb.setGenusOrUninomial(partialname);
4426 tnb.generateTitle();
4427
4428 tmp = findMatchingTaxon(tnb,refMods);
4429 if(tmp ==null){
4430 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4431 if(!configState.getConfig().doKeepOriginalSecundum())
4432 {
4433 tmp.setSec(configState.getConfig().getSecundum());
4434 // tmp.setSec(refMods);
4435 //sourceHandler.addSource(refMods, tmp);
4436 }
4437
4438 if(subtribe != null) {
4439 classification.addParentChild(subtribe, tmp, null, null);
4440 higherRank=Rank.SUBTRIBE();
4441 higherTaxa=subtribe;
4442 } else{
4443 if(tribe !=null) {
4444 classification.addParentChild(tribe, tmp, null, null);
4445 higherRank=Rank.TRIBE();
4446 higherTaxa=tribe;
4447 } else{
4448 if(subfamily !=null) {
4449 classification.addParentChild(subfamily, tmp, null, null);
4450 higherRank=Rank.SUBFAMILY();
4451 higherTaxa=subfamily;
4452 } else
4453 if(family !=null) {
4454 classification.addParentChild(family, tmp, null, null);
4455 higherRank=Rank.FAMILY();
4456 higherTaxa=family;
4457 }
4458 else{
4459 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4460 classification.addChildTaxon(tmp, null, null);
4461 }
4462 }
4463 }
4464 }
4465 return tmp;
4466 }
4467
4468 /**
4469 * @param tnb
4470 * @return
4471 */
4472 private Taxon buildSubtribe(NonViralName<?> tnb) {
4473 Taxon tmp;
4474 tnb.generateTitle();
4475 tmp = findMatchingTaxon(tnb,refMods);
4476 if(tmp==null){
4477 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4478 if(!configState.getConfig().doKeepOriginalSecundum()) {
4479 tmp.setSec(configState.getConfig().getSecundum());
4480 }
4481 // tmp.setSec(refMods);
4482 //sourceHandler.addSource(refMods, tmp);
4483 if(tribe != null) {
4484 classification.addParentChild(tribe, tmp, null, null);
4485 higherRank=Rank.TRIBE();
4486 higherTaxa=tribe;
4487 } else{
4488 //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4489 classification.addChildTaxon(tmp, null, null);
4490 }
4491 }
4492 return tmp;
4493 }
4494 /**
4495 * @param tnb
4496 * @return
4497 */
4498 private Taxon buildTribe(NonViralName<?> tnb) {
4499 Taxon tmp;
4500 tnb.generateTitle();
4501 tmp = findMatchingTaxon(tnb,refMods);
4502 if(tmp==null){
4503 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4504 if(!configState.getConfig().doKeepOriginalSecundum()) {
4505 tmp.setSec(configState.getConfig().getSecundum());
4506 }
4507 // tmp.setSec(refMods);
4508 //sourceHandler.addSource(refMods, tmp);
4509 if (subfamily !=null) {
4510 classification.addParentChild(subfamily, tmp, null, null);
4511 higherRank=Rank.SUBFAMILY();
4512 higherTaxa=subfamily;
4513 } else {
4514 if(family != null) {
4515 classification.addParentChild(family, tmp, null, null);
4516 higherRank=Rank.FAMILY();
4517 higherTaxa=family;
4518 }
4519 else{
4520 //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4521 classification.addChildTaxon(tmp, null, null);
4522 }
4523 }
4524 }
4525 return tmp;
4526 }
4527
4528 /**
4529 * @param identifier2
4530 * @return
4531 */
4532 @SuppressWarnings("rawtypes")
4533 private Taxon getTaxonByLSID(String identifier) {
4534 //logger.info("getTaxonByLSID");
4535 // boolean lsidok=false;
4536 String id = identifier.split("__")[0];
4537 // String source = identifier.split("__")[1];
4538 LSID lsid = null;
4539 if (id.indexOf("lsid")>-1){
4540 try {
4541 lsid = new LSID(id);
4542 // lsidok=true;
4543 } catch (MalformedLSIDException e) {
4544 logger.warn("Malformed LSID");
4545 }
4546 }
4547 if (lsid !=null){
4548 List<TaxonBase> taxons = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4549 LSID currentlsid=null;
4550 for (TaxonBase t:taxons){
4551 currentlsid = t.getLsid();
4552 if (currentlsid !=null){
4553 if (currentlsid.getLsid().equals(lsid.getLsid())){
4554 try{
4555 return (Taxon) t;
4556 }
4557 catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4558 }
4559 }
4560 }
4561 }
4562 return null;
4563 }
4564 /**
4565 * @param author2
4566 * @return
4567 */
4568 @SuppressWarnings("rawtypes")
4569 private Person findOrCreateAuthor(String author2) {
4570 //logger.info("findOrCreateAuthor");
4571 List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4572 for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4573 if(hibernateP.getTitleCache().equals(author2)) {
4574 AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4575 return CdmBase.deproxy(existing, Person.class);
4576 }
4577 }
4578 Person p = Person.NewInstance();
4579 p.setTitleCache(author2,true);
4580 importer.getAgentService().saveOrUpdate(p);
4581 return CdmBase.deproxy(p, Person.class);
4582 }
4583 /**
4584 * @param author the author to set
4585 */
4586 public void setAuthor(String author) {
4587 this.author = author;
4588 }
4589
4590 /**
4591 * @return the higherTaxa
4592 */
4593 public Taxon getHigherTaxa() {
4594 return higherTaxa;
4595 }
4596 /**
4597 * @param higherTaxa the higherTaxa to set
4598 */
4599 public void setHigherTaxa(Taxon higherTaxa) {
4600 this.higherTaxa = higherTaxa;
4601 }
4602 /**
4603 * @return the higherRank
4604 */
4605 public Rank getHigherRank() {
4606 return higherRank;
4607 }
4608 /**
4609 * @param higherRank the higherRank to set
4610 */
4611 public void setHigherRank(Rank higherRank) {
4612 this.higherRank = higherRank;
4613 }
4614 public String getName(){
4615 if (newName.isEmpty()) {
4616 return originalName;
4617 } else {
4618 return newName;
4619 }
4620
4621 }
4622 /**
4623 * @return the fullName
4624 */
4625 public String getOriginalName() {
4626 return originalName;
4627 }
4628 /**
4629 * @param fullName the fullName to set
4630 */
4631 public void setOriginalName(String fullName) {
4632 this.originalName = fullName;
4633 }
4634 /**
4635 * @return the newName
4636 */
4637 public String getNewName() {
4638 return newName;
4639 }
4640 /**
4641 * @param newName the newName to set
4642 */
4643 public void setNewName(String newName) {
4644 this.newName = newName;
4645 }
4646 /**
4647 * @return the rank
4648 */
4649 public Rank getRank() {
4650 return rank;
4651 }
4652 /**
4653 * @param rank the rank to set
4654 */
4655 public void setRank(Rank rank) {
4656 this.rank = rank;
4657 }
4658 /**
4659 * @return the idenfitiger
4660 */
4661 public String getIdentifier() {
4662 return identifier;
4663 }
4664 /**
4665 * @param idenfitiger the idenfitiger to set
4666 */
4667 public void setIdentifier(String identifier) {
4668 this.identifier = identifier;
4669 }
4670 /**
4671 * @return the status
4672 */
4673 public String getStatus() {
4674 if (status == null) {
4675 return "";
4676 }
4677 return status;
4678 }
4679 /**
4680 * @param status the status to set
4681 */
4682 public void setStatus(String status) {
4683 this.status = status;
4684 }
4685 /**
4686 * @return the family
4687 */
4688 public Taxon getFamily() {
4689 return family;
4690 }
4691 /**
4692 * @param family the family to set
4693 */
4694 @SuppressWarnings("rawtypes")
4695 public void setFamily(Taxon family) {
4696 this.family = family;
4697 TaxonNameBase taxonNameBase = CdmBase.deproxy(family.getName(), TaxonNameBase.class);
4698 familyName = castTaxonNameBase(taxonNameBase,familyName);
4699 }
4700 /**
4701 * @return the subfamily
4702 */
4703 public Taxon getSubfamily() {
4704 return subfamily;
4705 }
4706 /**
4707 * @param subfamily the subfamily to set
4708 */
4709 @SuppressWarnings("rawtypes")
4710 public void setSubfamily(Taxon subfamily) {
4711 this.subfamily = subfamily;
4712 TaxonNameBase taxonNameBase = CdmBase.deproxy(subfamily.getName(), TaxonNameBase.class);
4713 subfamilyName = castTaxonNameBase(taxonNameBase,subfamilyName);
4714 }
4715 /**
4716 * @return the tribe
4717 */
4718 public Taxon getTribe() {
4719 return tribe;
4720 }
4721 /**
4722 * @param tribe the tribe to set
4723 */
4724 @SuppressWarnings("rawtypes")
4725 public void setTribe(Taxon tribe) {
4726 this.tribe = tribe;
4727 TaxonNameBase taxonNameBase = CdmBase.deproxy(tribe.getName(), TaxonNameBase.class);
4728 tribeName = castTaxonNameBase(taxonNameBase,tribeName);
4729 }
4730 /**
4731 * @return the subtribe
4732 */
4733 public Taxon getSubtribe() {
4734 return subtribe;
4735 }
4736 /**
4737 * @param subtribe the subtribe to set
4738 */
4739 @SuppressWarnings("rawtypes")
4740 public void setSubtribe(Taxon subtribe) {
4741 this.subtribe = subtribe;
4742 TaxonNameBase taxonNameBase = CdmBase.deproxy(subtribe.getName(), TaxonNameBase.class);
4743 subtribeName =castTaxonNameBase(taxonNameBase,subtribeName);
4744 }
4745 /**
4746 * @return the genus
4747 */
4748 public Taxon getGenus() {
4749 return genus;
4750 }
4751 /**
4752 * @param genus the genus to set
4753 */
4754 @SuppressWarnings("rawtypes")
4755 public void setGenus(Taxon genus) {
4756 this.genus = genus;
4757 TaxonNameBase taxonNameBase = CdmBase.deproxy(genus.getName(), TaxonNameBase.class);
4758 genusName = castTaxonNameBase(taxonNameBase,genusName);
4759 //System.out.println("GENUSNAME: "+genusName.toString());
4760 }
4761 /**
4762 * @return the subgenus
4763 */
4764 public Taxon getSubgenus() {
4765 return subgenus;
4766 }
4767 /**
4768 * @param subgenus the subgenus to set
4769 */
4770 @SuppressWarnings("rawtypes")
4771 public void setSubgenus(Taxon subgenus) {
4772 this.subgenus = subgenus;
4773 TaxonNameBase taxonNameBase = CdmBase.deproxy(subgenus.getName(), TaxonNameBase.class);
4774 subgenusName = castTaxonNameBase(taxonNameBase,subgenusName);
4775 }
4776 /**
4777 * @return the species
4778 */
4779 public Taxon getSpecies() {
4780 return species;
4781 }
4782 /**
4783 * @param species the species to set
4784 */
4785 public void setSpecies(Taxon species) {
4786 this.species = species;
4787 @SuppressWarnings("rawtypes")
4788 TaxonNameBase taxonNameBase = CdmBase.deproxy(species.getName(), TaxonNameBase.class);
4789 speciesName = castTaxonNameBase(taxonNameBase,speciesName);
4790
4791 }
4792 /**
4793 * @return the subspecies
4794 */
4795 public Taxon getSubspecies() {
4796 return subspecies;
4797 }
4798 /**
4799 * @param subspecies the subspecies to set
4800 */
4801 @SuppressWarnings("rawtypes")
4802 public void setSubspecies(Taxon subspecies) {
4803 this.subspecies = subspecies;
4804 TaxonNameBase taxonNameBase = CdmBase.deproxy(subspecies.getName(), TaxonNameBase.class);
4805 subspeciesName = castTaxonNameBase(taxonNameBase,subspeciesName);
4806
4807 }
4808
4809
4810
4811 }
4812
4813
4814 /**
4815 * @param status
4816 */
4817 private void addProblematicStatusToFile(String status) {
4818 try{
4819 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/StatusUnknown_"+classification.getTitleCache()+".txt",true);
4820 BufferedWriter out = new BufferedWriter(fstream);
4821 out.write(status+"\n");
4822 //Close the output stream
4823 out.close();
4824 }catch (Exception e){//Catch exception if any
4825 System.err.println("Error: " + e.getMessage());
4826 }
4827
4828 }
4829
4830
4831
4832 /**
4833 * @param tnb
4834 * @return
4835 */
4836 private Taxon findMatchingTaxon(NonViralName<?> tnb, Reference refMods) {
4837 logger.info("findMatchingTaxon");
4838 Taxon tmp=null;
4839
4840 refMods=CdmBase.deproxy(refMods, Reference.class);
4841 boolean insertAsExisting =false;
4842 List<Taxon> existingTaxons = new ArrayList<Taxon>();
4843 try {
4844 existingTaxons = getMatchingTaxon(tnb);
4845 } catch (Exception e1) {
4846 // TODO Auto-generated catch block
4847 e1.printStackTrace();
4848 }
4849 double similarityScore=0.0;
4850 double similarityAuthor=-1;
4851 String author1="";
4852 String author2="";
4853 String t1="";
4854 String t2="";
4855 for (Taxon bestMatchingTaxon:existingTaxons){
4856 if (!existingTaxons.isEmpty() && configState.getConfig().isInteractWithUser() && !insertAsExisting) {
4857 // System.out.println("tnb "+tnb.getTitleCache());
4858 // System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4859 try {
4860 if(tnb.getAuthorshipCache()!=null) {
4861 author1=tnb.getAuthorshipCache();
4862 }
4863 } catch (Exception e) {
4864 // TODO Auto-generated catch block
4865 e.printStackTrace();
4866 }
4867 try {
4868 if(castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache()!=null) {
4869 author2=castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache();
4870 }
4871 } catch (Exception e) {
4872 // TODO Auto-generated catch block
4873 e.printStackTrace();
4874 }
4875 try {
4876 t1=tnb.getTitleCache().split("sec.")[0].trim();
4877 if (author1!=null && !StringUtils.isEmpty(author1)) {
4878 t1=t1.split(Pattern.quote(author1))[0];
4879 }
4880 } catch (Exception e) {
4881 // TODO Auto-generated catch block
4882 e.printStackTrace();
4883 }
4884 try {
4885 t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4886 if (author2!=null && !StringUtils.isEmpty(author2)) {
4887 t2=t2.split(Pattern.quote(author2))[0];
4888 }
4889 } catch (Exception e) {
4890 // TODO Auto-generated catch block
4891 e.printStackTrace();
4892 }
4893 similarityScore=similarity(t1.trim(), t2.trim());
4894 // System.out.println("taxascore: "+similarityScore);
4895 similarityAuthor=similarity(author1.trim(), author2.trim());
4896 // System.out.println("authorscore: "+similarityAuthor);
4897 insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4898 }
4899 if(insertAsExisting) {
4900 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4901 tmp=bestMatchingTaxon;
4902 sourceHandler.addSource(refMods, tmp);
4903 return tmp;
4904 }
4905 }
4906 return tmp;
4907 }
4908
4909
4910 /**
4911 * @param tnb
4912 * @param refMods
4913 * @param similarityScore
4914 * @param bestMatchingTaxon
4915 * @param similarityAuthor
4916 * @return
4917 */
4918 private boolean compareAndCheckTaxon(NonViralName<?> tnb, Reference<?> refMods, double similarityScore,
4919 Taxon bestMatchingTaxon, double similarityAuthor) {
4920 //logger.info("compareAndCheckTaxon");
4921 boolean insertAsExisting;
4922 // if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4923 // insertAsExisting=false;
4924 // } else{
4925 //a small hack/automatisation for Chenopodium only
4926 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4927 bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4928 insertAsExisting=true;
4929 } else {
4930 insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4931 }
4932 // }
4933
4934 logDecision(tnb,bestMatchingTaxon,insertAsExisting, refMods);
4935 return insertAsExisting;
4936 }
4937
4938 /**
4939 * @return
4940 */
4941 @SuppressWarnings("rawtypes")
4942 private List<Taxon> getMatchingTaxon(TaxonNameBase tnb) {
4943 //logger.info("getMatchingTaxon");
4944 Pager<TaxonBase> pager=importer.getTaxonService().findByTitle(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4945 List<TaxonBase>records = pager.getRecords();
4946
4947 List<Taxon> existingTaxons = new ArrayList<Taxon>();
4948 for (TaxonBase r:records){
4949 try{
4950 Taxon bestMatchingTaxon = (Taxon)r;
4951 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4952 if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4953 existingTaxons.add(bestMatchingTaxon);
4954 }
4955 }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4956 }
4957 Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4958 if (!existingTaxons.contains(bmt) && bmt!=null) {
4959 if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4960 existingTaxons.add(bmt);
4961 }
4962 }
4963 return existingTaxons;
4964 }
4965
4966 /**
4967 * Check if the found Taxon can reasonnably be the same
4968 * example: with and without author should match, but the subspecies should not be suggested for a genus
4969 * */
4970 private boolean compareTaxonNameLength(String f, String o){
4971 //logger.info("compareTaxonNameLength");
4972 boolean lengthOk=false;
4973 int sizeF = f.length();
4974 int sizeO = o.length();
4975 if (sizeO>=sizeF) {
4976 lengthOk=true;
4977 }
4978 if(sizeF>sizeO) {
4979 if (sizeF-sizeO>10) {
4980 lengthOk=false;
4981 } else {
4982 lengthOk=true;
4983 }
4984 }
4985
4986 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4987 return lengthOk;
4988 }
4989
4990 private double similarity(String s1, String s2) {
4991 //logger.info("similarity");
4992 //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4993 if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4994 String l1=s1.toLowerCase().trim();
4995 String l2=s2.toLowerCase().trim();
4996 if (l1.length() < l2.length()) { // s1 should always be bigger
4997 String swap = l1; l1 = l2; l2 = swap;
4998 }
4999 int bigLen = l1.length();
5000 if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
5001 return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
5002 }
5003 else{
5004 if(s1!=null && s2!=null){
5005 if (s1.equalsIgnoreCase(s2)) {
5006 return 1;
5007 }
5008 }
5009 return -1;
5010 }
5011 }
5012
5013 private int computeEditDistance(String s1, String s2) {
5014 //logger.info("computeEditDistance");
5015 int[] costs = new int[s2.length() + 1];
5016 for (int i = 0; i <= s1.length(); i++) {
5017 int lastValue = i;
5018 for (int j = 0; j <= s2.length(); j++) {
5019 if (i == 0) {
5020 costs[j] = j;
5021 } else {
5022 if (j > 0) {
5023 int newValue = costs[j - 1];
5024 if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
5025 newValue = Math.min(Math.min(newValue, lastValue),
5026 costs[j]) + 1;
5027 }
5028 costs[j - 1] = lastValue;
5029 lastValue = newValue;
5030 }
5031 }
5032 }
5033 if (i > 0) {
5034 costs[s2.length()] = lastValue;
5035 }
5036 }
5037 return costs[s2.length()];
5038 }
5039
5040 Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
5041 /**
5042 * @param taxonnamebase
5043 */
5044 @SuppressWarnings("rawtypes")
5045 public void lookForParentNode(NonViralName<?> taxonnamebase, Taxon tax, Reference<?> ref, MyName myName) {
5046 logger.info("lookForParentNode "+taxonnamebase.getTitleCache()+" for "+myName.toString());
5047 //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
5048 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
5049 if (taxonnamebase.getRank().equals(Rank.FORM())){
5050 handleFormHierarchy(ref, myName, parser);
5051 }
5052 if (taxonnamebase.getRank().equals(Rank.VARIETY())){
5053 handleVarietyHierarchy(ref, myName, parser);
5054 }
5055 if (taxonnamebase.getRank().equals(Rank.SUBSPECIES())){
5056 handleSubSpeciesHierarchy(ref, myName, parser);
5057 }
5058 if (taxonnamebase.getRank().equals(Rank.SPECIES())){
5059 handleSpeciesHierarchy(ref, myName, parser);
5060 }
5061 if (taxonnamebase.getRank().equals(Rank.SUBGENUS())){
5062 handleSubgenusHierarchy(ref, myName, parser);
5063 }
5064
5065 if (taxonnamebase.getRank().equals(Rank.GENUS())){
5066 handleGenusHierarchy(ref, myName, parser);
5067 }
5068 if (taxonnamebase.getRank().equals(Rank.SUBTRIBE())){
5069 handleSubtribeHierarchy(ref, myName, parser);
5070 }
5071 if (taxonnamebase.getRank().equals(Rank.TRIBE())){
5072 handleTribeHierarchy(ref, myName, parser);
5073 }
5074
5075 if (taxonnamebase.getRank().equals(Rank.SUBFAMILY())){
5076 handleSubfamilyHierarchy(ref, myName, parser);
5077 }
5078 }
5079
5080 /**
5081 * @param ref
5082 * @param myName
5083 * @param parser
5084 */
5085 private void handleSubfamilyHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5086 System.out.println("handleSubfamilyHierarchy");
5087 String parentStr = myName.getFamilyStr();
5088 Rank r = Rank.FAMILY();
5089 if(parentStr!=null){
5090
5091 Taxon parent = null;
5092 Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
5093 for(TaxonBase tb:taxontest.getRecords()){
5094 try {
5095 if (tb.getName().getRank().equals(r)) {
5096 parent=CdmBase.deproxy(tb, Taxon.class);
5097 }
5098 break;
5099 } catch (Exception e) {
5100 // TODO Auto-generated catch block
5101 e.printStackTrace();
5102 }
5103 }
5104 if(parent == null) {
5105 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5106 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5107 if(tmp ==null)
5108 {
5109 parent=Taxon.NewInstance(parentNameName, ref);
5110 importer.getTaxonService().save(parent);
5111 parent = CdmBase.deproxy(parent, Taxon.class);
5112 } else {
5113 parent=tmp;
5114 }
5115 lookForParentNode(parentNameName, parent, ref,myName);
5116
5117 }
5118 hierarchy.put(r,parent);
5119 }
5120 }
5121
5122 /**
5123 * @param ref
5124 * @param myName
5125 * @param parser
5126 */
5127 private void handleTribeHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5128 String parentStr = myName.getSubfamilyStr();
5129 Rank r = Rank.SUBFAMILY();
5130 if (parentStr == null){
5131 parentStr = myName.getFamilyStr();
5132 r = Rank.FAMILY();
5133 }
5134 if(parentStr!=null){
5135 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5136 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5137 // importer.getTaxonService().save(parent);
5138 // parent = CdmBase.deproxy(parent, Taxon.class);
5139
5140 boolean parentDoesNotExists = true;
5141 for (TaxonNode p : classification.getAllNodes()){
5142 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5143 parentDoesNotExists = false;
5144 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5145 break;
5146 }
5147 }
5148 // if(parentDoesNotExists) {
5149 // importer.getTaxonService().save(parent);
5150 // parent = CdmBase.deproxy(parent, Taxon.class);
5151 // lookForParentNode(parentNameName, parent, ref,myName);
5152 // }
5153 if(parentDoesNotExists) {
5154 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5155 if(tmp ==null)
5156 {
5157 parent=Taxon.NewInstance(parentNameName, ref);
5158 importer.getTaxonService().save(parent);
5159 parent = CdmBase.deproxy(parent, Taxon.class);
5160 } else {
5161 parent=tmp;
5162 }
5163 lookForParentNode(parentNameName, parent, ref,myName);
5164
5165 }
5166 hierarchy.put(r,parent);
5167 }
5168 }
5169
5170 /**
5171 * @param ref
5172 * @param myName
5173 * @param parser
5174 */
5175 private void handleSubtribeHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5176 String parentStr = myName.getTribeStr();
5177 Rank r = Rank.TRIBE();
5178 if (parentStr == null){
5179 parentStr = myName.getSubfamilyStr();
5180 r = Rank.SUBFAMILY();
5181 }
5182 if (parentStr == null){
5183 parentStr = myName.getFamilyStr();
5184 r = Rank.FAMILY();
5185 }
5186 if(parentStr!=null){
5187 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5188 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5189 // importer.getTaxonService().save(parent);
5190 // parent = CdmBase.deproxy(parent, Taxon.class);
5191
5192 boolean parentDoesNotExists = true;
5193 for (TaxonNode p : classification.getAllNodes()){
5194 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5195 parentDoesNotExists = false;
5196 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5197
5198 break;
5199 }
5200 }
5201 // if(parentDoesNotExists) {
5202 // importer.getTaxonService().save(parent);
5203 // parent = CdmBase.deproxy(parent, Taxon.class);
5204 // lookForParentNode(parentNameName, parent, ref,myName);
5205 // }
5206 if(parentDoesNotExists) {
5207 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5208 if(tmp ==null)
5209 {
5210 parent=Taxon.NewInstance(parentNameName, ref);
5211 importer.getTaxonService().save(parent);
5212 parent = CdmBase.deproxy(parent, Taxon.class);
5213 } else {
5214 parent=tmp;
5215 }
5216 lookForParentNode(parentNameName, parent, ref,myName);
5217
5218 }
5219 hierarchy.put(r,parent);
5220 }
5221 }
5222
5223 /**
5224 * @param ref
5225 * @param myName
5226 * @param parser
5227 */
5228 private void handleGenusHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5229 String parentStr = myName.getSubtribeStr();
5230 Rank r = Rank.SUBTRIBE();
5231 if (parentStr == null){
5232 parentStr = myName.getTribeStr();
5233 r = Rank.TRIBE();
5234 }
5235 if (parentStr == null){
5236 parentStr = myName.getSubfamilyStr();
5237 r = Rank.SUBFAMILY();
5238 }
5239 if (parentStr == null){
5240 parentStr = myName.getFamilyStr();
5241 r = Rank.FAMILY();
5242 }
5243 if(parentStr!=null){
5244 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5245 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5246 // importer.getTaxonService().save(parent);
5247 // parent = CdmBase.deproxy(parent, Taxon.class);
5248
5249 boolean parentDoesNotExists = true;
5250 for (TaxonNode p : classification.getAllNodes()){
5251 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5252 // System.out.println(p.getTaxon().getUuid());
5253 // System.out.println(parent.getUuid());
5254 parentDoesNotExists = false;
5255 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5256 break;
5257 }
5258 }
5259 // if(parentDoesNotExists) {
5260 // importer.getTaxonService().save(parent);
5261 // parent = CdmBase.deproxy(parent, Taxon.class);
5262 // lookForParentNode(parentNameName, parent, ref,myName);
5263 // }
5264 if(parentDoesNotExists) {
5265 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5266 if(tmp ==null)
5267 {
5268 parent=Taxon.NewInstance(parentNameName, ref);
5269 importer.getTaxonService().save(parent);
5270 parent = CdmBase.deproxy(parent, Taxon.class);
5271 } else {
5272 parent=tmp;
5273 }
5274 lookForParentNode(parentNameName, parent, ref,myName);
5275
5276 }
5277 hierarchy.put(r,parent);
5278 }
5279 }
5280
5281 /**
5282 * @param ref
5283 * @param myName
5284 * @param parser
5285 */
5286 private void handleSubgenusHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5287 String parentStr = myName.getGenusStr();
5288 Rank r = Rank.GENUS();
5289
5290 if(parentStr==null){
5291 parentStr = myName.getSubtribeStr();
5292 r = Rank.SUBTRIBE();
5293 }
5294 if (parentStr == null){
5295 parentStr = myName.getTribeStr();
5296 r = Rank.TRIBE();
5297 }
5298 if (parentStr == null){
5299 parentStr = myName.getSubfamilyStr();
5300 r = Rank.SUBFAMILY();
5301 }
5302 if (parentStr == null){
5303 parentStr = myName.getFamilyStr();
5304 r = Rank.FAMILY();
5305 }
5306 if(parentStr!=null){
5307 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5308 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5309 // importer.getTaxonService().save(parent);
5310 // parent = CdmBase.deproxy(parent, Taxon.class);
5311
5312 boolean parentDoesNotExists = true;
5313 for (TaxonNode p : classification.getAllNodes()){
5314 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5315 // System.out.println(p.getTaxon().getUuid());
5316 // System.out.println(parent.getUuid());
5317 parentDoesNotExists = false;
5318 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5319 break;
5320 }
5321 }
5322 // if(parentDoesNotExists) {
5323 // importer.getTaxonService().save(parent);
5324 // parent = CdmBase.deproxy(parent, Taxon.class);
5325 // lookForParentNode(parentNameName, parent, ref,myName);
5326 // }
5327 if(parentDoesNotExists) {
5328 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5329 if(tmp ==null)
5330 {
5331 parent=Taxon.NewInstance(parentNameName, ref);
5332 importer.getTaxonService().save(parent);
5333 parent = CdmBase.deproxy(parent, Taxon.class);
5334 } else {
5335 parent=tmp;
5336 }
5337 lookForParentNode(parentNameName, parent, ref,myName);
5338
5339 }
5340 hierarchy.put(r,parent);
5341 }
5342 }
5343
5344 /**
5345 * @param ref
5346 * @param myName
5347 * @param parser
5348 */
5349 private void handleSpeciesHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5350 String parentStr = myName.getSubgenusStr();
5351 Rank r = Rank.SUBGENUS();
5352
5353 if(parentStr==null){
5354 parentStr = myName.getGenusStr();
5355 r = Rank.GENUS();
5356 }
5357
5358 if(parentStr==null){
5359 parentStr = myName.getSubtribeStr();
5360 r = Rank.SUBTRIBE();
5361 }
5362 if (parentStr == null){
5363 parentStr = myName.getTribeStr();
5364 r = Rank.TRIBE();
5365 }
5366 if (parentStr == null){
5367 parentStr = myName.getSubfamilyStr();
5368 r = Rank.SUBFAMILY();
5369 }
5370 if (parentStr == null){
5371 parentStr = myName.getFamilyStr();
5372 r = Rank.FAMILY();
5373 }
5374 if(parentStr!=null){
5375 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5376 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5377 hierarchy.put(r,parent);
5378 }
5379 }
5380
5381 /**
5382 * @param ref
5383 * @param myName
5384 * @param parser
5385 */
5386 private void handleSubSpeciesHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5387 String parentStr = myName.getSpeciesStr();
5388 Rank r = Rank.SPECIES();
5389
5390
5391 if(parentStr==null){
5392 parentStr = myName.getSubgenusStr();
5393 r = Rank.SUBGENUS();
5394 }
5395
5396 if(parentStr==null){
5397 parentStr = myName.getGenusStr();
5398 r = Rank.GENUS();
5399 }
5400
5401 if(parentStr==null){
5402 parentStr = myName.getSubtribeStr();
5403 r = Rank.SUBTRIBE();
5404 }
5405 if (parentStr == null){
5406 parentStr = myName.getTribeStr();
5407 r = Rank.TRIBE();
5408 }
5409 if (parentStr == null){
5410 parentStr = myName.getSubfamilyStr();
5411 r = Rank.SUBFAMILY();
5412 }
5413 if (parentStr == null){
5414 parentStr = myName.getFamilyStr();
5415 r = Rank.FAMILY();
5416 }
5417 if(parentStr!=null){
5418 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5419 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5420 hierarchy.put(r,parent);
5421 }
5422 }
5423
5424
5425 /**
5426 * @param ref
5427 * @param myName
5428 * @param parser
5429 */
5430 private void handleFormHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5431 String parentStr = myName.getSubspeciesStr();
5432 Rank r = Rank.SUBSPECIES();
5433
5434
5435 if(parentStr==null){
5436 parentStr = myName.getSpeciesStr();
5437 r = Rank.SPECIES();
5438 }
5439
5440 if(parentStr==null){
5441 parentStr = myName.getSubgenusStr();
5442 r = Rank.SUBGENUS();
5443 }
5444
5445 if(parentStr==null){
5446 parentStr = myName.getGenusStr();
5447 r = Rank.GENUS();
5448 }
5449
5450 if(parentStr==null){
5451 parentStr = myName.getSubtribeStr();
5452 r = Rank.SUBTRIBE();
5453 }
5454 if (parentStr == null){
5455 parentStr = myName.getTribeStr();
5456 r = Rank.TRIBE();
5457 }
5458 if (parentStr == null){
5459 parentStr = myName.getSubfamilyStr();
5460 r = Rank.SUBFAMILY();
5461 }
5462 if (parentStr == null){
5463 parentStr = myName.getFamilyStr();
5464 r = Rank.FAMILY();
5465 }
5466 if(parentStr!=null){
5467 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5468 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5469 hierarchy.put(r,parent);
5470 }
5471 }
5472
5473 /**
5474 * @param ref
5475 * @param myName
5476 * @param parser
5477 */
5478 private void handleVarietyHierarchy(Reference<?> ref, MyName myName, INonViralNameParser<?> parser) {
5479 String parentStr = myName.getSubspeciesStr();
5480 Rank r = Rank.SUBSPECIES();
5481
5482 if(parentStr==null){
5483 parentStr = myName.getSpeciesStr();
5484 r = Rank.SPECIES();
5485 }
5486
5487 if(parentStr==null){
5488 parentStr = myName.getSubgenusStr();
5489 r = Rank.SUBGENUS();
5490 }
5491
5492 if(parentStr==null){
5493 parentStr = myName.getGenusStr();
5494 r = Rank.GENUS();
5495 }
5496
5497 if(parentStr==null){
5498 parentStr = myName.getSubtribeStr();
5499 r = Rank.SUBTRIBE();
5500 }
5501 if (parentStr == null){
5502 parentStr = myName.getTribeStr();
5503 r = Rank.TRIBE();
5504 }
5505 if (parentStr == null){
5506 parentStr = myName.getSubfamilyStr();
5507 r = Rank.SUBFAMILY();
5508 }
5509 if (parentStr == null){
5510 parentStr = myName.getFamilyStr();
5511 r = Rank.FAMILY();
5512 }
5513 if(parentStr!=null){
5514 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5515 //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5516 hierarchy.put(r,parent);
5517 }
5518 }
5519
5520 /**
5521 * @param ref
5522 * @param myName
5523 * @param parser
5524 * @param parentStr
5525 * @param r
5526 * @return
5527 */
5528 private Taxon handleParentName(Reference<?> ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5529 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5530 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5531 // importer.getTaxonService().save(parent);
5532 // parent = CdmBase.deproxy(parent, Taxon.class);
5533
5534 boolean parentDoesNotExists = true;
5535 for (TaxonNode p : classification.getAllNodes()){
5536 if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5537 // System.out.println(p.getTaxon().getUuid());
5538 // System.out.println(parent.getUuid());
5539 parentDoesNotExists = false;
5540 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
5541 break;
5542 }
5543 }
5544 if(parentDoesNotExists) {
5545 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5546 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5547 if(tmp ==null)
5548 {
5549 parent=Taxon.NewInstance(parentNameName, ref);
5550 importer.getTaxonService().save(parent);
5551 parent = CdmBase.deproxy(parent, Taxon.class);
5552 } else {
5553 parent=tmp;
5554 }
5555 lookForParentNode(parentNameName, parent, ref,myName);
5556
5557 }
5558 return parent;
5559 }
5560
5561 private void addNameDifferenceToFile(String originalname, String atomisedname){
5562 try{
5563 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/NamesDifferent_"+classification.getTitleCache()+".txt",true);
5564 BufferedWriter out = new BufferedWriter(fstream);
5565 out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5566 //Close the output stream
5567 out.close();
5568 }catch (Exception e){//Catch exception if any
5569 System.err.println("Error: " + e.getMessage());
5570 }
5571 }
5572 /**
5573 * @param name
5574 * @param author
5575 * @param nomenclaturalCode2
5576 * @param rank
5577 */
5578 private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5579 try{
5580 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/NameNotParsed.txt",true);
5581 BufferedWriter out = new BufferedWriter(fstream);
5582 out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5583 //Close the output stream
5584 out.close();
5585 }catch (Exception e){//Catch exception if any
5586 System.err.println("Error: " + e.getMessage());
5587 }
5588 }
5589
5590
5591 /**
5592 * @param tnb
5593 * @param bestMatchingTaxon
5594 * @param insertAsExisting
5595 * @param refMods
5596 */
5597 private void logDecision(NonViralName<?> tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5598 try{
5599 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/Decisions_"+classification.toString()+".txt",true);
5600 BufferedWriter out = new BufferedWriter(fstream);
5601 out.write(tnb.getTitleCache()+" sec. "+refMods+"\t"+bestMatchingTaxon.getTitleCache()+"\t"+insertAsExisting+"\n");
5602 //Close the output stream
5603 out.close();
5604 }catch (Exception e){//Catch exception if any
5605 System.err.println("Error: " + e.getMessage());
5606 }
5607 }
5608
5609
5610 @SuppressWarnings("unused")
5611 private String replaceNull(Object in){
5612 if (in == null) {
5613 return "";
5614 }
5615 if (in.getClass().equals(NomenclaturalCode.class)) {
5616 return ((NomenclaturalCode)in).getTitleCache();
5617 }
5618 return in.toString();
5619 }
5620
5621 /**
5622 * @param fullName
5623 * @param nomenclaturalCode2
5624 * @param rank
5625 */
5626 private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5627 try{
5628 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/NameNotParsed_"+classification.getTitleCache()+".txt",true);
5629 BufferedWriter out = new BufferedWriter(fstream);
5630 out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5631 //Close the output stream
5632 out.close();
5633 }catch (Exception e){//Catch exception if any
5634 System.err.println("Error: " + e.getMessage());
5635 }
5636
5637 }
5638
5639 }
5640
5641
5642