merged trunk into branch
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / taxonx2013 / TaxonXTreatmentExtractor.java
1 // $Id$
2 /**
3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.taxonx2013;
11
12 import java.io.BufferedWriter;
13 import java.io.File;
14 import java.io.FileWriter;
15 import java.io.IOException;
16 import java.net.URI;
17 import java.util.ArrayList;
18 import java.util.HashMap;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Set;
22 import java.util.UUID;
23 import java.util.regex.Pattern;
24
25 import javax.xml.transform.TransformerException;
26 import javax.xml.transform.TransformerFactoryConfigurationError;
27
28 import org.apache.commons.lang.StringUtils;
29 import org.w3c.dom.Node;
30 import org.w3c.dom.NodeList;
31
32 import com.ibm.lsid.MalformedLSIDException;
33
34 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
35 import eu.etaxonomy.cdm.api.service.pager.Pager;
36 import eu.etaxonomy.cdm.model.agent.AgentBase;
37 import eu.etaxonomy.cdm.model.agent.Person;
38 import eu.etaxonomy.cdm.model.common.CdmBase;
39 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
40 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
41 import eu.etaxonomy.cdm.model.common.LSID;
42 import eu.etaxonomy.cdm.model.common.Language;
43 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
44 import eu.etaxonomy.cdm.model.common.UuidAndTitleCache;
45 import eu.etaxonomy.cdm.model.description.Feature;
46 import eu.etaxonomy.cdm.model.description.FeatureNode;
47 import eu.etaxonomy.cdm.model.description.FeatureTree;
48 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
49 import eu.etaxonomy.cdm.model.description.TaxonDescription;
50 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
51 import eu.etaxonomy.cdm.model.description.TextData;
52 import eu.etaxonomy.cdm.model.name.BacterialName;
53 import eu.etaxonomy.cdm.model.name.BotanicalName;
54 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
55 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
56 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
57 import eu.etaxonomy.cdm.model.name.NonViralName;
58 import eu.etaxonomy.cdm.model.name.Rank;
59 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
60 import eu.etaxonomy.cdm.model.name.ZoologicalName;
61 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
62 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
63 import eu.etaxonomy.cdm.model.reference.Reference;
64 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
65 import eu.etaxonomy.cdm.model.taxon.Classification;
66 import eu.etaxonomy.cdm.model.taxon.Synonym;
67 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
68 import eu.etaxonomy.cdm.model.taxon.Taxon;
69 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
70 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
71 import eu.etaxonomy.cdm.persistence.query.MatchMode;
72 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
73 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
74 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
75
76 /**
77 * @author pkelbert
78 * @date 2 avr. 2013
79 *
80 */
81 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
82
83 private static final String notMarkedUp = "Not marked-up";
84 private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
85 private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
86 private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
87 private static final boolean skippQuestion = true;
88
89 private final NomenclaturalCode nomenclaturalCode;
90 private Classification classification;
91
92 private String treatmentMainName,originalTreatmentName;
93
94 private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
95
96
97 private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
98 private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
99
100 private boolean maxRankRespected =false;
101 private Map<String, Feature> featuresMap;
102
103 private MyName currentMyName=new MyName();
104
105 private final Reference<?> sourceUrlRef;
106
107 private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
108
109 /**
110 * @param nomenclaturalCode
111 * @param classification
112 * @param importer
113 * @param configState
114 */
115 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
116 TaxonXImportState configState,Map<String, Feature> featuresMap, Reference<?> urlSource ) {
117 this.nomenclaturalCode=nomenclaturalCode;
118 this.classification = classification;
119 this.importer=importer;
120 this.configState=configState;
121 this.featuresMap=featuresMap;
122 this.sourceUrlRef =urlSource;
123 prepareCollectors(configState, importer.getAgentService());
124 this.sourceHandler.setSourceUrlRef(sourceUrlRef);
125 this.sourceHandler.setImporter(importer);
126 this.sourceHandler.setConfigState(configState);
127 }
128
129 /**
130 * extracts all the treament information and save them
131 * @param treatmentnode: the XML Node
132 * @param tosave: the list of object to save into the CDM
133 * @param refMods: the reference extracted from the MODS
134 * @param sourceName: the URI of the document
135 */
136 @SuppressWarnings({ "rawtypes", "unused" })
137 protected void extractTreatment(Node treatmentnode, List<Object> tosave, Reference<?> refMods, URI sourceName) {
138
139
140 logger.info("extractTreatment");
141 List<TaxonNameBase> nametosave = new ArrayList<TaxonNameBase>();
142 NodeList children = treatmentnode.getChildNodes();
143 Taxon acceptedTaxon =null;
144 Taxon defaultTaxon =null;
145 boolean refgroup=false;
146
147 for (int i=0;i<children.getLength();i++){
148 if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
149 refgroup=true;
150 }
151 }
152
153 for (int i=0;i<children.getLength();i++){
154
155 if (children.item(i).getNodeName().equalsIgnoreCase("tax:nomenclature")){
156 NodeList nomenclature = children.item(i).getChildNodes();
157 boolean containsName=false;
158 for(int k=0;k<nomenclature.getLength();k++){
159 if(nomenclature.item(k).getNodeName().equalsIgnoreCase("tax:name")){
160 containsName=true;
161 break;
162 }
163 }
164 if (containsName){
165 reloadClassification();
166 //extract "main" the scientific name
167 try{
168 acceptedTaxon = extractNomenclature(children.item(i),nametosave,refMods);
169 }catch(ClassCastException e){e.printStackTrace();System.exit(0);}
170 // System.out.println("acceptedTaxon : "+acceptedTaxon);
171 }
172 }
173 else if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
174 reloadClassification();
175 //extract the References within the document
176 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
177 }
178 else if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
179 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
180 File file = new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
181 FileWriter writer;
182 try {
183 writer = new FileWriter(file ,true);
184 writer.write(sourceName+"\n");
185 writer.flush();
186 writer.close();
187 } catch (IOException e1) {
188 // TODO Auto-generated catch block
189 e1.printStackTrace();
190 }
191 // String multiple = askMultiple(children.item(i));
192 String multiple = "Other";
193 if (multiple.equalsIgnoreCase("other")) {
194 extractSpecificFeatureNotStructured(children.item(i),acceptedTaxon, defaultTaxon,nametosave, refMods,multiple);
195 }
196 else
197 if (multiple.equalsIgnoreCase("synonyms")) {
198 try{
199 extractSynonyms(children.item(i),acceptedTaxon, refMods);
200 }catch(NullPointerException e){
201 logger.warn("the accepted taxon is maybe null");
202 }
203 }
204 else
205 if(multiple.equalsIgnoreCase("material examined")){
206 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
207 }
208 else
209 if (multiple.equalsIgnoreCase("distribution")){
210 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
211 }
212 else
213 if (multiple.equalsIgnoreCase("type status")){
214 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,refMods, "TypeStatus");
215 }
216 else
217 if (multiple.equalsIgnoreCase("vernacular name")){
218 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
219
220 }
221 else{
222 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,multiple);
223 }
224
225 }
226 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
227 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
228 extractFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, Feature.BIOLOGY_ECOLOGY());
229 }
230 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
231 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
232 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
233 }
234 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
235 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
236 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
237 }
238 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
239 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
240 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.DIAGNOSIS());
241 }
242 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
243 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
244 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DISCUSSION());
245 }
246 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
247 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
248 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
249 }
250
251 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
252 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
253 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
254 }
255 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
256 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
257 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave,refMods,Feature.ETYMOLOGY());
258 }
259
260 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
261 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
262 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
263 }
264 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:figure") && maxRankRespected){
265 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, "Figure");
266 }
267 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
268 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
269 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, "table");
270 }
271
272 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
273 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
274 //TODO IGNORE keys for the moment
275 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
276 extractSpecificFeatureNotStructured(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,"Keys - unparsed");
277 }
278 else{
279 if (!children.item(i).getNodeName().equalsIgnoreCase("tax:pb")){
280 logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
281 if (children.item(i).getAttributes() !=null) {
282 logger.info(children.item(i).getAttributes().item(0));
283 }
284 extractSpecificFeatureNotStructured(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,notMarkedUp);
285 }
286 }
287 }
288 // logger.info("saveUpdateNames");
289 if (maxRankRespected){
290 importer.getNameService().saveOrUpdate(nametosave);
291 importer.getClassificationService().saveOrUpdate(classification);
292 logger.info("saveUpdateNames-ok");
293 }
294
295 buildFeatureTree();
296 }
297
298
299 protected Map<String,Feature> getFeaturesUsed(){
300 return featuresMap;
301 }
302 /**
303 *
304 */
305 private void buildFeatureTree() {
306 FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
307 if (proibiospheretree == null){
308 List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
309 if (trees.size()==1) {
310 FeatureTree ft = trees.get(0);
311 if (featuresMap==null) {
312 featuresMap=new HashMap<String, Feature>();
313 }
314 for (Feature feature: ft.getDistinctFeatures()){
315 if(feature!=null) {
316 featuresMap.put(feature.getTitleCache(), feature);
317 }
318 }
319 }
320 proibiospheretree = FeatureTree.NewInstance();
321 proibiospheretree.setUuid(proIbioTreeUUID);
322 }
323 // FeatureNode root = proibiospheretree.getRoot();
324 FeatureNode root2 = proibiospheretree.getRoot();
325 if (root2 != null){
326 int nbChildren = root2.getChildCount()-1;
327 while (nbChildren>-1){
328 try{
329 root2.removeChild(nbChildren);
330 }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
331 nbChildren --;
332 }
333
334 }
335
336 for (Feature feature:featuresMap.values()) {
337 root2.addChild(FeatureNode.NewInstance(feature));
338 }
339 importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
340
341 }
342
343
344 /**
345 * @param keys
346 * @param acceptedTaxon: the current acceptedTaxon
347 * @param nametosave: the list of objects to save into the CDM
348 * @param refMods: the current reference extracted from the MODS
349 */
350 /* @SuppressWarnings("rawtypes")
351 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
352 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
353
354 NodeList children = keys.getChildNodes();
355 String key="";
356 PolytomousKey poly = PolytomousKey.NewInstance();
357 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
358 poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
359 poly.addTaxonomicScope(acceptedTaxon);
360 poly.setTitleCache("bloup");
361 // poly.addCoveredTaxon(acceptedTaxon);
362 PolytomousKeyNode root = poly.getRoot();
363 PolytomousKeyNode previous = null,tmpKey=null;
364 Taxon taxonKey=null;
365 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
366
367 // String fullContent = keys.getTextContent();
368 for (int i=0;i<children.getLength();i++){
369 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
370 NodeList paragraph = children.item(i).getChildNodes();
371 key="";
372 taxonKey=null;
373 for (int j=0;j<paragraph.getLength();j++){
374 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
375 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
376 key+=paragraph.item(j).getTextContent().trim();
377 // logger.info("KEY: "+j+"--"+key);
378 }
379 }
380 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
381 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
382 }
383 }
384 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
385 if (keypattern.matcher(key).matches()){
386 tmpKey = PolytomousKeyNode.NewInstance(key);
387 if (taxonKey!=null) {
388 tmpKey.setTaxon(taxonKey);
389 }
390 polyNodes.add(tmpKey);
391 if (previous == null) {
392 root.addChild(tmpKey);
393 } else {
394 previous.addChild(tmpKey);
395 }
396 }else{
397 if (!key.isEmpty()){
398 tmpKey=PolytomousKeyNode.NewInstance(key);
399 if (taxonKey!=null) {
400 tmpKey.setTaxon(taxonKey);
401 }
402 polyNodes.add(tmpKey);
403 if (keypatternend.matcher(key).matches()) {
404 root.addChild(tmpKey);
405 previous=tmpKey;
406 } else{
407 previous.addChild(tmpKey);
408 }
409
410 }
411 }
412 }
413 }
414 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
415 importer.getPolytomousKeyService().saveOrUpdate(poly);
416 }
417 */
418 // /**
419 // * @param taxons: the XML Nodegroup
420 // * @param nametosave: the list of objects to save into the CDM
421 // * @param acceptedTaxon: the current accepted Taxon
422 // * @param refMods: the current reference extracted from the MODS
423 // *
424 // * @return Taxon object built
425 // */
426 // @SuppressWarnings({ "rawtypes", "unchecked" })
427 // private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
428 // // logger.info("getTaxonFromXML");
429 // // logger.info("acceptedTaxon: "+acceptedTaxon);
430 //
431 // // TaxonNameBase nameToBeFilled = null;
432 //
433 // currentMyName = new MyName();
434 // NomenclaturalStatusType statusType = null;
435 //
436 // try {
437 // currentMyName = extractScientificName(taxons);
438 // if (!currentMyName.getStatus().isEmpty()){
439 // try {
440 // statusType = nomStatusString2NomStatus(currentMyName.getStatus());
441 // } catch (UnknownCdmTypeException e) {
442 // addProblematicStatusToFile(currentMyName.getStatus());
443 // logger.warn("Problem with status");
444 // }
445 // }
446 //
447 // } catch (TransformerFactoryConfigurationError e1) {
448 // logger.warn(e1);
449 // } catch (TransformerException e1) {
450 // logger.warn(e1);
451 // }
452 // /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
453 //
454 // nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
455 // if (nameToBeFilled.hasProblem() &&
456 // !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
457 // // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
458 // addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
459 // nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser, currentMyName.getAuthor(), currentMyName.getRank());
460 // }
461 //
462 // nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
463 // */
464 // TaxonNameBase nameToBeFilled = currentMyName.getTaxonNameBase();
465 // Taxon t = currentMyName.getTaxon();
466 // // importer.getNameService().saveOrUpdate(nametosave);
467 // /* Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
468 // */
469 // boolean statusMatch=false;
470 // if(t !=null ){
471 // statusMatch=compareStatus(t, statusType);
472 // }
473 // if (t ==null || (t != null && !statusMatch)){
474 // if(statusType != null) {
475 // nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
476 // }
477 // t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
478 // if (t.getSec() == null) {
479 // t.setSec(refMods);
480 // }
481 // if(!configState.getConfig().doKeepOriginalSecundum()) {
482 // t.setSec(configState.getConfig().getSecundum());
483 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
484 // }
485 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
486 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
487 //
488 //
489 // if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
490 // setLSID(currentMyName.getIdentifier(), t);
491 // }
492 //
493 // // Taxon parentTaxon = currentMyName.getHigherTaxa();
494 // // if (parentTaxon == null && !skippQuestion) {
495 // // parentTaxon = askParent(t, classification);
496 // // }
497 // // if (parentTaxon ==null){
498 // // while (parentTaxon == null) {
499 // // System.out.println("parent is null");
500 // // parentTaxon = createParent(t, refMods);
501 // // classification.addParentChild(parentTaxon, t, refMods, null);
502 // // }
503 // // }else{
504 // // classification.addParentChild(parentTaxon, t, refMods, null);
505 // // }
506 // }
507 // else{
508 // t = CdmBase.deproxy(t, Taxon.class);
509 // }
510 // if (!configState.getConfig().doKeepOriginalSecundum()) {
511 // t.setSec(configState.getConfig().getSecundum());
512 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
513 // }
514 // return t;
515 // }
516
517
518
519
520 // private Taxon getTaxonFromTaxonNameBase(TaxonNameBase tnb,Reference<?> ref){
521 // Taxon taxon = null;
522 //// System.out.println(tnb.getTitleCache());
523 // Taxon cc= importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
524 // if (cc != null){
525 // if ((cc.getSec() == null || cc.getSec().toString().isEmpty()) || (cc.getSec() != null &&
526 // cc.getSec().getTitleCache().equalsIgnoreCase(ref.getTitleCache()))) {
527 // if(cc.getSec() == null || cc.getSec().toString().isEmpty()){
528 // cc.setSec(ref);
529 // importer.getTaxonService().saveOrUpdate(cc);
530 // }
531 // taxon=cc;
532 // }
533 // }
534 // else{
535 // // List<TaxonBase> c = importer.getTaxonService().searchTaxaByName(tnb.getTitleCache(), ref);
536 // List<TaxonBase> c = importer.getTaxonService().list(TaxonBase.class, 0, 0, null, null);
537 // for (TaxonBase b : c) {
538 // try{
539 // taxon = (Taxon) b;
540 // }catch(ClassCastException e){logger.warn("error while casting existing taxonnamebase");}
541 // }
542 // }
543 // if (taxon == null){
544 //// System.out.println("NEW TAXON HERE "+tnb.toString()+", "+ref.toString());
545 // taxon = Taxon.NewInstance(tnb, ref); //sec set null
546 // importer.getTaxonService().save(taxon);
547 //
548 // }
549 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
550 //
551 // boolean exist = false;
552 // for (TaxonNode p : classification.getAllNodes()){
553 // if(p.getTaxon().equals(taxon)) {
554 // exist =true;
555 // }
556 // }
557 // if (!exist){
558 // taxon = (Taxon) importer.getTaxonService().find(taxon.getUuid());
559 // Taxon parentTaxon = currentMyName.getHigherTaxa();
560 // if (parentTaxon != null) {
561 // classification.addParentChild(parentTaxon, taxon, ref, null);
562 // } else {
563 // System.out.println("HERE???");
564 // classification.addChildTaxon(taxon, ref, null);
565 // }
566 // importer.getClassificationService().saveOrUpdate(classification);
567 // // refreshTransaction();
568 // }
569 // taxon = CdmBase.deproxy(taxon, Taxon.class);
570 // // System.out.println("TAXON RETOURNE : "+taxon.getTitleCache());
571 // return taxon;
572 // }
573 /**
574 * @param taxons: the XML Nodegroup
575 * @param nametosave: the list of objects to save into the CDM
576 * @param acceptedTaxon: the current accepted Taxon
577 * @param refMods: the current reference extracted from the MODS
578 *
579 * @return Taxon object built
580 */
581 @SuppressWarnings({ "rawtypes", "unused" })
582 private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
583 // logger.info("getTaxonFromXML");
584 // logger.info("acceptedTaxon: "+acceptedTaxon);
585
586 TaxonNameBase nameToBeFilled = null;
587
588 currentMyName=new MyName();
589
590 NomenclaturalStatusType statusType = null;
591 try {
592 currentMyName = extractScientificName(taxons,refMods);
593 } catch (TransformerFactoryConfigurationError e1) {
594 logger.warn(e1);
595 } catch (TransformerException e1) {
596 logger.warn(e1);
597 }
598 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
599
600 nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
601 if (nameToBeFilled.hasProblem() &&
602 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
603 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
604 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
605 nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
606 }
607
608 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
609 */
610 nameToBeFilled = currentMyName.getTaxonNameBase();
611 return nameToBeFilled;
612
613 }
614
615 // @SuppressWarnings("rawtypes")
616 // private TaxonNameBase getTaxonNameBase (TaxonNameBase name, List<TaxonNameBase> nametosave, NomenclaturalStatusType statusType){
617 // List<TaxonNameBase> names = importer.getNameService().list(TaxonNameBase.class, null, null, null, null);
618 // for (TaxonNameBase tb : names){
619 // if (tb.getTitleCache().equalsIgnoreCase(name.getTitleCache())) {
620 // boolean statusMatch=false;
621 // if(tb !=null ){
622 // statusMatch=compareStatus(tb, statusType);
623 // }
624 // if (!statusMatch){
625 // if(statusType != null) {
626 // name.addStatus(NomenclaturalStatus.NewInstance(statusType));
627 // }
628 // }else
629 // {
630 // logger.info("TaxonNameBase FOUND"+name.getTitleCache());
631 // return CdmBase.deproxy(tb, TaxonNameBase.class);
632 // }
633 // }
634 // }
635 // // logger.info("TaxonNameBase NOT FOUND "+name.getTitleCache());
636 // // System.out.println("add name "+name);
637 // nametosave.add(name);
638 // name = CdmBase.deproxy(name, TaxonNameBase.class);
639 // return name;
640 //
641 // }
642
643
644
645 // /**
646 // * @param tb
647 // * @param statusType
648 // * @return
649 // */
650 // private boolean compareStatus(TaxonNameBase tb, NomenclaturalStatusType statusType) {
651 // boolean statusMatch=false;
652 // //found one taxon
653 // Set<NomenclaturalStatus> status = tb.getStatus();
654 // if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
655 // for (NomenclaturalStatus st:status){
656 // NomenclaturalStatusType stype = st.getType();
657 // if (stype.toString().equalsIgnoreCase(statusType.toString())) {
658 // statusMatch=true;
659 // }
660 // }
661 // }
662 // else{
663 // if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
664 // statusMatch=true;
665 // }
666 // }
667 // return statusMatch;
668 // }
669
670 /**
671 *
672 */
673 private void reloadClassification() {
674 Classification cl = importer.getClassificationService().find(classification.getUuid());
675 if (cl != null){
676 classification=cl;
677 }else{
678 importer.getClassificationService().saveOrUpdate(classification);
679 classification = importer.getClassificationService().find(classification.getUuid());
680 }
681 }
682
683 // /**
684 // * Create a Taxon for the current NameBase, based on the current reference
685 // * @param taxonNameBase
686 // * @param refMods: the current reference extracted from the MODS
687 // * @return Taxon
688 // */
689 // @SuppressWarnings({ "unused", "rawtypes" })
690 // private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
691 // Taxon t = new Taxon(taxonNameBase,null );
692 // if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
693 // t.setSec(configState.getConfig().getSecundum());
694 // logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
695 // }
696 // /*<<<<<<< .courant
697 // boolean sourceExists=false;
698 // Set<IdentifiableSource> sources = t.getSources();
699 // for (IdentifiableSource src : sources){
700 // String micro = src.getCitationMicroReference();
701 // Reference r = src.getCitation();
702 // if (r.equals(refMods) && micro == null) {
703 // sourceExists=true;
704 // }
705 // }
706 // if(!sourceExists) {
707 // t.addSource(null,null,refMods,null);
708 // }
709 //=======*/
710 // t.addSource(OriginalSourceType.Import,null,null,refMods,null);
711 // t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
712 // return t;
713 // }
714
715 private void extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods,
716 String featureName) {
717 // System.out.println("extractDescriptionWithReference !");
718 NodeList children = typestatus.getChildNodes();
719
720 Feature currentFeature=getFeatureObjectFromString(featureName);
721
722 String r="";String s="";
723 for (int i=0;i<children.getLength();i++){
724 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
725 s+=children.item(i).getTextContent().trim();
726 }
727 if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
728 r+= children.item(i).getTextContent().trim();
729 }
730 if (s.indexOf(r)>-1) {
731 s=s.split(r)[0];
732 }
733 }
734
735 Reference<?> currentref = ReferenceFactory.newGeneric();
736 if(!r.isEmpty()) {
737 currentref.setTitleCache(r);
738 } else {
739 currentref=refMods;
740 }
741 setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
742 }
743
744 /**
745 * @param nametosave
746 * @param distribution: the XML node group
747 * @param acceptedTaxon: the current accepted Taxon
748 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
749 * @param refMods: the current reference extracted from the MODS
750 */
751 @SuppressWarnings("rawtypes")
752 private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods) {
753 // logger.info("DISTRIBUTION");
754 // logger.info("acceptedTaxon: "+acceptedTaxon);
755 NodeList children = distribution.getChildNodes();
756 Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
757 Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
758
759 for (int i=0;i<children.getLength();i++){
760 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
761 NodeList paragraph = children.item(i).getChildNodes();
762 for (int j=0;j<paragraph.getLength();j++){
763 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
764 extractText(descriptionsFulltext, i, paragraph.item(j));
765 }
766 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
767 extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
768 }
769 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
770 MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
771 DerivedUnit derivedUnitBase = null;
772 specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
773 extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
774 }
775 }
776 }
777 }
778
779 int m=0;
780 for (int k:descriptionsFulltext.keySet()) {
781 if (k>m) {
782 m=k;
783 }
784 }
785 for (int k:specimenOrObservations.keySet()) {
786 if (k>m) {
787 m=k;
788 }
789 }
790
791
792 if(acceptedTaxon!=null){
793 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
794 Feature currentFeature = Feature.DISTRIBUTION();
795 // DerivedUnit derivedUnitBase=null;
796 // String descr="";
797 for (int k=0;k<=m;k++){
798 if(specimenOrObservations.keySet().contains(k)){
799 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
800 handleAssociation(acceptedTaxon, refMods, td, soo);
801 }
802 }
803
804 if (descriptionsFulltext.keySet().contains(k)){
805 if (!descriptionsFulltext.get(k).isEmpty() && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
806 setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
807 break;
808 }
809 else{
810 handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
811 }
812 }
813
814 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
815 acceptedTaxon.addDescription(td);
816 sourceHandler.addAndSaveSource(refMods, td, null);
817 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
818 }
819 }
820 }
821 }
822
823 /**
824 * @param refMods
825 * @param descriptionsFulltext
826 * @param td
827 * @param currentFeature
828 * @param k
829 */
830 private void handleTextData(Reference<?> refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
831 Feature currentFeature, int k) {
832 TextData textData = TextData.NewInstance();
833 textData.setFeature(currentFeature);
834 textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
835 sourceHandler.addSource(refMods, textData);
836 td.addElement(textData);
837 }
838
839 /**
840 * @param acceptedTaxon
841 * @param refMods
842 * @param td
843 * @param soo
844 */
845 private void handleAssociation(Taxon acceptedTaxon, Reference<?> refMods, TaxonDescription td, MySpecimenOrObservation soo) {
846 String descr=soo.getDescr();
847 DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
848
849 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
850
851 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
852 acceptedTaxon.addDescription(taxonDescription);
853
854 Feature feature=null;
855 feature = makeFeature(derivedUnitBase);
856 if(!StringUtils.isEmpty(descr)) {
857 derivedUnitBase.setTitleCache(descr, true);
858 }
859
860 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
861
862 taxonDescription.addElement(indAssociation);
863 taxonDescription.setTaxon(acceptedTaxon);
864 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
865 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
866 td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
867 }
868
869 /**
870 * create an individualAssociation
871 * @param refMods
872 * @param derivedUnitBase
873 * @param feature
874 * @return
875 */
876 private IndividualsAssociation createIndividualAssociation(Reference<?> refMods, DerivedUnit derivedUnitBase,
877 Feature feature) {
878 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
879 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
880 indAssociation.setFeature(feature);
881 indAssociation = sourceHandler.addSource(refMods, indAssociation);
882 return indAssociation;
883 }
884
885 /**
886 * @param specimenOrObservations
887 * @param descriptionsFulltext
888 * @param i
889 * @param specimenOrObservation
890 */
891 private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
892 Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
893 List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
894 if (speObsList == null) {
895 speObsList=new ArrayList<MySpecimenOrObservation>();
896 }
897 speObsList.add(specimenOrObservation);
898 specimenOrObservations.put(i,speObsList);
899
900 String s = specimenOrObservation.getDerivedUnitBase().toString();
901 if (descriptionsFulltext.get(i) !=null){
902 s = descriptionsFulltext.get(i)+" "+s;
903 }
904 descriptionsFulltext.put(i, s);
905 }
906
907 /**
908 * Extract the text with the inline link to a taxon
909 * @param nametosave
910 * @param refMods
911 * @param descriptionsFulltext
912 * @param i
913 * @param paragraph
914 */
915 @SuppressWarnings("rawtypes")
916 private void extractInLine(List<TaxonNameBase> nametosave, Reference<?> refMods, Map<Integer, String> descriptionsFulltext,
917 int i, Node paragraph) {
918 String inLine=getInlineText(nametosave, refMods, paragraph);
919 if (descriptionsFulltext.get(i) !=null){
920 inLine = descriptionsFulltext.get(i)+inLine;
921 }
922 descriptionsFulltext.put(i, inLine);
923 }
924
925 /**
926 * Extract the raw text from a Node
927 * @param descriptionsFulltext
928 * @param node
929 * @param j
930 */
931 private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
932 if(!node.getTextContent().trim().isEmpty()) {
933 String s =node.getTextContent().trim();
934 if (descriptionsFulltext.get(i) !=null){
935 s = descriptionsFulltext.get(i)+" "+s;
936 }
937 descriptionsFulltext.put(i, s);
938 }
939 }
940
941
942 /**
943 * @param materials: the XML node group
944 * @param acceptedTaxon: the current accepted Taxon
945 * @param refMods: the current reference extracted from the MODS
946 */
947 @SuppressWarnings("rawtypes")
948 private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference<?> refMods,List<TaxonNameBase> nametosave) {
949 // logger.info("EXTRACTMATERIALS");
950 // logger.info("acceptedTaxon: "+acceptedTaxon);
951 NodeList children = materials.getChildNodes();
952 NodeList events = null;
953 // String descr="";
954
955
956 for (int i=0;i<children.getLength();i++){
957 String rawAssociation="";
958 boolean added=false;
959 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
960 events = children.item(i).getChildNodes();
961 for(int k=0;k<events.getLength();k++){
962 if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
963 String inLine= getInlineText(nametosave, refMods, events.item(k));
964 if(!inLine.isEmpty()) {
965 rawAssociation+=inLine;
966 }
967 }
968 if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
969 && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
970 rawAssociation+= events.item(k).getTextContent().trim();
971 }
972 if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
973 if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
974 rawAssociation="no description text";
975 }
976 added=true;
977 handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
978 }
979 if (!rawAssociation.isEmpty() && !added){
980
981 Feature feature = Feature.MATERIALS_EXAMINED();
982 featuresMap.put(feature.getTitleCache(),feature);
983
984 TextData textData = createTextData(rawAssociation, refMods, feature);
985
986 if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
987 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
988 td.addElement(textData);
989 acceptedTaxon.addDescription(td);
990 sourceHandler.addAndSaveSource(refMods, td, null);
991 }
992 // DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
993 // derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
994 //
995 // TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
996 // acceptedTaxon.addDescription(taxonDescription);
997 //
998 // IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
999 //
1000 // Feature feature = Feature.MATERIALS_EXAMINED();
1001 // featuresMap.put(feature.getTitleCache(),feature);
1002 // if(!StringUtils.isEmpty(rawAssociation)) {
1003 // derivedUnitBase.setTitleCache(rawAssociation, true);
1004 // }
1005 // indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
1006 // indAssociation.setFeature(feature);
1007 // indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
1008 //
1009 // /*boolean sourceExists=false;
1010 // Set<DescriptionElementSource> dsources = indAssociation.getSources();
1011 // for (DescriptionElementSource src : dsources){
1012 // String micro = src.getCitationMicroReference();
1013 // Reference r = src.getCitation();
1014 // if (r.equals(refMods) && micro == null) {
1015 // sourceExists=true;
1016 // }
1017 // }
1018 // if(!sourceExists) {
1019 // indAssociation.addSource(null, null, refMods, null);
1020 // }*/
1021 // taxonDescription.addElement(indAssociation);
1022 // taxonDescription.setTaxon(acceptedTaxon);
1023 // taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
1024 //
1025 // /*sourceExists=false;
1026 // Set<IdentifiableSource> sources = taxonDescription.getSources();
1027 // for (IdentifiableSource src : sources){
1028 // String micro = src.getCitationMicroReference();
1029 // Reference r = src.getCitation();
1030 // if (r.equals(refMods) && micro == null) {
1031 // sourceExists=true;
1032 // }
1033 // }
1034 // if(!sourceExists) {
1035 // taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
1036 // }*/
1037 //
1038 // importer.getDescriptionService().saveOrUpdate(taxonDescription);
1039 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1040
1041 rawAssociation="";
1042 }
1043 }
1044 }
1045 }
1046 }
1047
1048 /**
1049 * @param acceptedTaxon
1050 * @param refMods
1051 * @param events
1052 * @param rawAssociation
1053 * @param k
1054 */
1055 private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference<?> refMods, Node event,
1056 String rawAssociation) {
1057 String descr;
1058 DerivedUnit derivedUnitBase;
1059 MySpecimenOrObservation myspecimenOrObservation;
1060 DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
1061 derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
1062
1063 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
1064
1065 myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit);
1066 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
1067 descr=myspecimenOrObservation.getDescr();
1068
1069 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
1070
1071 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1072 acceptedTaxon.addDescription(taxonDescription);
1073
1074 Feature feature = makeFeature(derivedUnitBase);
1075 featuresMap.put(feature.getTitleCache(),feature);
1076 if(!StringUtils.isEmpty(descr)) {
1077 derivedUnitBase.setTitleCache(descr, true);
1078 }
1079
1080 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
1081
1082 taxonDescription.addElement(indAssociation);
1083 taxonDescription.setTaxon(acceptedTaxon);
1084 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
1085 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1086 }
1087
1088
1089
1090 /**
1091 * @param materials: the XML node group
1092 * @param acceptedTaxon: the current accepted Taxon
1093 * @param refMods: the current reference extracted from the MODS
1094 */
1095 private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference<?> refMods, String event) {
1096 // logger.info("EXTRACTMATERIALS");
1097 // logger.info("acceptedTaxon: "+acceptedTaxon);
1098 String descr="";
1099
1100 DerivedUnit derivedUnitBase=null;
1101 MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
1102 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
1103
1104 sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
1105
1106 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1107 acceptedTaxon.addDescription(taxonDescription);
1108
1109 Feature feature=null;
1110 if (event.equalsIgnoreCase("collection")){
1111 feature = makeFeature(derivedUnitBase);
1112 }
1113 else{
1114 feature = Feature.MATERIALS_EXAMINED();
1115 }
1116 featuresMap.put(feature.getTitleCache(), feature);
1117
1118 descr=myspecimenOrObservation.getDescr();
1119 if(!StringUtils.isEmpty(descr)) {
1120 derivedUnitBase.setTitleCache(descr);
1121 }
1122
1123 IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
1124
1125 taxonDescription.addElement(indAssociation);
1126 taxonDescription.setTaxon(acceptedTaxon);
1127 sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
1128 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1129
1130 return derivedUnitBase.getTitleCache();
1131
1132 }
1133
1134
1135 /**
1136 * @param description: the XML node group
1137 * @param acceptedTaxon: the current acceptedTaxon
1138 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1139 * @param nametosave: the list of objects to save into the CDM
1140 * @param refMods: the current reference extracted from the MODS
1141 * @param featureName: the feature name
1142 */
1143 @SuppressWarnings({ "rawtypes", "null" })
1144 private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1145 List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
1146 // System.out.println("GRUUUUuu");
1147 NodeList children = description.getChildNodes();
1148 NodeList insideNodes ;
1149 NodeList trNodes;
1150 // String descr ="";
1151 String localdescr="";
1152 List<String> blabla=null;
1153 List<String> text = new ArrayList<String>();
1154
1155 String table="<table>";
1156 String head="";
1157 String line="";
1158
1159 Feature currentFeature=getFeatureObjectFromString(featureName);
1160
1161 // String fullContent = description.getTextContent();
1162 for (int i=0;i<children.getLength();i++){
1163 // localdescr="";
1164 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1165 text.add(children.item(i).getTextContent().trim());
1166 }
1167 if (featureName.equalsIgnoreCase("table")){
1168 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1169 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
1170 head = extractTableHead(children.item(i));
1171 table+=head;
1172 line = extractTableLine(children.item(i));
1173 if (!line.equalsIgnoreCase("<tr></tr>")) {
1174 table+=line;
1175 }
1176 }
1177 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1178 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1179 line = extractTableLineWithColumn(children.item(i).getChildNodes());
1180 if(!line.equalsIgnoreCase("<tr></tr>")) {
1181 table+=line;
1182 }
1183 }
1184 }
1185 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1186 insideNodes=children.item(i).getChildNodes();
1187 blabla= new ArrayList<String>();
1188 for (int j=0;j<insideNodes.getLength();j++){
1189 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1190 String inlinetext = getInlineText(nametosave, refMods, insideNodes.item(j));
1191 if (!inlinetext.isEmpty()) {
1192 blabla.add(inlinetext);
1193 }
1194 }
1195 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1196 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1197 blabla.add(insideNodes.item(j).getTextContent().trim());
1198 // localdescr += insideNodes.item(j).getTextContent().trim();
1199 }
1200 }
1201 }
1202 if (!blabla.isEmpty()) {
1203 String blaStr = StringUtils.join(blabla," ").trim();
1204 if(!blaStr.isEmpty()) {
1205 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1206 }
1207 }
1208 text.add(StringUtils.join(blabla," "));
1209 }
1210 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1211 if(!children.item(i).getTextContent().trim().isEmpty()){
1212 localdescr = children.item(i).getTextContent().trim();
1213 setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1214 }
1215 }
1216 }
1217
1218 table+="</table>";
1219 if (!table.equalsIgnoreCase("<table></table>")){
1220 // System.out.println("TABLE : "+table);
1221 text.add(table);
1222 }
1223
1224 if (text !=null && !text.isEmpty()) {
1225 return StringUtils.join(text," ");
1226 } else {
1227 return "";
1228 }
1229
1230 }
1231
1232 /**
1233 * @param children
1234 * @param i
1235 * @return
1236 */
1237 private String extractTableLine(Node child) {
1238 String line;
1239 line="<tr>";
1240 if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1241 line = extractTableLineWithColumn(child.getChildNodes());
1242 }
1243 line+="</tr>";
1244 return line;
1245 }
1246
1247 /**
1248 * @param children
1249 * @param i
1250 * @return
1251 */
1252 private String extractTableHead(Node child) {
1253 String head;
1254 String line;
1255 head="<th>";
1256 NodeList trNodes = child.getChildNodes();
1257 for (int k=0;k<trNodes.getLength();k++){
1258 if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1259 && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1260 line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1261 head+=line;
1262 }
1263 }
1264 head+="</th>";
1265 return head;
1266 }
1267
1268 /**
1269 * build a html table line, with td columns
1270 * @param tdNodes
1271 * @return an html coded line
1272 */
1273 private String extractTableLineWithColumn(NodeList tdNodes) {
1274 String line;
1275 line="<tr>";
1276 for (int l=0;l<tdNodes.getLength();l++){
1277 if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1278 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1279 }
1280 }
1281 line+="</tr>";
1282 return line;
1283 }
1284
1285 /**
1286 * @param description: the XML node group
1287 * @param acceptedTaxon: the current acceptedTaxon
1288 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1289 * @param nametosave: the list of objects to save into the CDM
1290 * @param refMods: the current reference extracted from the MODS
1291 * @param featureName: the feature name
1292 */
1293 @SuppressWarnings({ "unused", "rawtypes" })
1294 private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1295 List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
1296 NodeList children = description.getChildNodes();
1297 NodeList insideNodes ;
1298 List<String> blabla= new ArrayList<String>();
1299
1300
1301 Feature currentFeature = getFeatureObjectFromString(featureName);
1302
1303 String fullContent = description.getTextContent();
1304 for (int i=0;i<children.getLength();i++){
1305 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1306 insideNodes=children.item(i).getChildNodes();
1307 for (int j=0;j<insideNodes.getLength();j++){
1308 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1309 String inlineText =getInlineText(nametosave, refMods, insideNodes.item(j));
1310 if(!inlineText.isEmpty()) {
1311 blabla.add(inlineText);
1312 }
1313 }
1314 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1315 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1316 blabla.add(insideNodes.item(j).getTextContent().trim());
1317 }
1318 }
1319 }
1320 }
1321 if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1322 if(!children.item(i).getTextContent().trim().isEmpty()){
1323 String localdescr = children.item(i).getTextContent().trim();
1324 if(!localdescr.isEmpty())
1325 {
1326 blabla.add(localdescr);
1327 }
1328 }
1329 }
1330 }
1331
1332 if (blabla !=null && !blabla.isEmpty()) {
1333 String blaStr = StringUtils.join(blabla," ").trim();
1334 if (!blaStr.isEmpty() && !blaStr.equalsIgnoreCase(".") && !blaStr.equalsIgnoreCase(",") && !blaStr.equalsIgnoreCase(";")) {
1335 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1336 }
1337 return blaStr;
1338 } else {
1339 return "";
1340 }
1341
1342 }
1343
1344 /**
1345 * @param nametosave
1346 * @param refMods
1347 * @param insideNodes
1348 * @param blabla
1349 * @param j
1350 */
1351 @SuppressWarnings({ "rawtypes" })
1352 private String getInlineText(List<TaxonNameBase> nametosave, Reference<?> refMods, Node insideNode) {
1353 TaxonNameBase tnb = getTaxonNameBaseFromXML(insideNode, nametosave,refMods);
1354 // Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1355 Taxon tax = currentMyName.getTaxon();
1356 if(tnb !=null){
1357 String linkedTaxon = tnb.toString().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1358 return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1359 }
1360 return "";
1361 }
1362
1363 /**
1364 * @param featureName
1365 * @return
1366 */
1367 @SuppressWarnings("rawtypes")
1368 private Feature getFeatureObjectFromString(String featureName) {
1369 List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
1370 Feature currentFeature=null;
1371 for (DefinedTermBase feature: features){
1372 String tmpF = ((Feature)feature).getTitleCache();
1373 if (tmpF.equalsIgnoreCase(featureName)) {
1374 currentFeature=(Feature)feature;
1375 // System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1376 }
1377 }
1378 if (currentFeature == null) {
1379 currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1380 if(featureName.equalsIgnoreCase("Other")){
1381 currentFeature.setUuid(OtherUUID);
1382 }
1383 if(featureName.equalsIgnoreCase(notMarkedUp)){
1384 currentFeature.setUuid(NotMarkedUpUUID);
1385 }
1386 importer.getTermService().saveOrUpdate(currentFeature);
1387 }
1388 return currentFeature;
1389 }
1390
1391
1392
1393
1394 /**
1395 * @param children: the XML node group
1396 * @param nametosave: the list of objects to save into the CDM
1397 * @param acceptedTaxon: the current acceptedTaxon
1398 * @param refMods: the current reference extracted from the MODS
1399 * @param fullContent :the parsed XML content
1400 * @return a list of description (text)
1401 */
1402 @SuppressWarnings({ "unused", "rawtypes" })
1403 private List<String> parseParagraph(List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods, Node paragraph, Feature feature){
1404 List<String> fullDescription= new ArrayList<String>();
1405 // String localdescr;
1406 String descr="";
1407 NodeList insideNodes ;
1408 boolean collectionEvent = false;
1409 List<Node>collectionEvents = new ArrayList<Node>();
1410
1411 NodeList children = paragraph.getChildNodes();
1412
1413 for (int i=0;i<children.getLength();i++){
1414 // localdescr="";
1415 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1416 descr += children.item(i).getTextContent().trim();
1417 }
1418 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1419 insideNodes=children.item(i).getChildNodes();
1420 List<String> blabla= new ArrayList<String>();
1421 for (int j=0;j<insideNodes.getLength();j++){
1422 boolean nodeKnown = false;
1423 // System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1424 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1425 String inlineText = getInlineText(nametosave, refMods, insideNodes.item(j));
1426 if (!inlineText.isEmpty()) {
1427 blabla.add(inlineText);
1428 }
1429 nodeKnown=true;
1430 }
1431 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1432 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1433 blabla.add(insideNodes.item(j).getTextContent().trim());
1434 // localdescr += insideNodes.item(j).getTextContent().trim();
1435 }
1436 nodeKnown=true;
1437 }
1438 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1439 String ref = insideNodes.item(j).getTextContent().trim();
1440 if (ref.endsWith(";") && ((ref.length())>1)) {
1441 ref=ref.substring(0, ref.length()-1)+".";
1442 }
1443 Reference<?> reference = ReferenceFactory.newGeneric();
1444 reference.setTitleCache(ref, true);
1445 blabla.add(reference.getTitleCache());
1446 nodeKnown=true;
1447 }
1448 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1449 String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, nametosave, refMods, "figure");
1450 blabla.add(figure);
1451 }
1452 if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1453 insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1454 insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1455 String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, nametosave, refMods, "table");
1456 blabla.add(table);
1457 }
1458 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1459 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1460 String titlecache = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection");
1461 blabla.add(titlecache);
1462 collectionEvent=true;
1463 collectionEvents.add(insideNodes.item(j));
1464 nodeKnown=true;
1465 }
1466 // if (!nodeKnown && !insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:pb")) {
1467 // logger.info("Node not handled yet : "+insideNodes.item(j).getNodeName());
1468 // }
1469
1470 }
1471 if (!blabla.isEmpty()) {
1472 fullDescription.add(StringUtils.join(blabla," "));
1473 }
1474 }
1475 if (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1476 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, nametosave, refMods, "Figure");
1477 fullDescription.add(figure);
1478 }
1479 if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1480 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1481 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1482 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, nametosave, refMods, "table");
1483 fullDescription.add(table);
1484 }
1485 }
1486
1487 if(descr.length()>0){
1488
1489
1490 Feature currentFeature= getNotMarkedUpFeatureObject();
1491 setParticularDescription(descr,acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1492 }
1493 // if (collectionEvent) {
1494 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1495 // for (Node coll:collectionEvents){
1496 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1497 // }
1498 // }
1499 return fullDescription;
1500 }
1501
1502
1503 /**
1504 * @param description: the XML node group
1505 * @param acceptedTaxon: the current acceptedTaxon
1506 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1507 * @param nametosave: the list of objects to save into the CDM
1508 * @param refMods: the current reference extracted from the MODS
1509 * @param feature: the feature to link the data with
1510 */
1511 @SuppressWarnings("rawtypes")
1512 private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods, Feature feature){
1513 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1514 List<String> fullDescription= parseParagraph( nametosave, acceptedTaxon, refMods, description,feature);
1515
1516 // System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1517 if (!fullDescription.isEmpty()) {
1518 setParticularDescription(StringUtils.join(fullDescription," "),acceptedTaxon,defaultTaxon, refMods,feature);
1519 }
1520
1521 }
1522
1523
1524 /**
1525 * @param descr: the XML Nodegroup to parse
1526 * @param acceptedTaxon: the current acceptedTaxon
1527 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1528 * @param refMods: the current reference extracted from the MODS
1529 * @param currentFeature: the feature name
1530 * @return
1531 */
1532 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods, Feature currentFeature) {
1533 logger.info("setParticularDescription "+currentFeature.getTitleCache()+", \n blabla : "+descr);
1534 // System.out.println("setParticularDescription "+currentFeature.getTitleCache()+", \n blabla : "+descr);
1535 // logger.info("acceptedTaxon: "+acceptedTaxon);
1536 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1537
1538 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1539
1540 TextData textData = createTextData(descr, refMods, currentFeature);
1541
1542 if(! descr.isEmpty() && (acceptedTaxon!=null)){
1543 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1544 td.addElement(textData);
1545 acceptedTaxon.addDescription(td);
1546
1547 sourceHandler.addAndSaveSource(refMods, td, null);
1548 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1549 }
1550
1551 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1552 try{
1553 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1554 if (tmp!=null) {
1555 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1556 }else{
1557 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1558 }
1559 }catch(Exception e){
1560 logger.debug("TAXON EXISTS"+defaultTaxon);
1561 }
1562
1563 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1564 defaultTaxon.addDescription(td);
1565 td.addElement(textData);
1566 sourceHandler.addAndSaveSource(refMods, td, null);
1567 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1568 }
1569 }
1570
1571 /**
1572 * @param descr
1573 * @param refMods
1574 * @param currentFeature
1575 * @return
1576 */
1577 private TextData createTextData(String descr, Reference<?> refMods, Feature currentFeature) {
1578 TextData textData = TextData.NewInstance();
1579 textData.setFeature(currentFeature);
1580 sourceHandler.addSource(refMods, textData);
1581
1582 textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1583 return textData;
1584 }
1585
1586
1587
1588 /**
1589 * @param descr: the XML Nodegroup to parse
1590 * @param acceptedTaxon: the current acceptedTaxon
1591 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1592 * @param refMods: the current reference extracted from the MODS
1593 * @param currentFeature: the feature name
1594 * @return
1595 */
1596 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference<?> currentRef, Reference<?> refMods, Feature currentFeature) {
1597 // System.out.println("setParticularDescriptionSPecial "+currentFeature);
1598 // logger.info("acceptedTaxon: "+acceptedTaxon);
1599 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1600
1601 featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1602 TextData textData = createTextData(descr, refMods, currentFeature);
1603
1604 if(! descr.isEmpty() && (acceptedTaxon!=null)){
1605 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1606 td.addElement(textData);
1607 acceptedTaxon.addDescription(td);
1608
1609 sourceHandler.addAndSaveSource(refMods, td, currentRef);
1610 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1611 }
1612
1613 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1614 try{
1615 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1616 if (tmp!=null) {
1617 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1618 }else{
1619 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1620 }
1621 }catch(Exception e){
1622 logger.debug("TAXON EXISTS"+defaultTaxon);
1623 }
1624
1625 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1626 defaultTaxon.addDescription(td);
1627 td.addElement(textData);
1628 sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1629 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1630 }
1631 }
1632
1633
1634
1635 /**
1636 * @param synonyms: the XML Nodegroup to parse
1637 * @param nametosave: the list of objects to save into the CDM
1638 * @param acceptedTaxon: the current acceptedTaxon
1639 * @param refMods: the current reference extracted from the MODS
1640 */
1641 @SuppressWarnings({ "rawtypes" })
1642 private void extractSynonyms(Node synonyms, Taxon acceptedTaxon,Reference<?> refMods) {
1643 // logger.info("extractSynonyms: "+acceptedTaxon);
1644 Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1645 if (ttmp != null) {
1646 acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1647 }
1648 else{
1649 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1650 }
1651 NodeList children = synonyms.getChildNodes();
1652 TaxonNameBase nameToBeFilled = null;
1653 List<MyName> names = new ArrayList<MyName>();
1654
1655 if(synonyms.getNodeName().equalsIgnoreCase("tax:name")){
1656 MyName myName;
1657 try {
1658 myName = extractScientificName(synonyms,refMods);
1659 names.add(myName);
1660 } catch (TransformerFactoryConfigurationError e) {
1661 logger.warn(e);
1662 } catch (TransformerException e) {
1663 logger.warn(e);
1664 }
1665 }
1666
1667
1668 for (int i=0;i<children.getLength();i++){
1669 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1670 NodeList tmp = children.item(i).getChildNodes();
1671 // String fullContent = children.item(i).getTextContent();
1672 for (int j=0; j< tmp.getLength();j++){
1673 if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1674 MyName myName;
1675 try {
1676 myName = extractScientificName(tmp.item(j),refMods);
1677 names.add(myName);
1678 } catch (TransformerFactoryConfigurationError e) {
1679 logger.warn(e);
1680 } catch (TransformerException e) {
1681 logger.warn(e);
1682 }
1683
1684 }
1685 }
1686 }
1687 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1688 MyName myName;
1689 try {
1690 myName = extractScientificName(children.item(i),refMods);
1691 names.add(myName);
1692 } catch (TransformerFactoryConfigurationError e) {
1693 logger.warn(e);
1694 } catch (TransformerException e) {
1695 logger.warn(e);
1696 }
1697
1698 }
1699 }
1700 NomenclaturalStatusType statusType = null;
1701
1702 for(MyName name:names){
1703 // System.out.println("HANDLE NAME "+name);
1704
1705 statusType = null;
1706
1707 nameToBeFilled = currentMyName.getTaxonNameBase();
1708
1709 Synonym synonym = null;
1710
1711 if (!name.getStatus().isEmpty()){
1712 try {
1713 statusType = nomStatusString2NomStatus(name.getStatus());
1714 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1715 synonym = Synonym.NewInstance(nameToBeFilled, refMods);
1716 } catch (UnknownCdmTypeException e) {
1717 addProblematicStatusToFile(name.getStatus());
1718 logger.warn("Problem with status");
1719 synonym = Synonym.NewInstance(nameToBeFilled, refMods);
1720 synonym.setAppendedPhrase(name.getStatus());
1721 }
1722 }
1723 else{
1724 synonym = Synonym.NewInstance(nameToBeFilled, refMods);
1725 }
1726 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1727 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1728 if (nameToBeFilled.hasProblem() &&
1729 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1730 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1731 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1732 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1733 }
1734 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1735 */
1736 if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1737 setLSID(name.getIdentifier(), synonym);
1738 }
1739
1740 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1741 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1742 boolean synoExist = false;
1743 for (Synonym syn: synonymsSet){
1744 System.out.println(syn.getName()+" -- "+syn.getSec());
1745 boolean a =syn.getName().equals(synonym.getName());
1746 boolean b = syn.getSec().equals(synonym.getSec());
1747 if (a && b) {
1748 synoExist=true;
1749 }
1750 }
1751 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1752 // System.out.println("SYNONYM");
1753 sourceHandler.addSource(refMods, synonym);
1754
1755 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1756
1757 }
1758 }
1759 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1760 }
1761
1762
1763 /**
1764 * @param refgroup: the XML nodes
1765 * @param nametosave: the list of objects to save into the CDM
1766 * @param acceptedTaxon: the current acceptedTaxon
1767 * @param nametosave: the list of objects to save into the CDM
1768 * @param refMods: the current reference extracted from the MODS
1769 * @return the acceptedTaxon (why?)
1770 * handle cases where the bibref are inside <p> and outside
1771 */
1772 @SuppressWarnings({ "rawtypes" })
1773 private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods) {
1774 // logger.info("extractReferences");
1775 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1776
1777 NodeList children = refgroup.getChildNodes();
1778 NonViralName<?> nameToBeFilled = getNonViralNameAccNomenclature();
1779
1780 ReferenceBuilder refBuild = new ReferenceBuilder();
1781 for (int i=0;i<children.getLength();i++){
1782 if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1783 String ref = children.item(i).getTextContent().trim();
1784 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1785 if (!refBuild.isFoundBibref()){
1786 extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1787 }
1788 }
1789
1790 if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1791 NodeList references = children.item(i).getChildNodes();
1792 String descr="";
1793 for (int j=0;j<references.getLength();j++){
1794 if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1795 String ref = references.item(j).getTextContent().trim();
1796 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1797 }
1798 else
1799 if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1800 && !references.item(j).getTextContent().trim().isEmpty()){
1801 descr += references.item(j).getTextContent().trim();
1802 }
1803
1804 }
1805 if (!refBuild.isFoundBibref()){
1806 //if it's not tagged, put it as row information.
1807 // extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1808 //then put it as a not markup feature if not empty
1809 if (descr.length()>0){
1810 Feature currentFeature= getNotMarkedUpFeatureObject();
1811 setParticularDescription(descr,acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1812 }
1813 }
1814 }
1815 }
1816 // importer.getClassificationService().saveOrUpdate(classification);
1817 return acceptedTaxon;
1818
1819 }
1820
1821 /**
1822 * get the non viral name according to the current nomenclature
1823 * @return
1824 */
1825 private NonViralName<?> getNonViralNameAccNomenclature() {
1826 NonViralName<?> nameToBeFilled = null;
1827 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1828 nameToBeFilled = BotanicalName.NewInstance(null);
1829 }
1830 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1831 nameToBeFilled = ZoologicalName.NewInstance(null);
1832 }
1833 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1834 nameToBeFilled = BacterialName.NewInstance(null);
1835 }
1836 return nameToBeFilled;
1837 }
1838
1839 /**
1840 * @return the feature object for the category "not marked up"
1841 */
1842 @SuppressWarnings("rawtypes")
1843 private Feature getNotMarkedUpFeatureObject() {
1844 List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
1845 Feature currentFeature =null;
1846 for (DefinedTermBase feat: features){
1847 String tmpF = ((Feature)feat).getTitleCache();
1848 if (tmpF.equalsIgnoreCase(notMarkedUp)) {
1849 currentFeature=(Feature)feat;
1850 }
1851 }
1852 if (currentFeature == null) {
1853 currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1854 currentFeature.setUuid(NotMarkedUpUUID);
1855 importer.getTermService().saveOrUpdate(currentFeature);
1856 }
1857 return currentFeature;
1858 }
1859
1860 /**
1861 * @param references
1862 * handle cases where the bibref are inside <p> and outside
1863 */
1864 @SuppressWarnings("rawtypes")
1865 private void extractReferenceRawText(NodeList references, NonViralName<?> nameToBeFilled, Reference<?> refMods,
1866 Taxon acceptedTaxon) {
1867 String refString="";
1868 NomenclaturalStatusType statusType = null;
1869 currentMyName= new MyName();
1870 for (int j=0;j<references.getLength();j++){
1871 acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1872 //no bibref tag inside
1873 // System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1874 if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1875
1876 try {
1877 currentMyName = extractScientificName(references.item(j),refMods);
1878 // if (myName.getNewName().isEmpty()) {
1879 // name=myName.getOriginalName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1880 // } else {
1881 // name=myName.getNewName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1882 // }
1883 } catch (TransformerFactoryConfigurationError e) {
1884 logger.warn(e);
1885 } catch (TransformerException e) {
1886 logger.warn(e);
1887 }
1888
1889 // name=name.trim();
1890 }
1891 if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1892 refString = references.item(j).getTextContent().trim();
1893 }
1894 if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1895 //
1896 statusType = null;
1897 if (!currentMyName.getStatus().isEmpty()){
1898 try {
1899 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1900 } catch (UnknownCdmTypeException e) {
1901 addProblematicStatusToFile(currentMyName.getStatus());
1902 logger.warn("Problem with status");
1903 }
1904 }
1905
1906
1907 /*INonViralNameParser parser = NonViralNameParserImpl.NewInstance();*/
1908 String fullLineRefName = references.item(j).getTextContent().trim();
1909 int nameOrRefOrOther=2;
1910 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1911 // System.out.println("NAMEORREFOR?? "+nameOrRefOrOther);
1912 if (nameOrRefOrOther==0){
1913 /*TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1914 if (nameTBF.hasProblem() &&
1915 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1916 addProblemNameToFile(fullLineRefName,"",nomenclaturalCode,Rank.UNKNOWN_RANK());
1917 nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser,currentMyName.getAuthor(), currentMyName.getRank());
1918 }
1919 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1920 */
1921 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1922 Synonym synonym = null;
1923 if (!currentMyName.getStatus().isEmpty()){
1924 try {
1925 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1926 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1927 synonym = Synonym.NewInstance(nameTBF, refMods);
1928 } catch (UnknownCdmTypeException e) {
1929 addProblematicStatusToFile(currentMyName.getStatus());
1930 logger.warn("Problem with status");
1931 synonym = Synonym.NewInstance(nameTBF, refMods);
1932 synonym.setAppendedPhrase(currentMyName.getStatus());
1933 }
1934 }
1935 else{
1936 synonym = Synonym.NewInstance(nameTBF, refMods);
1937 }
1938
1939 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1940 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1941 boolean synoExist = false;
1942 for (Synonym syn: synonymsSet){
1943 // System.out.println(syn.getName()+" -- "+syn.getSec());
1944 boolean a =syn.getName().equals(synonym.getName());
1945 boolean b = syn.getSec().equals(synonym.getSec());
1946 if (a && b) {
1947 synoExist=true;
1948 }
1949 }
1950 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1951 sourceHandler.addSource(refMods, synonym);
1952
1953 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1954 }
1955 }
1956
1957 if (nameOrRefOrOther==1){
1958 Reference<?> re = ReferenceFactory.newGeneric();
1959 re.setTitleCache(fullLineRefName);
1960
1961 /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1962 if (nameTBF.hasProblem() &&
1963 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1964 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1965 nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1966 }
1967 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1968 */
1969 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1970 Synonym synonym = null;
1971 if (!currentMyName.getStatus().isEmpty()){
1972 try {
1973 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1974 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1975 synonym = Synonym.NewInstance(nameTBF, refMods);
1976 } catch (UnknownCdmTypeException e) {
1977 addProblematicStatusToFile(currentMyName.getStatus());
1978 logger.warn("Problem with status");
1979 synonym = Synonym.NewInstance(nameTBF, refMods);
1980 synonym.setAppendedPhrase(currentMyName.getStatus());
1981 }
1982 }
1983 else{
1984 synonym = Synonym.NewInstance(nameTBF, refMods);
1985 }
1986
1987 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1988 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
1989 boolean synoExist = false;
1990 for (Synonym syn: synonymsSet){
1991 // System.out.println(syn.getName()+" -- "+syn.getSec());
1992 boolean a =syn.getName().equals(synonym.getName());
1993 boolean b = syn.getSec().equals(synonym.getSec());
1994 if (a && b) {
1995 synoExist=true;
1996 }
1997 }
1998 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1999 sourceHandler.addSource(refMods, synonym);
2000
2001 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),re, null);
2002 }
2003
2004 }
2005
2006
2007 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2008 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2009 }
2010 }
2011
2012 if(!currentMyName.getName().isEmpty()){
2013 logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
2014 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
2015 Reference<?> refS = ReferenceFactory.newGeneric();
2016 refS.setTitleCache(refString, true);
2017 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
2018 // acceptedTaxon.addDescription(td);
2019 // acceptedTaxon.addSource(refSource);
2020 //
2021 // TextData textData = TextData.NewInstance(Feature.CITATION());
2022 //
2023 // textData.addSource(null, null, refS, null);
2024 // td.addElement(textData);
2025 // td.addSource(refSource);
2026 // importer.getDescriptionService().saveOrUpdate(td);
2027
2028
2029 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2030 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2031
2032 }
2033
2034 acceptedTaxon.getName().setNomenclaturalReference(refS);
2035 }
2036 else{
2037 /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2038 TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
2039 if (nameTBF.hasProblem() &&
2040 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
2041 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
2042 addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
2043 nameTBF=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
2044 }
2045 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
2046 */
2047 TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
2048 Synonym synonym = null;
2049 if (!currentMyName.getStatus().isEmpty()){
2050 try {
2051 statusType = nomStatusString2NomStatus(currentMyName.getStatus());
2052 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2053 synonym = Synonym.NewInstance(nameTBF, refMods);
2054 } catch (UnknownCdmTypeException e) {
2055 addProblematicStatusToFile(currentMyName.getStatus());
2056 logger.warn("Problem with status");
2057 synonym = Synonym.NewInstance(nameTBF, refMods);
2058 synonym.setAppendedPhrase(currentMyName.getStatus());
2059 }
2060 }
2061 else{
2062 synonym = Synonym.NewInstance(nameTBF, refMods);
2063 }
2064
2065
2066 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2067 setLSID(currentMyName.getIdentifier(), synonym);
2068 }
2069
2070 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
2071 // System.out.println(synonym.getName()+" -- "+synonym.getSec());
2072 boolean synoExist = false;
2073 for (Synonym syn: synonymsSet){
2074 // System.out.println(syn.getName()+" -- "+syn.getSec());
2075 boolean a =syn.getName().equals(synonym.getName());
2076 boolean b = syn.getSec().equals(synonym.getSec());
2077 if (a && b) {
2078 synoExist=true;
2079 }
2080 }
2081 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
2082 sourceHandler.addSource(refMods, synonym);
2083
2084 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
2085 }
2086 }
2087 }
2088 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2089 }
2090 }
2091
2092
2093
2094 /**
2095 * @param identifier
2096 * @param acceptedTaxon
2097 */
2098 @SuppressWarnings("rawtypes")
2099 private void setLSID(String identifier, TaxonBase<?> taxon) {
2100 // boolean lsidok=false;
2101 String id = identifier.split("__")[0];
2102 String source = identifier.split("__")[1];
2103 if (id.indexOf("lsid")>-1){
2104 try {
2105 LSID lsid = new LSID(id);
2106 taxon.setLsid(lsid);
2107 // lsidok=true;
2108 } catch (MalformedLSIDException e) {
2109 logger.warn("Malformed LSID");
2110 }
2111
2112 }
2113
2114 // if ((id.indexOf("lsid")<0) || !lsidok){
2115 //ADD ORIGINAL SOURCE ID EVEN IF LSID
2116 Reference<?> re = null;
2117 List<Reference> references = importer.getReferenceService().list(Reference.class, null, null, null, null);
2118 for (Reference<?> refe: references) {
2119 if (refe.getTitleCache().equalsIgnoreCase(source)) {
2120 re =refe;
2121 }
2122 }
2123
2124 if(re == null){
2125 re = ReferenceFactory.newGeneric();
2126 re.setTitleCache(source);
2127 importer.getReferenceService().saveOrUpdate(re);
2128 }
2129 re=CdmBase.deproxy(re, Reference.class);
2130
2131 Set<IdentifiableSource> sources = taxon.getSources();
2132 boolean lsidinsource=false;
2133 boolean urlinsource=false;
2134 for (IdentifiableSource src:sources){
2135 if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
2136 lsidinsource=true;
2137 }
2138 if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
2139 urlinsource=true;
2140 }
2141 }
2142 if(!lsidinsource) {
2143 taxon.addSource(OriginalSourceType.Import, id,null,re,null);
2144 }
2145 if(!urlinsource)
2146 {
2147 taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
2148 // }
2149 }
2150
2151 }
2152
2153 /**
2154 * try to solve a parsing problem for a scientific name
2155 * @param original : the name from the OCR document
2156 * @param name : the tagged version
2157 * @param parser
2158 * @return the corrected TaxonNameBase
2159 */
2160 /* @SuppressWarnings({ "unchecked", "rawtypes" })
2161 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
2162 Map<String,String> ato = namesMap.get(original);
2163 if (ato == null) {
2164 ato = namesMap.get(original+" "+author);
2165 }
2166
2167
2168 if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
2169 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
2170 }
2171 if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
2172 rank = getRank(ato);
2173 }
2174 // TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
2175 TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2176 // logger.info("RANK: "+rank);
2177 int retry=0;
2178 List<ParserProblem> problems = nameTBF.getParsingProblems();
2179 for (ParserProblem pb:problems) {
2180 System.out.println(pb.toString());
2181 }
2182 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
2183 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2184 String fullname=name;
2185 if(! skippQuestion) {
2186 fullname = getFullReference(name,nameTBF.getParsingProblems());
2187 }
2188 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2189 nameTBF = BotanicalName.NewInstance(null);
2190 }
2191 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2192 nameTBF = ZoologicalName.NewInstance(null);
2193 }
2194 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2195 nameTBF= BacterialName.NewInstance(null);
2196 }
2197 parser.parseReferencedName(nameTBF, fullname, rank, false);
2198 retry++;
2199 }
2200 if (retry == 1){
2201 if(author != null){
2202 if (name.indexOf(author)>-1) {
2203 nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
2204 } else {
2205 nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
2206 }
2207 if (nameTBF.hasProblem()){
2208 if (name.indexOf(author)>-1) {
2209 addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
2210 } else {
2211 addProblemNameToFile(name,author,nomenclaturalCode,rank);
2212 }
2213 // System.out.println("TBF still has problems "+nameTBF.hasProblem());
2214 problems = nameTBF.getParsingProblems();
2215 for (ParserProblem pb:problems) {
2216 System.out.println(pb.toString());
2217 }
2218 nameTBF.setFullTitleCache(name, true);
2219 }else{
2220 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2221 ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2222 }
2223 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2224 ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2225 }
2226 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2227 ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2228 }
2229 }
2230 // logger.info("FULL TITLE CACHE "+name);
2231 }else{
2232 nameTBF.setFullTitleCache(name, true);
2233 }
2234 }
2235 return nameTBF;
2236 }
2237
2238 */
2239
2240 /**
2241 * @param nomenclatureNode: the XML nodes
2242 * @param nametosave: the list of objects to save into the CDM
2243 * @param refMods: the current reference extracted from the MODS
2244 * @return
2245 */
2246 @SuppressWarnings({ "rawtypes", "null" })
2247 private Taxon extractNomenclature(Node nomenclatureNode, List<TaxonNameBase> nametosave, Reference<?> refMods) throws ClassCastException{
2248 // logger.info("extractNomenclature");
2249 NodeList children = nomenclatureNode.getChildNodes();
2250 String freetext="";
2251 NonViralName<?> nameToBeFilled = null;
2252 Taxon acceptedTaxon = null;
2253 // INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2254
2255 // String fullContent = nomenclatureNode.getTextContent();
2256
2257 NomenclaturalStatusType statusType = null;
2258 for (int i=0;i<children.getLength();i++){
2259 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2260 String status = children.item(i).getTextContent().trim();
2261 if (!status.isEmpty()){
2262 try {
2263 statusType = nomStatusString2NomStatus(status);
2264 } catch (UnknownCdmTypeException e) {
2265 addProblematicStatusToFile(currentMyName.getStatus());
2266 logger.warn("Problem with status");
2267 }
2268 }
2269 }
2270 }
2271
2272 boolean containsSynonyms=false;
2273 for (int i=0;i<children.getLength();i++){
2274
2275 if (children.item(i).getNodeName().equalsIgnoreCase("#text")) {
2276 freetext=children.item(i).getTextContent();
2277 }
2278 if (children.item(i).getNodeName().equalsIgnoreCase("tax:collection_event")) {
2279 // System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2280 extractMaterialsDirect(children.item(i), acceptedTaxon, refMods, "collection");
2281 }
2282 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
2283 if(!containsSynonyms){
2284 currentMyName = new MyName();
2285 try {
2286 currentMyName = extractScientificName(children.item(i),refMods);
2287 treatmentMainName = currentMyName.getNewName();
2288 originalTreatmentName = currentMyName.getOriginalName();
2289
2290 } catch (TransformerFactoryConfigurationError e1) {
2291 logger.warn(e1);
2292 } catch (TransformerException e1) {
2293 logger.warn(e1);
2294 }
2295
2296 if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(configState.getConfig().getMaxRank()) || currentMyName.getRank().equals(configState.getConfig().getMaxRank())){
2297 maxRankRespected=true;
2298 /* if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2299 nameToBeFilled = BotanicalName.NewInstance(null);
2300 }
2301 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2302 nameToBeFilled = ZoologicalName.NewInstance(null);
2303 }
2304 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2305 nameToBeFilled = BacterialName.NewInstance(null);
2306 }
2307 */
2308 nameToBeFilled=currentMyName.getTaxonNameBase();
2309
2310 // acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2311 acceptedTaxon=currentMyName.getTaxon();
2312 System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2313
2314
2315 boolean statusMatch=false;
2316 if(acceptedTaxon !=null ){
2317 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2318 statusMatch=compareStatus(acceptedTaxon, statusType);
2319 System.out.println("statusMatch: "+statusMatch);
2320 }
2321 if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2322 // acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2323 /*
2324 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2325 nameToBeFilled = (BotanicalName) parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
2326 }
2327 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2328 nameToBeFilled = (ZoologicalName) parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
2329 }
2330 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2331 nameToBeFilled = (BacterialName) parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
2332 }
2333
2334
2335 if (nameToBeFilled.hasProblem() &&
2336 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
2337 addProblemNameToFile(treatmentMainName,"",nomenclaturalCode,null);
2338 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2339 nameToBeFilled = (BotanicalName) solveNameProblem(originalTreatmentName,treatmentMainName,parser,currentMyName.getAuthor(), currentMyName.getRank());
2340 }
2341 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2342 nameToBeFilled = (ZoologicalName)solveNameProblem(originalTreatmentName,treatmentMainName,parser,currentMyName.getAuthor(), currentMyName.getRank());
2343 }
2344 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2345 nameToBeFilled = (BacterialName) solveNameProblem(originalTreatmentName,treatmentMainName,parser,currentMyName.getAuthor(), currentMyName.getRank());
2346 }
2347
2348 }
2349 */
2350 nameToBeFilled=currentMyName.getTaxonNameBase();
2351 if (nameToBeFilled!=null){
2352 /*
2353 try{
2354 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2355 nameToBeFilled = (BotanicalName) getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2356 }
2357 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2358 nameToBeFilled = (ZoologicalName)getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2359 }
2360 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2361 nameToBeFilled = (BacterialName)getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2362 }
2363 }catch(Exception e){
2364 TaxonNameBase n = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2365 System.out.println(n.getClass());
2366 nameToBeFilled = (NonViralName<?>) getTaxonNameBase(nameToBeFilled,nametosave,statusType);
2367 }
2368 */
2369
2370 if (!originalTreatmentName.isEmpty()) {
2371 TaxonNameDescription td = TaxonNameDescription.NewInstance();
2372 td.setTitleCache(originalTreatmentName);
2373 nameToBeFilled.addDescription(td);
2374 }
2375
2376 if(statusType != null) {
2377 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2378 }
2379 sourceHandler.addSource(refMods, nameToBeFilled);
2380
2381 if (nameToBeFilled.getNomenclaturalReference() == null) {
2382 acceptedTaxon= new Taxon(nameToBeFilled,refMods);
2383 System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2384 }
2385 else {
2386 acceptedTaxon= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2387 System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2388 }
2389
2390 sourceHandler.addSource(refMods, acceptedTaxon);
2391
2392 if(!configState.getConfig().doKeepOriginalSecundum()) {
2393 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2394 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2395 System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2396 }
2397
2398 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2399 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2400 }
2401
2402
2403 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2404 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2405 }
2406 // Taxon parentTaxon = currentMyName.getHigherTaxa();
2407 // if (parentTaxon == null && !skippQuestion) {
2408 // parentTaxon = askParent(acceptedTaxon, classification);
2409 // }
2410 // if (parentTaxon ==null){
2411 // while (parentTaxon == null) {
2412 // parentTaxon = createParent(acceptedTaxon, refMods);
2413 // classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
2414 // }
2415 // }else{
2416 // classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
2417 // }
2418
2419 }else{
2420 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2421 Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2422 boolean sourcelinked=false;
2423 for (IdentifiableSource source:sources){
2424 if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2425 sourcelinked=true;
2426 }
2427 }
2428 if (!configState.getConfig().doKeepOriginalSecundum()) {
2429 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2430 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2431 System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2432 }
2433 if (!sourcelinked){
2434 sourceHandler.addSource(refMods, acceptedTaxon);
2435 }
2436 if (!sourcelinked || !configState.getConfig().doKeepOriginalSecundum()){
2437
2438 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2439 setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2440 }
2441 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2442 }
2443 }
2444 }else{
2445 maxRankRespected=false;
2446 }
2447 containsSynonyms=true;
2448 }else{
2449 try{
2450 extractSynonyms(children.item(i), acceptedTaxon, refMods);
2451 }catch(NullPointerException e){
2452 logger.warn("nullpointerexception, the accepted taxon might be null");
2453 }
2454 }
2455 }
2456 if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2457 reloadClassification();
2458 //extract the References within the document
2459 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
2460 }
2461 if(!freetext.isEmpty()) {
2462 setParticularDescription(freetext,acceptedTaxon,acceptedTaxon, refMods,getNotMarkedUpFeatureObject());
2463 }
2464
2465 }
2466 // importer.getClassificationService().saveOrUpdate(classification);
2467 return acceptedTaxon;
2468 }
2469
2470
2471 /**
2472 * @return
2473 */
2474 @SuppressWarnings("unchecked")
2475 private boolean compareStatus(Taxon t, NomenclaturalStatusType statusType) {
2476 boolean statusMatch=false;
2477 //found one taxon
2478 Set<NomenclaturalStatus> status = t.getName().getStatus();
2479 if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2480 for (NomenclaturalStatus st:status){
2481 NomenclaturalStatusType stype = st.getType();
2482 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2483 statusMatch=true;
2484 }
2485 }
2486 }
2487 else{
2488 if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2489 statusMatch=true;
2490 }
2491 }
2492 return statusMatch;
2493 }
2494
2495 /**
2496 * @param acceptedTaxon: the current acceptedTaxon
2497 * @param ref: the current reference extracted from the MODS
2498 * @return the parent for the current accepted taxon
2499 */
2500 /* private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
2501 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2502
2503 List<Rank> rankList = new ArrayList<Rank>();
2504 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2505
2506 List<String> rankListStr = new ArrayList<String>();
2507 for (Rank r:rankList) {
2508 rankListStr.add(r.toString());
2509 }
2510 String r="";
2511 String s = acceptedTaxon.getTitleCache();
2512 Taxon tax = null;
2513 if(!skippQuestion){
2514 int addTaxon = askAddParent(s);
2515 logger.info("ADD TAXON: "+addTaxon);
2516 if (addTaxon == 0 ){
2517 Taxon tmp = askParent(acceptedTaxon, classification);
2518 if (tmp == null){
2519 s = askSetParent(s);
2520 r = askRank(s,rankListStr);
2521
2522 NonViralName<?> nameToBeFilled = null;
2523 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2524 nameToBeFilled = BotanicalName.NewInstance(null);
2525 }
2526 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2527 nameToBeFilled = ZoologicalName.NewInstance(null);
2528 }
2529 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2530 nameToBeFilled = BacterialName.NewInstance(null);
2531 }
2532 nameToBeFilled.setTitleCache(s);
2533 nameToBeFilled.setRank(getRank(r));
2534
2535 tax = Taxon.NewInstance(nameToBeFilled, ref);
2536 }
2537 else{
2538 tax=tmp;
2539 }
2540
2541 createParent(tax, ref);
2542 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2543 classification.addParentChild(tax, acceptedTaxon, ref, null);
2544 }
2545 else{
2546 classification.addChildTaxon(acceptedTaxon, ref, null);
2547 tax=acceptedTaxon;
2548 }
2549 } else{
2550 classification.addChildTaxon(acceptedTaxon, ref, null);
2551 tax=acceptedTaxon;
2552 }
2553 // logger.info("RETURN: "+tax );
2554 return tax;
2555
2556 }
2557
2558 */
2559
2560 /**
2561 * @param name
2562 * @throws TransformerFactoryConfigurationError
2563 * @throws TransformerException
2564 * @return a list of possible names
2565 */
2566 @SuppressWarnings({ "null", "rawtypes" })
2567 private MyName extractScientificName(Node name, Reference<?> refMods) throws TransformerFactoryConfigurationError, TransformerException {
2568 // System.out.println("extractScientificName");
2569
2570 String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2571 List<String> rankListToPrint = new ArrayList<String>();
2572 for (String r : rankListToPrint_tmp) {
2573 rankListToPrint.add(r.toLowerCase());
2574 }
2575
2576 Rank rank = Rank.UNKNOWN_RANK();
2577 NodeList children = name.getChildNodes();
2578 String fullName = "";
2579 String newName="";
2580 String identifier="";
2581 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2582 List<String> atomisedName= new ArrayList<String>();
2583
2584 String rankStr = "";
2585 Rank tmpRank ;
2586
2587 String status= extractStatus(children);
2588
2589 for (int i=0;i<children.getLength();i++){
2590 if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2591 NodeList atom = children.item(i).getChildNodes();
2592 for (int k=0;k<atom.getLength();k++){
2593 identifier = extractIdentifier(identifier, atom.item(k));
2594 tmpRank = null;
2595 rankStr = atom.item(k).getNodeName().toLowerCase();
2596 // logger.info("RANKSTR:*"+rankStr+"*");
2597 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2598 rankStr=atom.item(k).getTextContent().trim();
2599 tmpRank = getRank(rankStr);
2600 }
2601 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2602 if (tmpRank != null){
2603 rank=tmpRank;
2604 }
2605 atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2606 }
2607 addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2608 }
2609 if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2610 // logger.info("name non atomised: "+children.item(i).getTextContent());
2611 fullName = children.item(i).getTextContent().trim();
2612 // logger.info("fullname: "+fullName);
2613 }
2614 }
2615 fullName = cleanName(fullName, atomisedName);
2616 namesMap.put(fullName,atomisedMap);
2617
2618 String atomisedNameStr = getAtomisedNameStr(atomisedName);
2619
2620 if (fullName != null){
2621 // System.out.println("fullname: "+fullName);
2622 // System.out.println("atomised: "+atomisedNameStr);
2623 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2624 if (skippQuestion){
2625 // String defaultN = "";
2626 if (atomisedNameStr.length()>fullName.length()) {
2627 newName=atomisedNameStr;
2628 } else {
2629 if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2630 newName=getScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2631 } else {
2632 newName=fullName;
2633 }
2634 }
2635 } else {
2636 newName=getScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2637 }
2638 } else {
2639 newName=fullName;
2640 }
2641 }
2642 //not really needed
2643 // rank = askForRank(newName, rank, nomenclaturalCode);
2644 // System.out.println("atomised: "+atomisedMap.toString());
2645
2646 // String[] names = new String[5];
2647 MyName myname = new MyName();
2648
2649 System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2650 // System.out.println(atomisedMap.keySet());
2651 fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2652 myname.setOriginalName(fullName);
2653 myname.setNewName(newName);
2654 myname.setRank(rank);
2655 myname.setIdentifier(identifier);
2656 myname.setStatus(status);
2657 myname.setSource(refMods);
2658
2659 // boolean higherAdded=false;
2660
2661
2662 boolean parseNameManually=false;
2663 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2664 TaxonNameBase nameToBeFilledTest = parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2665 if (nameToBeFilledTest.hasProblem()){
2666 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2667 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2668 if (nameToBeFilledTest.hasProblem()){
2669 addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2670 parseNameManually=true;
2671 }
2672 }
2673
2674 if(parseNameManually){
2675 createAtomisedTaxon(rank, newName, atomisedMap, myname);
2676 }
2677 else{
2678 createAtomisedTaxonString(newName, atomisedMap, myname);
2679 myname.setParsedName(nameToBeFilledTest);
2680 myname.buildTaxon();
2681 }
2682 return myname;
2683
2684 }
2685
2686 /**
2687 * @param atomisedName
2688 * @return
2689 */
2690 private String getAtomisedNameStr(List<String> atomisedName) {
2691 String atomisedNameStr = StringUtils.join(atomisedName," ");
2692 while(atomisedNameStr.contains(" ")) {
2693 atomisedNameStr=atomisedNameStr.replace(" ", " ");
2694 }
2695 atomisedNameStr=atomisedNameStr.trim();
2696 return atomisedNameStr;
2697 }
2698
2699 /**
2700 * @param children
2701 * @param status
2702 * @return
2703 */
2704 private String extractStatus(NodeList children) {
2705 String status="";
2706 for (int i=0;i<children.getLength();i++){
2707 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2708 (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2709 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2710 status = children.item(i).getTextContent().trim();
2711 }
2712 }
2713 return status;
2714 }
2715
2716 /**
2717 * @param identifier
2718 * @param atom
2719 * @param k
2720 * @return
2721 */
2722 private String extractIdentifier(String identifier, Node atom) {
2723 if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2724 try{
2725 identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2726 }catch(Exception e){
2727 System.out.println("pb with identifier, maybe empty");
2728 }
2729 try{
2730 identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2731 }catch(Exception e){
2732 System.out.println("pb with identifier, maybe empty");
2733 }
2734 }
2735 return identifier;
2736 }
2737
2738 /**
2739 * @param rankListToPrint
2740 * @param rank
2741 * @param atomisedName
2742 * @param atom
2743 */
2744 private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2745 for (int k=0;k<atom.getLength();k++){
2746 if (!atom.item(k).getNodeName().equalsIgnoreCase("dwc:taxonRank") ) {
2747 if (atom.item(k).getNodeName().equalsIgnoreCase("dwc:subgenus") || atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:subgenus")) {
2748 atomisedName.add("("+atom.item(k).getTextContent().trim()+")");
2749 } else{
2750 if(atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet") || atom.item(k).getNodeName().equalsIgnoreCase("dwc:Subspecies")) {
2751 if(atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet")){
2752 atomisedName.add("var. "+atom.item(k).getTextContent().trim());
2753 }
2754 if(atom.item(k).getNodeName().equalsIgnoreCase("dwc:Subspecies") || atom.item(k).getNodeName().equalsIgnoreCase("dwc:infraspecificepithet")) {
2755 atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2756 }
2757 }
2758 else{
2759 if(rankListToPrint.contains(atom.item(k).getNodeName().toLowerCase())) {
2760 atomisedName.add(atom.item(k).getTextContent().trim());
2761 }
2762 else{
2763 // System.out.println("rank : "+rank.toString());
2764 if (rank.isHigher(Rank.GENUS()) && (atom.item(k).getNodeName().indexOf("dwcranks:")>-1 || atom.item(k).getNodeName().indexOf("dwc:Family")>-1)) {
2765 atomisedName.add(atom.item(k).getTextContent().trim());
2766 }
2767 // else{
2768 // System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2769 // }
2770 }
2771 // else{
2772 // System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2773 // }
2774 }
2775 }
2776 }
2777 }
2778 }
2779
2780 /**
2781 * @param fullName
2782 * @param atomisedName
2783 * @return
2784 */
2785 private String cleanName(String name, List<String> atomisedName) {
2786 String fullName =name;
2787 if (fullName != null){
2788 fullName = fullName.replace("( ", "(");
2789 fullName = fullName.replace(" )",")");
2790
2791 if (fullName.trim().isEmpty()){
2792 fullName=StringUtils.join(atomisedName," ");
2793 }
2794
2795 while(fullName.contains(" ")) {
2796 fullName=fullName.replace(" ", " ");
2797 // logger.info("while");
2798 }
2799 fullName=fullName.trim();
2800 }
2801 return fullName;
2802 }
2803
2804 /**
2805 * @param rank
2806 * @param fullName
2807 * @param atomisedMap
2808 * @param myname
2809 * @return
2810 */
2811 private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap,
2812 MyName myname) {
2813 String fullName=name;
2814 if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2815 // System.out.println("rank : "+rank.toString());
2816 if(rank.isHigher(Rank.SPECIES())){
2817 try{
2818 String author=null;
2819 if(atomisedMap.get("dwcranks:subgenus") != null) {
2820 author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2821 }
2822 if(atomisedMap.get("dwc:subgenus") != null) {
2823 author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2824 }
2825 if(author == null) {
2826 if(atomisedMap.get("dwc:genus") != null) {
2827 author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2828 }
2829 }
2830 if(author != null){
2831 fullName = fullName.substring(0, fullName.indexOf(author));
2832 author=author.replaceAll(",","").trim();
2833 myname.setAuthor(author);
2834 }
2835 }catch(Exception e){
2836 //could not extract the author
2837 }
2838 }
2839 if(rank.equals(Rank.SPECIES())){
2840 try{
2841 String author=null;
2842 if(author == null) {
2843 if(atomisedMap.get("dwc:species") != null) {
2844 String[] t = fullName.split(atomisedMap.get("dwc:species"));
2845 // System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2846 author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2847 // System.out.println("AUTEUR "+author);
2848 }
2849 }
2850 if(author != null){
2851 fullName = fullName.substring(0, fullName.indexOf(author));
2852 author=author.replaceAll(",","").trim();
2853 myname.setAuthor(author);
2854 }
2855 }catch(Exception e){
2856 //could not extract the author
2857 }
2858 }
2859 }else{
2860 myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2861 }
2862 return fullName;
2863 }
2864
2865 /**
2866 * @param newName
2867 * @param atomisedMap
2868 * @param myname
2869 */
2870 private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2871 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2872 myname.setFamilyStr(atomisedMap.get("dwc:family"));
2873 }
2874 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
2875 myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2876 }
2877 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2878 myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2879 }
2880 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2881 myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2882 }
2883 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2884 myname.setGenusStr(atomisedMap.get("dwc:genus"));
2885 }
2886 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2887 myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2888 }
2889 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2890 myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2891 }
2892 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2893 String n=newName;
2894 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2895 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2896 n=n.replace("subsp.","");
2897 }
2898 if(atomisedMap.get("dwc:subspecies") != null) {
2899 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2900 n=n.replace("subsp.","");
2901 }
2902 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2903 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2904 n=n.replace("var.","");
2905 n=n.replace("v.","");
2906 }
2907 if(atomisedMap.get("dwcranks:formepithet") != null) {
2908 //TODO
2909 System.out.println("TODO FORMA");
2910 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2911 n=n.replace("forma","");
2912 }
2913 n=n.trim();
2914 String author = myname.getAuthor();
2915 if(n.split(" ").length>2)
2916 {
2917 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2918 String a= "";
2919 try{
2920 a=n.split(n2)[1].trim();
2921 }catch(Exception e){logger.info("no author in "+n+"?");}
2922
2923 myname.setAuthor(a);
2924 System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2925 n=n2;
2926
2927 }
2928
2929 myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2930 myname.setAuthor(author);
2931 }
2932 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2933 myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2934 }
2935 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2936 myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2937 }
2938 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2939 myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2940 }
2941 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2942 myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2943 }
2944 }
2945
2946 /**
2947 * @param rank
2948 * @param newName
2949 * @param atomisedMap
2950 * @param myname
2951 */
2952 private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2953 if(rank.equals(Rank.UNKNOWN_RANK())){
2954 myname.setNotParsableTaxon(newName);
2955 }
2956 else{
2957 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2958 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2959 }
2960 if(atomisedMap.get("dwcranks:subfamily") != null && checkRankValidForImport(Rank.SUBFAMILY())){
2961 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2962 }
2963 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2964 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2965 }
2966 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2967 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2968 }
2969 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2970 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2971 }
2972 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2973 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2974 }
2975 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2976 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2977 }
2978 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2979 String n=newName;
2980 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2981 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2982 n=n.replace("subsp.","");
2983 }
2984 if(atomisedMap.get("dwc:subspecies") != null) {
2985 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2986 n=n.replace("subsp.","");
2987 }
2988 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2989 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2990 n=n.replace("var.","");
2991 n=n.replace("v.","");
2992 }
2993 if(atomisedMap.get("dwcranks:formepithet") != null) {
2994 //TODO
2995 System.out.println("TODO FORMA");
2996 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2997 n=n.replace("forma","");
2998 }
2999 n=n.trim();
3000 String author = myname.getAuthor();
3001 if(n.split(" ").length>2)
3002 {
3003 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3004 String a="";
3005 try{
3006 a= n.split(n2)[1].trim();
3007 }catch(Exception e){logger.info("no author?");}
3008 myname.setAuthor(a);
3009 System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3010 n=n2;
3011
3012 }
3013
3014 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3015 myname.setAuthor(author);
3016 }
3017 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3018 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3019 }
3020 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3021 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3022 }
3023 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3024 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3025 }
3026 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3027 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3028 }
3029 }
3030 }
3031
3032 /**
3033 * @return
3034 */
3035 private boolean checkRankValidForImport(Rank currentRank) {
3036 return currentRank.isLower(configState.getConfig().getMaxRank()) || currentRank.equals(configState.getConfig().getMaxRank());
3037 }
3038
3039
3040
3041 /**
3042 * @param classification2
3043 */
3044 public void updateClassification(Classification classification2) {
3045 classification = classification2;
3046 }
3047
3048 public class MyName {
3049 String originalName="";
3050 String newName="";
3051 Rank rank=Rank.UNKNOWN_RANK();
3052 String identifier="";
3053 String status="";
3054 String author=null;
3055
3056 NonViralName<?> taxonnamebase;
3057
3058 Reference<?> refMods ;
3059
3060 Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3061 NonViralName<?> familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3062 String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3063 Taxon higherTaxa;
3064 Rank higherRank;
3065 private Taxon taxon;
3066
3067
3068 public void setSource(Reference<?> re){
3069 refMods=re;
3070 }
3071
3072 /**
3073 * @param string
3074 */
3075 public void setFormStr(String string) {
3076 this.formStr=string;
3077
3078 }
3079 /**
3080 * @param string
3081 */
3082 public void setVarietyStr(String string) {
3083 this.varietyStr=string;
3084
3085 }
3086 /**
3087 * @param string
3088 */
3089 public void setSubspeciesStr(String string) {
3090 this.subspeciesStr=string;
3091
3092 }
3093 /**
3094 * @param string
3095 */
3096 public void setSpeciesStr(String string) {
3097 this.speciesStr=string;
3098
3099 }
3100 /**
3101 * @param string
3102 */
3103 public void setSubgenusStr(String string) {
3104 this.subgenusStr=string;
3105
3106 }
3107 /**
3108 * @param string
3109 */
3110 public void setGenusStr(String string) {
3111 this.genusStr=string;
3112
3113 }
3114 /**
3115 * @param string
3116 */
3117 public void setSubtribeStr(String string) {
3118 this.subtribeStr=string;
3119
3120 }
3121 /**
3122 * @param string
3123 */
3124 public void setTribeStr(String string) {
3125 this.tribeStr=string;
3126
3127 }
3128 /**
3129 * @param string
3130 */
3131 public void setSubfamilyStr(String string) {
3132 this.subfamilyStr=string;
3133
3134 }
3135 /**
3136 * @param string
3137 */
3138 public void setFamilyStr(String string) {
3139 this.familyStr=string;
3140
3141 }
3142 /**
3143 * @return the familyStr
3144 */
3145 public String getFamilyStr() {
3146 return familyStr;
3147 }
3148 /**
3149 * @return the subfamilyStr
3150 */
3151 public String getSubfamilyStr() {
3152 return subfamilyStr;
3153 }
3154 /**
3155 * @return the tribeStr
3156 */
3157 public String getTribeStr() {
3158 return tribeStr;
3159 }
3160 /**
3161 * @return the subtribeStr
3162 */
3163 public String getSubtribeStr() {
3164 return subtribeStr;
3165 }
3166 /**
3167 * @return the genusStr
3168 */
3169 public String getGenusStr() {
3170 return genusStr;
3171 }
3172 /**
3173 * @return the subgenusStr
3174 */
3175 public String getSubgenusStr() {
3176 return subgenusStr;
3177 }
3178 /**
3179 * @return the speciesStr
3180 */
3181 public String getSpeciesStr() {
3182 return speciesStr;
3183 }
3184 /**
3185 * @return the subspeciesStr
3186 */
3187 public String getSubspeciesStr() {
3188 return subspeciesStr;
3189 }
3190 /**
3191 * @return the formStr
3192 */
3193 public String getFormStr() {
3194 return formStr;
3195 }
3196 /**
3197 * @return the varietyStr
3198 */
3199 public String getVarietyStr() {
3200 return varietyStr;
3201 }
3202
3203 /**
3204 * @param newName2
3205 */
3206 public void setNotParsableTaxon(String newName2) {
3207 List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3208
3209 NomenclaturalStatusType statusType = null;
3210 if (!getStatus().isEmpty()){
3211 try {
3212 statusType = nomStatusString2NomStatus(getStatus());
3213 } catch (UnknownCdmTypeException e) {
3214 addProblematicStatusToFile(getStatus());
3215 logger.warn("Problem with status");
3216 }
3217 }
3218
3219 boolean foundIdentic=false;
3220 Taxon tmp=null;
3221 // Taxon tmpPartial=null;
3222 for (TaxonBase tmpb:tmpList){
3223 if(tmpb !=null){
3224 TaxonNameBase tnb = tmpb.getName();
3225 Rank crank=null;
3226 if (tnb != null){
3227 if (tnb.getTitleCache().split("sec.")[0].equals(newName2) ){
3228 crank =tnb.getRank();
3229 if (crank !=null && rank !=null){
3230 if (crank.equals(rank)){
3231 foundIdentic=true;
3232 try{
3233 tmp=(Taxon)tmpb;
3234 }catch(Exception e){
3235 e.printStackTrace();
3236 }
3237 }
3238 }
3239 }
3240 }
3241 }
3242 }
3243 boolean statusMatch=false;
3244 boolean appendedMatch=false;
3245 if(tmp !=null && foundIdentic){
3246 statusMatch=compareStatus(tmp, statusType);
3247 if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3248 appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3249 }
3250 if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3251 appendedMatch=true;
3252 }
3253
3254 }
3255 if ((tmp == null || !foundIdentic) || (tmp != null && !statusMatch) || (tmp != null && !appendedMatch && !statusMatch)){
3256
3257 NonViralName<?> tnb = getNonViralNameAccNomenclature();
3258 tnb.setRank(rank);
3259
3260 if(statusType != null) {
3261 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3262 }
3263 if(getStatus()!=null) {
3264 tnb.setAppendedPhrase(getStatus());
3265 }
3266
3267 tnb.setTitleCache(newName2,true);
3268 tmp = findMatchingTaxon(tnb,refMods);
3269 if(tmp==null){
3270 tmp=Taxon.NewInstance(tnb, refMods);
3271 tmp.setSec(refMods);
3272 sourceHandler.addSource(refMods, tmp);
3273 classification.addChildTaxon(tmp, null, null);
3274 }
3275 }
3276 tmp = CdmBase.deproxy(tmp, Taxon.class);
3277 if (author != null) {
3278 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3279 setLSID(getIdentifier(), tmp);
3280 importer.getTaxonService().saveOrUpdate(tmp);
3281 tmp = CdmBase.deproxy(tmp, Taxon.class);
3282 }
3283 }
3284 TaxonNameBase tnb = CdmBase.deproxy(tmp.getName(), TaxonNameBase.class);
3285
3286 this.taxon=tmp;
3287 castTaxonNameBase(tnb, taxonnamebase);
3288
3289 }
3290
3291 /**
3292 *
3293 */
3294 public void buildTaxon() {
3295 System.out.println("BUILD TAXON");
3296
3297 NomenclaturalStatusType statusType = null;
3298 if (!getStatus().isEmpty()){
3299 try {
3300 statusType = nomStatusString2NomStatus(getStatus());
3301 taxonnamebase.addStatus(NomenclaturalStatus.NewInstance(statusType));
3302 } catch (UnknownCdmTypeException e) {
3303 addProblematicStatusToFile(getStatus());
3304 logger.warn("Problem with status");
3305 }
3306 }
3307 importer.getNameService().save(taxonnamebase);
3308 Taxon tmptaxon = Taxon.NewInstance(taxonnamebase, refMods); //sec set null
3309
3310 boolean exist = false;
3311 for (TaxonNode p : classification.getAllNodes()){
3312 try{
3313 if(p.getTaxon().getTitleCache().equalsIgnoreCase(tmptaxon.getTitleCache())) {
3314 if(compareStatus(p.getTaxon(), statusType)){
3315 tmptaxon=CdmBase.deproxy(p.getTaxon(), Taxon.class);
3316 exist =true;
3317 }
3318 }
3319 }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3320 }
3321 if (!exist){
3322
3323 boolean insertAsExisting =false;
3324 List<Taxon> existingTaxons = getMatchingTaxon(taxonnamebase);
3325 double similarityScore=0.0;
3326 for (Taxon bestMatchingTaxon:existingTaxons){
3327 similarityScore=similarity(taxonnamebase.getTitleCache().split("sec.")[0].toLowerCase().trim(), bestMatchingTaxon.getTitleCache().split("sec.")[0].toLowerCase().trim());
3328 insertAsExisting = compareAndCheckTaxon(taxonnamebase, refMods, similarityScore, bestMatchingTaxon);
3329 if(insertAsExisting) {
3330 tmptaxon=bestMatchingTaxon;
3331 break;
3332 }
3333 }
3334 if (!insertAsExisting){
3335 tmptaxon.setSec(refMods);
3336 if (taxonnamebase.getRank().equals(configState.getConfig().getMaxRank())) {
3337 System.out.println("****************************"+tmptaxon);
3338 classification.addChildTaxon(tmptaxon, refMods, null);
3339 } else{
3340 hierarchy = new HashMap<Rank, Taxon>();
3341 System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxon.toString());
3342 lookForParentNode(taxonnamebase,tmptaxon, refMods,this);
3343 System.out.println("HIERARCHY "+hierarchy);
3344 Taxon parent = buildHierarchy();
3345 if(!taxonExistsInClassification(parent,tmptaxon)){
3346 classification.addParentChild(parent, tmptaxon, refMods, null);
3347 importer.getClassificationService().saveOrUpdate(classification);
3348 }
3349 // Set<TaxonNode> nodeList = classification.getAllNodes();
3350 // for(TaxonNode tn:nodeList) {
3351 // System.out.println(tn.getTaxon());
3352 // }
3353 }
3354 }
3355 importer.getClassificationService().saveOrUpdate(classification);
3356 // refreshTransaction();
3357 }
3358 taxon=CdmBase.deproxy(tmptaxon, Taxon.class);
3359
3360 }
3361
3362
3363 /**
3364 *
3365 */
3366 private Taxon buildHierarchy() {
3367 Taxon higherTaxon = null;
3368 if(hierarchy.containsKey(configState.getConfig().getMaxRank())){
3369 if(!taxonExistsInClassification(higherTaxon, hierarchy.get(configState.getConfig().getMaxRank()))) {
3370 System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"+hierarchy.get(configState.getConfig().getMaxRank()));
3371 classification.addChildTaxon(hierarchy.get(configState.getConfig().getMaxRank()), refMods, null);
3372 }
3373 higherTaxon = hierarchy.get(configState.getConfig().getMaxRank());
3374 return higherTaxon;
3375 }
3376 if(hierarchy.containsKey(Rank.SUBFAMILY())){
3377 if(!taxonExistsInClassification(higherTaxon, hierarchy.get(Rank.SUBFAMILY()))) {
3378 classification.addParentChild(higherTaxon, hierarchy.get(Rank.SUBFAMILY()), refMods, null);
3379 }
3380 higherTaxon=hierarchy.get(Rank.SUBFAMILY());
3381 }
3382 if(hierarchy.containsKey(Rank.TRIBE())){
3383 if(!taxonExistsInClassification(higherTaxon, hierarchy.get(Rank.TRIBE()))) {
3384 classification.addParentChild(higherTaxon, hierarchy.get(Rank.TRIBE()), refMods, null);
3385 }
3386 higherTaxon=hierarchy.get(Rank.TRIBE());
3387 }
3388 if(hierarchy.containsKey(Rank.SUBTRIBE())){
3389 if(!taxonExistsInClassification(higherTaxon, hierarchy.get(Rank.SUBTRIBE()))) {
3390 classification.addParentChild(higherTaxon, hierarchy.get(Rank.SUBTRIBE()), refMods, null);
3391 }
3392 higherTaxon=hierarchy.get(Rank.SUBTRIBE());
3393 }
3394 if(hierarchy.containsKey(Rank.GENUS())){
3395 if(!taxonExistsInClassification(higherTaxon, hierarchy.get(Rank.GENUS()))) {
3396 classification.addParentChild(higherTaxon, hierarchy.get(Rank.GENUS()), refMods, null);
3397 }
3398 higherTaxon=hierarchy.get(Rank.GENUS());
3399 }
3400 if(hierarchy.containsKey(Rank.SUBGENUS())){
3401 if(!taxonExistsInClassification(higherTaxon, hierarchy.get(Rank.SUBGENUS()))) {
3402 classification.addParentChild(higherTaxon, hierarchy.get(Rank.SUBGENUS()), refMods, null);
3403 }
3404 higherTaxon=hierarchy.get(Rank.SUBGENUS());
3405 }
3406 importer.getClassificationService().saveOrUpdate(classification);
3407 return higherTaxon;
3408 }
3409
3410 private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3411 // System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3412 boolean found=false;
3413 if(parent !=null){
3414 for (TaxonNode p : classification.getAllNodes()){
3415 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3416 for (TaxonNode c : p.getChildNodes()) {
3417 if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3418 found=true;
3419 break;
3420 }
3421 }
3422 }
3423 }
3424 }
3425 else{
3426 for (TaxonNode p : classification.getAllNodes()){
3427 if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3428 found=true;
3429 break;
3430 }
3431 }
3432 }
3433 // System.out.println("LOOK IF TAXA EXIST? "+found);
3434 return found;
3435 }
3436 /**
3437 * @param nameToBeFilledTest
3438 */
3439 @SuppressWarnings("rawtypes")
3440 public void setParsedName(TaxonNameBase nameToBeFilledTest) {
3441 this.taxonnamebase = (NonViralName<?>) nameToBeFilledTest;
3442
3443 }
3444 //variety dwcranks:varietyEpithet
3445 /**
3446 * @return the author
3447 */
3448 public String getAuthor() {
3449 return author;
3450 }
3451 /**
3452 * @return
3453 */
3454 public Taxon getTaxon() {
3455 return taxon;
3456 }
3457 /**
3458 * @return
3459 */
3460 public NonViralName<?> getTaxonNameBase() {
3461 return taxonnamebase;
3462 }
3463
3464 /**
3465 * @param findOrCreateTaxon
3466 */
3467 public void setForm(Taxon form) {
3468 this.form=form;
3469
3470 }
3471 /**
3472 * @param findOrCreateTaxon
3473 */
3474 public void setVariety(Taxon variety) {
3475 this.variety=variety;
3476
3477 }
3478 /**
3479 * @param string
3480 * @return
3481 */
3482 @SuppressWarnings("rawtypes")
3483 public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3484 List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3485
3486 NomenclaturalStatusType statusType = null;
3487 if (!getStatus().isEmpty()){
3488 try {
3489 statusType = nomStatusString2NomStatus(getStatus());
3490 } catch (UnknownCdmTypeException e) {
3491 addProblematicStatusToFile(getStatus());
3492 logger.warn("Problem with status");
3493 }
3494 }
3495
3496 boolean foundIdentic=false;
3497 Taxon tmp=null;
3498 // Taxon tmpPartial=null;
3499 for (TaxonBase tmpb:tmpList){
3500 if(tmpb !=null){
3501 TaxonNameBase tnb = tmpb.getName();
3502 Rank crank=null;
3503 if (tnb != null){
3504 // System.out.println(tnb.getTitleCache());
3505 // if (tnb.getTitleCache().split("sec.")[0].equals(partialname) ||tnb.getTitleCache().split("sec.")[0].equals(fullname) ){
3506 if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3507 if (tnb.getTitleCache().split("sec.")[0].equals(fullname) ){
3508 crank =tnb.getRank();
3509 if (crank !=null && rank !=null){
3510 if (crank.equals(rank)){
3511 foundIdentic=true;
3512 try{
3513 tmp=(Taxon)tmpb;
3514 }catch(Exception e){
3515 e.printStackTrace();
3516 }
3517 }
3518 }
3519 }
3520 if(fullname.indexOf(partialname)<0){ //for corrected names such as Anochetus -- A. blf-pat
3521 if (tnb.getTitleCache().split("sec.")[0].equals(partialname) ){
3522 crank =tnb.getRank();
3523 if (crank !=null && rank !=null){
3524 if (crank.equals(rank)){
3525 foundIdentic=true;
3526 try{
3527 tmp=(Taxon)tmpb;
3528 }catch(Exception e){
3529 e.printStackTrace();
3530 }
3531 }
3532 }
3533 }
3534 }
3535 }
3536 else{
3537 if (tnb.getTitleCache().split("sec.")[0].equals(partialname) ){
3538 crank =tnb.getRank();
3539 if (crank !=null && rank !=null){
3540 if (crank.equals(rank)){
3541 foundIdentic=true;
3542 try{
3543 tmp=(Taxon)tmpb;
3544 }catch(Exception e){
3545 e.printStackTrace();
3546 }
3547 }
3548 }
3549 }
3550 }
3551 }
3552 }
3553 }
3554 boolean statusMatch=false;
3555 boolean appendedMatch=false;
3556 if(tmp !=null && foundIdentic){
3557 statusMatch=compareStatus(tmp, statusType);
3558 if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3559 appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3560 }
3561 if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3562 appendedMatch=true;
3563 }
3564
3565 }
3566 if ((tmp == null || !foundIdentic) || (tmp != null && !statusMatch) || (tmp != null && !appendedMatch && !statusMatch)){
3567
3568 NonViralName<?> tnb = getNonViralNameAccNomenclature();
3569 tnb.setRank(rank);
3570
3571 if(statusType != null) {
3572 tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3573 }
3574 if(getStatus()!=null) {
3575 tnb.setAppendedPhrase(getStatus());
3576 }
3577
3578 if(rank.equals(Rank.UNKNOWN_RANK())) {
3579 tnb.setTitleCache(fullname);
3580 }
3581
3582 if(rank.isHigher(Rank.SPECIES())) {
3583 tnb.setTitleCache(partialname);
3584 }
3585
3586 if (rank.equals(globalrank) && author != null) {
3587 if(fullname.indexOf("opulifolium")>-1) {
3588 System.out.println("AUTOR: "+author);
3589 }
3590 tnb.setCombinationAuthorTeam(findOrCreateAuthor(author));
3591 if (getIdentifier() !=null && !getIdentifier().isEmpty()){
3592 Taxon taxonLSID = getTaxonByLSID(getIdentifier());
3593 if (taxonLSID !=null) {
3594 tmp=taxonLSID;
3595 }
3596 }
3597 }
3598
3599 if(tmp == null){
3600 if (rank.equals(Rank.FAMILY())) {
3601 tmp = buildFamily(tnb);
3602 }
3603 if (rank.equals(Rank.SUBFAMILY())) {
3604 tmp = buildSubfamily(tnb);
3605 }
3606 if (rank.equals(Rank.TRIBE())) {
3607 tmp = buildTribe(tnb);
3608 }
3609 if (rank.equals(Rank.SUBTRIBE())) {
3610 tmp = buildSubtribe(tnb);
3611 }
3612 if (rank.equals(Rank.GENUS())) {
3613 tmp = buildGenus(partialname, tnb);
3614 }
3615
3616 if (rank.equals(Rank.SUBGENUS())) {
3617 tmp = buildSubgenus(partialname, tnb);
3618 }
3619 if (rank.equals(Rank.SPECIES())) {
3620 tmp = buildSpecies(partialname, tnb);
3621 }
3622
3623 if (rank.equals(Rank.SUBSPECIES())) {
3624 tmp = buildSubspecies(partialname, tnb);
3625 }
3626
3627 if (rank.equals(Rank.VARIETY())) {
3628 tmp = buildVariety(fullname, partialname, tnb);
3629 }
3630
3631 if (rank.equals(Rank.FORM())) {
3632 tmp = buildForm(fullname, partialname, tnb);
3633 }
3634
3635 importer.getClassificationService().saveOrUpdate(classification);
3636 }
3637 }
3638
3639 tmp = CdmBase.deproxy(tmp, Taxon.class);
3640 if (rank.equals(globalrank) && author != null) {
3641 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3642 setLSID(getIdentifier(), tmp);
3643 importer.getTaxonService().saveOrUpdate(tmp);
3644 tmp = CdmBase.deproxy(tmp, Taxon.class);
3645 }
3646 }
3647 TaxonNameBase tnb = CdmBase.deproxy(tmp.getName(), TaxonNameBase.class);
3648
3649 this.taxon=tmp;
3650 castTaxonNameBase(tnb, taxonnamebase);
3651 return tmp;
3652 }
3653 /**
3654 * @param tnb
3655 * @return
3656 */
3657 private Taxon buildSubfamily(NonViralName<?> tnb) {
3658 Taxon tmp;
3659 tnb.generateTitle();
3660 tmp = findMatchingTaxon(tnb,refMods);
3661 if(tmp ==null){
3662 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3663 tmp.setSec(refMods);
3664 sourceHandler.addSource(refMods, tmp);
3665 if(family != null) {
3666 classification.addParentChild(family, tmp, null, null);
3667 higherRank=Rank.FAMILY();
3668 higherTaxa=family;
3669 } else {
3670 System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
3671 classification.addChildTaxon(tmp, null, null);
3672 }
3673 }
3674 return tmp;
3675 }
3676 /**
3677 * @param tnb
3678 * @return
3679 */
3680 private Taxon buildFamily(NonViralName<?> tnb) {
3681 Taxon tmp;
3682 tnb.generateTitle();
3683 tmp = findMatchingTaxon(tnb,refMods);
3684 if(tmp ==null){
3685 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3686 tmp.setSec(refMods);
3687 sourceHandler.addSource(refMods, tmp);
3688 System.out.println("ADDCHILDTAXON FAMILY "+tmp);
3689 classification.addChildTaxon(tmp, null, null);
3690 }
3691 return tmp;
3692 }
3693 /**
3694 * @param fullname
3695 * @param tnb
3696 * @return
3697 */
3698 private Taxon buildForm(String fullname, String partialname, NonViralName<?> tnb) {
3699 Taxon tmp;
3700 if (genusName !=null) {
3701 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
3702 }
3703 if (subgenusName !=null) {
3704 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
3705 }
3706 if(speciesName !=null) {
3707 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
3708 }
3709 if(subspeciesName != null) {
3710 tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
3711 }
3712 if(partialname!= null) {
3713 tnb.setInfraSpecificEpithet(partialname);
3714 }
3715 tnb.generateTitle();
3716 //TODO how to save form??
3717 tnb.setTitleCache(fullname, true);
3718 tmp = findMatchingTaxon(tnb,refMods);
3719 if(tmp ==null){
3720 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3721 tmp.setSec(refMods);
3722 sourceHandler.addSource(refMods, tmp);
3723 if (subspecies !=null) {
3724 classification.addParentChild(subspecies, tmp, null, null);
3725 higherRank=Rank.SUBSPECIES();
3726 higherTaxa=subspecies;
3727 } else {
3728 if (species !=null) {
3729 classification.addParentChild(species, tmp, null, null);
3730 higherRank=Rank.SPECIES();
3731 higherTaxa=species;
3732 }
3733 else{
3734 System.out.println("ADDCHILDTAXON FORM "+tmp);
3735 classification.addChildTaxon(tmp, null, null);
3736 }
3737 }
3738 }
3739 return tmp;
3740 }
3741 /**
3742 * @param fullname
3743 * @param tnb
3744 * @return
3745 */
3746 private Taxon buildVariety(String fullname, String partialname, NonViralName<?> tnb) {
3747 Taxon tmp;
3748 if (genusName !=null) {
3749 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
3750 }
3751 if (subgenusName !=null) {
3752 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
3753 }
3754 if(speciesName !=null) {
3755 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
3756 }
3757 if(subspeciesName != null) {
3758 tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
3759 }
3760 if(partialname != null) {
3761 tnb.setInfraSpecificEpithet(partialname);
3762 }
3763 //TODO how to save variety?
3764 tnb.setTitleCache(fullname, true);
3765 tmp = findMatchingTaxon(tnb,refMods);
3766 if(tmp ==null){
3767 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3768 tmp.setSec(refMods);
3769 sourceHandler.addSource(refMods, tmp);
3770 if (subspecies !=null) {
3771 classification.addParentChild(subspecies, tmp, null, null);
3772 higherRank=Rank.SUBSPECIES();
3773 higherTaxa=subspecies;
3774 } else {
3775 if(species !=null) {
3776 classification.addParentChild(species, tmp, null, null);
3777 higherRank=Rank.SPECIES();
3778 higherTaxa=species;
3779 }
3780 else{
3781 System.out.println("ADDCHILDTAXON VARIETY "+tmp);
3782 classification.addChildTaxon(tmp, null, null);
3783 }
3784 }
3785 }
3786 return tmp;
3787 }
3788 /**
3789 * @param partialname
3790 * @param tnb
3791 * @return
3792 */
3793 private Taxon buildSubspecies(String partialname, NonViralName<?> tnb) {
3794 Taxon tmp;
3795 if (genusName !=null) {
3796 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
3797 }
3798 if (subgenusName !=null) {
3799 // System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
3800 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
3801 }
3802 if(speciesName !=null) {
3803 // System.out.println("SPE:"+speciesName.getSpecificEpithet());
3804 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
3805 }
3806 tnb.setInfraSpecificEpithet(partialname);
3807 tnb.generateTitle();
3808 tmp = findMatchingTaxon(tnb,refMods);
3809 if(tmp ==null){
3810 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3811 tmp.setSec(refMods);
3812 sourceHandler.addSource(refMods, tmp);
3813
3814 if(species != null) {
3815 classification.addParentChild(species, tmp, null, null);
3816 higherRank=Rank.SPECIES();
3817 higherTaxa=species;
3818 }
3819 else{
3820 System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
3821 classification.addChildTaxon(tmp, null, null);
3822 }
3823 }
3824 return tmp;
3825 }
3826 /**
3827 * @param partialname
3828 * @param tnb
3829 * @return
3830 */
3831 private Taxon buildSpecies(String partialname, NonViralName<?> tnb) {
3832 Taxon tmp;
3833 if (genusName !=null) {
3834 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
3835 }
3836 if (subgenusName !=null) {
3837 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
3838 }
3839 tnb.setSpecificEpithet(partialname.toLowerCase());
3840 tnb.generateTitle();
3841 tmp = findMatchingTaxon(tnb,refMods);
3842 if(tmp ==null){
3843 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3844 tmp.setSec(refMods);
3845 sourceHandler.addSource(refMods, tmp);
3846 if (subgenus !=null) {
3847 classification.addParentChild(subgenus, tmp, null, null);
3848 higherRank=Rank.SUBGENUS();
3849 higherTaxa=subgenus;
3850 } else {
3851 if (genus !=null) {
3852 classification.addParentChild(genus, tmp, null, null);
3853 higherRank=Rank.GENUS();
3854 higherTaxa=genus;
3855 }
3856 else{
3857 System.out.println("ADDCHILDTAXON SPECIES "+tmp);
3858 classification.addChildTaxon(tmp, null, null);
3859 }
3860 }
3861 }
3862 return tmp;
3863 }
3864 /**
3865 * @param partialname
3866 * @param tnb
3867 * @return
3868 */
3869 private Taxon buildSubgenus(String partialname, NonViralName<?> tnb) {
3870 Taxon tmp;
3871 tnb.setInfraGenericEpithet(partialname);
3872 if (genusName !=null) {
3873 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
3874 }
3875 tnb.generateTitle();
3876 tmp = findMatchingTaxon(tnb,refMods);
3877 if(tmp ==null){
3878 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3879 tmp.setSec(refMods);
3880 sourceHandler.addSource(refMods, tmp);
3881 if(genus != null) {
3882 classification.addParentChild(genus, tmp, null, null);
3883 higherRank=Rank.GENUS();
3884 higherTaxa=genus;
3885 } else{
3886 System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
3887 classification.addChildTaxon(tmp, null, null);
3888 }
3889 }
3890 return tmp;
3891 }
3892 /**
3893 * @param partialname
3894 * @param tnb
3895 * @return
3896 */
3897 private Taxon buildGenus(String partialname, NonViralName<?> tnb) {
3898 Taxon tmp;
3899 tnb.setGenusOrUninomial(partialname);
3900 tnb.generateTitle();
3901
3902 tmp = findMatchingTaxon(tnb,refMods);
3903 if(tmp ==null){
3904 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3905 tmp.setSec(refMods);
3906 sourceHandler.addSource(refMods, tmp);
3907
3908 if(subtribe != null) {
3909 classification.addParentChild(subtribe, tmp, null, null);
3910 higherRank=Rank.SUBTRIBE();
3911 higherTaxa=subtribe;
3912 } else{
3913 if(tribe !=null) {
3914 classification.addParentChild(tribe, tmp, null, null);
3915 higherRank=Rank.TRIBE();
3916 higherTaxa=tribe;
3917 } else{
3918 if(subfamily !=null) {
3919 classification.addParentChild(subfamily, tmp, null, null);
3920 higherRank=Rank.SUBFAMILY();
3921 higherTaxa=subfamily;
3922 } else
3923 if(family !=null) {
3924 classification.addParentChild(family, tmp, null, null);
3925 higherRank=Rank.FAMILY();
3926 higherTaxa=family;
3927 }
3928 else{
3929 System.out.println("ADDCHILDTAXON GENUS "+tmp);
3930 classification.addChildTaxon(tmp, null, null);
3931 }
3932 }
3933 }
3934 }
3935 return tmp;
3936 }
3937
3938 /**
3939 * @param tnb
3940 * @return
3941 */
3942 private Taxon buildSubtribe(NonViralName<?> tnb) {
3943 Taxon tmp;
3944 tnb.generateTitle();
3945 tmp = findMatchingTaxon(tnb,refMods);
3946 if(tmp==null){
3947 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3948 tmp.setSec(refMods);
3949 sourceHandler.addSource(refMods, tmp);
3950 if(tribe != null) {
3951 classification.addParentChild(tribe, tmp, null, null);
3952 higherRank=Rank.TRIBE();
3953 higherTaxa=tribe;
3954 } else{
3955 System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
3956 classification.addChildTaxon(tmp, null, null);
3957 }
3958 }
3959 return tmp;
3960 }
3961 /**
3962 * @param tnb
3963 * @return
3964 */
3965 private Taxon buildTribe(NonViralName<?> tnb) {
3966 Taxon tmp;
3967 tnb.generateTitle();
3968 tmp = findMatchingTaxon(tnb,refMods);
3969 if(tmp==null){
3970 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
3971 tmp.setSec(refMods);
3972 sourceHandler.addSource(refMods, tmp);
3973 if (subfamily !=null) {
3974 classification.addParentChild(subfamily, tmp, null, null);
3975 higherRank=Rank.SUBFAMILY();
3976 higherTaxa=subfamily;
3977 } else {
3978 if(family != null) {
3979 classification.addParentChild(family, tmp, null, null);
3980 higherRank=Rank.FAMILY();
3981 higherTaxa=family;
3982 }
3983 else{
3984 System.out.println("ADDCHILDTAXON TRIBE "+tmp);
3985 classification.addChildTaxon(tmp, null, null);
3986 }
3987 }
3988 }
3989 return tmp;
3990 }
3991 /**
3992 * @param tnb
3993 * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3994 * if errors, cast into a classis nonviralname
3995 * @param taxonnamebase2
3996 */
3997 @SuppressWarnings("rawtypes")
3998 private NonViralName<?> castTaxonNameBase(TaxonNameBase tnb, NonViralName<?> nvn) {
3999 NonViralName<?> taxonnamebase2 = nvn;
4000 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
4001 try{
4002 taxonnamebase2=(BotanicalName) tnb;
4003 }catch(Exception e){
4004 taxonnamebase2= (NonViralName<?>) tnb;
4005 }
4006 }
4007 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
4008 try{
4009 taxonnamebase2=(ZoologicalName) tnb;
4010 }catch(Exception e){
4011 taxonnamebase2= (NonViralName<?>) tnb;
4012 }
4013 }
4014 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
4015 try{
4016 taxonnamebase2=(BacterialName) tnb;
4017 }catch(Exception e){
4018 taxonnamebase2= (NonViralName<?>) tnb;
4019 }
4020 }
4021 return taxonnamebase2;
4022 }
4023 /**
4024 * @param identifier2
4025 * @return
4026 */
4027 @SuppressWarnings("rawtypes")
4028 private Taxon getTaxonByLSID(String identifier) {
4029 // boolean lsidok=false;
4030 String id = identifier.split("__")[0];
4031 // String source = identifier.split("__")[1];
4032 LSID lsid = null;
4033 if (id.indexOf("lsid")>-1){
4034 try {
4035 lsid = new LSID(id);
4036 // lsidok=true;
4037 } catch (MalformedLSIDException e) {
4038 logger.warn("Malformed LSID");
4039 }
4040 }
4041 if (lsid !=null){
4042 List<TaxonBase> taxons = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4043 LSID currentlsid=null;
4044 for (TaxonBase t:taxons){
4045 currentlsid = t.getLsid();
4046 if (currentlsid !=null){
4047 if (currentlsid.getLsid().equals(lsid.getLsid())){
4048 try{
4049 return (Taxon) t;
4050 }
4051 catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4052 }
4053 }
4054 }
4055 }
4056 return null;
4057 }
4058 /**
4059 * @param author2
4060 * @return
4061 */
4062 @SuppressWarnings("rawtypes")
4063 private Person findOrCreateAuthor(String author2) {
4064 List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4065 for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4066 if(hibernateP.getTitleCache().equals(author2)) {
4067 AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4068 return CdmBase.deproxy(existing, Person.class);
4069 }
4070 }
4071 Person p = Person.NewInstance();
4072 p.setTitleCache(author2,true);
4073 importer.getAgentService().saveOrUpdate(p);
4074 return CdmBase.deproxy(p, Person.class);
4075 }
4076 /**
4077 * @param author the author to set
4078 */
4079 public void setAuthor(String author) {
4080 this.author = author;
4081 }
4082
4083 /**
4084 * @return the higherTaxa
4085 */
4086 public Taxon getHigherTaxa() {
4087 return higherTaxa;
4088 }
4089 /**
4090 * @param higherTaxa the higherTaxa to set
4091 */
4092 public void setHigherTaxa(Taxon higherTaxa) {
4093 this.higherTaxa = higherTaxa;
4094 }
4095 /**
4096 * @return the higherRank
4097 */
4098 public Rank getHigherRank() {
4099 return higherRank;
4100 }
4101 /**
4102 * @param higherRank the higherRank to set
4103 */
4104 public void setHigherRank(Rank higherRank) {
4105 this.higherRank = higherRank;
4106 }
4107 public String getName(){
4108 if (newName.isEmpty()) {
4109 return originalName;
4110 } else {
4111 return newName;
4112 }
4113
4114 }
4115 /**
4116 * @return the fullName
4117 */
4118 public String getOriginalName() {
4119 return originalName;
4120 }
4121 /**
4122 * @param fullName the fullName to set
4123 */
4124 public void setOriginalName(String fullName) {
4125 this.originalName = fullName;
4126 }
4127 /**
4128 * @return the newName
4129 */
4130 public String getNewName() {
4131 return newName;
4132 }
4133 /**
4134 * @param newName the newName to set
4135 */
4136 public void setNewName(String newName) {
4137 this.newName = newName;
4138 }
4139 /**
4140 * @return the rank
4141 */
4142 public Rank getRank() {
4143 return rank;
4144 }
4145 /**
4146 * @param rank the rank to set
4147 */
4148 public void setRank(Rank rank) {
4149 this.rank = rank;
4150 }
4151 /**
4152 * @return the idenfitiger
4153 */
4154 public String getIdentifier() {
4155 return identifier;
4156 }
4157 /**
4158 * @param idenfitiger the idenfitiger to set
4159 */
4160 public void setIdentifier(String identifier) {
4161 this.identifier = identifier;
4162 }
4163 /**
4164 * @return the status
4165 */
4166 public String getStatus() {
4167 if (status == null) {
4168 return "";
4169 }
4170 return status;
4171 }
4172 /**
4173 * @param status the status to set
4174 */
4175 public void setStatus(String status) {
4176 this.status = status;
4177 }
4178 /**
4179 * @return the family
4180 */
4181 public Taxon getFamily() {
4182 return family;
4183 }
4184 /**
4185 * @param family the family to set
4186 */
4187 @SuppressWarnings("rawtypes")
4188 public void setFamily(Taxon family) {
4189 this.family = family;
4190 TaxonNameBase taxonNameBase = CdmBase.deproxy(family.getName(), TaxonNameBase.class);
4191 familyName = castTaxonNameBase(taxonNameBase,familyName);
4192 }
4193 /**
4194 * @return the subfamily
4195 */
4196 public Taxon getSubfamily() {
4197 return subfamily;
4198 }
4199 /**
4200 * @param subfamily the subfamily to set
4201 */
4202 @SuppressWarnings("rawtypes")
4203 public void setSubfamily(Taxon subfamily) {
4204 this.subfamily = subfamily;
4205 TaxonNameBase taxonNameBase = CdmBase.deproxy(subfamily.getName(), TaxonNameBase.class);
4206 subfamilyName = castTaxonNameBase(taxonNameBase,subfamilyName);
4207 }
4208 /**
4209 * @return the tribe
4210 */
4211 public Taxon getTribe() {
4212 return tribe;
4213 }
4214 /**
4215 * @param tribe the tribe to set
4216 */
4217 @SuppressWarnings("rawtypes")
4218 public void setTribe(Taxon tribe) {
4219 this.tribe = tribe;
4220 TaxonNameBase taxonNameBase = CdmBase.deproxy(tribe.getName(), TaxonNameBase.class);
4221 tribeName = castTaxonNameBase(taxonNameBase,tribeName);
4222 }
4223 /**
4224 * @return the subtribe
4225 */
4226 public Taxon getSubtribe() {
4227 return subtribe;
4228 }
4229 /**
4230 * @param subtribe the subtribe to set
4231 */
4232 @SuppressWarnings("rawtypes")
4233 public void setSubtribe(Taxon subtribe) {
4234 this.subtribe = subtribe;
4235 TaxonNameBase taxonNameBase = CdmBase.deproxy(subtribe.getName(), TaxonNameBase.class);
4236 subtribeName =castTaxonNameBase(taxonNameBase,subtribeName);
4237 }
4238 /**
4239 * @return the genus
4240 */
4241 public Taxon getGenus() {
4242 return genus;
4243 }
4244 /**
4245 * @param genus the genus to set
4246 */
4247 @SuppressWarnings("rawtypes")
4248 public void setGenus(Taxon genus) {
4249 this.genus = genus;
4250 TaxonNameBase taxonNameBase = CdmBase.deproxy(genus.getName(), TaxonNameBase.class);
4251 genusName = castTaxonNameBase(taxonNameBase,genusName);
4252 System.out.println("GENUSNAME: "+genusName.toString());
4253 }
4254 /**
4255 * @return the subgenus
4256 */
4257 public Taxon getSubgenus() {
4258 return subgenus;
4259 }
4260 /**
4261 * @param subgenus the subgenus to set
4262 */
4263 @SuppressWarnings("rawtypes")
4264 public void setSubgenus(Taxon subgenus) {
4265 this.subgenus = subgenus;
4266 TaxonNameBase taxonNameBase = CdmBase.deproxy(subgenus.getName(), TaxonNameBase.class);
4267 subgenusName = castTaxonNameBase(taxonNameBase,subgenusName);
4268 }
4269 /**
4270 * @return the species
4271 */
4272 public Taxon getSpecies() {
4273 return species;
4274 }
4275 /**
4276 * @param species the species to set
4277 */
4278 public void setSpecies(Taxon species) {
4279 this.species = species;
4280 @SuppressWarnings("rawtypes")
4281 TaxonNameBase taxonNameBase = CdmBase.deproxy(species.getName(), TaxonNameBase.class);
4282 speciesName = castTaxonNameBase(taxonNameBase,speciesName);
4283
4284 }
4285 /**
4286 * @return the subspecies
4287 */
4288 public Taxon getSubspecies() {
4289 return subspecies;
4290 }
4291 /**
4292 * @param subspecies the subspecies to set
4293 */
4294 @SuppressWarnings("rawtypes")
4295 public void setSubspecies(Taxon subspecies) {
4296 this.subspecies = subspecies;
4297 TaxonNameBase taxonNameBase = CdmBase.deproxy(subspecies.getName(), TaxonNameBase.class);
4298 subspeciesName = castTaxonNameBase(taxonNameBase,subspeciesName);
4299
4300 }
4301
4302
4303
4304 }
4305
4306
4307 /**
4308 * @param status
4309 */
4310 private void addProblematicStatusToFile(String status) {
4311 try{
4312 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/StatusUnknown_"+classification.getTitleCache()+".txt",true);
4313 BufferedWriter out = new BufferedWriter(fstream);
4314 out.write(status+"\n");
4315 //Close the output stream
4316 out.close();
4317 }catch (Exception e){//Catch exception if any
4318 System.err.println("Error: " + e.getMessage());
4319 }
4320
4321 }
4322
4323
4324
4325 /**
4326 * @param tnb
4327 * @return
4328 */
4329 private Taxon findMatchingTaxon(NonViralName<?> tnb, Reference refMods) {
4330 Taxon tmp=null;
4331
4332 boolean insertAsExisting =false;
4333 List<Taxon> existingTaxons = getMatchingTaxon(tnb);
4334 double similarityScore=0.0;
4335 for (Taxon bestMatchingTaxon:existingTaxons){
4336 if (!existingTaxons.isEmpty() && configState.getConfig().isInteractWithUser() && !insertAsExisting) {
4337 similarityScore=similarity(tnb.getTitleCache().split("sec.")[0].toLowerCase().trim(), bestMatchingTaxon.getTitleCache().split("sec.")[0].toLowerCase().trim());
4338 insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon);
4339 }
4340 if(insertAsExisting) {
4341 System.out.println("KEEP "+bestMatchingTaxon.toString());
4342 tmp=bestMatchingTaxon;
4343 sourceHandler.addSource(refMods, tmp);
4344 return tmp;
4345 }
4346 }
4347 return tmp;
4348 }
4349
4350 /**
4351 * @param tnb
4352 * @param refMods
4353 * @param similarityScore
4354 * @param bestMatchingTaxon
4355 * @return
4356 */
4357 private boolean compareAndCheckTaxon(NonViralName<?> tnb, Reference refMods, double similarityScore,
4358 Taxon bestMatchingTaxon) {
4359 boolean insertAsExisting;
4360 if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4361 insertAsExisting=false;
4362 } else{
4363 if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") &&
4364 bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4365 insertAsExisting=true;
4366 } else {
4367 insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore);
4368 }
4369 }
4370 return insertAsExisting;
4371 }
4372
4373 /**
4374 * @return
4375 */
4376 @SuppressWarnings("rawtypes")
4377 private List<Taxon> getMatchingTaxon(TaxonNameBase tnb) {
4378 Pager<TaxonBase> pager=importer.getTaxonService().findByTitle(TaxonBase.class, tnb.getTitleCache().split("sec.")[0], MatchMode.BEGINNING, null, null, null, null, null);
4379 List<TaxonBase>records = pager.getRecords();
4380
4381 List<Taxon> existingTaxons = new ArrayList<Taxon>();
4382 for (TaxonBase r:records){
4383 try{
4384 Taxon bestMatchingTaxon = (Taxon)r;
4385 // System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4386 if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4387 existingTaxons.add(bestMatchingTaxon);
4388 }
4389 }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4390 }
4391 Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4392 if (!existingTaxons.contains(bmt) && bmt!=null) {
4393 if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4394 existingTaxons.add(bmt);
4395 }
4396 }
4397 return existingTaxons;
4398 }
4399
4400 /**
4401 * Check if the found Taxon can reasonnably be the same
4402 * example: with and without author should match, but the subspecies should not be suggested for a genus
4403 * */
4404 private boolean compareTaxonNameLength(String f, String o){
4405 boolean lengthOk=false;
4406 int sizeF = f.length();
4407 int sizeO = o.length();
4408 if (sizeO>=sizeF) {
4409 lengthOk=true;
4410 }
4411 if(sizeF>sizeO) {
4412 if (sizeF-sizeO>10) {
4413 lengthOk=false;
4414 } else {
4415 lengthOk=true;
4416 }
4417 }
4418
4419 // System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4420 return lengthOk;
4421 }
4422
4423 private double similarity(String s1, String s2) {
4424 if (s1.length() < s2.length()) { // s1 should always be bigger
4425 String swap = s1; s1 = s2; s2 = swap;
4426 }
4427 int bigLen = s1.length();
4428 if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4429 return (bigLen - computeEditDistance(s1, s2)) / (double) bigLen;
4430 }
4431
4432 private int computeEditDistance(String s1, String s2) {
4433 int[] costs = new int[s2.length() + 1];
4434 for (int i = 0; i <= s1.length(); i++) {
4435 int lastValue = i;
4436 for (int j = 0; j <= s2.length(); j++) {
4437 if (i == 0) {
4438 costs[j] = j;
4439 } else {
4440 if (j > 0) {
4441 int newValue = costs[j - 1];
4442 if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4443 newValue = Math.min(Math.min(newValue, lastValue),
4444 costs[j]) + 1;
4445 }
4446 costs[j - 1] = lastValue;
4447 lastValue = newValue;
4448 }
4449 }
4450 }
4451 if (i > 0) {
4452 costs[s2.length()] = lastValue;
4453 }
4454 }
4455 return costs[s2.length()];
4456 }
4457
4458 Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
4459 /**
4460 * @param taxonnamebase
4461 */
4462 @SuppressWarnings("rawtypes")
4463 public void lookForParentNode(NonViralName<?> taxonnamebase, Taxon tax, Reference<?> ref, MyName myName) {
4464 System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
4465 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4466 if (taxonnamebase.getRank().equals(Rank.FORM())){
4467 handleFormHierarchy(ref, myName, parser);
4468 }
4469 if (taxonnamebase.getRank().equals(Rank.VARIETY())){
4470 handleVarietyHierarchy(ref, myName, parser);
4471 }
4472 if (taxonnamebase.getRank().equals(Rank.SUBSPECIES())){
4473 handleSubSpeciesHierarchy(ref, myName, parser);
4474 }
4475 if (taxonnamebase.getRank().equals(Rank.SPECIES())){
4476 handleSpeciesHierarchy(ref, myName, parser);
4477 }
4478 if (taxonnamebase.getRank().equals(Rank.SUBGENUS())){
4479 handleSubgenusHierarchy(ref, myName, parser);
4480 }
4481
4482 if (taxonnamebase.getRank().equals(Rank.GENUS())){
4483 handleGenusHierarchy(ref, myName, parser);
4484 }
4485 if (taxonnamebase.getRank().equals(Rank.SUBTRIBE())){
4486 handleSubtribeHierarchy(ref, myName, parser);
4487 }
4488 if (taxonnamebase.getRank().equals(Rank.TRIBE())){
4489 handleTribeHierarchy(ref, myName, parser);
4490 }
4491
4492 if (taxonnamebase.getRank().equals(Rank.SUBFAMILY())){
4493 handleSubfamilyHierarchy(ref, myName, parser);
4494 }
4495 }
4496
4497 /**
4498 * @param ref
4499 * @param myName
4500 * @param parser
4501 */
4502 private void handleSubfamilyHierarchy(Reference<?> ref, MyName myName, INonViralNameParser parser) {
4503 String parentStr = myName.getFamilyStr();
4504 Rank r = Rank.FAMILY();
4505 if(parentStr!=null){
4506 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
4507 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
4508 // importer.getTaxonService().save(parent);
4509 // parent = CdmBase.deproxy(parent, Taxon.class);
4510
4511 boolean parentDoesNotExists = true;
4512 for (TaxonNode p : classification.getAllNodes()){
4513 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
4514 parentDoesNotExists = false;
4515 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
4516 break;
4517 }
4518 }
4519 // if(parentDoesNotExists) {
4520 // importer.getTaxonService().save(parent);
4521 // parent = CdmBase.deproxy(parent, Taxon.class);
4522 // lookForParentNode(parentNameName, parent, ref,myName);
4523 // }
4524 if(parentDoesNotExists) {
4525 Taxon tmp = findMatchingTaxon(parentNameName,ref);
4526 if(tmp ==null)
4527 {
4528 parent=Taxon.NewInstance(parentNameName, ref);
4529 importer.getTaxonService().save(parent);
4530 parent = CdmBase.deproxy(parent, Taxon.class);
4531 } else {
4532 parent=tmp;
4533 }
4534 lookForParentNode(parentNameName, parent, ref,myName);
4535
4536 }
4537 hierarchy.put(r,parent);
4538 }
4539 }
4540
4541 /**
4542 * @param ref
4543 * @param myName
4544 * @param parser
4545 */
4546 private void handleTribeHierarchy(Reference<?> ref, MyName myName, INonViralNameParser parser) {
4547 String parentStr = myName.getSubfamilyStr();
4548 Rank r = Rank.SUBFAMILY();
4549 if (parentStr == null){
4550 parentStr = myName.getFamilyStr();
4551 r = Rank.FAMILY();
4552 }
4553 if(parentStr!=null){
4554 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
4555 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
4556 // importer.getTaxonService().save(parent);
4557 // parent = CdmBase.deproxy(parent, Taxon.class);
4558
4559 boolean parentDoesNotExists = true;
4560 for (TaxonNode p : classification.getAllNodes()){
4561 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
4562 parentDoesNotExists = false;
4563 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
4564 break;
4565 }
4566 }
4567 // if(parentDoesNotExists) {
4568 // importer.getTaxonService().save(parent);
4569 // parent = CdmBase.deproxy(parent, Taxon.class);
4570 // lookForParentNode(parentNameName, parent, ref,myName);
4571 // }
4572 if(parentDoesNotExists) {
4573 Taxon tmp = findMatchingTaxon(parentNameName,ref);
4574 if(tmp ==null)
4575 {
4576 parent=Taxon.NewInstance(parentNameName, ref);
4577 importer.getTaxonService().save(parent);
4578 parent = CdmBase.deproxy(parent, Taxon.class);
4579 } else {
4580 parent=tmp;
4581 }
4582 lookForParentNode(parentNameName, parent, ref,myName);
4583
4584 }
4585 hierarchy.put(r,parent);
4586 }
4587 }
4588
4589 /**
4590 * @param ref
4591 * @param myName
4592 * @param parser
4593 */
4594 private void handleSubtribeHierarchy(Reference<?> ref, MyName myName, INonViralNameParser parser) {
4595 String parentStr = myName.getTribeStr();
4596 Rank r = Rank.TRIBE();
4597 if (parentStr == null){
4598 parentStr = myName.getSubfamilyStr();
4599 r = Rank.SUBFAMILY();
4600 }
4601 if (parentStr == null){
4602 parentStr = myName.getFamilyStr();
4603 r = Rank.FAMILY();
4604 }
4605 if(parentStr!=null){
4606 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
4607 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
4608 // importer.getTaxonService().save(parent);
4609 // parent = CdmBase.deproxy(parent, Taxon.class);
4610
4611 boolean parentDoesNotExists = true;
4612 for (TaxonNode p : classification.getAllNodes()){
4613 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
4614 parentDoesNotExists = false;
4615 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
4616
4617 break;
4618 }
4619 }
4620 // if(parentDoesNotExists) {
4621 // importer.getTaxonService().save(parent);
4622 // parent = CdmBase.deproxy(parent, Taxon.class);
4623 // lookForParentNode(parentNameName, parent, ref,myName);
4624 // }
4625 if(parentDoesNotExists) {
4626 Taxon tmp = findMatchingTaxon(parentNameName,ref);
4627 if(tmp ==null)
4628 {
4629 parent=Taxon.NewInstance(parentNameName, ref);
4630 importer.getTaxonService().save(parent);
4631 parent = CdmBase.deproxy(parent, Taxon.class);
4632 } else {
4633 parent=tmp;
4634 }
4635 lookForParentNode(parentNameName, parent, ref,myName);
4636
4637 }
4638 hierarchy.put(r,parent);
4639 }
4640 }
4641
4642 /**
4643 * @param ref
4644 * @param myName
4645 * @param parser
4646 */
4647 private void handleGenusHierarchy(Reference<?> ref, MyName myName, INonViralNameParser parser) {
4648 String parentStr = myName.getSubtribeStr();
4649 Rank r = Rank.SUBTRIBE();
4650 if (parentStr == null){
4651 parentStr = myName.getTribeStr();
4652 r = Rank.TRIBE();
4653 }
4654 if (parentStr == null){
4655 parentStr = myName.getSubfamilyStr();
4656 r = Rank.SUBFAMILY();
4657 }
4658 if (parentStr == null){
4659 parentStr = myName.getFamilyStr();
4660 r = Rank.FAMILY();
4661 }
4662 if(parentStr!=null){
4663 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
4664 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
4665 // importer.getTaxonService().save(parent);
4666 // parent = CdmBase.deproxy(parent, Taxon.class);
4667
4668 boolean parentDoesNotExists = true;
4669 for (TaxonNode p : classification.getAllNodes()){
4670 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
4671 // System.out.println(p.getTaxon().getUuid());
4672 // System.out.println(parent.getUuid());
4673 parentDoesNotExists = false;
4674 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
4675 break;
4676 }
4677 }
4678 // if(parentDoesNotExists) {
4679 // importer.getTaxonService().save(parent);
4680 // parent = CdmBase.deproxy(parent, Taxon.class);
4681 // lookForParentNode(parentNameName, parent, ref,myName);
4682 // }
4683 if(parentDoesNotExists) {
4684 Taxon tmp = findMatchingTaxon(parentNameName,ref);
4685 if(tmp ==null)
4686 {
4687 parent=Taxon.NewInstance(parentNameName, ref);
4688 importer.getTaxonService().save(parent);
4689 parent = CdmBase.deproxy(parent, Taxon.class);
4690 } else {
4691 parent=tmp;
4692 }
4693 lookForParentNode(parentNameName, parent, ref,myName);
4694
4695 }
4696 hierarchy.put(r,parent);
4697 }
4698 }
4699
4700 /**
4701 * @param ref
4702 * @param myName
4703 * @param parser
4704 */
4705 private void handleSubgenusHierarchy(Reference<?> ref, MyName myName, INonViralNameParser parser) {
4706 String parentStr = myName.getGenusStr();
4707 Rank r = Rank.GENUS();
4708
4709 if(parentStr==null){
4710 parentStr = myName.getSubtribeStr();
4711 r = Rank.SUBTRIBE();
4712 }
4713 if (parentStr == null){
4714 parentStr = myName.getTribeStr();
4715 r = Rank.TRIBE();
4716 }
4717 if (parentStr == null){
4718 parentStr = myName.getSubfamilyStr();
4719 r = Rank.SUBFAMILY();
4720 }
4721 if (parentStr == null){
4722 parentStr = myName.getFamilyStr();
4723 r = Rank.FAMILY();
4724 }
4725 if(parentStr!=null){
4726 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
4727 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
4728 // importer.getTaxonService().save(parent);
4729 // parent = CdmBase.deproxy(parent, Taxon.class);
4730
4731 boolean parentDoesNotExists = true;
4732 for (TaxonNode p : classification.getAllNodes()){
4733 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
4734 // System.out.println(p.getTaxon().getUuid());
4735 // System.out.println(parent.getUuid());
4736 parentDoesNotExists = false;
4737 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
4738 break;
4739 }
4740 }
4741 // if(parentDoesNotExists) {
4742 // importer.getTaxonService().save(parent);
4743 // parent = CdmBase.deproxy(parent, Taxon.class);
4744 // lookForParentNode(parentNameName, parent, ref,myName);
4745 // }
4746 if(parentDoesNotExists) {
4747 Taxon tmp = findMatchingTaxon(parentNameName,ref);
4748 if(tmp ==null)
4749 {
4750 parent=Taxon.NewInstance(parentNameName, ref);
4751 importer.getTaxonService().save(parent);
4752 parent = CdmBase.deproxy(parent, Taxon.class);
4753 } else {
4754 parent=tmp;
4755 }
4756 lookForParentNode(parentNameName, parent, ref,myName);
4757
4758 }
4759 hierarchy.put(r,parent);
4760 }
4761 }
4762
4763 /**
4764 * @param ref
4765 * @param myName
4766 * @param parser
4767 */
4768 private void handleSpeciesHierarchy(Reference<?> ref, MyName myName, INonViralNameParser parser) {
4769 String parentStr = myName.getSubgenusStr();
4770 Rank r = Rank.SUBGENUS();
4771
4772 if(parentStr==null){
4773 parentStr = myName.getGenusStr();
4774 r = Rank.GENUS();
4775 }
4776
4777 if(parentStr==null){
4778 parentStr = myName.getSubtribeStr();
4779 r = Rank.SUBTRIBE();
4780 }
4781 if (parentStr == null){
4782 parentStr = myName.getTribeStr();
4783 r = Rank.TRIBE();
4784 }
4785 if (parentStr == null){
4786 parentStr = myName.getSubfamilyStr();
4787 r = Rank.SUBFAMILY();
4788 }
4789 if (parentStr == null){
4790 parentStr = myName.getFamilyStr();
4791 r = Rank.FAMILY();
4792 }
4793 if(parentStr!=null){
4794 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
4795 System.out.println("PUT IN HIERARCHY "+r+", "+parent);
4796 hierarchy.put(r,parent);
4797 }
4798 }
4799
4800 /**
4801 * @param ref
4802 * @param myName
4803 * @param parser
4804 */
4805 private void handleSubSpeciesHierarchy(Reference<?> ref, MyName myName, INonViralNameParser parser) {
4806 String parentStr = myName.getSpeciesStr();
4807 Rank r = Rank.SPECIES();
4808
4809
4810 if(parentStr==null){
4811 parentStr = myName.getSubgenusStr();
4812 r = Rank.SUBGENUS();
4813 }
4814
4815 if(parentStr==null){
4816 parentStr = myName.getGenusStr();
4817 r = Rank.GENUS();
4818 }
4819
4820 if(parentStr==null){
4821 parentStr = myName.getSubtribeStr();
4822 r = Rank.SUBTRIBE();
4823 }
4824 if (parentStr == null){
4825 parentStr = myName.getTribeStr();
4826 r = Rank.TRIBE();
4827 }
4828 if (parentStr == null){
4829 parentStr = myName.getSubfamilyStr();
4830 r = Rank.SUBFAMILY();
4831 }
4832 if (parentStr == null){
4833 parentStr = myName.getFamilyStr();
4834 r = Rank.FAMILY();
4835 }
4836 if(parentStr!=null){
4837 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
4838 System.out.println("PUT IN HIERARCHY "+r+", "+parent);
4839 hierarchy.put(r,parent);
4840 }
4841 }
4842
4843
4844 /**
4845 * @param ref
4846 * @param myName
4847 * @param parser
4848 */
4849 private void handleFormHierarchy(Reference<?> ref, MyName myName, INonViralNameParser parser) {
4850 String parentStr = myName.getSubspeciesStr();
4851 Rank r = Rank.SUBSPECIES();
4852
4853
4854 if(parentStr==null){
4855 parentStr = myName.getSpeciesStr();
4856 r = Rank.SPECIES();
4857 }
4858
4859 if(parentStr==null){
4860 parentStr = myName.getSubgenusStr();
4861 r = Rank.SUBGENUS();
4862 }
4863
4864 if(parentStr==null){
4865 parentStr = myName.getGenusStr();
4866 r = Rank.GENUS();
4867 }
4868
4869 if(parentStr==null){
4870 parentStr = myName.getSubtribeStr();
4871 r = Rank.SUBTRIBE();
4872 }
4873 if (parentStr == null){
4874 parentStr = myName.getTribeStr();
4875 r = Rank.TRIBE();
4876 }
4877 if (parentStr == null){
4878 parentStr = myName.getSubfamilyStr();
4879 r = Rank.SUBFAMILY();
4880 }
4881 if (parentStr == null){
4882 parentStr = myName.getFamilyStr();
4883 r = Rank.FAMILY();
4884 }
4885 if(parentStr!=null){
4886 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
4887 System.out.println("PUT IN HIERARCHY "+r+", "+parent);
4888 hierarchy.put(r,parent);
4889 }
4890 }
4891
4892 /**
4893 * @param ref
4894 * @param myName
4895 * @param parser
4896 */
4897 private void handleVarietyHierarchy(Reference<?> ref, MyName myName, INonViralNameParser parser) {
4898 String parentStr = myName.getSubspeciesStr();
4899 Rank r = Rank.SUBSPECIES();
4900
4901 if(parentStr==null){
4902 parentStr = myName.getSpeciesStr();
4903 r = Rank.SPECIES();
4904 }
4905
4906 if(parentStr==null){
4907 parentStr = myName.getSubgenusStr();
4908 r = Rank.SUBGENUS();
4909 }
4910
4911 if(parentStr==null){
4912 parentStr = myName.getGenusStr();
4913 r = Rank.GENUS();
4914 }
4915
4916 if(parentStr==null){
4917 parentStr = myName.getSubtribeStr();
4918 r = Rank.SUBTRIBE();
4919 }
4920 if (parentStr == null){
4921 parentStr = myName.getTribeStr();
4922 r = Rank.TRIBE();
4923 }
4924 if (parentStr == null){
4925 parentStr = myName.getSubfamilyStr();
4926 r = Rank.SUBFAMILY();
4927 }
4928 if (parentStr == null){
4929 parentStr = myName.getFamilyStr();
4930 r = Rank.FAMILY();
4931 }
4932 if(parentStr!=null){
4933 Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
4934 System.out.println("PUT IN HIERARCHY "+r+", "+parent);
4935 hierarchy.put(r,parent);
4936 }
4937 }
4938
4939 /**
4940 * @param ref
4941 * @param myName
4942 * @param parser
4943 * @param parentStr
4944 * @param r
4945 * @return
4946 */
4947 private Taxon handleParentName(Reference<?> ref, MyName myName, INonViralNameParser parser, String parentStr, Rank r) {
4948 NonViralName<?> parentNameName = (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
4949 Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
4950 // importer.getTaxonService().save(parent);
4951 // parent = CdmBase.deproxy(parent, Taxon.class);
4952
4953 boolean parentDoesNotExists = true;
4954 for (TaxonNode p : classification.getAllNodes()){
4955 if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
4956 // System.out.println(p.getTaxon().getUuid());
4957 // System.out.println(parent.getUuid());
4958 parentDoesNotExists = false;
4959 parent=CdmBase.deproxy(p.getTaxon(), Taxon.class);
4960 break;
4961 }
4962 }
4963 if(parentDoesNotExists) {
4964 Taxon tmp = findMatchingTaxon(parentNameName,ref);
4965 // System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
4966 if(tmp ==null)
4967 {
4968 parent=Taxon.NewInstance(parentNameName, ref);
4969 importer.getTaxonService().save(parent);
4970 parent = CdmBase.deproxy(parent, Taxon.class);
4971 } else {
4972 parent=tmp;
4973 }
4974 lookForParentNode(parentNameName, parent, ref,myName);
4975
4976 }
4977 return parent;
4978 }
4979
4980 /**
4981 * @param name
4982 * @param author
4983 * @param nomenclaturalCode2
4984 * @param rank
4985 */
4986 private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
4987 try{
4988 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/NameNotParsed.txt",true);
4989 BufferedWriter out = new BufferedWriter(fstream);
4990 out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
4991 //Close the output stream
4992 out.close();
4993 }catch (Exception e){//Catch exception if any
4994 System.err.println("Error: " + e.getMessage());
4995 }
4996 }
4997
4998 @SuppressWarnings("unused")
4999 private String replaceNull(Object in){
5000 if (in == null) {
5001 return "";
5002 }
5003 if (in.getClass().equals(NomenclaturalCode.class)) {
5004 return ((NomenclaturalCode)in).getTitleCache();
5005 }
5006 return in.toString();
5007 }
5008
5009 /**
5010 * @param fullName
5011 * @param nomenclaturalCode2
5012 * @param rank
5013 */
5014 private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5015 try{
5016 FileWriter fstream = new FileWriter("/home/pkelbert/Bureau/NameNotParsed_"+classification.getTitleCache()+".txt",true);
5017 BufferedWriter out = new BufferedWriter(fstream);
5018 out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5019 //Close the output stream
5020 out.close();
5021 }catch (Exception e){//Catch exception if any
5022 System.err.println("Error: " + e.getMessage());
5023 }
5024
5025 }
5026
5027 }
5028
5029
5030