updated to trunk
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / taxonx2013 / TaxonXTreatmentExtractor.java
1 // $Id$
2 /**
3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.taxonx2013;
11
12 import java.io.File;
13 import java.io.FileWriter;
14 import java.io.IOException;
15 import java.net.URI;
16 import java.util.ArrayList;
17 import java.util.HashMap;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.regex.Pattern;
22
23 import javax.xml.transform.TransformerException;
24 import javax.xml.transform.TransformerFactoryConfigurationError;
25
26 import org.apache.commons.lang.StringUtils;
27 import org.w3c.dom.Node;
28 import org.w3c.dom.NodeList;
29
30 import com.ibm.lsid.MalformedLSIDException;
31
32 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
33 import eu.etaxonomy.cdm.model.common.CdmBase;
34 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
35 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
36 import eu.etaxonomy.cdm.model.common.LSID;
37 import eu.etaxonomy.cdm.model.common.Language;
38 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
39 import eu.etaxonomy.cdm.model.description.Feature;
40 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
41 import eu.etaxonomy.cdm.model.description.PolytomousKey;
42 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
43 import eu.etaxonomy.cdm.model.description.TaxonDescription;
44 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
45 import eu.etaxonomy.cdm.model.description.TextData;
46 import eu.etaxonomy.cdm.model.name.BacterialName;
47 import eu.etaxonomy.cdm.model.name.BotanicalName;
48 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
49 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
50 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
51 import eu.etaxonomy.cdm.model.name.NonViralName;
52 import eu.etaxonomy.cdm.model.name.Rank;
53 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
54 import eu.etaxonomy.cdm.model.name.ZoologicalName;
55 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
56 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
57 import eu.etaxonomy.cdm.model.reference.Reference;
58 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
59 import eu.etaxonomy.cdm.model.taxon.Classification;
60 import eu.etaxonomy.cdm.model.taxon.Synonym;
61 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
62 import eu.etaxonomy.cdm.model.taxon.Taxon;
63 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
64 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
65 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
66 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
67 import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
68
69 /**
70 * @author pkelbert
71 * @date 2 avr. 2013
72 *
73 */
74 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
75
76 private final NomenclaturalCode nomenclaturalCode;
77 private Classification classification;
78
79 private String treatmentMainName,originalTreatmentName;
80
81 private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
82
83
84 private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
85 private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
86
87 private boolean maxRankRespected =false;
88
89 /**
90 * @param nomenclaturalCode
91 * @param classification
92 * @param importer
93 * @param configState
94 */
95 public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
96 TaxonXImportState configState) {
97 this.nomenclaturalCode=nomenclaturalCode;
98 this.classification = classification;
99 this.importer=importer;
100 this.configState=configState;
101 prepareCollectors(configState, importer.getAgentService());
102 }
103
104 /**
105 * extracts all the treament information and save them
106 * @param treatmentnode: the XML Node
107 * @param tosave: the list of object to save into the CDM
108 * @param refMods: the reference extracted from the MODS
109 * @param sourceName: the URI of the document
110 */
111 @SuppressWarnings({ "rawtypes", "unused" })
112 protected void extractTreatment(Node treatmentnode, List<Object> tosave, Reference<?> refMods, URI sourceName) {
113 logger.info("extractTreatment");
114 List<TaxonNameBase> nametosave = new ArrayList<TaxonNameBase>();
115 NodeList children = treatmentnode.getChildNodes();
116 Taxon acceptedTaxon =null;
117 Taxon defaultTaxon =null;
118 boolean refgroup=false;
119
120 for (int i=0;i<children.getLength();i++){
121 if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
122 refgroup=true;
123 }
124 }
125
126 for (int i=0;i<children.getLength();i++){
127
128 if (children.item(i).getNodeName().equalsIgnoreCase("tax:nomenclature")){
129 NodeList nomenclature = children.item(i).getChildNodes();
130 boolean containsName=false;
131 for(int k=0;k<nomenclature.getLength();k++){
132 if(nomenclature.item(k).getNodeName().equalsIgnoreCase("tax:name")){
133 containsName=true;
134 break;
135 }
136 }
137 if (containsName){
138 reloadClassification();
139 //extract "main" the scientific name
140 acceptedTaxon = extractNomenclature(children.item(i),nametosave,refMods);
141 }
142 }
143 else if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
144 reloadClassification();
145 //extract the References within the document
146 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
147 }
148 else if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
149 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
150 File file = new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
151 FileWriter writer;
152 try {
153 writer = new FileWriter(file ,true);
154 writer.write(sourceName+"\n");
155 writer.flush();
156 writer.close();
157 } catch (IOException e1) {
158 // TODO Auto-generated catch block
159 e1.printStackTrace();
160 }
161 String multiple = askMultiple(children.item(i));
162 if (multiple.equalsIgnoreCase("synonyms")) {
163 extractSynonyms(children.item(i),nametosave, acceptedTaxon,refMods);
164 }
165 else
166 if(multiple.equalsIgnoreCase("material examined")){
167 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
168 }
169 else
170 if (multiple.equalsIgnoreCase("distribution")){
171 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
172 }
173 else
174 if (multiple.equalsIgnoreCase("type status")){
175 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,"TypeStatus");
176 }
177 else
178 if (multiple.equalsIgnoreCase("vernacular name")){
179 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.COMMON_NAME().getTitleCache());
180
181 }
182 else{
183 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,multiple);
184 }
185
186 }
187 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
188 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
189 extractFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, Feature.BIOLOGY_ECOLOGY());
190 }
191 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
192 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
193 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.COMMON_NAME().getTitleCache());
194 }
195 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
196 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
197 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
198 }
199 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
200 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
201 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.DIAGNOSIS());
202 }
203 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
204 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
205 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DISCUSSION());
206 }
207 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
208 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
209 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
210 }
211
212 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
213 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
214 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
215 }
216 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
217 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
218 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave,refMods,Feature.ETYMOLOGY());
219 }
220
221 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
222 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
223 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
224 }
225 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:figure") && maxRankRespected){
226 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, "figure");
227 }
228 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
229 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("other") && maxRankRespected){
230 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, "table");
231 }
232
233 else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
234 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
235 //TODO IGNORE keys for the moment
236 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
237 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,"Keys - unparsed");
238 }
239 else{
240 if (!children.item(i).getNodeName().equalsIgnoreCase("tax:pb")){
241 logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
242 if (children.item(i).getAttributes() !=null) {
243 logger.info(children.item(i).getAttributes().item(0));
244 }
245 }
246 }
247 }
248 // logger.info("saveUpdateNames");
249 if (maxRankRespected){
250 importer.getNameService().saveOrUpdate(nametosave);
251 importer.getClassificationService().saveOrUpdate(classification);
252 logger.info("saveUpdateNames-ok");
253 }
254 }
255
256
257 /**
258 * @param keys
259 * @param acceptedTaxon: the current acceptedTaxon
260 * @param nametosave: the list of objects to save into the CDM
261 * @param refMods: the current reference extracted from the MODS
262 */
263 @SuppressWarnings("rawtypes")
264 private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
265 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
266
267 NodeList children = keys.getChildNodes();
268 String key="";
269 PolytomousKey poly = PolytomousKey.NewInstance();
270 poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
271 poly.addTaxonomicScope(acceptedTaxon);
272 poly.setTitleCache("bloup");
273 // poly.addCoveredTaxon(acceptedTaxon);
274 PolytomousKeyNode root = poly.getRoot();
275 PolytomousKeyNode previous = null,tmpKey=null;
276 Taxon taxonKey=null;
277 List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
278
279 // String fullContent = keys.getTextContent();
280 for (int i=0;i<children.getLength();i++){
281 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
282 NodeList paragraph = children.item(i).getChildNodes();
283 key="";
284 taxonKey=null;
285 for (int j=0;j<paragraph.getLength();j++){
286 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
287 if (! paragraph.item(j).getTextContent().trim().isEmpty()){
288 key+=paragraph.item(j).getTextContent().trim();
289 // logger.info("KEY: "+j+"--"+key);
290 }
291 }
292 if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
293 taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
294 }
295 }
296 // logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
297 if (keypattern.matcher(key).matches()){
298 tmpKey = PolytomousKeyNode.NewInstance(key);
299 if (taxonKey!=null) {
300 tmpKey.setTaxon(taxonKey);
301 }
302 polyNodes.add(tmpKey);
303 if (previous == null) {
304 root.addChild(tmpKey);
305 } else {
306 previous.addChild(tmpKey);
307 }
308 }else{
309 if (!key.isEmpty()){
310 tmpKey=PolytomousKeyNode.NewInstance(key);
311 if (taxonKey!=null) {
312 tmpKey.setTaxon(taxonKey);
313 }
314 polyNodes.add(tmpKey);
315 if (keypatternend.matcher(key).matches()) {
316 root.addChild(tmpKey);
317 previous=tmpKey;
318 } else{
319 previous.addChild(tmpKey);
320 }
321
322 }
323 }
324 }
325 }
326 importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
327 importer.getPolytomousKeyService().saveOrUpdate(poly);
328 }
329
330 /**
331 * @param taxons: the XML Nodegroup
332 * @param nametosave: the list of objects to save into the CDM
333 * @param acceptedTaxon: the current accepted Taxon
334 * @param refMods: the current reference extracted from the MODS
335 *
336 * @return Taxon object built
337 */
338 @SuppressWarnings({ "rawtypes", "unchecked" })
339 private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
340 // logger.info("getTaxonFromXML");
341 // logger.info("acceptedTaxon: "+acceptedTaxon);
342
343 TaxonNameBase nameToBeFilled = null;
344
345 MyName myname = new MyName();
346 NomenclaturalStatusType statusType = null;
347
348 try {
349 myname = extractScientificName(taxons);
350 if (!myname.getStatus().isEmpty()){
351 try {
352 statusType = nomStatusString2NomStatus(myname.getStatus());
353 } catch (UnknownCdmTypeException e) {
354 logger.warn("Problem with status");
355 }
356 }
357
358 } catch (TransformerFactoryConfigurationError e1) {
359 logger.warn(e1);
360 } catch (TransformerException e1) {
361 logger.warn(e1);
362 }
363 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
364
365 nameToBeFilled = parser.parseFullName(myname.getName(), nomenclaturalCode, myname.getRank());
366 if (nameToBeFilled.hasProblem() &&
367 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
368 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
369 nameToBeFilled=solveNameProblem(myname.getOriginalName(), myname.getName(),parser);
370 }
371
372 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
373
374 // importer.getNameService().saveOrUpdate(nametosave);
375 Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
376
377 boolean statusMatch=false;
378 if(t !=null ){
379 statusMatch=compareStatus(t, statusType);
380 }
381 if (t ==null || (t != null && !statusMatch)){
382 if(statusType != null) {
383 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
384 }
385 t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
386 if (t.getSec() == null) {
387 t.setSec(refMods);
388 }
389 if(!configState.getConfig().doKeepOriginalSecundum()) {
390 t.setSec(configState.getConfig().getSecundum());
391 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
392 }
393 t.addSource(OriginalSourceType.Import,null,null,refMods,null);
394
395 /* boolean sourceExists=false;
396 Set<IdentifiableSource> sources = t.getSources();
397 for (IdentifiableSource src : sources){
398 String micro = src.getCitationMicroReference();
399 Reference r = src.getCitation();
400 if (r.equals(refMods) && micro == null) {
401 sourceExists=true;
402 }
403 }
404 if(!sourceExists) {
405 t.addSource(OriginalSourceType.Import,null,null,refMods,null);
406 }
407 */
408
409 if (!myname.getIdentifier().isEmpty() && (myname.getIdentifier().length()>2)){
410 setLSID(myname.getIdentifier(), t);
411 }
412
413 Taxon parentTaxon = askParent(t, classification);
414 if (parentTaxon ==null){
415 while (parentTaxon == null) {
416 parentTaxon = createParent(t, refMods);
417 classification.addParentChild(parentTaxon, t, refMods, null);
418 }
419 }else{
420 classification.addParentChild(parentTaxon, t, refMods, null);
421 }
422 }
423 else{
424 t = CdmBase.deproxy(t, Taxon.class);
425 }
426 if (!configState.getConfig().doKeepOriginalSecundum()) {
427 t.setSec(configState.getConfig().getSecundum());
428 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
429 }
430 return t;
431 }
432
433
434 /**
435 * @param taxons: the XML Nodegroup
436 * @param nametosave: the list of objects to save into the CDM
437 * @param acceptedTaxon: the current accepted Taxon
438 * @param refMods: the current reference extracted from the MODS
439 *
440 * @return Taxon object built
441 */
442 @SuppressWarnings({ "rawtypes", "unchecked" })
443 private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
444 // logger.info("getTaxonFromXML");
445 // logger.info("acceptedTaxon: "+acceptedTaxon);
446
447 TaxonNameBase nameToBeFilled = null;
448
449 MyName myName=new MyName();
450
451 NomenclaturalStatusType statusType = null;
452 try {
453 myName = extractScientificName(taxons);
454 if (!myName.getStatus().isEmpty()){
455 try {
456 statusType = nomStatusString2NomStatus(myName.getStatus());
457 } catch (UnknownCdmTypeException e) {
458 logger.warn("Problem with status");
459 }
460 }
461 } catch (TransformerFactoryConfigurationError e1) {
462 logger.warn(e1);
463 } catch (TransformerException e1) {
464 logger.warn(e1);
465 }
466 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
467
468 nameToBeFilled = parser.parseFullName(myName.getName(), nomenclaturalCode, myName.getRank());
469 if (nameToBeFilled.hasProblem() &&
470 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
471 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
472 nameToBeFilled=solveNameProblem(myName.getOriginalName(), myName.getName(),parser);
473 }
474
475 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
476 return nameToBeFilled;
477
478 }
479
480 @SuppressWarnings("rawtypes")
481 private TaxonNameBase getTaxonNameBase (TaxonNameBase name, List<TaxonNameBase> nametosave, NomenclaturalStatusType statusType){
482 List<TaxonNameBase> names = importer.getNameService().list(TaxonNameBase.class, null, null, null, null);
483 for (TaxonNameBase tb : names){
484 if (tb.getTitleCache().equalsIgnoreCase(name.getTitleCache())) {
485 boolean statusMatch=false;
486 if(tb !=null ){
487 statusMatch=compareStatus(tb, statusType);
488 }
489 if (!statusMatch){
490 if(statusType != null) {
491 name.addStatus(NomenclaturalStatus.NewInstance(statusType));
492 }
493 }else
494 {
495 logger.info("TaxonNameBase FOUND"+name.getTitleCache());
496 return tb;
497 }
498 }
499 }
500 logger.info("TaxonNameBase NOT FOUND "+name.getTitleCache());
501 System.out.println("add name "+name);
502 nametosave.add(name);
503 return name;
504
505 }
506
507
508
509 /**
510 * @param tb
511 * @param statusType
512 * @return
513 */
514 private boolean compareStatus(TaxonNameBase tb, NomenclaturalStatusType statusType) {
515 boolean statusMatch=false;
516 //found one taxon
517 Set<NomenclaturalStatus> status = tb.getStatus();
518 if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
519 for (NomenclaturalStatus st:status){
520 NomenclaturalStatusType stype = st.getType();
521 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
522 statusMatch=true;
523 }
524 }
525 }
526 else{
527 if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
528 statusMatch=true;
529 }
530 }
531 return statusMatch;
532 }
533
534 /**
535 *
536 */
537 private void reloadClassification() {
538 Classification cl = importer.getClassificationService().find(classification.getUuid());
539 if (cl != null){
540 classification=cl;
541 }else{
542 importer.getClassificationService().saveOrUpdate(classification);
543 classification = importer.getClassificationService().find(classification.getUuid());
544 }
545
546 }
547
548 /**
549 * Create a Taxon for the current NameBase, based on the current reference
550 * @param taxonNameBase
551 * @param refMods: the current reference extracted from the MODS
552 * @return Taxon
553 */
554 @SuppressWarnings({ "unused", "rawtypes" })
555 private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
556 Taxon t = new Taxon(taxonNameBase,null );
557 if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
558 t.setSec(configState.getConfig().getSecundum());
559 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
560 }
561 /*<<<<<<< .courant
562 boolean sourceExists=false;
563 Set<IdentifiableSource> sources = t.getSources();
564 for (IdentifiableSource src : sources){
565 String micro = src.getCitationMicroReference();
566 Reference r = src.getCitation();
567 if (r.equals(refMods) && micro == null) {
568 sourceExists=true;
569 }
570 }
571 if(!sourceExists) {
572 t.addSource(null,null,refMods,null);
573 }
574 =======*/
575 t.addSource(OriginalSourceType.Import,null,null,refMods,null);
576 return t;
577 }
578
579 @SuppressWarnings("rawtypes")
580 private void extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave,
581 Reference<?> refMods, String featureName) {
582 System.out.println("extractDescriptionWithReference !");
583 NodeList children = typestatus.getChildNodes();
584
585 List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
586 Feature currentFeature=null;
587 for (DefinedTermBase feature: features){
588 String tmpF = ((Feature)feature).getTitleCache();
589 if (tmpF.equalsIgnoreCase(featureName)) {
590 currentFeature=(Feature)feature;
591 }
592 }
593 if (currentFeature == null) {
594 currentFeature=Feature.NewInstance(featureName, featureName, featureName);
595 importer.getTermService().saveOrUpdate(currentFeature);
596 }
597
598 String r="";String s="";
599 for (int i=0;i<children.getLength();i++){
600 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
601 s+=children.item(i).getTextContent().trim();
602 }
603 if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
604 r+= children.item(i).getTextContent().trim();
605 }
606 if (s.indexOf(r)>-1) {
607 s=s.split(r)[0];
608 }
609 }
610 Reference<?> currentref = ReferenceFactory.newGeneric();
611 if(!r.isEmpty()) {
612 currentref.setTitle(r);
613 } else {
614 currentref=refMods;
615 }
616 setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
617
618 }
619
620 /**
621 * @param nametosave
622 * @param distribution: the XML node group
623 * @param acceptedTaxon: the current accepted Taxon
624 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
625 * @param refMods: the current reference extracted from the MODS
626 */
627 @SuppressWarnings("rawtypes")
628 private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods) {
629 // logger.info("DISTRIBUTION");
630 // logger.info("acceptedTaxon: "+acceptedTaxon);
631 NodeList children = distribution.getChildNodes();
632 Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
633 Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
634
635 for (int i=0;i<children.getLength();i++){
636 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
637 NodeList paragraph = children.item(i).getChildNodes();
638 for (int j=0;j<paragraph.getLength();j++){
639 if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
640 if(!paragraph.item(j).getTextContent().trim().isEmpty()) {
641 String s =paragraph.item(j).getTextContent().trim();
642 if (descriptionsFulltext.get(i) !=null){
643 s = descriptionsFulltext.get(i)+" "+s;
644 }
645 descriptionsFulltext.put(i, s);
646 }
647 }
648 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
649 String s =getTaxonNameBaseFromXML(paragraph.item(j),nametosave,refMods).toString().split("sec.")[0];
650 if (descriptionsFulltext.get(i) !=null){
651 s = descriptionsFulltext.get(i)+" "+s;
652 }
653 descriptionsFulltext.put(i, s);
654 }
655 else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
656 MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
657 DerivedUnit derivedUnitBase = null;
658 specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
659 List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
660 if (speObsList == null) {
661 speObsList=new ArrayList<MySpecimenOrObservation>();
662 }
663 speObsList.add(specimenOrObservation);
664 specimenOrObservations.put(i,speObsList);
665
666 String s = specimenOrObservation.getDerivedUnitBase().toString();
667 if (descriptionsFulltext.get(i) !=null){
668 s = descriptionsFulltext.get(i)+" "+s;
669 }
670 descriptionsFulltext.put(i, s);
671 }
672
673 }
674 }
675 }
676
677 int m=0;
678 for (int k:descriptionsFulltext.keySet()) {
679 if (k>m) {
680 m=k;
681 }
682 }
683 for (int k:specimenOrObservations.keySet()) {
684 if (k>m) {
685 m=k;
686 }
687 }
688
689
690 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
691 Feature currentFeature = Feature.DISTRIBUTION();
692 DerivedUnit derivedUnitBase=null;
693 String descr="";
694 for (int k=0;k<=m;k++){
695 if(specimenOrObservations.keySet().contains(k)){
696 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
697 derivedUnitBase = soo.getDerivedUnitBase();
698 descr=soo.getDescr();
699
700 /*<<<<<<< .courant
701 boolean sourceExists=false;
702 Set<IdentifiableSource> sources = derivedUnitBase.getSources();
703 for (IdentifiableSource src : sources){
704 String micro = src.getCitationMicroReference();
705 Reference r = src.getCitation();
706 if (r.equals(refMods) && micro == null) {
707 sourceExists=true;
708 }
709 }
710 if(!sourceExists) {
711 derivedUnitBase.addSource(null,null,refMods,null);
712 }
713 =======*/
714 derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
715
716 importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
717
718 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
719 acceptedTaxon.addDescription(taxonDescription);
720
721
722 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
723
724 Feature feature=null;
725 feature = makeFeature(derivedUnitBase);
726 if(!StringUtils.isEmpty(descr)) {
727 derivedUnitBase.setTitleCache(descr, true);
728 }
729 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
730 indAssociation.setFeature(feature);
731 indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
732
733 /* sourceExists=false;
734 Set<DescriptionElementSource> dsources = indAssociation.getSources();
735 for (DescriptionElementSource src : dsources){
736 String micro = src.getCitationMicroReference();
737 Reference r = src.getCitation();
738 if (r.equals(refMods) && micro == null) {
739 sourceExists=true;
740 }
741 }
742 if(!sourceExists) {
743 indAssociation.addSource(null, null, refMods, null);
744 }
745 */
746 indAssociation.addSource(OriginalSourceType.Import, null,null,refMods,null);
747
748 taxonDescription.addElement(indAssociation);
749 taxonDescription.setTaxon(acceptedTaxon);
750 taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
751
752 /*sourceExists=false;
753 sources = taxonDescription.getSources();
754 for (IdentifiableSource src : sources){
755 String micro = src.getCitationMicroReference();
756 Reference r = src.getCitation();
757 if (r.equals(refMods) && micro == null) {
758 sourceExists=true;
759 }
760 }
761 if(!sourceExists) {
762 taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
763 }*/
764 importer.getDescriptionService().saveOrUpdate(taxonDescription);
765 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
766 td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
767 }
768 }
769
770 if (descriptionsFulltext.keySet().contains(k)){
771 if (!descriptionsFulltext.get(k).isEmpty() && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
772 setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
773 break;
774 }
775 else{
776 TextData textData = TextData.NewInstance();
777
778 textData.setFeature(currentFeature);
779 textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
780 textData.addSource(OriginalSourceType.Import, null, null, refMods, null);
781
782 td.addElement(textData);
783 }
784 }
785
786
787 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
788 /*<<<<<<< .courant
789 boolean sourceExists=false;
790 Set<IdentifiableSource> sources = td.getSources();
791 for (IdentifiableSource src : sources){
792 String micro = src.getCitationMicroReference();
793 Reference r = src.getCitation();
794 if (r.equals(refMods) && micro == null) {
795 sourceExists=true;
796 }
797 }
798 if(!sourceExists) {
799 td.addSource(null,null,refMods,null);
800 }
801 =======*/
802 td.addSource(OriginalSourceType.Import, null,null,refMods,null);
803 acceptedTaxon.addDescription(td);
804 importer.getDescriptionService().saveOrUpdate(td);
805 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
806 }
807 }
808 }
809
810
811 /**
812 * @param materials: the XML node group
813 * @param acceptedTaxon: the current accepted Taxon
814 * @param refMods: the current reference extracted from the MODS
815 */
816 @SuppressWarnings("rawtypes")
817 private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference<?> refMods,List<TaxonNameBase> nametosave) {
818 // logger.info("EXTRACTMATERIALS");
819 // logger.info("acceptedTaxon: "+acceptedTaxon);
820 NodeList children = materials.getChildNodes();
821 NodeList events = null;
822 String descr="";
823
824 DerivedUnit derivedUnitBase=null;
825 MySpecimenOrObservation myspecimenOrObservation = null;
826
827 for (int i=0;i<children.getLength();i++){
828 String rawAssociation="";
829 boolean added=false;
830 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
831 events = children.item(i).getChildNodes();
832 for(int k=0;k<events.getLength();k++){
833 if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
834 String linkedTaxon = getTaxonNameBaseFromXML(events.item(k), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
835 rawAssociation+=linkedTaxon.split("sec")[0];
836 }
837 if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
838 && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
839 rawAssociation+= events.item(k).getTextContent().trim();
840 }
841 if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
842 if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
843 rawAssociation="no description text";
844 }
845 added=true;
846 DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
847 derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
848 /*<<<<<<< .courant
849 System.out.println("derivedUnitBase: "+derivedUnitBase);
850
851 boolean sourceExists=false;
852 Set<IdentifiableSource> sources = derivedUnitBase.getSources();
853 for (IdentifiableSource src : sources){
854 String micro = src.getCitationMicroReference();
855 Reference r = src.getCitation();
856 if (r.equals(refMods) && micro == null) {
857 sourceExists=true;
858 }
859 }
860 if(!sourceExists) {
861 derivedUnitBase.addSource(null,null,refMods,null);
862 }
863
864 =======*/
865 derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
866 importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
867
868 myspecimenOrObservation = extractSpecimenOrObservation(events.item(k),derivedUnitBase,SpecimenOrObservationType.DerivedUnit);
869 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
870 descr=myspecimenOrObservation.getDescr();
871
872 /*<<<<<<< .courant
873 sourceExists=false;
874 sources = derivedUnitBase.getSources();
875 for (IdentifiableSource src : sources){
876 String micro = src.getCitationMicroReference();
877 Reference r = src.getCitation();
878 if (r.equals(refMods) && micro == null) {
879 sourceExists=true;
880 }
881 }
882 if(!sourceExists) {
883 derivedUnitBase.addSource(null,null,refMods,null);
884 }
885 =======*/
886 derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
887
888 importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
889
890 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
891 acceptedTaxon.addDescription(taxonDescription);
892
893
894 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
895
896 Feature feature = makeFeature(derivedUnitBase);
897 if(!StringUtils.isEmpty(descr)) {
898 derivedUnitBase.setTitleCache(descr, true);
899 }
900 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
901 indAssociation.setFeature(feature);
902 indAssociation.addSource(OriginalSourceType.Import,null, null, refMods, null);
903
904 /*sourceExists=false;
905 Set<DescriptionElementSource> dsources = indAssociation.getSources();
906 for (DescriptionElementSource src : dsources){
907 String micro = src.getCitationMicroReference();
908 Reference r = src.getCitation();
909 if (r.equals(refMods) && micro == null) {
910 sourceExists=true;
911 }
912 }
913 if(!sourceExists) {
914 indAssociation.addSource(null, null, refMods, null);
915 }
916 */
917
918 taxonDescription.addElement(indAssociation);
919 taxonDescription.setTaxon(acceptedTaxon);
920 taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
921
922 /*sourceExists=false;
923 sources = taxonDescription.getSources();
924 for (IdentifiableSource src : sources){
925 String micro = src.getCitationMicroReference();
926 Reference r = src.getCitation();
927 if (r.equals(refMods) && micro == null) {
928 sourceExists=true;
929 }
930 }
931 if(!sourceExists) {
932 taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
933 }*/
934 importer.getDescriptionService().saveOrUpdate(taxonDescription);
935 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
936 }
937 if (!rawAssociation.isEmpty() && !added){
938 DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
939 derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
940
941 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
942 acceptedTaxon.addDescription(taxonDescription);
943
944 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
945
946 Feature feature = Feature.MATERIALS_EXAMINED();
947 if(!StringUtils.isEmpty(rawAssociation)) {
948 derivedUnitBase.setTitleCache(rawAssociation, true);
949 }
950 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
951 indAssociation.setFeature(feature);
952 indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
953
954 /*boolean sourceExists=false;
955 Set<DescriptionElementSource> dsources = indAssociation.getSources();
956 for (DescriptionElementSource src : dsources){
957 String micro = src.getCitationMicroReference();
958 Reference r = src.getCitation();
959 if (r.equals(refMods) && micro == null) {
960 sourceExists=true;
961 }
962 }
963 if(!sourceExists) {
964 indAssociation.addSource(null, null, refMods, null);
965 }*/
966 taxonDescription.addElement(indAssociation);
967 taxonDescription.setTaxon(acceptedTaxon);
968 taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
969
970 /*sourceExists=false;
971 Set<IdentifiableSource> sources = taxonDescription.getSources();
972 for (IdentifiableSource src : sources){
973 String micro = src.getCitationMicroReference();
974 Reference r = src.getCitation();
975 if (r.equals(refMods) && micro == null) {
976 sourceExists=true;
977 }
978 }
979 if(!sourceExists) {
980 taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
981 }*/
982
983 importer.getDescriptionService().saveOrUpdate(taxonDescription);
984 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
985
986 rawAssociation="";
987 }
988 }
989 }
990 }
991 }
992
993 /**
994 * @param materials: the XML node group
995 * @param acceptedTaxon: the current accepted Taxon
996 * @param refMods: the current reference extracted from the MODS
997 */
998 @SuppressWarnings("rawtypes")
999 private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference<?> refMods, String event) {
1000 // logger.info("EXTRACTMATERIALS");
1001 // logger.info("acceptedTaxon: "+acceptedTaxon);
1002 String descr="";
1003
1004 DerivedUnit derivedUnitBase=null;
1005 MySpecimenOrObservation myspecimenOrObservation = null;
1006
1007
1008 myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
1009 derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
1010 descr=myspecimenOrObservation.getDescr();
1011
1012 /*<<<<<<< .courant
1013 boolean sourceExists=false;
1014 Set<IdentifiableSource> sources = derivedUnitBase.getSources();
1015 for (IdentifiableSource src : sources){
1016 String micro = src.getCitationMicroReference();
1017 Reference r = src.getCitation();
1018 if (r.equals(refMods) && micro == null) {
1019 sourceExists=true;
1020 }
1021 }
1022 if(!sourceExists) {
1023 derivedUnitBase.addSource(null,null,refMods,null);
1024 }
1025 =======*/
1026 derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
1027
1028 importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
1029
1030 TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1031 acceptedTaxon.addDescription(taxonDescription);
1032
1033
1034 IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
1035
1036 Feature feature=null;
1037 if (event.equalsIgnoreCase("collection")){
1038 feature = makeFeature(derivedUnitBase);
1039 }
1040 else{
1041 feature = Feature.MATERIALS_EXAMINED();
1042 }
1043 if(!StringUtils.isEmpty(descr)) {
1044 derivedUnitBase.setTitleCache(descr);
1045 }
1046 indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
1047 indAssociation.setFeature(feature);
1048 indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
1049
1050 /* sourceExists=false;
1051 Set<DescriptionElementSource> dsources = indAssociation.getSources();
1052 for (DescriptionElementSource src : dsources){
1053 String micro = src.getCitationMicroReference();
1054 Reference r = src.getCitation();
1055 if (r.equals(refMods) && micro == null) {
1056 sourceExists=true;
1057 }
1058 }
1059 if(!sourceExists) {
1060 indAssociation.addSource(null, null, refMods, null);
1061 }
1062 */
1063 taxonDescription.addElement(indAssociation);
1064 taxonDescription.setTaxon(acceptedTaxon);
1065 taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
1066
1067 /* sourceExists=false;
1068 sources = taxonDescription.getSources();
1069 for (IdentifiableSource src : sources){
1070 String micro = src.getCitationMicroReference();
1071 Reference r = src.getCitation();
1072 if (r.equals(refMods) && micro == null) {
1073 sourceExists=true;
1074 }
1075 }
1076 if(!sourceExists) {
1077 taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
1078 }
1079 */
1080 importer.getDescriptionService().saveOrUpdate(taxonDescription);
1081 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1082
1083 return derivedUnitBase.getTitleCache();
1084
1085 }
1086
1087
1088 /**
1089 * @param description: the XML node group
1090 * @param acceptedTaxon: the current acceptedTaxon
1091 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1092 * @param nametosave: the list of objects to save into the CDM
1093 * @param refMods: the current reference extracted from the MODS
1094 * @param featureName: the feature name
1095 */
1096 private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1097 List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
1098 // System.out.println("GRUUUUuu");
1099 NodeList children = description.getChildNodes();
1100 NodeList insideNodes ;
1101 NodeList trNodes;
1102 NodeList tdNodes;
1103 String descr ="";
1104 String localdescr="";
1105 List<String> blabla=null;
1106 List<String> text = new ArrayList<String>();
1107
1108 String table="<table>";
1109 String head="";
1110 String line="";
1111
1112 // String fullContent = description.getTextContent();
1113 for (int i=0;i<children.getLength();i++){
1114 localdescr="";
1115 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1116 descr += children.item(i).getTextContent().trim();
1117 }
1118 // if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1119 // children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("other") &&
1120 // children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1121 if (featureName.equalsIgnoreCase("table")){
1122 System.out.println("children.item(i).name: "+i+"-- "+children.item(i).getNodeName());
1123 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1124 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
1125 head="<th>";
1126 trNodes = children.item(i).getChildNodes();
1127 for (int k=0;k<trNodes.getLength();k++){
1128 System.out.println("NB ELEMENTS "+k +"("+trNodes.getLength()+")");
1129 if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1130 && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1131
1132 System.out.println("hop");
1133 line="<tr>";
1134 tdNodes=trNodes.item(k).getChildNodes();
1135 for (int l=0;l<tdNodes.getLength();l++){
1136 if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1137 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1138 }
1139 }
1140 line+="</tr>";
1141 head+=line;
1142 }
1143 }
1144 head+="</th>";
1145 table+=head;
1146 // }
1147 line="<tr>";
1148 if (children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1149 line="<tr>";
1150 tdNodes=children.item(i).getChildNodes();
1151 for (int l=0;l<tdNodes.getLength();l++){
1152 if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1153 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1154 }
1155 }
1156 }
1157 line+="</tr>";
1158 if (!line.equalsIgnoreCase("<tr></tr>")) {
1159 table+=line;
1160 }
1161 }
1162 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1163 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1164 line="<tr>";
1165 trNodes = children.item(i).getChildNodes();
1166 for (int k=0;k<trNodes.getLength();k++){
1167 if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:p")){
1168 line+="<td>"+trNodes.item(k).getTextContent()+"</td>";
1169 }
1170 }
1171 line+="</tr>";
1172 if(!line.equalsIgnoreCase("<tr></tr>")) {
1173 table+=line;
1174 }
1175 }
1176 }
1177 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1178 insideNodes=children.item(i).getChildNodes();
1179 blabla= new ArrayList<String>();
1180 for (int j=0;j<insideNodes.getLength();j++){
1181 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1182 String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
1183 blabla.add(linkedTaxon.split("sec")[0]);
1184 }
1185 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1186 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1187 blabla.add(insideNodes.item(j).getTextContent().trim());
1188 localdescr += insideNodes.item(j).getTextContent().trim();
1189 }
1190 }
1191 }
1192 if (!blabla.isEmpty()) {
1193 List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
1194 Feature currentFeature=null;
1195 for (DefinedTermBase feature: features){
1196 String tmpF = ((Feature)feature).getTitleCache();
1197 if (tmpF.equalsIgnoreCase(featureName)) {
1198 currentFeature=(Feature)feature;
1199 }
1200 }
1201 if (currentFeature == null) {
1202 currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1203 importer.getTermService().saveOrUpdate(currentFeature);
1204 }
1205 setParticularDescription(StringUtils.join(blabla," "),acceptedTaxon,defaultTaxon, refMods,currentFeature);
1206 }
1207 text.add(StringUtils.join(blabla," "));
1208 }
1209 }
1210
1211 table+="</table>";
1212 if (!table.equalsIgnoreCase("<table></table>")){
1213 System.out.println("TABLE : "+table);
1214 text.add(table);
1215 }
1216
1217 if (text !=null && !text.isEmpty()) {
1218 return StringUtils.join(text," ");
1219 } else {
1220 return "";
1221 }
1222
1223 }
1224
1225
1226
1227
1228 /**
1229 * @param children: the XML node group
1230 * @param nametosave: the list of objects to save into the CDM
1231 * @param acceptedTaxon: the current acceptedTaxon
1232 * @param refMods: the current reference extracted from the MODS
1233 * @param fullContent :the parsed XML content
1234 * @return a list of description (text)
1235 */
1236 @SuppressWarnings("unused")
1237 private List<String> parseParagraph(List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods, Node paragraph, Feature feature){
1238 List<String> fullDescription= new ArrayList<String>();
1239 // String localdescr;
1240 String descr="";
1241 NodeList insideNodes ;
1242 boolean collectionEvent = false;
1243 List<Node>collectionEvents = new ArrayList<Node>();
1244
1245 NodeList children = paragraph.getChildNodes();
1246
1247 for (int i=0;i<children.getLength();i++){
1248 // localdescr="";
1249 if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1250 descr += children.item(i).getTextContent().trim();
1251 }
1252 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1253 insideNodes=children.item(i).getChildNodes();
1254 List<String> blabla= new ArrayList<String>();
1255 for (int j=0;j<insideNodes.getLength();j++){
1256 boolean nodeKnown = false;
1257 System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1258 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1259 String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
1260 blabla.add(linkedTaxon.split("sec")[0]);
1261 nodeKnown=true;
1262 }
1263 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1264 if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1265 blabla.add(insideNodes.item(j).getTextContent().trim());
1266 // localdescr += insideNodes.item(j).getTextContent().trim();
1267 }
1268 nodeKnown=true;
1269 }
1270 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1271 String ref = insideNodes.item(j).getTextContent().trim();
1272 if (ref.endsWith(";") && ((ref.length())>1)) {
1273 ref=ref.substring(0, ref.length()-1)+".";
1274 }
1275 Reference<?> reference = ReferenceFactory.newGeneric();
1276 reference.setTitleCache(ref, true);
1277 blabla.add(reference.getTitleCache());
1278 nodeKnown=true;
1279 }
1280 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1281 System.out.println("OUHOU");
1282 String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, nametosave, refMods, "figure");
1283 blabla.add(figure);
1284 }
1285 if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1286 insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("other") &&
1287 insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1288 System.out.println("OUI?");
1289 String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, nametosave, refMods, "table");
1290 blabla.add(table);
1291 }
1292 if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1293 logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1294 String titlecache = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection");
1295 blabla.add(titlecache);
1296 collectionEvent=true;
1297 collectionEvents.add(insideNodes.item(j));
1298 nodeKnown=true;
1299 }
1300 if (!nodeKnown && !insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:pb")) {
1301 logger.info("Node not handled yet : "+insideNodes.item(j).getNodeName());
1302 }
1303
1304 }
1305 if (!blabla.isEmpty()) {
1306 fullDescription.add(StringUtils.join(blabla," "));
1307 }
1308 }
1309 if (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1310 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, nametosave, refMods, "figure");
1311 fullDescription.add(figure);
1312 }
1313 if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1314 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("other") &&
1315 children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1316 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, nametosave, refMods, "table");
1317 fullDescription.add(table);
1318 }
1319 }
1320 // if (collectionEvent) {
1321 // logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1322 // for (Node coll:collectionEvents){
1323 // = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1324 // }
1325 // }
1326 return fullDescription;
1327 }
1328
1329
1330 /**
1331 * @param description: the XML node group
1332 * @param acceptedTaxon: the current acceptedTaxon
1333 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1334 * @param nametosave: the list of objects to save into the CDM
1335 * @param refMods: the current reference extracted from the MODS
1336 * @param feature: the feature to link the data with
1337 */
1338 private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods, Feature feature){
1339 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1340 List<String> fullDescription= parseParagraph( nametosave, acceptedTaxon, refMods, description,feature);
1341
1342 if (!fullDescription.isEmpty()) {
1343 setParticularDescription(StringUtils.join(fullDescription,"<br/>"),acceptedTaxon,defaultTaxon, refMods,feature);
1344 }
1345
1346 }
1347
1348
1349 /**
1350 * @param descr: the XML Nodegroup to parse
1351 * @param acceptedTaxon: the current acceptedTaxon
1352 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1353 * @param refMods: the current reference extracted from the MODS
1354 * @param currentFeature: the feature name
1355 * @return
1356 */
1357 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods, Feature currentFeature) {
1358 // logger.info("setParticularDescription "+currentFeature);
1359 // logger.info("acceptedTaxon: "+acceptedTaxon);
1360 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1361
1362 TextData textData = TextData.NewInstance();
1363 textData.setFeature(currentFeature);
1364 textData.addSource(OriginalSourceType.Import, null,null,refMods,null);
1365
1366 textData.putText(Language.UNKNOWN_LANGUAGE(), descr+"<br/>");
1367
1368 if(! descr.isEmpty() && (acceptedTaxon!=null)){
1369 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1370 td.addElement(textData);
1371 td.addSource(OriginalSourceType.Import,null,null,refMods,null);
1372 acceptedTaxon.addDescription(td);
1373 importer.getDescriptionService().saveOrUpdate(td);
1374 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1375 }
1376
1377 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1378 try{
1379 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1380 if (tmp!=null) {
1381 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1382 }else{
1383 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1384 }
1385 }catch(Exception e){
1386 logger.debug("TAXON EXISTS"+defaultTaxon);
1387 }
1388
1389 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1390 defaultTaxon.addDescription(td);
1391 td.addElement(textData);
1392 td.addSource(OriginalSourceType.Import,null,null,refMods,null);
1393 importer.getDescriptionService().saveOrUpdate(td);
1394 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1395 }
1396 }
1397
1398 /**
1399 * @param descr: the XML Nodegroup to parse
1400 * @param acceptedTaxon: the current acceptedTaxon
1401 * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1402 * @param refMods: the current reference extracted from the MODS
1403 * @param currentFeature: the feature name
1404 * @return
1405 */
1406 private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference<?> currentRef, Reference<?> refMods, Feature currentFeature) {
1407 System.out.println("setParticularDescriptionSPecial "+currentFeature);
1408 // logger.info("acceptedTaxon: "+acceptedTaxon);
1409 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1410
1411 TextData textData = TextData.NewInstance();
1412 textData.setFeature(currentFeature);
1413 textData.addSource(OriginalSourceType.Import,null,null,refMods,null);
1414
1415 textData.putText(Language.UNKNOWN_LANGUAGE(), descr+"<br/>");
1416
1417 if(! descr.isEmpty() && (acceptedTaxon!=null)){
1418 TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1419 td.addElement(textData);
1420 td.addSource(OriginalSourceType.Import,null,null,refMods,null);
1421 if(currentRef != refMods) {
1422 td.addSource(OriginalSourceType.Import,null,null,currentRef,null);
1423 }
1424 acceptedTaxon.addDescription(td);
1425 importer.getDescriptionService().saveOrUpdate(td);
1426 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1427 }
1428
1429 if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1430 try{
1431 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1432 if (tmp!=null) {
1433 defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1434 }else{
1435 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1436 }
1437 }catch(Exception e){
1438 logger.debug("TAXON EXISTS"+defaultTaxon);
1439 }
1440
1441 TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1442 defaultTaxon.addDescription(td);
1443 td.addElement(textData);
1444 if(currentRef != refMods) {
1445 td.addSource(OriginalSourceType.Import,null,null,refMods,null);
1446 }
1447 td.addSource(OriginalSourceType.Import,null,null,currentRef,null);
1448 importer.getDescriptionService().saveOrUpdate(td);
1449 importer.getTaxonService().saveOrUpdate(defaultTaxon);
1450 }
1451 }
1452
1453
1454
1455 /**
1456 * @param synonyms: the XML Nodegroup to parse
1457 * @param nametosave: the list of objects to save into the CDM
1458 * @param acceptedTaxon: the current acceptedTaxon
1459 * @param refMods: the current reference extracted from the MODS
1460 */
1461 @SuppressWarnings({ "rawtypes", "unchecked" })
1462 private void extractSynonyms(Node synonyms, List<TaxonNameBase> nametosave,Taxon acceptedTaxon, Reference<?> refMods) {
1463 // logger.info("extractSynonyms: "+acceptedTaxon);
1464 Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1465 if (ttmp != null) {
1466 acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1467 }
1468 else{
1469 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1470 }
1471 NodeList children = synonyms.getChildNodes();
1472 TaxonNameBase nameToBeFilled = null;
1473 List<MyName> names = new ArrayList<MyName>();
1474
1475 if(synonyms.getNodeName().equalsIgnoreCase("tax:name")){
1476 MyName myName;
1477 try {
1478 myName = extractScientificName(synonyms);
1479 names.add(myName);
1480 } catch (TransformerFactoryConfigurationError e) {
1481 logger.warn(e);
1482 } catch (TransformerException e) {
1483 logger.warn(e);
1484 }
1485 }
1486
1487
1488 for (int i=0;i<children.getLength();i++){
1489 if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1490 NodeList tmp = children.item(i).getChildNodes();
1491 // String fullContent = children.item(i).getTextContent();
1492 for (int j=0; j< tmp.getLength();j++){
1493 if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1494 MyName myName;
1495 try {
1496 myName = extractScientificName(tmp.item(j));
1497 names.add(myName);
1498 } catch (TransformerFactoryConfigurationError e) {
1499 logger.warn(e);
1500 } catch (TransformerException e) {
1501 logger.warn(e);
1502 }
1503
1504 }
1505 }
1506 }
1507 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1508 MyName myName;
1509 try {
1510 myName = extractScientificName(children.item(i));
1511 names.add(myName);
1512 } catch (TransformerFactoryConfigurationError e) {
1513 logger.warn(e);
1514 } catch (TransformerException e) {
1515 logger.warn(e);
1516 }
1517
1518 }
1519 }
1520 NomenclaturalStatusType statusType = null;
1521
1522 for(MyName name:names){
1523 System.out.println("HANDLE NAME "+name);
1524
1525 statusType = null;
1526
1527 if (!name.getStatus().isEmpty()){
1528 try {
1529 statusType = nomStatusString2NomStatus(name.getStatus());
1530 } catch (UnknownCdmTypeException e) {
1531 logger.warn("Problem with status");
1532 }
1533 }
1534
1535 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1536 nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1537 if (nameToBeFilled.hasProblem() &&
1538 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1539 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1540 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser);
1541 }
1542 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1543 Synonym synonym = Synonym.NewInstance(nameToBeFilled, refMods);
1544
1545
1546 if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1547 setLSID(name.getIdentifier(), synonym);
1548 }
1549
1550 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1551 System.out.println(synonym.getName()+" -- "+synonym.getSec());
1552 boolean synoExist = false;
1553 for (Synonym syn: synonymsSet){
1554 System.out.println(syn.getName()+" -- "+syn.getSec());
1555 boolean a =syn.getName().equals(synonym.getName());
1556 boolean b = syn.getSec().equals(synonym.getSec());
1557 if (a && b) {
1558 synoExist=true;
1559 }
1560 }
1561 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1562 System.out.println("SYNONYM");
1563 synonym.addSource(OriginalSourceType.Import,null,null,refMods,null);
1564 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1565 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1566 }
1567 }
1568
1569 }
1570
1571
1572
1573
1574
1575 /**
1576 * @param refgroup: the XML nodes
1577 * @param nametosave: the list of objects to save into the CDM
1578 * @param acceptedTaxon: the current acceptedTaxon
1579 * @param nametosave: the list of objects to save into the CDM
1580 * @param refMods: the current reference extracted from the MODS
1581 * @return the acceptedTaxon (why?)
1582 * handle cases where the bibref are inside <p> and outside
1583 */
1584 @SuppressWarnings({ "null", "unused" ,"rawtypes" })
1585 private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods) {
1586 // logger.info("extractReferences");
1587 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1588
1589 NodeList children = refgroup.getChildNodes();
1590 NonViralName<?> nameToBeFilled = null;
1591 if (nomenclaturalCode.equals(NomenclaturalCode.ICNCP)){
1592 nameToBeFilled = BotanicalName.NewInstance(null);
1593 }
1594 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1595 nameToBeFilled = ZoologicalName.NewInstance(null);
1596 }
1597 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1598 nameToBeFilled = BacterialName.NewInstance(null);
1599 }
1600
1601 ReferenceBuilder refBuild = new ReferenceBuilder();
1602 for (int i=0;i<children.getLength();i++){
1603 if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1604 String ref = children.item(i).getTextContent().trim();
1605 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1606 if (!refBuild.isFoundBibref()){
1607 extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, nametosave, refMods,acceptedTaxon);
1608 }
1609 }
1610
1611 if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1612 NodeList references = children.item(i).getChildNodes();
1613 for (int j=0;j<references.getLength();j++){
1614 if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1615 String ref = references.item(j).getTextContent().trim();
1616 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode, acceptedTaxon, refMods);
1617 }
1618 }
1619 if (!refBuild.isFoundBibref()){
1620 extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1621 }
1622 }
1623 }
1624 // importer.getClassificationService().saveOrUpdate(classification);
1625 return acceptedTaxon;
1626
1627 }
1628
1629 /**
1630 * @param references
1631 * handle cases where the bibref are inside <p> and outside
1632 */
1633 @SuppressWarnings("rawtypes")
1634 private void extractReferenceRawText(NodeList references, NonViralName<?> nameToBeFilled, List<TaxonNameBase> nametosave,
1635 Reference<?> refMods, Taxon acceptedTaxon) {
1636 String refString="";
1637 NomenclaturalStatusType statusType = null;
1638 MyName myName= new MyName();
1639 for (int j=0;j<references.getLength();j++){
1640 //no bibref tag inside
1641 System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1642 if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1643
1644 try {
1645 myName = extractScientificName(references.item(j));
1646 // if (myName.getNewName().isEmpty()) {
1647 // name=myName.getOriginalName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1648 // } else {
1649 // name=myName.getNewName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1650 // }
1651 } catch (TransformerFactoryConfigurationError e) {
1652 logger.warn(e);
1653 } catch (TransformerException e) {
1654 logger.warn(e);
1655 }
1656
1657 // name=name.trim();
1658 }
1659 if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1660 refString = references.item(j).getTextContent().trim();
1661 }
1662 if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1663 //
1664 statusType = null;
1665 if (!myName.getStatus().isEmpty()){
1666 try {
1667 statusType = nomStatusString2NomStatus(myName.getStatus());
1668 } catch (UnknownCdmTypeException e) {
1669 logger.warn("Problem with status");
1670 }
1671 }
1672
1673 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1674 String fullLineRefName = references.item(j).getTextContent().trim();
1675 int nameOrRefOrOther=1;
1676 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1677 System.out.println("NAMEORREFOR?? "+nameOrRefOrOther);
1678 if (nameOrRefOrOther==0){
1679 TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1680 if (nameTBF.hasProblem() &&
1681 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1682 nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser);
1683 }
1684 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1685 Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1686
1687 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1688 System.out.println(synonym.getName()+" -- "+synonym.getSec());
1689 boolean synoExist = false;
1690 for (Synonym syn: synonymsSet){
1691 System.out.println(syn.getName()+" -- "+syn.getSec());
1692 boolean a =syn.getName().equals(synonym.getName());
1693 boolean b = syn.getSec().equals(synonym.getSec());
1694 if (a && b) {
1695 synoExist=true;
1696 }
1697 }
1698 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1699 System.out.println("SYNONYM");
1700 synonym.addSource(OriginalSourceType.Import,null,null,refMods,null);
1701 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1702 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1703 }
1704 }
1705
1706 if (nameOrRefOrOther==1){
1707 Reference<?> re = ReferenceFactory.newGeneric();
1708 re.setTitleCache(fullLineRefName);
1709
1710 TaxonNameBase nameTBF = parser.parseFullName(myName.getName(), nomenclaturalCode, Rank.UNKNOWN_RANK());
1711 if (nameTBF.hasProblem() &&
1712 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1713 nameTBF=solveNameProblem(myName.getName(), myName.getName(),parser);
1714 }
1715 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1716 Synonym synonym = Synonym.NewInstance(nameTBF, re);
1717
1718 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1719 System.out.println(synonym.getName()+" -- "+synonym.getSec());
1720 boolean synoExist = false;
1721 for (Synonym syn: synonymsSet){
1722 System.out.println(syn.getName()+" -- "+syn.getSec());
1723 boolean a =syn.getName().equals(synonym.getName());
1724 boolean b = syn.getSec().equals(synonym.getSec());
1725 if (a && b) {
1726 synoExist=true;
1727 }
1728 }
1729 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1730 System.out.println("SYNONYM");
1731 synonym.addSource(OriginalSourceType.Import,null,null,refMods,null);
1732 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),re, null);
1733 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1734 }
1735 }
1736
1737
1738
1739 if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
1740 setLSID(myName.getIdentifier(), acceptedTaxon);
1741 }
1742
1743
1744 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1745 }
1746
1747
1748 if(!myName.getName().isEmpty()){
1749 logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+myName.getName()+"*");
1750 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(myName.getName().trim())){
1751 Reference<?> refS = ReferenceFactory.newGeneric();
1752 refS.setTitleCache(refString, true);
1753 // TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1754 // acceptedTaxon.addDescription(td);
1755 // acceptedTaxon.addSource(refSource);
1756 //
1757 // TextData textData = TextData.NewInstance(Feature.CITATION());
1758 //
1759 // textData.addSource(null, null, refS, null);
1760 // td.addElement(textData);
1761 // td.addSource(refSource);
1762 // importer.getDescriptionService().saveOrUpdate(td);
1763
1764
1765 if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
1766 setLSID(myName.getIdentifier(), acceptedTaxon);
1767
1768 }
1769
1770 acceptedTaxon.getName().setNomenclaturalReference(refS);
1771 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1772 }
1773 else{
1774 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1775 TaxonNameBase nameTBF = parser.parseFullName(myName.getName(), nomenclaturalCode, myName.getRank());
1776 if (nameTBF.hasProblem() &&
1777 !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1778 // if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1779 nameTBF=solveNameProblem(myName.getOriginalName(), myName.getName(),parser);
1780 }
1781 nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1782 Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1783
1784
1785 if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
1786 String id = myName.getIdentifier().split("__")[0];
1787 String source = myName.getIdentifier().split("__")[1];
1788 if (id.indexOf("lsid")>-1){
1789 try {
1790 LSID lsid = new LSID(id);
1791 synonym.setLsid(lsid);
1792 } catch (MalformedLSIDException e) {
1793 // TODO Auto-generated catch block
1794 e.printStackTrace();
1795 }
1796
1797 }
1798 else{
1799 //TODO ADD ORIGINAL SOURCE ID
1800 Reference<?> re = ReferenceFactory.newGeneric();
1801 re.setTitle(source);
1802
1803 IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import,null,null,re,null);
1804 os.setIdInSource(id);
1805 //
1806 // os.setCitation(re);
1807 synonym.addSource(os);
1808 }
1809 }
1810
1811 Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1812 System.out.println(synonym.getName()+" -- "+synonym.getSec());
1813 boolean synoExist = false;
1814 for (Synonym syn: synonymsSet){
1815 System.out.println(syn.getName()+" -- "+syn.getSec());
1816 boolean a =syn.getName().equals(synonym.getName());
1817 boolean b = syn.getSec().equals(synonym.getSec());
1818 if (a && b) {
1819 synoExist=true;
1820 }
1821 }
1822 if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1823 System.out.println("SYNONYM");
1824 synonym.addSource(OriginalSourceType.Import,null,null,refMods,null);
1825 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1826 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1827 }
1828 }
1829 }
1830 }
1831 }
1832
1833
1834
1835 /**
1836 * @param identifier
1837 * @param acceptedTaxon
1838 */
1839 private void setLSID(String identifier, TaxonBase<?> taxon) {
1840 boolean lsidok=false;
1841 String id = identifier.split("__")[0];
1842 String source = identifier.split("__")[1];
1843 if (id.indexOf("lsid")>-1){
1844 try {
1845 LSID lsid = new LSID(id);
1846 taxon.setLsid(lsid);
1847 lsidok=true;
1848 } catch (MalformedLSIDException e) {
1849 logger.warn("Malformed LSID");
1850 }
1851
1852 }
1853 if ((id.indexOf("lsid")<0) || !lsidok){
1854 //ADD ORIGINAL SOURCE ID
1855 IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
1856 os.setIdInSource(id);
1857 Reference<?> re = ReferenceFactory.newGeneric();
1858 re.setTitle(source);
1859 os.setCitation(re);
1860 taxon.addSource(os);
1861 }
1862
1863 }
1864
1865 /**
1866 * try to solve a parsing problem for a scientific name
1867 * @param original : the name from the OCR document
1868 * @param name : the tagged version
1869 * @param parser
1870 * @return the corrected TaxonNameBase
1871 */
1872 @SuppressWarnings({ "unchecked", "rawtypes" })
1873 private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser) {
1874 Map<String,String> ato = namesMap.get(original);
1875 Rank rank=Rank.UNKNOWN_RANK();
1876
1877 if (ato == null){
1878 rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1879 }else{
1880 rank = getRank(ato);
1881 }
1882 TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1883 // logger.info("RANK: "+rank);
1884 int retry=0;
1885 while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1886 String fullname = getFullReference(name,nameTBF.getParsingProblems());
1887 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1888 nameTBF = BotanicalName.NewInstance(null);
1889 }
1890 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1891 nameTBF = ZoologicalName.NewInstance(null);
1892 }
1893 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1894 nameTBF= BacterialName.NewInstance(null);
1895 }
1896 parser.parseReferencedName(nameTBF, fullname, rank, false);
1897 retry++;
1898 }
1899 if (retry == 1){
1900 nameTBF.setFullTitleCache(name, true);
1901 // logger.info("FULL TITLE CACHE "+name);
1902 }
1903 return nameTBF;
1904 }
1905
1906 /**
1907 * @param nomenclatureNode: the XML nodes
1908 * @param nametosave: the list of objects to save into the CDM
1909 * @param refMods: the current reference extracted from the MODS
1910 * @return
1911 */
1912 @SuppressWarnings({ "rawtypes", "unused" })
1913 private Taxon extractNomenclature(Node nomenclatureNode, List<TaxonNameBase> nametosave, Reference<?> refMods) {
1914 // logger.info("extractNomenclature");
1915 NodeList children = nomenclatureNode.getChildNodes();
1916 String freetext;
1917 TaxonNameBase nameToBeFilled = null;
1918 Taxon acceptedTaxon = null;
1919 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1920
1921 // String fullContent = nomenclatureNode.getTextContent();
1922
1923 NomenclaturalStatusType statusType = null;
1924 for (int i=0;i<children.getLength();i++){
1925 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
1926 String status = children.item(i).getTextContent().trim();
1927 if (!status.isEmpty()){
1928 try {
1929 statusType = nomStatusString2NomStatus(status);
1930 } catch (UnknownCdmTypeException e) {
1931 logger.warn("Problem with status");
1932 }
1933 }
1934 }
1935 }
1936
1937 boolean containsSynonyms=false;
1938 for (int i=0;i<children.getLength();i++){
1939
1940 if (children.item(i).getNodeName().equalsIgnoreCase("#text")) {
1941 freetext=children.item(i).getTextContent();
1942 }
1943 if (children.item(i).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1944 System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
1945 extractMaterialsDirect(children.item(i), acceptedTaxon, refMods, "collection");
1946 }
1947 if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1948 if(!containsSynonyms){
1949 MyName myName = new MyName();
1950 try {
1951 myName = extractScientificName(children.item(i));
1952 treatmentMainName = myName.getNewName();
1953 originalTreatmentName = myName.getOriginalName();
1954
1955 } catch (TransformerFactoryConfigurationError e1) {
1956 logger.warn(e1);
1957 } catch (TransformerException e1) {
1958 logger.warn(e1);
1959 }
1960
1961 if (myName.getRank().equals(Rank.UNKNOWN_RANK()) || myName.getRank().isLower(configState.getConfig().getMaxRank())){
1962 maxRankRespected=true;
1963 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1964 nameToBeFilled = BotanicalName.NewInstance(null);
1965 }
1966 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1967 nameToBeFilled = ZoologicalName.NewInstance(null);
1968 }
1969 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1970 nameToBeFilled = BacterialName.NewInstance(null);
1971 }
1972 acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
1973 System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
1974
1975
1976 boolean statusMatch=false;
1977 if(acceptedTaxon !=null ){
1978 statusMatch=compareStatus(acceptedTaxon, statusType);
1979 }
1980 if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
1981 System.out.println("devrait pas venir la");
1982 nameToBeFilled = parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
1983 if (nameToBeFilled.hasProblem() &&
1984 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1985 nameToBeFilled = solveNameProblem(originalTreatmentName,treatmentMainName,parser);
1986 }
1987 nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1988 if (!originalTreatmentName.isEmpty()) {
1989 TaxonNameDescription td = TaxonNameDescription.NewInstance();
1990 td.setTitleCache(originalTreatmentName);
1991 nameToBeFilled.addDescription(td);
1992 }
1993 if(statusType != null) {
1994 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1995 }
1996 nameToBeFilled.addSource(OriginalSourceType.Import,null,null,refMods,null);
1997 acceptedTaxon= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
1998 if(!configState.getConfig().doKeepOriginalSecundum()) {
1999 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2000 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2001 }
2002
2003
2004 if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
2005 boolean lsidok=false;
2006 String id = myName.getIdentifier().split("__")[0];
2007 String source = myName.getIdentifier().split("__")[1];
2008 if (id.indexOf("lsid")>-1){
2009 try {
2010 LSID lsid = new LSID(id);
2011 acceptedTaxon.setLsid(lsid);
2012 lsidok=true;
2013 } catch (MalformedLSIDException e) {
2014 logger.warn("Malformed LSID");
2015 }
2016
2017 }
2018 if ((id.indexOf("lsid")<0) || !lsidok){
2019 //TODO ADD ORIGINAL SOURCE ID
2020 IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
2021 os.setIdInSource(id);
2022 Reference<?> re = ReferenceFactory.newGeneric();
2023 re.setTitle(source);
2024 os.setCitation(re);
2025 acceptedTaxon.addSource(os);
2026 }
2027 }
2028 /*<<<<<<< .courant
2029 boolean sourceExists=false;
2030 Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2031 for (IdentifiableSource src : sources){
2032 String micro = src.getCitationMicroReference();
2033 Reference r = src.getCitation();
2034 if (r.equals(refMods)) {
2035 sourceExists=true;
2036 }
2037 }
2038 if(!sourceExists) {
2039 acceptedTaxon.addSource(null,null,refMods,null);
2040 }
2041 =======*/
2042
2043 acceptedTaxon.addSource(OriginalSourceType.Import, null,null,refMods,null);
2044 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2045
2046 Taxon parentTaxon = askParent(acceptedTaxon, classification);
2047 if (parentTaxon ==null){
2048 while (parentTaxon == null) {
2049 parentTaxon = createParent(acceptedTaxon, refMods);
2050 classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
2051 }
2052 }else{
2053 classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
2054 }
2055 }else{
2056 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2057 Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2058 boolean sourcelinked=false;
2059 for (IdentifiableSource source:sources){
2060 if (source.getCitation().getTitle().equalsIgnoreCase(refMods.getTitleCache())) {
2061 sourcelinked=true;
2062 }
2063 }
2064 if (!configState.getConfig().doKeepOriginalSecundum()) {
2065 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2066 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2067 }
2068 if (!sourcelinked){
2069 acceptedTaxon.addSource(OriginalSourceType.Import, null, null, refMods, null);
2070 }
2071 if (!sourcelinked || !configState.getConfig().doKeepOriginalSecundum()){
2072
2073 if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
2074 setLSID(myName.getIdentifier(), acceptedTaxon);
2075 }
2076 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2077 }
2078 }
2079 }else{
2080 maxRankRespected=false;
2081 }
2082 containsSynonyms=true;
2083 }else{
2084 extractSynonyms(children.item(i), nametosave, acceptedTaxon, refMods);
2085 }
2086 }
2087 if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2088 reloadClassification();
2089 //extract the References within the document
2090 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
2091 }
2092
2093 }
2094 // importer.getClassificationService().saveOrUpdate(classification);
2095 return acceptedTaxon;
2096 }
2097
2098 /**
2099 * @return
2100 */
2101 private boolean compareStatus(Taxon t, NomenclaturalStatusType statusType) {
2102 boolean statusMatch=false;
2103 //found one taxon
2104 Set<NomenclaturalStatus> status = t.getName().getStatus();
2105 if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2106 for (NomenclaturalStatus st:status){
2107 NomenclaturalStatusType stype = st.getType();
2108 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2109 statusMatch=true;
2110 }
2111 }
2112 }
2113 else{
2114 if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2115 statusMatch=true;
2116 }
2117 }
2118 return statusMatch;
2119 }
2120
2121 /**
2122 * @param acceptedTaxon: the current acceptedTaxon
2123 * @param ref: the current reference extracted from the MODS
2124 * @return the parent for the current accepted taxon
2125 */
2126 private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
2127 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2128
2129 List<Rank> rankList = new ArrayList<Rank>();
2130 rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2131
2132 List<String> rankListStr = new ArrayList<String>();
2133 for (Rank r:rankList) {
2134 rankListStr.add(r.toString());
2135 }
2136 String r="";
2137 String s = acceptedTaxon.getTitleCache();
2138 Taxon tax = null;
2139
2140 int addTaxon = askAddParent(s);
2141 logger.info("ADD TAXON: "+addTaxon);
2142 if (addTaxon == 0){
2143 Taxon tmp = askParent(acceptedTaxon, classification);
2144 if (tmp == null){
2145 s = askSetParent(s);
2146 r = askRank(s,rankListStr);
2147
2148 NonViralName<?> nameToBeFilled = null;
2149 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2150 nameToBeFilled = BotanicalName.NewInstance(null);
2151 }
2152 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2153 nameToBeFilled = ZoologicalName.NewInstance(null);
2154 }
2155 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2156 nameToBeFilled = BacterialName.NewInstance(null);
2157 }
2158 nameToBeFilled.setTitleCache(s);
2159 nameToBeFilled.setRank(getRank(r));
2160
2161 tax = Taxon.NewInstance(nameToBeFilled, ref);
2162 }
2163 else{
2164 tax=tmp;
2165 }
2166
2167 createParent(tax, ref);
2168 // logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2169 classification.addParentChild(tax, acceptedTaxon, ref, null);
2170 }
2171 else{
2172 classification.addChildTaxon(acceptedTaxon, ref, null);
2173 tax=acceptedTaxon;
2174 }
2175 // logger.info("RETURN: "+tax );
2176 return tax;
2177
2178 }
2179
2180
2181
2182 /**
2183 * @param name
2184 * @throws TransformerFactoryConfigurationError
2185 * @throws TransformerException
2186 * @return a list of possible names
2187 */
2188 private MyName extractScientificName(Node name) throws TransformerFactoryConfigurationError, TransformerException {
2189 // System.out.println("extractScientificName");
2190
2191 String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:infraspecificepithet","dwc:scientificNameAuthorship"};
2192 List<String> rankListToPrint = new ArrayList<String>();
2193 for (String r : rankListToPrint_tmp) {
2194 rankListToPrint.add(r.toLowerCase());
2195 }
2196
2197 Rank rank = Rank.UNKNOWN_RANK();
2198 NodeList children = name.getChildNodes();
2199 String fullName = "";
2200 String newName="";
2201 String identifier="";
2202 HashMap<String, String> atomisedMap = new HashMap<String, String>();
2203 List<String> atomisedName= new ArrayList<String>();
2204
2205 String rankStr = "";
2206 Rank tmpRank ;
2207
2208 String status="";
2209 NomenclaturalStatusType statusType = null;
2210 for (int i=0;i<children.getLength();i++){
2211 if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2212 (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2213 children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2214 status = children.item(i).getTextContent().trim();
2215 }
2216 }
2217
2218 for (int i=0;i<children.getLength();i++){
2219 if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2220 NodeList atom = children.item(i).getChildNodes();
2221 for (int k=0;k<atom.getLength();k++){
2222 if (atom.item(k).getNodeName().equalsIgnoreCase("tax:xid")){
2223 try{
2224 identifier = atom.item(k).getAttributes().getNamedItem("identifier").getNodeValue();
2225 }catch(Exception e){
2226 System.out.println("pb with identifier, maybe empty");
2227 }
2228 try{
2229 identifier+="__"+atom.item(k).getAttributes().getNamedItem("source").getNodeValue();
2230 }catch(Exception e){
2231 System.out.println("pb with identifier, maybe empty");
2232 }
2233 }
2234 tmpRank = null;
2235 rankStr = atom.item(k).getNodeName().toLowerCase();
2236 // logger.info("RANKSTR:*"+rankStr+"*");
2237 if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2238 rankStr=atom.item(k).getTextContent().trim();
2239 tmpRank = getRank(rankStr);
2240 }
2241 // if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2242 if (tmpRank != null){
2243 rank=tmpRank;
2244 }
2245
2246
2247 atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2248 if (!atom.item(k).getNodeName().equalsIgnoreCase("dwc:taxonRank") ) {
2249 if (atom.item(k).getNodeName().equalsIgnoreCase("dwc:subgenus") || atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:subgenus")) {
2250 atomisedName.add("("+atom.item(k).getTextContent().trim()+")");
2251 } else{
2252 if(atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet")) {
2253 atomisedName.add("var. "+atom.item(k).getTextContent().trim());
2254 } else{
2255 if(rankListToPrint.contains(atom.item(k).getNodeName().toLowerCase())) {
2256 atomisedName.add(atom.item(k).getTextContent().trim());
2257 }
2258 else{
2259 System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2260 }
2261 }
2262 }
2263 }
2264 }
2265 }
2266 if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2267 // logger.info("name non atomised: "+children.item(i).getTextContent());
2268 fullName = children.item(i).getTextContent().trim();
2269 // logger.info("fullname: "+fullName);
2270 }
2271 }
2272 if (fullName != null){
2273 fullName = fullName.replace("( ", "(");
2274 fullName = fullName.replace(" )",")");
2275
2276 }
2277 if (fullName.trim().isEmpty()){
2278 fullName=StringUtils.join(atomisedName," ");
2279 }
2280
2281 while(fullName.contains(" ")) {
2282 fullName=fullName.replace(" ", " ");
2283 // logger.info("while");
2284 }
2285
2286 namesMap.put(fullName,atomisedMap);
2287 String atomisedNameStr = StringUtils.join(atomisedName," ");
2288 while(atomisedNameStr.contains(" ")) {
2289 atomisedNameStr=atomisedNameStr.replace(" ", " ");
2290 // logger.info("atomisedNameStr: "+atomisedNameStr);
2291 }
2292 atomisedNameStr=atomisedNameStr.trim();
2293
2294 if (fullName != null){
2295 if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2296 newName=getScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2297 } else {
2298 newName=fullName;
2299 }
2300 }
2301 rank = askForRank(newName, rank, nomenclaturalCode);
2302 String[] names = new String[5];
2303 MyName myname = new MyName();
2304 myname.setOriginalName(fullName);
2305 myname.setNewName(newName);
2306 myname.setRank(rank);
2307 myname.setIdentifier(identifier);
2308 myname.setStatus(status);
2309 return myname;
2310
2311 }
2312
2313 /**
2314 * @param classification2
2315 */
2316 public void updateClassification(Classification classification2) {
2317 classification = classification2;
2318 }
2319
2320 public class MyName {
2321 String originalName="";
2322 String newName="";
2323 Rank rank=Rank.UNKNOWN_RANK();
2324 String identifier="";
2325 String status="";
2326
2327 public String getName(){
2328 if (newName.isEmpty()) {
2329 return originalName;
2330 } else {
2331 return newName;
2332 }
2333
2334 }
2335 /**
2336 * @return the fullName
2337 */
2338 public String getOriginalName() {
2339 return originalName;
2340 }
2341 /**
2342 * @param fullName the fullName to set
2343 */
2344 public void setOriginalName(String fullName) {
2345 this.originalName = fullName;
2346 }
2347 /**
2348 * @return the newName
2349 */
2350 public String getNewName() {
2351 return newName;
2352 }
2353 /**
2354 * @param newName the newName to set
2355 */
2356 public void setNewName(String newName) {
2357 this.newName = newName;
2358 }
2359 /**
2360 * @return the rank
2361 */
2362 public Rank getRank() {
2363 return rank;
2364 }
2365 /**
2366 * @param rank the rank to set
2367 */
2368 public void setRank(Rank rank) {
2369 this.rank = rank;
2370 }
2371 /**
2372 * @return the idenfitiger
2373 */
2374 public String getIdentifier() {
2375 return identifier;
2376 }
2377 /**
2378 * @param idenfitiger the idenfitiger to set
2379 */
2380 public void setIdentifier(String identifier) {
2381 this.identifier = identifier;
2382 }
2383 /**
2384 * @return the status
2385 */
2386 public String getStatus() {
2387 return status;
2388 }
2389 /**
2390 * @param status the status to set
2391 */
2392 public void setStatus(String status) {
2393 this.status = status;
2394 }
2395
2396
2397
2398 }
2399
2400 }
2401
2402
2403