cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXTreatmentExtractor.java

   1 // $Id$
   2 /**
   3  * Copyright (C) 2013 EDIT
   4  * European Distributed Institute of Taxonomy
   5  * http://www.e-taxonomy.eu
   6  *
   7  * The contents of this file are subject to the Mozilla Public License Version 1.1
   8  * See LICENSE.TXT at the top of this package for the full license terms.
   9  */
  10 package eu.etaxonomy.cdm.io.taxonx2013;
  11
  12 import java.io.File;
  13 import java.io.FileWriter;
  14 import java.io.IOException;
  15 import java.net.URI;
  16 import java.util.ArrayList;
  17 import java.util.HashMap;
  18 import java.util.List;
  19 import java.util.Map;
  20 import java.util.Set;
  21 import java.util.regex.Pattern;
  22
  23 import javax.xml.transform.TransformerException;
  24 import javax.xml.transform.TransformerFactoryConfigurationError;
  25
  26 import org.apache.commons.lang.StringUtils;
  27 import org.w3c.dom.Node;
  28 import org.w3c.dom.NodeList;
  29
  30 import com.ibm.lsid.MalformedLSIDException;
  31
  32 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
  33 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade.DerivedUnitType;
  34 import eu.etaxonomy.cdm.model.common.CdmBase;
  35 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
  36 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
  37 import eu.etaxonomy.cdm.model.common.LSID;
  38 import eu.etaxonomy.cdm.model.common.Language;
  39 import eu.etaxonomy.cdm.model.description.Feature;
  40 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
  41 import eu.etaxonomy.cdm.model.description.PolytomousKey;
  42 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
  43 import eu.etaxonomy.cdm.model.description.TaxonDescription;
  44 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
  45 import eu.etaxonomy.cdm.model.description.TextData;
  46 import eu.etaxonomy.cdm.model.name.BacterialName;
  47 import eu.etaxonomy.cdm.model.name.BotanicalName;
  48 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
  49 import eu.etaxonomy.cdm.model.name.NonViralName;
  50 import eu.etaxonomy.cdm.model.name.Rank;
  51 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
  52 import eu.etaxonomy.cdm.model.name.ZoologicalName;
  53 import eu.etaxonomy.cdm.model.occurrence.DerivedUnitBase;
  54 import eu.etaxonomy.cdm.model.reference.Reference;
  55 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
  56 import eu.etaxonomy.cdm.model.taxon.Classification;
  57 import eu.etaxonomy.cdm.model.taxon.Synonym;
  58 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
  59 import eu.etaxonomy.cdm.model.taxon.Taxon;
  60 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
  61 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
  62 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
  63 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
  64 import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
  65
  66 /**
  67  * @author pkelbert
  68  * @date 2 avr. 2013
  69  *
  70  */
  71 /**
  72  * @author pkelbert
  73  * @date 17 juin 2013
  74  *
  75  */
  76 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
  77
  78     private final NomenclaturalCode nomenclaturalCode;
  79     private Classification classification;
  80
  81     private  String treatmentMainName,originalTreatmentName;
  82
  83     private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
  84
  85
  86     private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
  87     private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
  88
  89     private boolean maxRankRespected =false;
  90
  91     /**
  92      * @param nomenclaturalCode
  93      * @param classification
  94      * @param importer
  95      * @param configState
  96      */
  97     public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
  98             TaxonXImportState configState) {
  99         this.nomenclaturalCode=nomenclaturalCode;
 100         this.classification = classification;
 101         this.importer=importer;
 102         this.configState=configState;
 103         prepareCollectors(configState, importer.getAgentService());
 104     }
 105
 106     /**
 107      * extracts all the treament information and save them
 108      * @param treatmentnode: the XML Node
 109      * @param tosave: the list of object to save into the CDM
 110      * @param refMods: the reference extracted from the MODS
 111      * @param sourceName: the URI of the document
 112      */
 113     @SuppressWarnings({ "rawtypes", "unused" })
 114     protected void extractTreatment(Node treatmentnode, List<Object> tosave, Reference<?> refMods, URI sourceName) {
 115         logger.info("extractTreatment");
 116         List<TaxonNameBase> nametosave = new ArrayList<TaxonNameBase>();
 117         NodeList children = treatmentnode.getChildNodes();
 118         Taxon acceptedTaxon =null;
 119         Taxon defaultTaxon =null;
 120         boolean refgroup=false;
 121
 122         for (int i=0;i<children.getLength();i++){
 123             if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
 124                 refgroup=true;
 125             }
 126         }
 127
 128         for (int i=0;i<children.getLength();i++){
 129
 130             if (children.item(i).getNodeName().equalsIgnoreCase("tax:nomenclature")){
 131                 NodeList nomenclature = children.item(i).getChildNodes();
 132                 boolean containsName=false;
 133                 for(int k=0;k<nomenclature.getLength();k++){
 134                     if(nomenclature.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 135                         containsName=true;
 136                         break;
 137                     }
 138                 }
 139                 if (containsName){
 140                     reloadClassification();
 141                     //extract "main" the scientific name
 142                     acceptedTaxon = extractNomenclature(children.item(i),nametosave,refMods);
 143                 }
 144             }
 145             else if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
 146                 reloadClassification();
 147                 //extract the References within the document
 148                 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
 149             }
 150             else if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 151                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
 152                 File file = new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
 153                 FileWriter writer;
 154                 try {
 155                     writer = new FileWriter(file ,true);
 156                     writer.write(sourceName+"\n");
 157                     writer.flush();
 158                     writer.close();
 159                 } catch (IOException e1) {
 160                     // TODO Auto-generated catch block
 161                     e1.printStackTrace();
 162                 }
 163                 String multiple = askMultiple(children.item(i));
 164                 if (multiple.equalsIgnoreCase("synonyms")) {
 165                     extractSynonyms(children.item(i),nametosave, acceptedTaxon,refMods);
 166                 }
 167                 else
 168                     if(multiple.equalsIgnoreCase("material examined")){
 169                         extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
 170                     }
 171                     else
 172                         if (multiple.equalsIgnoreCase("distribution")){
 173                             extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
 174                         }
 175                         else {
 176                             extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,multiple);
 177                         }
 178             }
 179             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 180                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
 181                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, Feature.BIOLOGY_ECOLOGY());
 182             }
 183             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 184                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
 185                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
 186             }
 187             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 188                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
 189                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.DIAGNOSIS());
 190             }
 191             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 192                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
 193                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DISCUSSION());
 194             }
 195
 196             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 197                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
 198                 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
 199             }
 200             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 201                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
 202                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave,refMods,Feature.ETYMOLOGY());
 203             }
 204
 205             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 206                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
 207                 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
 208             }
 209
 210             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 211                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
 212                 //TODO IGNORE keys for the moment
 213                 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
 214                 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,"Keys - unparsed");
 215             }
 216             else{
 217                 logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
 218                 if (children.item(i).getAttributes() !=null) {
 219                     logger.info(children.item(i).getAttributes().item(0));
 220                 }
 221             }
 222         }
 223         //        logger.info("saveUpdateNames");
 224         if (maxRankRespected){
 225             importer.getNameService().saveOrUpdate(nametosave);
 226             importer.getClassificationService().saveOrUpdate(classification);
 227             logger.info("saveUpdateNames-ok");
 228         }
 229     }
 230
 231
 232     /**
 233      * @param keys
 234      * @param acceptedTaxon: the current acceptedTaxon
 235      * @param nametosave: the list of objects to save into the CDM
 236      * @param refMods: the current reference extracted from the MODS
 237      */
 238     @SuppressWarnings("rawtypes")
 239     private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
 240         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
 241
 242         NodeList children = keys.getChildNodes();
 243         String key="";
 244         PolytomousKey poly =  PolytomousKey.NewInstance();
 245         poly.addSource(null,null,refMods,null);
 246         poly.addTaxonomicScope(acceptedTaxon);
 247         poly.setTitleCache("bloup");
 248         //        poly.addCoveredTaxon(acceptedTaxon);
 249         PolytomousKeyNode root = poly.getRoot();
 250         PolytomousKeyNode previous = null,tmpKey=null;
 251         Taxon taxonKey=null;
 252         List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
 253
 254         //        String fullContent = keys.getTextContent();
 255         for (int i=0;i<children.getLength();i++){
 256             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 257                 NodeList paragraph = children.item(i).getChildNodes();
 258                 key="";
 259                 taxonKey=null;
 260                 for (int j=0;j<paragraph.getLength();j++){
 261                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 262                         if (! paragraph.item(j).getTextContent().trim().isEmpty()){
 263                             key+=paragraph.item(j).getTextContent().trim();
 264                             //                            logger.info("KEY: "+j+"--"+key);
 265                         }
 266                     }
 267                     if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 268                         taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
 269                     }
 270                 }
 271                 //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
 272                 if (keypattern.matcher(key).matches()){
 273                     tmpKey = PolytomousKeyNode.NewInstance(key);
 274                     if (taxonKey!=null) {
 275                         tmpKey.setTaxon(taxonKey);
 276                     }
 277                     polyNodes.add(tmpKey);
 278                     if (previous == null) {
 279                         root.addChild(tmpKey);
 280                     } else {
 281                         previous.addChild(tmpKey);
 282                     }
 283                 }else{
 284                     if (!key.isEmpty()){
 285                         tmpKey=PolytomousKeyNode.NewInstance(key);
 286                         if (taxonKey!=null) {
 287                             tmpKey.setTaxon(taxonKey);
 288                         }
 289                         polyNodes.add(tmpKey);
 290                         if (keypatternend.matcher(key).matches()) {
 291                             root.addChild(tmpKey);
 292                             previous=tmpKey;
 293                         } else{
 294                             previous.addChild(tmpKey);
 295                         }
 296
 297                     }
 298                 }
 299             }
 300         }
 301         importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
 302         importer.getPolytomousKeyService().saveOrUpdate(poly);
 303     }
 304
 305     /**
 306      * @param taxons: the XML Nodegroup
 307      * @param nametosave: the list of objects to save into the CDM
 308      * @param acceptedTaxon: the current accepted Taxon
 309      * @param refMods: the current reference extracted from the MODS
 310      *
 311      * @return Taxon object built
 312      */
 313     @SuppressWarnings({ "rawtypes", "unchecked" })
 314     private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
 315         //        logger.info("getTaxonFromXML");
 316         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 317
 318         TaxonNameBase nameToBeFilled = null;
 319         String name="";
 320
 321         String[] enames = null;
 322         Rank rank = Rank.UNKNOWN_RANK();
 323         String original="";
 324         String identifier="";
 325
 326         try {
 327             enames = extractScientificName(taxons);
 328             if (enames[1].isEmpty()) {
 329                 name=enames[0];
 330             } else {
 331                 name=enames[1];
 332             }
 333             original=enames[0];
 334             rank = Rank.getRankByName(enames[2]);
 335             identifier = enames[3];
 336         } catch (TransformerFactoryConfigurationError e1) {
 337             logger.warn(e1);
 338         } catch (TransformerException e1) {
 339             logger.warn(e1);
 340         } catch (UnknownCdmTypeException e) {
 341             logger.warn("Rank problem!"+enames[2]);
 342             rank=Rank.UNKNOWN_RANK();
 343         }
 344         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
 345
 346         nameToBeFilled = parser.parseFullName(name, nomenclaturalCode, rank);
 347         if (nameToBeFilled.hasProblem() &&
 348                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
 349             //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
 350             nameToBeFilled=solveNameProblem(original, name,parser);
 351         }
 352
 353         nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
 354
 355         //        importer.getNameService().saveOrUpdate(nametosave);
 356         Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
 357         if (t ==null){
 358             //            logger.info("BestTaxonService not the best or null");
 359             t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
 360             if (t.getSec() == null) {
 361                 t.setSec(refMods);
 362             }
 363             if(!configState.getConfig().doKeepOriginalSecundum()) {
 364                 t.setSec(configState.getConfig().getSecundum());
 365                 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 366             }
 367             t.addSource(null,null,refMods,null);
 368
 369             if (!identifier.isEmpty() && (identifier.length()>2)){
 370                 setLSID(identifier, t);
 371             }
 372
 373             Taxon parentTaxon = askParent(t, classification);
 374             if (parentTaxon ==null){
 375                 while (parentTaxon == null) {
 376                     parentTaxon = createParent(t, refMods);
 377                     classification.addParentChild(parentTaxon, t, refMods, null);
 378                 }
 379             }else{
 380                 classification.addParentChild(parentTaxon, t, refMods, null);
 381             }
 382         }
 383         else{
 384             t = CdmBase.deproxy(t, Taxon.class);
 385         }
 386         if (!configState.getConfig().doKeepOriginalSecundum()) {
 387             t.setSec(configState.getConfig().getSecundum());
 388             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 389         }
 390         return t;
 391     }
 392
 393
 394     /**
 395      * @param taxons: the XML Nodegroup
 396      * @param nametosave: the list of objects to save into the CDM
 397      * @param acceptedTaxon: the current accepted Taxon
 398      * @param refMods: the current reference extracted from the MODS
 399      *
 400      * @return Taxon object built
 401      */
 402     @SuppressWarnings({ "rawtypes", "unchecked" })
 403     private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
 404         //        logger.info("getTaxonFromXML");
 405         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 406
 407         TaxonNameBase nameToBeFilled = null;
 408         String name="";
 409
 410         String[] enames = null;
 411         Rank rank = Rank.UNKNOWN_RANK();
 412         String original="";
 413         String identifier="";
 414
 415         try {
 416             enames = extractScientificName(taxons);
 417             if (enames[1].isEmpty()) {
 418                 name=enames[0];
 419             } else {
 420                 name=enames[1];
 421             }
 422             original=enames[0];
 423             rank = Rank.getRankByName(enames[2]);
 424             identifier = enames[3];
 425         } catch (TransformerFactoryConfigurationError e1) {
 426             logger.warn(e1);
 427         } catch (TransformerException e1) {
 428             logger.warn(e1);
 429         } catch (UnknownCdmTypeException e) {
 430             logger.warn("Rank problem!"+enames[2]);
 431             rank=Rank.UNKNOWN_RANK();
 432         }
 433         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
 434
 435         nameToBeFilled = parser.parseFullName(name, nomenclaturalCode, rank);
 436         if (nameToBeFilled.hasProblem() &&
 437                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
 438             //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
 439             nameToBeFilled=solveNameProblem(original, name,parser);
 440         }
 441
 442         nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
 443         return nameToBeFilled;
 444
 445     }
 446
 447
 448     @SuppressWarnings("rawtypes")
 449     private TaxonNameBase getTaxonNameBase (TaxonNameBase name, List<TaxonNameBase> nametosave){
 450         List<TaxonNameBase> names = importer.getNameService().list(TaxonNameBase.class, null, null, null, null);
 451         for (TaxonNameBase tb : names){
 452             if (tb.getTitleCache().equalsIgnoreCase(name.getTitleCache())) {
 453                 logger.info("TaxonNameBase FOUND"+name.getTitleCache());
 454                 return tb;
 455             }
 456         }
 457         logger.info("TaxonNameBase NOT FOUND "+name.getTitleCache());
 458         nametosave.add(name);
 459         return name;
 460
 461     }
 462
 463
 464
 465     /**
 466      *
 467      */
 468     private void reloadClassification() {
 469         Classification cl = importer.getClassificationService().find(classification.getUuid());
 470         if (cl != null){
 471             classification=cl;
 472         }else{
 473             importer.getClassificationService().saveOrUpdate(classification);
 474             classification = importer.getClassificationService().find(classification.getUuid());
 475         }
 476
 477     }
 478
 479     /**
 480      * Create a Taxon for the current NameBase, based on the current reference
 481      * @param taxonNameBase
 482      * @param refMods: the current reference extracted from the MODS
 483      * @return Taxon
 484      */
 485     @SuppressWarnings({ "unused", "rawtypes" })
 486     private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
 487         Taxon t = new Taxon(taxonNameBase,null );
 488         if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
 489             t.setSec(configState.getConfig().getSecundum());
 490             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 491         }
 492         t.addSource(null,null,refMods,null);
 493         return t;
 494     }
 495
 496     /**
 497      * @param nametosave
 498      * @param distribution: the XML node group
 499      * @param acceptedTaxon: the current accepted Taxon
 500      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 501      * @param refMods: the current reference extracted from the MODS
 502      */
 503     @SuppressWarnings("rawtypes")
 504     private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods) {
 505         //        logger.info("DISTRIBUTION");
 506         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 507         NodeList children = distribution.getChildNodes();
 508         Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
 509         Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
 510
 511         for (int i=0;i<children.getLength();i++){
 512             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 513                 NodeList paragraph = children.item(i).getChildNodes();
 514                 for (int j=0;j<paragraph.getLength();j++){
 515                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 516                         if(!paragraph.item(j).getTextContent().trim().isEmpty()) {
 517                             String s =paragraph.item(j).getTextContent().trim();
 518                             if (descriptionsFulltext.get(i) !=null){
 519                                 s = descriptionsFulltext.get(i)+" "+s;
 520                             }
 521                             descriptionsFulltext.put(i, s);
 522                         }
 523                     }
 524                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 525                         String s =getTaxonNameBaseFromXML(paragraph.item(j),nametosave,refMods).toString().split("sec.")[0];
 526                         if (descriptionsFulltext.get(i) !=null){
 527                             s = descriptionsFulltext.get(i)+" "+s;
 528                         }
 529                         descriptionsFulltext.put(i, s);
 530                     }
 531                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
 532                         MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
 533                         DerivedUnitBase derivedUnitBase = null;
 534                         specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, DerivedUnitType.DerivedUnit);
 535                         List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
 536                         if (speObsList == null) {
 537                             speObsList=new ArrayList<MySpecimenOrObservation>();
 538                         }
 539                         speObsList.add(specimenOrObservation);
 540                         specimenOrObservations.put(i,speObsList);
 541
 542                         String s = specimenOrObservation.getDerivedUnitBase().toString();
 543                         if (descriptionsFulltext.get(i) !=null){
 544                             s = descriptionsFulltext.get(i)+" "+s;
 545                         }
 546                         descriptionsFulltext.put(i, s);
 547                     }
 548
 549                 }
 550             }
 551         }
 552
 553         int m=0;
 554         for (int k:descriptionsFulltext.keySet()) {
 555             if (k>m) {
 556                 m=k;
 557             }
 558         }
 559         for (int k:specimenOrObservations.keySet()) {
 560             if (k>m) {
 561                 m=k;
 562             }
 563         }
 564
 565
 566         TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
 567         Feature currentFeature = Feature.DISTRIBUTION();
 568         DerivedUnitBase derivedUnitBase=null;
 569         String descr="";
 570         for (int k=0;k<=m;k++){
 571             if(specimenOrObservations.keySet().contains(k)){
 572                 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
 573                     derivedUnitBase = soo.getDerivedUnitBase();
 574                     descr=soo.getDescr();
 575
 576                     derivedUnitBase.addSource(null,null,refMods,null);
 577
 578                     importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 579
 580                     TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 581                     acceptedTaxon.addDescription(taxonDescription);
 582
 583
 584                     IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 585
 586                     Feature feature=null;
 587                         feature = makeFeature(derivedUnitBase);
 588                     if(!StringUtils.isEmpty(descr)) {
 589                         derivedUnitBase.setTitleCache(descr, true);
 590                     }
 591                     indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 592                     indAssociation.setFeature(feature);
 593                     indAssociation.addSource(null, null, refMods, null);
 594
 595                     taxonDescription.addElement(indAssociation);
 596                     taxonDescription.setTaxon(acceptedTaxon);
 597                     taxonDescription.addSource(null,null,refMods,null);
 598
 599                     importer.getDescriptionService().saveOrUpdate(taxonDescription);
 600                     importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 601                     td.addDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
 602                 }
 603             }
 604
 605             if (descriptionsFulltext.keySet().contains(k)){
 606                 if (!descriptionsFulltext.get(k).isEmpty() && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
 607                     setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
 608                     break;
 609                 }
 610                 else{
 611                     TextData textData = TextData.NewInstance();
 612
 613                     textData.setFeature(currentFeature);
 614                     textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
 615                     textData.addSource(null, null, refMods, null);
 616
 617                     td.addElement(textData);
 618                 }
 619             }
 620
 621
 622             if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
 623                 td.addSource(null,null,refMods,null);
 624                 acceptedTaxon.addDescription(td);
 625                 importer.getDescriptionService().saveOrUpdate(td);
 626                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 627             }
 628         }
 629     }
 630
 631
 632     /**
 633      * @param materials: the XML node group
 634      * @param acceptedTaxon: the current accepted Taxon
 635      * @param refMods: the current reference extracted from the MODS
 636      */
 637     @SuppressWarnings("rawtypes")
 638     private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference<?> refMods,List<TaxonNameBase> nametosave) {
 639         //        logger.info("EXTRACTMATERIALS");
 640         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 641         NodeList children = materials.getChildNodes();
 642         NodeList events = null;
 643         String descr="";
 644
 645         DerivedUnitBase derivedUnitBase=null;
 646         MySpecimenOrObservation myspecimenOrObservation = null;
 647
 648         for (int i=0;i<children.getLength();i++){
 649             String rawAssociation="";
 650             boolean added=false;
 651             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 652                 events = children.item(i).getChildNodes();
 653                 for(int k=0;k<events.getLength();k++){
 654                     if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 655                         String linkedTaxon = getTaxonNameBaseFromXML(events.item(k), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
 656                         rawAssociation+=linkedTaxon.split("sec")[0];
 657                     }
 658                     if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
 659                             && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 660                         rawAssociation+= events.item(k).getTextContent().trim();
 661                     }
 662                     if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 663                         if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
 664                             rawAssociation="no description text";
 665                         }
 666                         added=true;
 667                         DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),DerivedUnitType.FieldObservation);
 668                         derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
 669                         derivedUnitBase.addSource(null,null,refMods,null);
 670                         importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 671
 672                         myspecimenOrObservation = extractSpecimenOrObservation(events.item(k),derivedUnitBase,DerivedUnitType.FieldObservation);
 673                         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
 674                         descr=myspecimenOrObservation.getDescr();
 675
 676                         derivedUnitBase.addSource(null,null,refMods,null);
 677
 678                         importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 679
 680                         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 681                         acceptedTaxon.addDescription(taxonDescription);
 682
 683
 684                         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 685
 686                         Feature feature = makeFeature(derivedUnitBase);
 687                         if(!StringUtils.isEmpty(descr)) {
 688                             derivedUnitBase.setTitleCache(descr, true);
 689                         }
 690                         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 691                         indAssociation.setFeature(feature);
 692                         indAssociation.addSource(null, null, refMods, null);
 693
 694                         taxonDescription.addElement(indAssociation);
 695                         taxonDescription.setTaxon(acceptedTaxon);
 696                         taxonDescription.addSource(null,null,refMods,null);
 697
 698                         importer.getDescriptionService().saveOrUpdate(taxonDescription);
 699                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 700                     }
 701                     if (!rawAssociation.isEmpty() && !added){
 702                         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 703                         acceptedTaxon.addDescription(taxonDescription);
 704
 705                         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 706
 707                         Feature feature = Feature.MATERIALS_EXAMINED();
 708                         if(!StringUtils.isEmpty(rawAssociation)) {
 709                             derivedUnitBase.setTitleCache(rawAssociation, true);
 710                         }
 711                         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 712                         indAssociation.setFeature(feature);
 713                         indAssociation.addSource(null, null, refMods, null);
 714
 715                         taxonDescription.addElement(indAssociation);
 716                         taxonDescription.setTaxon(acceptedTaxon);
 717                         taxonDescription.addSource(null,null,refMods,null);
 718
 719                         importer.getDescriptionService().saveOrUpdate(taxonDescription);
 720                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 721
 722                         rawAssociation="";
 723                     }
 724                 }
 725             }
 726         }
 727     }
 728
 729     /**
 730      * @param materials: the XML node group
 731      * @param acceptedTaxon: the current accepted Taxon
 732      * @param refMods: the current reference extracted from the MODS
 733      */
 734     @SuppressWarnings("rawtypes")
 735     private void extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference<?> refMods, String event) {
 736         //        logger.info("EXTRACTMATERIALS");
 737         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 738         String descr="";
 739
 740         DerivedUnitBase derivedUnitBase=null;
 741         MySpecimenOrObservation myspecimenOrObservation = null;
 742
 743
 744         myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, DerivedUnitType.FieldObservation);
 745         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
 746         descr=myspecimenOrObservation.getDescr();
 747
 748         derivedUnitBase.addSource(null,null,refMods,null);
 749
 750         importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 751
 752         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 753         acceptedTaxon.addDescription(taxonDescription);
 754
 755
 756         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 757
 758         Feature feature=null;
 759         if (event.equalsIgnoreCase("collection")){
 760             feature = makeFeature(derivedUnitBase);
 761         }
 762         else{
 763             feature = Feature.MATERIALS_EXAMINED();
 764         }
 765         if(!StringUtils.isEmpty(descr)) {
 766             derivedUnitBase.setTitleCache(descr, true);
 767         }
 768         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 769         indAssociation.setFeature(feature);
 770         indAssociation.addSource(null, null, refMods, null);
 771
 772         taxonDescription.addElement(indAssociation);
 773         taxonDescription.setTaxon(acceptedTaxon);
 774         taxonDescription.addSource(null,null,refMods,null);
 775
 776         importer.getDescriptionService().saveOrUpdate(taxonDescription);
 777         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 778
 779
 780     }
 781
 782
 783     /**
 784      * @param description: the XML node group
 785      * @param acceptedTaxon: the current acceptedTaxon
 786      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 787      * @param nametosave: the list of objects to save into the CDM
 788      * @param refMods: the current reference extracted from the MODS
 789      * @param featureName: the feature name
 790      */
 791     private void extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
 792             List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
 793         NodeList children = description.getChildNodes();
 794         NodeList insideNodes ;
 795         String descr ="";
 796         String localdescr="";
 797
 798         //        String fullContent = description.getTextContent();
 799         for (int i=0;i<children.getLength();i++){
 800             localdescr="";
 801             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
 802                 descr += children.item(i).getTextContent().trim();
 803             }
 804             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 805                 insideNodes=children.item(i).getChildNodes();
 806                 List<String> blabla= new ArrayList<String>();
 807                 for (int j=0;j<insideNodes.getLength();j++){
 808                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 809                         String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
 810                         blabla.add(linkedTaxon.split("sec")[0]);
 811                     }
 812                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
 813                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
 814                             blabla.add(insideNodes.item(j).getTextContent().trim());
 815                             localdescr += insideNodes.item(j).getTextContent().trim();
 816                         }
 817                     }
 818                 }
 819                 if (!blabla.isEmpty()) {
 820                     List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
 821                     Feature currentFeature=null;
 822                     for (DefinedTermBase feature: features){
 823                         String tmpF = ((Feature)feature).getTitleCache();
 824                         if (tmpF.equalsIgnoreCase(featureName)) {
 825                             currentFeature=(Feature)feature;
 826                         }
 827                     }
 828                     if (currentFeature == null) {
 829                         currentFeature=Feature.NewInstance(featureName, featureName, featureName);
 830                         importer.getTermService().saveOrUpdate(currentFeature);
 831                     }
 832                     setParticularDescription(StringUtils.join(blabla," "),acceptedTaxon,defaultTaxon, refMods,currentFeature);
 833                 }
 834             }
 835
 836         }
 837
 838     }
 839
 840
 841
 842
 843     /**
 844      * @param children: the XML node group
 845      * @param nametosave: the list of objects to save into the CDM
 846      * @param acceptedTaxon: the current acceptedTaxon
 847      * @param refMods: the current reference extracted from the MODS
 848      * @param fullContent :the parsed XML content
 849      * @return a list of description (text)
 850      */
 851     @SuppressWarnings("unused")
 852     private List<String> parseParagraph(List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods, Node paragraph, Feature feature){
 853         List<String> fullDescription=  new ArrayList<String>();
 854         //        String localdescr;
 855         String descr="";
 856         NodeList insideNodes ;
 857         boolean collectionEvent = false;
 858         List<Node>collectionEvents = new ArrayList<Node>();
 859
 860         NodeList children = paragraph.getChildNodes();
 861
 862         for (int i=0;i<children.getLength();i++){
 863             //            localdescr="";
 864             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
 865                 descr += children.item(i).getTextContent().trim();
 866             }
 867             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 868                 insideNodes=children.item(i).getChildNodes();
 869                 List<String> blabla= new ArrayList<String>();
 870                 for (int j=0;j<insideNodes.getLength();j++){
 871                     boolean nodeKnown = false;
 872                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 873                         String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
 874                         blabla.add(linkedTaxon.split("sec")[0]);
 875                         nodeKnown=true;
 876                     }
 877                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
 878                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
 879                             blabla.add(insideNodes.item(j).getTextContent().trim());
 880                             //                            localdescr += insideNodes.item(j).getTextContent().trim();
 881                         }
 882                         nodeKnown=true;
 883                     }
 884                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
 885                         String ref = insideNodes.item(j).getTextContent().trim();
 886                         if (ref.endsWith(";")  && ((ref.length())>1)) {
 887                             ref=ref.substring(0, ref.length()-1)+".";
 888                         }
 889                         Reference<?> reference = ReferenceFactory.newGeneric();
 890                         reference.setTitleCache(ref, true);
 891                         blabla.add(reference.getTitleCache());
 892                         nodeKnown=true;
 893                     }
 894                     if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
 895                         collectionEvent=true;
 896                         collectionEvents.add(insideNodes.item(j));
 897                         nodeKnown=true;
 898                     }
 899                     if (!nodeKnown) {
 900                         logger.info("Node not handled yet : "+insideNodes.item(j).getNodeName());
 901                         logger.warn("Node not handled yet : "+insideNodes.item(j).getNodeName());
 902                     }
 903
 904                 }
 905                 if (!blabla.isEmpty()) {
 906                     fullDescription.add(StringUtils.join(blabla," "));
 907                 }
 908             }
 909         }
 910         if (collectionEvent) {
 911             logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
 912             for (Node coll:collectionEvents){
 913                 extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
 914             }
 915         }
 916         return fullDescription;
 917     }
 918
 919
 920     /**
 921      * @param description: the XML node group
 922      * @param acceptedTaxon: the current acceptedTaxon
 923      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 924      * @param nametosave: the list of objects to save into the CDM
 925      * @param refMods: the current reference extracted from the MODS
 926      * @param feature: the feature to link the data with
 927      */
 928     private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods, Feature feature){
 929         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
 930         List<String> fullDescription= parseParagraph( nametosave, acceptedTaxon, refMods, description,feature);
 931
 932         if (!fullDescription.isEmpty()) {
 933             setParticularDescription(StringUtils.join(fullDescription,"<br/>"),acceptedTaxon,defaultTaxon, refMods,feature);
 934         }
 935
 936     }
 937
 938
 939     /**
 940      * @param descr: the XML Nodegroup to parse
 941      * @param acceptedTaxon: the current acceptedTaxon
 942      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 943      * @param refMods: the current reference extracted from the MODS
 944      * @param currentFeature: the feature name
 945      * @return
 946      */
 947     private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods, Feature currentFeature) {
 948         //        logger.info("setParticularDescription "+currentFeature);
 949         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 950         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
 951
 952         TextData textData = TextData.NewInstance();
 953         textData.setFeature(currentFeature);
 954         textData.addSource(null,null,refMods,null);
 955
 956         textData.putText(Language.UNKNOWN_LANGUAGE(), descr+"<br/>");
 957
 958         if(! descr.isEmpty() && (acceptedTaxon!=null)){
 959             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
 960             td.addElement(textData);
 961             td.addSource(null,null,refMods,null);
 962             acceptedTaxon.addDescription(td);
 963             importer.getDescriptionService().saveOrUpdate(td);
 964             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 965         }
 966
 967         if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
 968             try{
 969                 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
 970                 if (tmp!=null) {
 971                     defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
 972                 }else{
 973                     importer.getTaxonService().saveOrUpdate(defaultTaxon);
 974                 }
 975             }catch(Exception e){
 976                 logger.debug("TAXON EXISTS"+defaultTaxon);
 977             }
 978
 979             TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
 980             defaultTaxon.addDescription(td);
 981             td.addElement(textData);
 982             td.addSource(null,null,refMods,null);
 983             importer.getDescriptionService().saveOrUpdate(td);
 984             importer.getTaxonService().saveOrUpdate(defaultTaxon);
 985         }
 986     }
 987
 988
 989
 990     /**
 991      * @param synonyms: the XML Nodegroup to parse
 992      * @param nametosave: the list of objects to save into the CDM
 993      * @param acceptedTaxon: the current acceptedTaxon
 994      * @param refMods: the current reference extracted from the MODS
 995      */
 996     @SuppressWarnings({ "rawtypes", "unchecked" })
 997     private void extractSynonyms(Node synonyms, List<TaxonNameBase> nametosave,Taxon acceptedTaxon, Reference<?> refMods) {
 998         //        logger.info("extractSynonyms: "+acceptedTaxon);
 999         Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1000         if (ttmp != null) {
1001             acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1002         }
1003         else{
1004             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1005         }
1006         NodeList children = synonyms.getChildNodes();
1007         TaxonNameBase nameToBeFilled = null;
1008         List<String> names = new ArrayList<String>();
1009
1010         String identifier="";
1011
1012         for (int i=0;i<children.getLength();i++){
1013             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1014                 NodeList tmp = children.item(i).getChildNodes();
1015                 //                String fullContent = children.item(i).getTextContent();
1016                 for (int j=0; j< tmp.getLength();j++){
1017                     if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1018                         String[] enames;
1019                         try {
1020                             enames = extractScientificName(tmp.item(j));
1021                             if (enames[1].isEmpty()) {
1022                                 names.add(enames[0]+"---"+enames[2]+"---"+enames[3]);
1023                             } else {
1024                                 names.add(enames[1]+"---"+enames[2]+"---"+enames[3]);
1025                             }
1026                         } catch (TransformerFactoryConfigurationError e) {
1027                             logger.warn(e);
1028                         } catch (TransformerException e) {
1029                             logger.warn(e);
1030                         }
1031
1032                     }
1033                 }
1034             }
1035         }
1036         for(String name:names){
1037             System.out.println("HANDLE NAME "+name);
1038             Rank rank;
1039             try {
1040                 rank = Rank.getRankByName(name.split("---")[1]);
1041             } catch (UnknownCdmTypeException e) {
1042                 logger.warn("Rank problem!");
1043                 rank=null;
1044             }
1045             try{
1046                 identifier = name.split("---")[2];
1047             }catch(Exception e){logger.warn("identifier empty"); identifier="";}
1048             name = name.split("---")[0];
1049
1050             String original = name;
1051
1052             INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1053             nameToBeFilled = parser.parseFullName(name, nomenclaturalCode, rank);
1054             if (nameToBeFilled.hasProblem() &&
1055                     !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1056                 //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1057                 nameToBeFilled = solveNameProblem(original, name, parser);
1058             }
1059             nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
1060             Synonym synonym = Synonym.NewInstance(nameToBeFilled, refMods);
1061
1062
1063             if (!identifier.isEmpty() && (identifier.length()>2)){
1064                 setLSID(identifier, synonym);
1065             }
1066
1067             acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
1068             System.out.println("SYNONYM");
1069
1070             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1071         }
1072
1073     }
1074
1075
1076
1077
1078
1079     /**
1080      * @param refgroup: the XML nodes
1081      * @param nametosave: the list of objects to save into the CDM
1082      * @param acceptedTaxon: the current acceptedTaxon
1083      * @param nametosave: the list of objects to save into the CDM
1084      * @param refMods: the current reference extracted from the MODS
1085      * @return the acceptedTaxon (why?)
1086      */
1087     @SuppressWarnings({ "null", "unused" ,"rawtypes" })
1088     private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods) {
1089         //        logger.info("extractReferences");
1090         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1091
1092         NodeList children = refgroup.getChildNodes();
1093         NonViralName<?> nameToBeFilled = null;
1094         boolean accepted=true;
1095         for (int i=0;i<children.getLength();i++){
1096             if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1097                 NodeList references = children.item(i).getChildNodes();
1098                 int nbRef=0;
1099                 boolean foundBibref=false;
1100                 for (int j=0;j<references.getLength();j++){
1101                     if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1102                         foundBibref=true;
1103                         String ref = references.item(j).getTextContent().trim();
1104                         if (ref.endsWith(";")  && ((ref.length())>1)) {
1105                             ref=ref.substring(0, ref.length()-1)+".";
1106                         }
1107                         if (ref.startsWith(treatmentMainName) && !ref.endsWith(treatmentMainName)) {
1108                             ref=ref.replace(treatmentMainName, "");
1109                             ref=ref.trim();
1110                             while (ref.startsWith(".") || ref.startsWith(",")) {
1111                                 ref=ref.replace(".","").replace(",","").trim();
1112                             }
1113                         }
1114
1115                         //                        logger.info("Current reference :"+nbRef+", "+ref+", "+treatmentMainName+"--"+ref.indexOf(treatmentMainName));
1116                         Reference<?> reference = ReferenceFactory.newGeneric();
1117                         reference.setTitleCache(ref, true);
1118
1119                         boolean makeEmpty = false;
1120                         //                        Rank rank = null;
1121                         //                        logger.info("TREATMENTMAINNAME: "+treatmentMainName);
1122                         //                        logger.info("ref: "+ref);
1123                         if (nbRef==0) {
1124                             accepted=true;
1125                         } else {
1126                             accepted=false;
1127                         }
1128
1129                         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1130                         if (nomenclaturalCode.equals(NomenclaturalCode.ICBN)){
1131                             nameToBeFilled = BotanicalName.NewInstance(null);
1132                         }
1133                         if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1134                             nameToBeFilled = ZoologicalName.NewInstance(null);
1135                         }
1136                         if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1137                             nameToBeFilled = BacterialName.NewInstance(null);
1138                         }
1139                         if (accepted){
1140                             acceptedTaxon.getName().setNomenclaturalReference(reference);
1141                             nameToBeFilled.setNomenclaturalReference(reference);
1142                             acceptedTaxon.addSource(null,null,refMods,null);
1143                         }else{
1144                             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1145                             acceptedTaxon.addDescription(td);
1146                             acceptedTaxon.addSource(null,null,refMods,null);
1147
1148                             TextData textData = TextData.NewInstance(Feature.CITATION());
1149
1150                             textData.addSource(null, null, reference, null, acceptedTaxon.getName(), ref);
1151                             td.addElement(textData);
1152                             td.addSource(null,null,refMods,null);
1153
1154                             importer.getDescriptionService().saveOrUpdate(td);
1155                         }
1156                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1157                         //                        logger.warn("BWAAHHHH: "+nameToBeFilled.getParsingProblems()+", "+ref);
1158                         nbRef++;
1159                     }
1160                 }
1161                 if (!foundBibref){
1162                     String refString="";
1163                     String name="";
1164                     String identifier="";
1165                     for (int j=0;j<references.getLength();j++){
1166                         //no bibref tag inside
1167                         logger.info("references.item(j).getNodeName()"+references.item(j).getNodeName());
1168                         if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1169                             String[] enames;
1170                             try {
1171                                 enames = extractScientificName(references.item(j));
1172                                 if (enames[1].isEmpty()) {
1173                                     name=enames[0]+"---"+enames[2]+"---"+enames[3];
1174                                 } else {
1175                                     name=enames[1]+"---"+enames[2]+"---"+enames[3];
1176                                 }
1177                             } catch (TransformerFactoryConfigurationError e) {
1178                                 logger.warn(e);
1179                             } catch (TransformerException e) {
1180                                 logger.warn(e);
1181                             }
1182
1183                             name=name.trim();
1184                         }
1185                         if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1186                             refString = references.item(j).getTextContent().trim();
1187                         }
1188                         if(references.item(j).getNodeName().equalsIgnoreCase("#text") && name.isEmpty() && !references.item(j).getTextContent().trim().isEmpty()){
1189                             try{
1190                                 identifier = name.split("---")[3];
1191                             }catch(Exception e ){logger.warn("no identifier");identifier="";}
1192                             INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1193                             String fullLineRefName = references.item(j).getTextContent().trim();
1194                             TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1195                             if (nameTBF.hasProblem() &&
1196                                     !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1197                                 nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser);
1198                             }
1199                             nameTBF = getTaxonNameBase(nameTBF,nametosave);
1200                             Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1201
1202
1203                             if (!identifier.isEmpty() && (identifier.length()>2)){
1204                                 setLSID(identifier, acceptedTaxon);
1205                             }
1206
1207                             acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
1208                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1209                         }
1210                     }
1211
1212                     if(!name.isEmpty()){
1213                         logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+name+"*");
1214                         if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(name.split("---")[0].trim())){
1215                             identifier = name.split("---")[3];
1216                             Reference<?> refS = ReferenceFactory.newGeneric();
1217                             refS.setTitleCache(refString, true);
1218                             //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1219                             //                            acceptedTaxon.addDescription(td);
1220                             //                            acceptedTaxon.addSource(refSource);
1221                             //
1222                             //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1223                             //
1224                             //                            textData.addSource(null, null, refS, null);
1225                             //                            td.addElement(textData);
1226                             //                            td.addSource(refSource);
1227                             //                            importer.getDescriptionService().saveOrUpdate(td);
1228
1229
1230                             if (!identifier.isEmpty() && (identifier.length()>2)){
1231                                 setLSID(identifier, acceptedTaxon);
1232
1233                             }
1234
1235                             acceptedTaxon.getName().setNomenclaturalReference(refS);
1236                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1237                         }
1238                         else{
1239                             Rank rank;
1240                             try {
1241                                 rank = Rank.getRankByName(name.split("---")[1]);
1242                             } catch (Exception e) {
1243                                 logger.warn("Rank or name problem!");
1244                                 rank=null;
1245                             }
1246                             name = name.split("---")[0].trim() + refString;
1247                             String original = name;
1248                             try{
1249                                 identifier = name.split("---")[3];
1250                             }
1251                             catch(Exception e){
1252                                 logger.warn("no identifier");
1253                                 identifier="";
1254                             }
1255                             INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1256                             TaxonNameBase nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1257                             if (nameTBF.hasProblem() &&
1258                                     !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1259                                 //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1260                                 nameTBF=solveNameProblem(original, name,parser);
1261                             }
1262                             nameTBF = getTaxonNameBase(nameTBF,nametosave);
1263                             Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1264
1265
1266                             if (!identifier.isEmpty() && (identifier.length()>2)){
1267                                 String id = identifier.split("__")[0];
1268                                 String source = identifier.split("__")[1];
1269                                 if (id.indexOf("lsid")>-1){
1270                                     try {
1271                                         LSID lsid = new LSID(id);
1272                                         synonym.setLsid(lsid);
1273                                     } catch (MalformedLSIDException e) {
1274                                         // TODO Auto-generated catch block
1275                                         e.printStackTrace();
1276                                     }
1277
1278                                 }
1279                                 else{
1280                                     //TODO ADD ORIGINAL SOURCE ID
1281                                     IdentifiableSource os = IdentifiableSource.NewInstance();
1282                                     os.setIdInSource(id);
1283                                     Reference<?> re = ReferenceFactory.newGeneric();
1284                                     re.setTitle(source);
1285                                     os.setCitation(re);
1286                                     synonym.addSource(os);
1287                                 }
1288                             }
1289
1290                             acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
1291                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1292                         }
1293                     }
1294                 }
1295             }
1296         }
1297         //        importer.getClassificationService().saveOrUpdate(classification);
1298         return acceptedTaxon;
1299
1300     }
1301
1302     /**
1303      * @param identifier
1304      * @param acceptedTaxon
1305      */
1306     private void setLSID(String identifier, TaxonBase<?> taxon) {
1307         boolean lsidok=false;
1308         String id = identifier.split("__")[0];
1309         String source = identifier.split("__")[1];
1310         if (id.indexOf("lsid")>-1){
1311             try {
1312                 LSID lsid = new LSID(id);
1313                 taxon.setLsid(lsid);
1314                 lsidok=true;
1315             } catch (MalformedLSIDException e) {
1316                 logger.warn("Malformed LSID");
1317             }
1318
1319         }
1320         if ((id.indexOf("lsid")<0) || !lsidok){
1321             //ADD ORIGINAL SOURCE ID
1322             IdentifiableSource os = IdentifiableSource.NewInstance();
1323             os.setIdInSource(id);
1324             Reference<?> re = ReferenceFactory.newGeneric();
1325             re.setTitle(source);
1326             os.setCitation(re);
1327             taxon.addSource(os);
1328         }
1329
1330     }
1331
1332     /**
1333      * try to solve a parsing problem for a scientific name
1334      * @param original : the name from the OCR document
1335      * @param name : the tagged version
1336      * @param parser
1337      * @return the corrected TaxonNameBase
1338      */
1339     @SuppressWarnings({ "unchecked", "rawtypes" })
1340     private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser) {
1341         Map<String,String> ato = namesMap.get(original);
1342         Rank rank=Rank.UNKNOWN_RANK();
1343
1344         if (ato == null){
1345             rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1346         }else{
1347             rank = getRank(ato);
1348         }
1349         TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1350         //                logger.info("RANK: "+rank);
1351         int retry=0;
1352         while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1353             String fullname =  getFullReference(name,nameTBF.getParsingProblems());
1354             if (nomenclaturalCode.equals(NomenclaturalCode.ICBN)){
1355                 nameTBF = BotanicalName.NewInstance(null);
1356             }
1357             if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1358                 nameTBF = ZoologicalName.NewInstance(null);
1359             }
1360             if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1361                 nameTBF= BacterialName.NewInstance(null);
1362             }
1363             parser.parseReferencedName(nameTBF, fullname, rank, false);
1364             retry++;
1365         }
1366         if (retry == 1){
1367             nameTBF.setFullTitleCache(name, true);
1368             //                    logger.info("FULL TITLE CACHE "+name);
1369         }
1370         return nameTBF;
1371     }
1372
1373     /**
1374      * @param nomenclatureNode: the XML nodes
1375      * @param nametosave: the list of objects to save into the CDM
1376      * @param refMods: the current reference extracted from the MODS
1377      * @return
1378      */
1379     @SuppressWarnings({ "rawtypes", "unused" })
1380     private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonNameBase> nametosave, Reference<?> refMods) {
1381         //        logger.info("extractNomenclature");
1382         NodeList children = nomenclatureNode.getChildNodes();
1383         String freetext;
1384         TaxonNameBase nameToBeFilled = null;
1385         Taxon acceptedTaxon = null;
1386         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1387         String identifier="";
1388
1389         Rank rank = Rank.UNKNOWN_RANK();
1390         //        String fullContent = nomenclatureNode.getTextContent();
1391         for (int i=0;i<children.getLength();i++){
1392             if (children.item(i).getNodeName().equalsIgnoreCase("#text")) {
1393                 freetext=children.item(i).getTextContent();
1394             }
1395             if (children.item(i).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1396                 System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
1397                 extractMaterialsDirect(children.item(i), acceptedTaxon, refMods, "collection");
1398             }
1399             if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1400                 String[] names;
1401                 try {
1402                     names = extractScientificName(children.item(i));
1403                     treatmentMainName = names[1];
1404                     originalTreatmentName = names[0];
1405                     rank = Rank.getRankByName(names[2]);
1406                     identifier=names[3];
1407
1408                 } catch (TransformerFactoryConfigurationError e1) {
1409                     logger.warn(e1);
1410                 } catch (TransformerException e1) {
1411                     logger.warn(e1);
1412                 } catch (UnknownCdmTypeException e) {
1413                     logger.warn(e);
1414                 }
1415
1416                 if (rank.equals(Rank.UNKNOWN_RANK()) || rank.isLower(configState.getConfig().getMaxRank())){
1417                     maxRankRespected=true;
1418
1419                     if (nomenclaturalCode.equals(NomenclaturalCode.ICBN)){
1420                         nameToBeFilled = BotanicalName.NewInstance(null);
1421                     }
1422                     if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1423                         nameToBeFilled = ZoologicalName.NewInstance(null);
1424                     }
1425                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1426                         nameToBeFilled = BacterialName.NewInstance(null);
1427                     }
1428                     acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
1429                     if (acceptedTaxon ==null ){
1430                         nameToBeFilled = parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
1431                         if (nameToBeFilled.hasProblem() &&
1432                                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1433                             nameToBeFilled = solveNameProblem(originalTreatmentName,treatmentMainName,parser);
1434                         }
1435                         nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
1436                         if (!originalTreatmentName.isEmpty()) {
1437                             TaxonNameDescription td = TaxonNameDescription.NewInstance();
1438                             td.setTitleCache(originalTreatmentName);
1439                             nameToBeFilled.addDescription(td);
1440                         }
1441                         nameToBeFilled.addSource(null,null,refMods,null);
1442                         acceptedTaxon= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
1443                         if(!configState.getConfig().doKeepOriginalSecundum()) {
1444                             acceptedTaxon.setSec(configState.getConfig().getSecundum());
1445                             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
1446                         }
1447
1448
1449                         if (!identifier.isEmpty() && (identifier.length()>2)){
1450                             boolean lsidok=false;
1451                             String id = identifier.split("__")[0];
1452                             String source = identifier.split("__")[1];
1453                             if (id.indexOf("lsid")>-1){
1454                                 try {
1455                                     LSID lsid = new LSID(id);
1456                                     acceptedTaxon.setLsid(lsid);
1457                                     lsidok=true;
1458                                 } catch (MalformedLSIDException e) {
1459                                     logger.warn("Malformed LSID");
1460                                 }
1461
1462                             }
1463                             if ((id.indexOf("lsid")<0) || !lsidok){
1464                                 //TODO ADD ORIGINAL SOURCE ID
1465                                 IdentifiableSource os = IdentifiableSource.NewInstance();
1466                                 os.setIdInSource(id);
1467                                 Reference<?> re = ReferenceFactory.newGeneric();
1468                                 re.setTitle(source);
1469                                 os.setCitation(re);
1470                                 acceptedTaxon.addSource(os);
1471                             }
1472                         }
1473
1474                         acceptedTaxon.addSource(null,null,refMods,null);
1475                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1476
1477                         Taxon parentTaxon = askParent(acceptedTaxon, classification);
1478                         if (parentTaxon ==null){
1479                             while (parentTaxon == null) {
1480                                 parentTaxon = createParent(acceptedTaxon, refMods);
1481                                 classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
1482                             }
1483                         }else{
1484                             classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
1485                         }
1486                     }else{
1487                         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1488                         Set<IdentifiableSource> sources = acceptedTaxon.getSources();
1489                         boolean sourcelinked=false;
1490                         for (IdentifiableSource source:sources){
1491                             if (source.getCitation().getTitle().equalsIgnoreCase(refMods.getTitleCache())) {
1492                                 sourcelinked=true;
1493                             }
1494                         }
1495                         if (!configState.getConfig().doKeepOriginalSecundum()) {
1496                             acceptedTaxon.setSec(configState.getConfig().getSecundum());
1497                             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
1498                         }
1499                         if (!sourcelinked){
1500                             acceptedTaxon.addSource(null, null, refMods, null);
1501                         }
1502                         if (!sourcelinked || !configState.getConfig().doKeepOriginalSecundum()){
1503
1504                             if (!identifier.isEmpty() && (identifier.length()>2)){
1505                                 setLSID(identifier, acceptedTaxon);
1506                             }
1507                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1508                         }
1509                     }
1510                 }else{
1511                     maxRankRespected=false;
1512                 }
1513             }
1514         }
1515         //        importer.getClassificationService().saveOrUpdate(classification);
1516         return acceptedTaxon;
1517     }
1518
1519     /**
1520      * @param acceptedTaxon: the current acceptedTaxon
1521      * @param ref: the current reference extracted from the MODS
1522      * @return the parent for the current accepted taxon
1523      */
1524     private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
1525         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1526
1527         List<Rank> rankList = new ArrayList<Rank>();
1528         rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
1529
1530         List<String> rankListStr = new ArrayList<String>();
1531         for (Rank r:rankList) {
1532             rankListStr.add(r.toString());
1533         }
1534         String r="";
1535         String s = acceptedTaxon.getTitleCache();
1536         Taxon tax = null;
1537
1538         int addTaxon = askAddParent(s);
1539         logger.info("ADD TAXON: "+addTaxon);
1540         if (addTaxon == 0){
1541             Taxon tmp = askParent(acceptedTaxon, classification);
1542             if (tmp == null){
1543                 s = askSetParent(s);
1544                 r = askRank(s,rankListStr);
1545
1546                 NonViralName<?> nameToBeFilled = null;
1547                 if (nomenclaturalCode.equals(NomenclaturalCode.ICBN)){
1548                     nameToBeFilled = BotanicalName.NewInstance(null);
1549                 }
1550                 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1551                     nameToBeFilled = ZoologicalName.NewInstance(null);
1552                 }
1553                 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1554                     nameToBeFilled = BacterialName.NewInstance(null);
1555                 }
1556                 nameToBeFilled.setTitleCache(s);
1557                 nameToBeFilled.setRank(getRank(r));
1558
1559                 tax = Taxon.NewInstance(nameToBeFilled, ref);
1560             }
1561             else{
1562                 tax=tmp;
1563             }
1564
1565             createParent(tax, ref);
1566             //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
1567             classification.addParentChild(tax, acceptedTaxon, ref, null);
1568         }
1569         else{
1570             classification.addChildTaxon(acceptedTaxon, ref, null, null);
1571             tax=acceptedTaxon;
1572         }
1573         //        logger.info("RETURN: "+tax );
1574         return tax;
1575
1576     }
1577
1578
1579
1580     /**
1581      * @param name
1582      * @throws TransformerFactoryConfigurationError
1583      * @throws TransformerException
1584      * @return a list of possible names
1585      */
1586     private String[] extractScientificName(Node name) throws TransformerFactoryConfigurationError, TransformerException {
1587         //        System.out.println("extractScientificName");
1588         Rank rank = Rank.UNKNOWN_RANK();
1589         NodeList children = name.getChildNodes();
1590         String fullName = "";
1591         String newName="";
1592         String identifier="";
1593         HashMap<String, String> atomisedMap = new HashMap<String, String>();
1594         List<String> atomisedName= new ArrayList<String>();
1595
1596         String rankStr = "";
1597         Rank tmpRank ;
1598         for (int i=0;i<children.getLength();i++){
1599             if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
1600                 NodeList atom = children.item(i).getChildNodes();
1601                 for (int k=0;k<atom.getLength();k++){
1602                     if (atom.item(k).getNodeName().equalsIgnoreCase("tax:xid")){
1603                         try{
1604                             identifier = atom.item(k).getAttributes().getNamedItem("identifier").getNodeValue();
1605                         }catch(Exception e){
1606                             System.out.println("pb with identifier, maybe empty");
1607                         }
1608                         try{
1609                             identifier+="__"+atom.item(k).getAttributes().getNamedItem("source").getNodeValue();
1610                         }catch(Exception e){
1611                             System.out.println("pb with identifier, maybe empty");
1612                         }
1613                     }
1614                     tmpRank = null;
1615                     rankStr = atom.item(k).getNodeName().toLowerCase();
1616                     //                    logger.info("RANKSTR:*"+rankStr+"*");
1617                     if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
1618                         rankStr=atom.item(k).getTextContent().trim();
1619                         tmpRank = getRank(rankStr);
1620                     }
1621                     //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
1622                     if (tmpRank != null){
1623                         rank=tmpRank;
1624                     }
1625
1626                     atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
1627                     atomisedName.add(atom.item(k).getTextContent().trim());
1628                 }
1629             }
1630             if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
1631                 //                logger.info("name non atomised: "+children.item(i).getTextContent());
1632                 fullName = children.item(i).getTextContent().trim();
1633                 //                logger.info("fullname: "+fullName);
1634             }
1635         }
1636         if (fullName != null){
1637             fullName = fullName.replace("( ", "(");
1638             fullName = fullName.replace(" )",")");
1639
1640         }
1641         if (fullName.trim().isEmpty()){
1642             fullName=StringUtils.join(atomisedName," ");
1643         }
1644
1645         while(fullName.contains("  ")) {
1646             fullName=fullName.replace("  ", " ");
1647             //            logger.info("while");
1648         }
1649
1650         namesMap.put(fullName,atomisedMap);
1651         String atomisedNameStr = StringUtils.join(atomisedName," ");
1652         while(atomisedNameStr.contains("  ")) {
1653             atomisedNameStr=atomisedNameStr.replace("  ", " ");
1654             //            logger.info("atomisedNameStr: "+atomisedNameStr);
1655         }
1656         atomisedNameStr=atomisedNameStr.trim();
1657
1658         if (fullName != null){
1659             if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
1660                 newName=getScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
1661             } else {
1662                 newName=fullName;
1663             }
1664         }
1665         rank = askForRank(newName, rank, nomenclaturalCode);
1666         String[] names = new String[4];
1667         names[0]=fullName;
1668         names[1]=newName;
1669         names[2]=rank.toString();
1670         names[3]=identifier;
1671         return names;
1672
1673     }
1674
1675     /**
1676      * @param classification2
1677      */
1678     public void updateClassification(Classification classification2) {
1679         classification = classification2;
1680     }
1681
1682
1683 }