cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXTreatmentExtractor.java

   1 // $Id$
   2 /**
   3  * Copyright (C) 2013 EDIT
   4  * European Distributed Institute of Taxonomy
   5  * http://www.e-taxonomy.eu
   6  *
   7  * The contents of this file are subject to the Mozilla Public License Version 1.1
   8  * See LICENSE.TXT at the top of this package for the full license terms.
   9  */
  10 package eu.etaxonomy.cdm.io.taxonx2013;
  11
  12 import java.io.File;
  13 import java.io.FileWriter;
  14 import java.io.IOException;
  15 import java.net.URI;
  16 import java.util.ArrayList;
  17 import java.util.HashMap;
  18 import java.util.List;
  19 import java.util.Map;
  20 import java.util.Set;
  21 import java.util.regex.Pattern;
  22
  23 import javax.xml.transform.TransformerException;
  24 import javax.xml.transform.TransformerFactoryConfigurationError;
  25
  26 import org.apache.commons.lang.StringUtils;
  27 import org.w3c.dom.Node;
  28 import org.w3c.dom.NodeList;
  29
  30 import com.ibm.lsid.MalformedLSIDException;
  31
  32 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
  33 import eu.etaxonomy.cdm.model.common.CdmBase;
  34 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
  35 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
  36 import eu.etaxonomy.cdm.model.common.LSID;
  37 import eu.etaxonomy.cdm.model.common.Language;
  38 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
  39 import eu.etaxonomy.cdm.model.description.Feature;
  40 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
  41 import eu.etaxonomy.cdm.model.description.PolytomousKey;
  42 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
  43 import eu.etaxonomy.cdm.model.description.TaxonDescription;
  44 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
  45 import eu.etaxonomy.cdm.model.description.TextData;
  46 import eu.etaxonomy.cdm.model.name.BacterialName;
  47 import eu.etaxonomy.cdm.model.name.BotanicalName;
  48 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
  49 import eu.etaxonomy.cdm.model.name.NonViralName;
  50 import eu.etaxonomy.cdm.model.name.Rank;
  51 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
  52 import eu.etaxonomy.cdm.model.name.ZoologicalName;
  53 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
  54 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
  55 import eu.etaxonomy.cdm.model.reference.Reference;
  56 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
  57 import eu.etaxonomy.cdm.model.taxon.Classification;
  58 import eu.etaxonomy.cdm.model.taxon.Synonym;
  59 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
  60 import eu.etaxonomy.cdm.model.taxon.Taxon;
  61 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
  62 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
  63 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
  64 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
  65 import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
  66
  67 /**
  68  * @author pkelbert
  69  * @date 2 avr. 2013
  70  *
  71  */
  72 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
  73
  74     private final NomenclaturalCode nomenclaturalCode;
  75     private Classification classification;
  76
  77     private  String treatmentMainName,originalTreatmentName;
  78
  79     private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
  80
  81
  82     private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
  83     private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
  84
  85     private boolean maxRankRespected =false;
  86
  87     /**
  88      * @param nomenclaturalCode
  89      * @param classification
  90      * @param importer
  91      * @param configState
  92      */
  93     public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
  94             TaxonXImportState configState) {
  95         this.nomenclaturalCode=nomenclaturalCode;
  96         this.classification = classification;
  97         this.importer=importer;
  98         this.configState=configState;
  99         prepareCollectors(configState, importer.getAgentService());
 100     }
 101
 102     /**
 103      * extracts all the treament information and save them
 104      * @param treatmentnode: the XML Node
 105      * @param tosave: the list of object to save into the CDM
 106      * @param refMods: the reference extracted from the MODS
 107      * @param sourceName: the URI of the document
 108      */
 109     @SuppressWarnings({ "rawtypes", "unused" })
 110     protected void extractTreatment(Node treatmentnode, List<Object> tosave, Reference<?> refMods, URI sourceName) {
 111         logger.info("extractTreatment");
 112         List<TaxonNameBase> nametosave = new ArrayList<TaxonNameBase>();
 113         NodeList children = treatmentnode.getChildNodes();
 114         Taxon acceptedTaxon =null;
 115         Taxon defaultTaxon =null;
 116         boolean refgroup=false;
 117
 118         for (int i=0;i<children.getLength();i++){
 119             if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
 120                 refgroup=true;
 121             }
 122         }
 123
 124         for (int i=0;i<children.getLength();i++){
 125
 126             if (children.item(i).getNodeName().equalsIgnoreCase("tax:nomenclature")){
 127                 NodeList nomenclature = children.item(i).getChildNodes();
 128                 boolean containsName=false;
 129                 for(int k=0;k<nomenclature.getLength();k++){
 130                     if(nomenclature.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 131                         containsName=true;
 132                         break;
 133                     }
 134                 }
 135                 if (containsName){
 136                     reloadClassification();
 137                     //extract "main" the scientific name
 138                     acceptedTaxon = extractNomenclature(children.item(i),nametosave,refMods);
 139                 }
 140             }
 141             else if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
 142                 reloadClassification();
 143                 //extract the References within the document
 144                 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
 145             }
 146             else if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 147                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
 148                 File file = new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
 149                 FileWriter writer;
 150                 try {
 151                     writer = new FileWriter(file ,true);
 152                     writer.write(sourceName+"\n");
 153                     writer.flush();
 154                     writer.close();
 155                 } catch (IOException e1) {
 156                     // TODO Auto-generated catch block
 157                     e1.printStackTrace();
 158                 }
 159                 String multiple = askMultiple(children.item(i));
 160                 if (multiple.equalsIgnoreCase("synonyms")) {
 161                     extractSynonyms(children.item(i),nametosave, acceptedTaxon,refMods);
 162                 }
 163                 else
 164                     if(multiple.equalsIgnoreCase("material examined")){
 165                         extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
 166                     }
 167                     else
 168                         if (multiple.equalsIgnoreCase("distribution")){
 169                             extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
 170                         }
 171                         else {
 172                             extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,multiple);
 173                         }
 174             }
 175             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 176                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
 177                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, Feature.BIOLOGY_ECOLOGY());
 178             }
 179             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 180                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
 181                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
 182             }
 183             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 184                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
 185                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.DIAGNOSIS());
 186             }
 187             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 188                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
 189                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DISCUSSION());
 190             }
 191
 192             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 193                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
 194                 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
 195             }
 196             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 197                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
 198                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave,refMods,Feature.ETYMOLOGY());
 199             }
 200
 201             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 202                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
 203                 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
 204             }
 205
 206             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 207                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
 208                 //TODO IGNORE keys for the moment
 209                 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
 210                 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,"Keys - unparsed");
 211             }
 212             else{
 213                 logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
 214                 if (children.item(i).getAttributes() !=null) {
 215                     logger.info(children.item(i).getAttributes().item(0));
 216                 }
 217             }
 218         }
 219         //        logger.info("saveUpdateNames");
 220         if (maxRankRespected){
 221             importer.getNameService().saveOrUpdate(nametosave);
 222             importer.getClassificationService().saveOrUpdate(classification);
 223             logger.info("saveUpdateNames-ok");
 224         }
 225     }
 226
 227
 228     /**
 229      * @param keys
 230      * @param acceptedTaxon: the current acceptedTaxon
 231      * @param nametosave: the list of objects to save into the CDM
 232      * @param refMods: the current reference extracted from the MODS
 233      */
 234     @SuppressWarnings("rawtypes")
 235     private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
 236         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
 237
 238         NodeList children = keys.getChildNodes();
 239         String key="";
 240         PolytomousKey poly =  PolytomousKey.NewInstance();
 241         poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
 242         poly.addTaxonomicScope(acceptedTaxon);
 243         poly.setTitleCache("bloup");
 244         //        poly.addCoveredTaxon(acceptedTaxon);
 245         PolytomousKeyNode root = poly.getRoot();
 246         PolytomousKeyNode previous = null,tmpKey=null;
 247         Taxon taxonKey=null;
 248         List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
 249
 250         //        String fullContent = keys.getTextContent();
 251         for (int i=0;i<children.getLength();i++){
 252             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 253                 NodeList paragraph = children.item(i).getChildNodes();
 254                 key="";
 255                 taxonKey=null;
 256                 for (int j=0;j<paragraph.getLength();j++){
 257                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 258                         if (! paragraph.item(j).getTextContent().trim().isEmpty()){
 259                             key+=paragraph.item(j).getTextContent().trim();
 260                             //                            logger.info("KEY: "+j+"--"+key);
 261                         }
 262                     }
 263                     if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 264                         taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
 265                     }
 266                 }
 267                 //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
 268                 if (keypattern.matcher(key).matches()){
 269                     tmpKey = PolytomousKeyNode.NewInstance(key);
 270                     if (taxonKey!=null) {
 271                         tmpKey.setTaxon(taxonKey);
 272                     }
 273                     polyNodes.add(tmpKey);
 274                     if (previous == null) {
 275                         root.addChild(tmpKey);
 276                     } else {
 277                         previous.addChild(tmpKey);
 278                     }
 279                 }else{
 280                     if (!key.isEmpty()){
 281                         tmpKey=PolytomousKeyNode.NewInstance(key);
 282                         if (taxonKey!=null) {
 283                             tmpKey.setTaxon(taxonKey);
 284                         }
 285                         polyNodes.add(tmpKey);
 286                         if (keypatternend.matcher(key).matches()) {
 287                             root.addChild(tmpKey);
 288                             previous=tmpKey;
 289                         } else{
 290                             previous.addChild(tmpKey);
 291                         }
 292
 293                     }
 294                 }
 295             }
 296         }
 297         importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
 298         importer.getPolytomousKeyService().saveOrUpdate(poly);
 299     }
 300
 301     /**
 302      * @param taxons: the XML Nodegroup
 303      * @param nametosave: the list of objects to save into the CDM
 304      * @param acceptedTaxon: the current accepted Taxon
 305      * @param refMods: the current reference extracted from the MODS
 306      *
 307      * @return Taxon object built
 308      */
 309     @SuppressWarnings({ "rawtypes", "unchecked" })
 310     private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
 311         //        logger.info("getTaxonFromXML");
 312         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 313
 314         TaxonNameBase nameToBeFilled = null;
 315         String name="";
 316
 317         String[] enames = null;
 318         Rank rank = Rank.UNKNOWN_RANK();
 319         String original="";
 320         String identifier="";
 321
 322         try {
 323             enames = extractScientificName(taxons);
 324             if (enames[1].isEmpty()) {
 325                 name=enames[0];
 326             } else {
 327                 name=enames[1];
 328             }
 329             original=enames[0];
 330             rank = Rank.getRankByName(enames[2]);
 331             identifier = enames[3];
 332         } catch (TransformerFactoryConfigurationError e1) {
 333             logger.warn(e1);
 334         } catch (TransformerException e1) {
 335             logger.warn(e1);
 336         } catch (UnknownCdmTypeException e) {
 337             logger.warn("Rank problem!"+enames[2]);
 338             rank=Rank.UNKNOWN_RANK();
 339         }
 340         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
 341
 342         nameToBeFilled = parser.parseFullName(name, nomenclaturalCode, rank);
 343         if (nameToBeFilled.hasProblem() &&
 344                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
 345             //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
 346             nameToBeFilled=solveNameProblem(original, name,parser);
 347         }
 348
 349         nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
 350
 351         //        importer.getNameService().saveOrUpdate(nametosave);
 352         Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
 353         if (t ==null){
 354             //            logger.info("BestTaxonService not the best or null");
 355             t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
 356             if (t.getSec() == null) {
 357                 t.setSec(refMods);
 358             }
 359             if(!configState.getConfig().doKeepOriginalSecundum()) {
 360                 t.setSec(configState.getConfig().getSecundum());
 361                 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 362             }
 363             t.addSource(OriginalSourceType.Import,null,null,refMods,null);
 364
 365             if (!identifier.isEmpty() && (identifier.length()>2)){
 366                 setLSID(identifier, t);
 367             }
 368
 369             Taxon parentTaxon = askParent(t, classification);
 370             if (parentTaxon ==null){
 371                 while (parentTaxon == null) {
 372                     parentTaxon = createParent(t, refMods);
 373                     classification.addParentChild(parentTaxon, t, refMods, null);
 374                 }
 375             }else{
 376                 classification.addParentChild(parentTaxon, t, refMods, null);
 377             }
 378         }
 379         else{
 380             t = CdmBase.deproxy(t, Taxon.class);
 381         }
 382         if (!configState.getConfig().doKeepOriginalSecundum()) {
 383             t.setSec(configState.getConfig().getSecundum());
 384             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 385         }
 386         return t;
 387     }
 388
 389
 390     /**
 391      * @param taxons: the XML Nodegroup
 392      * @param nametosave: the list of objects to save into the CDM
 393      * @param acceptedTaxon: the current accepted Taxon
 394      * @param refMods: the current reference extracted from the MODS
 395      *
 396      * @return Taxon object built
 397      */
 398     @SuppressWarnings({ "rawtypes", "unchecked" })
 399     private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
 400         //        logger.info("getTaxonFromXML");
 401         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 402
 403         TaxonNameBase nameToBeFilled = null;
 404         String name="";
 405
 406         String[] enames = null;
 407         Rank rank = Rank.UNKNOWN_RANK();
 408         String original="";
 409         String identifier="";
 410
 411         try {
 412             enames = extractScientificName(taxons);
 413             if (enames[1].isEmpty()) {
 414                 name=enames[0];
 415             } else {
 416                 name=enames[1];
 417             }
 418             original=enames[0];
 419             rank = Rank.getRankByName(enames[2]);
 420             identifier = enames[3];
 421         } catch (TransformerFactoryConfigurationError e1) {
 422             logger.warn(e1);
 423         } catch (TransformerException e1) {
 424             logger.warn(e1);
 425         } catch (UnknownCdmTypeException e) {
 426             logger.warn("Rank problem!"+enames[2]);
 427             rank=Rank.UNKNOWN_RANK();
 428         }
 429         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
 430
 431         nameToBeFilled = parser.parseFullName(name, nomenclaturalCode, rank);
 432         if (nameToBeFilled.hasProblem() &&
 433                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
 434             //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
 435             nameToBeFilled=solveNameProblem(original, name,parser);
 436         }
 437
 438         nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
 439         return nameToBeFilled;
 440
 441     }
 442
 443
 444     @SuppressWarnings("rawtypes")
 445     private TaxonNameBase getTaxonNameBase (TaxonNameBase name, List<TaxonNameBase> nametosave){
 446         List<TaxonNameBase> names = importer.getNameService().list(TaxonNameBase.class, null, null, null, null);
 447         for (TaxonNameBase tb : names){
 448             if (tb.getTitleCache().equalsIgnoreCase(name.getTitleCache())) {
 449                 logger.info("TaxonNameBase FOUND"+name.getTitleCache());
 450                 return tb;
 451             }
 452         }
 453         logger.info("TaxonNameBase NOT FOUND "+name.getTitleCache());
 454         nametosave.add(name);
 455         return name;
 456
 457     }
 458
 459
 460
 461     /**
 462      *
 463      */
 464     private void reloadClassification() {
 465         Classification cl = importer.getClassificationService().find(classification.getUuid());
 466         if (cl != null){
 467             classification=cl;
 468         }else{
 469             importer.getClassificationService().saveOrUpdate(classification);
 470             classification = importer.getClassificationService().find(classification.getUuid());
 471         }
 472
 473     }
 474
 475     /**
 476      * Create a Taxon for the current NameBase, based on the current reference
 477      * @param taxonNameBase
 478      * @param refMods: the current reference extracted from the MODS
 479      * @return Taxon
 480      */
 481     @SuppressWarnings({ "unused", "rawtypes" })
 482     private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
 483         Taxon t = new Taxon(taxonNameBase,null );
 484         if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
 485             t.setSec(configState.getConfig().getSecundum());
 486             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 487         }
 488         t.addSource(OriginalSourceType.Import,null,null,refMods,null);
 489         return t;
 490     }
 491
 492     /**
 493      * @param nametosave
 494      * @param distribution: the XML node group
 495      * @param acceptedTaxon: the current accepted Taxon
 496      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 497      * @param refMods: the current reference extracted from the MODS
 498      */
 499     @SuppressWarnings("rawtypes")
 500     private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods) {
 501         //        logger.info("DISTRIBUTION");
 502         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 503         NodeList children = distribution.getChildNodes();
 504         Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
 505         Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
 506
 507         for (int i=0;i<children.getLength();i++){
 508             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 509                 NodeList paragraph = children.item(i).getChildNodes();
 510                 for (int j=0;j<paragraph.getLength();j++){
 511                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 512                         if(!paragraph.item(j).getTextContent().trim().isEmpty()) {
 513                             String s =paragraph.item(j).getTextContent().trim();
 514                             if (descriptionsFulltext.get(i) !=null){
 515                                 s = descriptionsFulltext.get(i)+" "+s;
 516                             }
 517                             descriptionsFulltext.put(i, s);
 518                         }
 519                     }
 520                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 521                         String s =getTaxonNameBaseFromXML(paragraph.item(j),nametosave,refMods).toString().split("sec.")[0];
 522                         if (descriptionsFulltext.get(i) !=null){
 523                             s = descriptionsFulltext.get(i)+" "+s;
 524                         }
 525                         descriptionsFulltext.put(i, s);
 526                     }
 527                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
 528                         MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
 529                         DerivedUnit derivedUnitBase = null;
 530                         specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
 531                         List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
 532                         if (speObsList == null) {
 533                             speObsList=new ArrayList<MySpecimenOrObservation>();
 534                         }
 535                         speObsList.add(specimenOrObservation);
 536                         specimenOrObservations.put(i,speObsList);
 537
 538                         String s = specimenOrObservation.getDerivedUnitBase().toString();
 539                         if (descriptionsFulltext.get(i) !=null){
 540                             s = descriptionsFulltext.get(i)+" "+s;
 541                         }
 542                         descriptionsFulltext.put(i, s);
 543                     }
 544
 545                 }
 546             }
 547         }
 548
 549         int m=0;
 550         for (int k:descriptionsFulltext.keySet()) {
 551             if (k>m) {
 552                 m=k;
 553             }
 554         }
 555         for (int k:specimenOrObservations.keySet()) {
 556             if (k>m) {
 557                 m=k;
 558             }
 559         }
 560
 561
 562         TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
 563         Feature currentFeature = Feature.DISTRIBUTION();
 564         DerivedUnit derivedUnitBase=null;
 565         String descr="";
 566         for (int k=0;k<=m;k++){
 567             if(specimenOrObservations.keySet().contains(k)){
 568                 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
 569                     derivedUnitBase = soo.getDerivedUnitBase();
 570                     descr=soo.getDescr();
 571
 572                     derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
 573
 574                     importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 575
 576                     TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 577                     acceptedTaxon.addDescription(taxonDescription);
 578
 579
 580                     IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 581
 582                     Feature feature=null;
 583                         feature = makeFeature(derivedUnitBase);
 584                     if(!StringUtils.isEmpty(descr)) {
 585                         derivedUnitBase.setTitleCache(descr, true);
 586                     }
 587                     indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 588                     indAssociation.setFeature(feature);
 589                     indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
 590
 591                     taxonDescription.addElement(indAssociation);
 592                     taxonDescription.setTaxon(acceptedTaxon);
 593                     taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
 594
 595                     importer.getDescriptionService().saveOrUpdate(taxonDescription);
 596                     importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 597                     td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
 598                 }
 599             }
 600
 601             if (descriptionsFulltext.keySet().contains(k)){
 602                 if (!descriptionsFulltext.get(k).isEmpty() && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
 603                     setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
 604                     break;
 605                 }
 606                 else{
 607                     TextData textData = TextData.NewInstance();
 608
 609                     textData.setFeature(currentFeature);
 610                     textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
 611                     textData.addSource(OriginalSourceType.Import, null, null, refMods, null);
 612
 613                     td.addElement(textData);
 614                 }
 615             }
 616
 617
 618             if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
 619                 td.addSource(OriginalSourceType.Import, null,null,refMods,null);
 620                 acceptedTaxon.addDescription(td);
 621                 importer.getDescriptionService().saveOrUpdate(td);
 622                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 623             }
 624         }
 625     }
 626
 627
 628     /**
 629      * @param materials: the XML node group
 630      * @param acceptedTaxon: the current accepted Taxon
 631      * @param refMods: the current reference extracted from the MODS
 632      */
 633     @SuppressWarnings("rawtypes")
 634     private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference<?> refMods,List<TaxonNameBase> nametosave) {
 635         //        logger.info("EXTRACTMATERIALS");
 636         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 637         NodeList children = materials.getChildNodes();
 638         NodeList events = null;
 639         String descr="";
 640
 641         DerivedUnit derivedUnitBase=null;
 642         MySpecimenOrObservation myspecimenOrObservation = null;
 643
 644         for (int i=0;i<children.getLength();i++){
 645             String rawAssociation="";
 646             boolean added=false;
 647             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 648                 events = children.item(i).getChildNodes();
 649                 for(int k=0;k<events.getLength();k++){
 650                     if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 651                         String linkedTaxon = getTaxonNameBaseFromXML(events.item(k), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
 652                         rawAssociation+=linkedTaxon.split("sec")[0];
 653                     }
 654                     if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
 655                             && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 656                         rawAssociation+= events.item(k).getTextContent().trim();
 657                     }
 658                     if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 659                         if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
 660                             rawAssociation="no description text";
 661                         }
 662                         added=true;
 663                         DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.FieldUnit);
 664                         derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
 665                         derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
 666                         importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 667
 668                         myspecimenOrObservation = extractSpecimenOrObservation(events.item(k),derivedUnitBase,SpecimenOrObservationType.FieldUnit);
 669                         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
 670                         descr=myspecimenOrObservation.getDescr();
 671
 672                         derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
 673
 674                         importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 675
 676                         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 677                         acceptedTaxon.addDescription(taxonDescription);
 678
 679
 680                         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 681
 682                         Feature feature = makeFeature(derivedUnitBase);
 683                         if(!StringUtils.isEmpty(descr)) {
 684                             derivedUnitBase.setTitleCache(descr, true);
 685                         }
 686                         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 687                         indAssociation.setFeature(feature);
 688                         indAssociation.addSource(OriginalSourceType.Import,null, null, refMods, null);
 689
 690                         taxonDescription.addElement(indAssociation);
 691                         taxonDescription.setTaxon(acceptedTaxon);
 692                         taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
 693
 694                         importer.getDescriptionService().saveOrUpdate(taxonDescription);
 695                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 696                     }
 697                     if (!rawAssociation.isEmpty() && !added){
 698                         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 699                         acceptedTaxon.addDescription(taxonDescription);
 700
 701                         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 702
 703                         Feature feature = Feature.MATERIALS_EXAMINED();
 704                         if(!StringUtils.isEmpty(rawAssociation)) {
 705                             derivedUnitBase.setTitleCache(rawAssociation, true);
 706                         }
 707                         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 708                         indAssociation.setFeature(feature);
 709                         indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
 710
 711                         taxonDescription.addElement(indAssociation);
 712                         taxonDescription.setTaxon(acceptedTaxon);
 713                         taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
 714
 715                         importer.getDescriptionService().saveOrUpdate(taxonDescription);
 716                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 717
 718                         rawAssociation="";
 719                     }
 720                 }
 721             }
 722         }
 723     }
 724
 725     /**
 726      * @param materials: the XML node group
 727      * @param acceptedTaxon: the current accepted Taxon
 728      * @param refMods: the current reference extracted from the MODS
 729      */
 730     @SuppressWarnings("rawtypes")
 731     private void extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference<?> refMods, String event) {
 732         //        logger.info("EXTRACTMATERIALS");
 733         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 734         String descr="";
 735
 736         DerivedUnit derivedUnitBase=null;
 737         MySpecimenOrObservation myspecimenOrObservation = null;
 738
 739
 740         myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.FieldUnit);
 741         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
 742         descr=myspecimenOrObservation.getDescr();
 743
 744         derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
 745
 746         importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 747
 748         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 749         acceptedTaxon.addDescription(taxonDescription);
 750
 751
 752         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 753
 754         Feature feature=null;
 755         if (event.equalsIgnoreCase("collection")){
 756             feature = makeFeature(derivedUnitBase);
 757         }
 758         else{
 759             feature = Feature.MATERIALS_EXAMINED();
 760         }
 761         if(!StringUtils.isEmpty(descr)) {
 762             derivedUnitBase.setTitleCache(descr, true);
 763         }
 764         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 765         indAssociation.setFeature(feature);
 766         indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
 767
 768         taxonDescription.addElement(indAssociation);
 769         taxonDescription.setTaxon(acceptedTaxon);
 770         taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
 771
 772         importer.getDescriptionService().saveOrUpdate(taxonDescription);
 773         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 774
 775
 776     }
 777
 778
 779     /**
 780      * @param description: the XML node group
 781      * @param acceptedTaxon: the current acceptedTaxon
 782      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 783      * @param nametosave: the list of objects to save into the CDM
 784      * @param refMods: the current reference extracted from the MODS
 785      * @param featureName: the feature name
 786      */
 787     private void extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
 788             List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
 789         NodeList children = description.getChildNodes();
 790         NodeList insideNodes ;
 791         String descr ="";
 792         String localdescr="";
 793
 794         //        String fullContent = description.getTextContent();
 795         for (int i=0;i<children.getLength();i++){
 796             localdescr="";
 797             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
 798                 descr += children.item(i).getTextContent().trim();
 799             }
 800             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 801                 insideNodes=children.item(i).getChildNodes();
 802                 List<String> blabla= new ArrayList<String>();
 803                 for (int j=0;j<insideNodes.getLength();j++){
 804                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 805                         String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
 806                         blabla.add(linkedTaxon.split("sec")[0]);
 807                     }
 808                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
 809                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
 810                             blabla.add(insideNodes.item(j).getTextContent().trim());
 811                             localdescr += insideNodes.item(j).getTextContent().trim();
 812                         }
 813                     }
 814                 }
 815                 if (!blabla.isEmpty()) {
 816                     List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
 817                     Feature currentFeature=null;
 818                     for (DefinedTermBase feature: features){
 819                         String tmpF = ((Feature)feature).getTitleCache();
 820                         if (tmpF.equalsIgnoreCase(featureName)) {
 821                             currentFeature=(Feature)feature;
 822                         }
 823                     }
 824                     if (currentFeature == null) {
 825                         currentFeature=Feature.NewInstance(featureName, featureName, featureName);
 826                         importer.getTermService().saveOrUpdate(currentFeature);
 827                     }
 828                     setParticularDescription(StringUtils.join(blabla," "),acceptedTaxon,defaultTaxon, refMods,currentFeature);
 829                 }
 830             }
 831
 832         }
 833
 834     }
 835
 836
 837
 838
 839     /**
 840      * @param children: the XML node group
 841      * @param nametosave: the list of objects to save into the CDM
 842      * @param acceptedTaxon: the current acceptedTaxon
 843      * @param refMods: the current reference extracted from the MODS
 844      * @param fullContent :the parsed XML content
 845      * @return a list of description (text)
 846      */
 847     @SuppressWarnings("unused")
 848     private List<String> parseParagraph(List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods, Node paragraph, Feature feature){
 849         List<String> fullDescription=  new ArrayList<String>();
 850         //        String localdescr;
 851         String descr="";
 852         NodeList insideNodes ;
 853         boolean collectionEvent = false;
 854         List<Node>collectionEvents = new ArrayList<Node>();
 855
 856         NodeList children = paragraph.getChildNodes();
 857
 858         for (int i=0;i<children.getLength();i++){
 859             //            localdescr="";
 860             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
 861                 descr += children.item(i).getTextContent().trim();
 862             }
 863             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 864                 insideNodes=children.item(i).getChildNodes();
 865                 List<String> blabla= new ArrayList<String>();
 866                 for (int j=0;j<insideNodes.getLength();j++){
 867                     boolean nodeKnown = false;
 868                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 869                         String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
 870                         blabla.add(linkedTaxon.split("sec")[0]);
 871                         nodeKnown=true;
 872                     }
 873                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
 874                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
 875                             blabla.add(insideNodes.item(j).getTextContent().trim());
 876                             //                            localdescr += insideNodes.item(j).getTextContent().trim();
 877                         }
 878                         nodeKnown=true;
 879                     }
 880                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
 881                         String ref = insideNodes.item(j).getTextContent().trim();
 882                         if (ref.endsWith(";")  && ((ref.length())>1)) {
 883                             ref=ref.substring(0, ref.length()-1)+".";
 884                         }
 885                         Reference<?> reference = ReferenceFactory.newGeneric();
 886                         reference.setTitleCache(ref, true);
 887                         blabla.add(reference.getTitleCache());
 888                         nodeKnown=true;
 889                     }
 890                     if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
 891                         collectionEvent=true;
 892                         collectionEvents.add(insideNodes.item(j));
 893                         nodeKnown=true;
 894                     }
 895                     if (!nodeKnown) {
 896                         logger.info("Node not handled yet : "+insideNodes.item(j).getNodeName());
 897                         logger.warn("Node not handled yet : "+insideNodes.item(j).getNodeName());
 898                     }
 899
 900                 }
 901                 if (!blabla.isEmpty()) {
 902                     fullDescription.add(StringUtils.join(blabla," "));
 903                 }
 904             }
 905         }
 906         if (collectionEvent) {
 907             logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
 908             for (Node coll:collectionEvents){
 909                 extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
 910             }
 911         }
 912         return fullDescription;
 913     }
 914
 915
 916     /**
 917      * @param description: the XML node group
 918      * @param acceptedTaxon: the current acceptedTaxon
 919      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 920      * @param nametosave: the list of objects to save into the CDM
 921      * @param refMods: the current reference extracted from the MODS
 922      * @param feature: the feature to link the data with
 923      */
 924     private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods, Feature feature){
 925         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
 926         List<String> fullDescription= parseParagraph( nametosave, acceptedTaxon, refMods, description,feature);
 927
 928         if (!fullDescription.isEmpty()) {
 929             setParticularDescription(StringUtils.join(fullDescription,"<br/>"),acceptedTaxon,defaultTaxon, refMods,feature);
 930         }
 931
 932     }
 933
 934
 935     /**
 936      * @param descr: the XML Nodegroup to parse
 937      * @param acceptedTaxon: the current acceptedTaxon
 938      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 939      * @param refMods: the current reference extracted from the MODS
 940      * @param currentFeature: the feature name
 941      * @return
 942      */
 943     private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods, Feature currentFeature) {
 944         //        logger.info("setParticularDescription "+currentFeature);
 945         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 946         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
 947
 948         TextData textData = TextData.NewInstance();
 949         textData.setFeature(currentFeature);
 950         textData.addSource(OriginalSourceType.Import, null,null,refMods,null);
 951
 952         textData.putText(Language.UNKNOWN_LANGUAGE(), descr+"<br/>");
 953
 954         if(! descr.isEmpty() && (acceptedTaxon!=null)){
 955             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
 956             td.addElement(textData);
 957             td.addSource(OriginalSourceType.Import,null,null,refMods,null);
 958             acceptedTaxon.addDescription(td);
 959             importer.getDescriptionService().saveOrUpdate(td);
 960             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 961         }
 962
 963         if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
 964             try{
 965                 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
 966                 if (tmp!=null) {
 967                     defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
 968                 }else{
 969                     importer.getTaxonService().saveOrUpdate(defaultTaxon);
 970                 }
 971             }catch(Exception e){
 972                 logger.debug("TAXON EXISTS"+defaultTaxon);
 973             }
 974
 975             TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
 976             defaultTaxon.addDescription(td);
 977             td.addElement(textData);
 978             td.addSource(OriginalSourceType.Import,null,null,refMods,null);
 979             importer.getDescriptionService().saveOrUpdate(td);
 980             importer.getTaxonService().saveOrUpdate(defaultTaxon);
 981         }
 982     }
 983
 984
 985
 986     /**
 987      * @param synonyms: the XML Nodegroup to parse
 988      * @param nametosave: the list of objects to save into the CDM
 989      * @param acceptedTaxon: the current acceptedTaxon
 990      * @param refMods: the current reference extracted from the MODS
 991      */
 992     @SuppressWarnings({ "rawtypes", "unchecked" })
 993     private void extractSynonyms(Node synonyms, List<TaxonNameBase> nametosave,Taxon acceptedTaxon, Reference<?> refMods) {
 994         //        logger.info("extractSynonyms: "+acceptedTaxon);
 995         Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
 996         if (ttmp != null) {
 997             acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
 998         }
 999         else{
1000             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1001         }
1002         NodeList children = synonyms.getChildNodes();
1003         TaxonNameBase nameToBeFilled = null;
1004         List<String> names = new ArrayList<String>();
1005
1006         String identifier="";
1007
1008         for (int i=0;i<children.getLength();i++){
1009             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1010                 NodeList tmp = children.item(i).getChildNodes();
1011                 //                String fullContent = children.item(i).getTextContent();
1012                 for (int j=0; j< tmp.getLength();j++){
1013                     if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1014                         String[] enames;
1015                         try {
1016                             enames = extractScientificName(tmp.item(j));
1017                             if (enames[1].isEmpty()) {
1018                                 names.add(enames[0]+"---"+enames[2]+"---"+enames[3]);
1019                             } else {
1020                                 names.add(enames[1]+"---"+enames[2]+"---"+enames[3]);
1021                             }
1022                         } catch (TransformerFactoryConfigurationError e) {
1023                             logger.warn(e);
1024                         } catch (TransformerException e) {
1025                             logger.warn(e);
1026                         }
1027
1028                     }
1029                 }
1030             }
1031         }
1032         for(String name:names){
1033             System.out.println("HANDLE NAME "+name);
1034             Rank rank;
1035             try {
1036                 rank = Rank.getRankByName(name.split("---")[1]);
1037             } catch (UnknownCdmTypeException e) {
1038                 logger.warn("Rank problem!");
1039                 rank=null;
1040             }
1041             try{
1042                 identifier = name.split("---")[2];
1043             }catch(Exception e){logger.warn("identifier empty"); identifier="";}
1044             name = name.split("---")[0];
1045
1046             String original = name;
1047
1048             INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1049             nameToBeFilled = parser.parseFullName(name, nomenclaturalCode, rank);
1050             if (nameToBeFilled.hasProblem() &&
1051                     !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1052                 //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1053                 nameToBeFilled = solveNameProblem(original, name, parser);
1054             }
1055             nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
1056             Synonym synonym = Synonym.NewInstance(nameToBeFilled, refMods);
1057
1058
1059             if (!identifier.isEmpty() && (identifier.length()>2)){
1060                 setLSID(identifier, synonym);
1061             }
1062
1063             acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
1064             System.out.println("SYNONYM");
1065
1066             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1067         }
1068
1069     }
1070
1071
1072
1073
1074
1075     /**
1076      * @param refgroup: the XML nodes
1077      * @param nametosave: the list of objects to save into the CDM
1078      * @param acceptedTaxon: the current acceptedTaxon
1079      * @param nametosave: the list of objects to save into the CDM
1080      * @param refMods: the current reference extracted from the MODS
1081      * @return the acceptedTaxon (why?)
1082      */
1083     @SuppressWarnings({ "null", "unused" ,"rawtypes" })
1084     private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods) {
1085         //        logger.info("extractReferences");
1086         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1087
1088         NodeList children = refgroup.getChildNodes();
1089         NonViralName<?> nameToBeFilled = null;
1090         boolean accepted=true;
1091         for (int i=0;i<children.getLength();i++){
1092             if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1093                 NodeList references = children.item(i).getChildNodes();
1094                 int nbRef=0;
1095                 boolean foundBibref=false;
1096                 for (int j=0;j<references.getLength();j++){
1097                     if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1098                         foundBibref=true;
1099                         String ref = references.item(j).getTextContent().trim();
1100                         if (ref.endsWith(";")  && ((ref.length())>1)) {
1101                             ref=ref.substring(0, ref.length()-1)+".";
1102                         }
1103                         if (ref.startsWith(treatmentMainName) && !ref.endsWith(treatmentMainName)) {
1104                             ref=ref.replace(treatmentMainName, "");
1105                             ref=ref.trim();
1106                             while (ref.startsWith(".") || ref.startsWith(",")) {
1107                                 ref=ref.replace(".","").replace(",","").trim();
1108                             }
1109                         }
1110
1111                         //                        logger.info("Current reference :"+nbRef+", "+ref+", "+treatmentMainName+"--"+ref.indexOf(treatmentMainName));
1112                         Reference<?> reference = ReferenceFactory.newGeneric();
1113                         reference.setTitleCache(ref, true);
1114
1115                         boolean makeEmpty = false;
1116                         //                        Rank rank = null;
1117                         //                        logger.info("TREATMENTMAINNAME: "+treatmentMainName);
1118                         //                        logger.info("ref: "+ref);
1119                         if (nbRef==0) {
1120                             accepted=true;
1121                         } else {
1122                             accepted=false;
1123                         }
1124
1125                         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1126                         if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1127                             nameToBeFilled = BotanicalName.NewInstance(null);
1128                         }
1129                         if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1130                             nameToBeFilled = ZoologicalName.NewInstance(null);
1131                         }
1132                         if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1133                             nameToBeFilled = BacterialName.NewInstance(null);
1134                         }
1135                         if (accepted){
1136                             acceptedTaxon.getName().setNomenclaturalReference(reference);
1137                             nameToBeFilled.setNomenclaturalReference(reference);
1138                             acceptedTaxon.addSource(OriginalSourceType.Import,null,null,refMods,null);
1139                         }else{
1140                             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1141                             acceptedTaxon.addDescription(td);
1142                             acceptedTaxon.addSource(OriginalSourceType.Import,null,null,refMods,null);
1143
1144                             TextData textData = TextData.NewInstance(Feature.CITATION());
1145
1146                             textData.addSource(OriginalSourceType.Import, null, null, reference, null, acceptedTaxon.getName(), ref);
1147                             td.addElement(textData);
1148                             td.addSource(OriginalSourceType.Import, null,null,refMods,null);
1149
1150                             importer.getDescriptionService().saveOrUpdate(td);
1151                         }
1152                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1153                         //                        logger.warn("BWAAHHHH: "+nameToBeFilled.getParsingProblems()+", "+ref);
1154                         nbRef++;
1155                     }
1156                 }
1157                 if (!foundBibref){
1158                     String refString="";
1159                     String name="";
1160                     String identifier="";
1161                     for (int j=0;j<references.getLength();j++){
1162                         //no bibref tag inside
1163                         logger.info("references.item(j).getNodeName()"+references.item(j).getNodeName());
1164                         if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1165                             String[] enames;
1166                             try {
1167                                 enames = extractScientificName(references.item(j));
1168                                 if (enames[1].isEmpty()) {
1169                                     name=enames[0]+"---"+enames[2]+"---"+enames[3];
1170                                 } else {
1171                                     name=enames[1]+"---"+enames[2]+"---"+enames[3];
1172                                 }
1173                             } catch (TransformerFactoryConfigurationError e) {
1174                                 logger.warn(e);
1175                             } catch (TransformerException e) {
1176                                 logger.warn(e);
1177                             }
1178
1179                             name=name.trim();
1180                         }
1181                         if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1182                             refString = references.item(j).getTextContent().trim();
1183                         }
1184                         if(references.item(j).getNodeName().equalsIgnoreCase("#text") && name.isEmpty() && !references.item(j).getTextContent().trim().isEmpty()){
1185                             try{
1186                                 identifier = name.split("---")[3];
1187                             }catch(Exception e ){logger.warn("no identifier");identifier="";}
1188                             INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1189                             String fullLineRefName = references.item(j).getTextContent().trim();
1190                             TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1191                             if (nameTBF.hasProblem() &&
1192                                     !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1193                                 nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser);
1194                             }
1195                             nameTBF = getTaxonNameBase(nameTBF,nametosave);
1196                             Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1197
1198
1199                             if (!identifier.isEmpty() && (identifier.length()>2)){
1200                                 setLSID(identifier, acceptedTaxon);
1201                             }
1202
1203                             acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
1204                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1205                         }
1206                     }
1207
1208                     if(!name.isEmpty()){
1209                         logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+name+"*");
1210                         if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(name.split("---")[0].trim())){
1211                             identifier = name.split("---")[3];
1212                             Reference<?> refS = ReferenceFactory.newGeneric();
1213                             refS.setTitleCache(refString, true);
1214                             //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1215                             //                            acceptedTaxon.addDescription(td);
1216                             //                            acceptedTaxon.addSource(refSource);
1217                             //
1218                             //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1219                             //
1220                             //                            textData.addSource(null, null, refS, null);
1221                             //                            td.addElement(textData);
1222                             //                            td.addSource(refSource);
1223                             //                            importer.getDescriptionService().saveOrUpdate(td);
1224
1225
1226                             if (!identifier.isEmpty() && (identifier.length()>2)){
1227                                 setLSID(identifier, acceptedTaxon);
1228
1229                             }
1230
1231                             acceptedTaxon.getName().setNomenclaturalReference(refS);
1232                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1233                         }
1234                         else{
1235                             Rank rank;
1236                             try {
1237                                 rank = Rank.getRankByName(name.split("---")[1]);
1238                             } catch (Exception e) {
1239                                 logger.warn("Rank or name problem!");
1240                                 rank=null;
1241                             }
1242                             name = name.split("---")[0].trim() + refString;
1243                             String original = name;
1244                             try{
1245                                 identifier = name.split("---")[3];
1246                             }
1247                             catch(Exception e){
1248                                 logger.warn("no identifier");
1249                                 identifier="";
1250                             }
1251                             INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1252                             TaxonNameBase nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1253                             if (nameTBF.hasProblem() &&
1254                                     !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1255                                 //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1256                                 nameTBF=solveNameProblem(original, name,parser);
1257                             }
1258                             nameTBF = getTaxonNameBase(nameTBF,nametosave);
1259                             Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1260
1261
1262                             if (!identifier.isEmpty() && (identifier.length()>2)){
1263                                 String id = identifier.split("__")[0];
1264                                 String source = identifier.split("__")[1];
1265                                 if (id.indexOf("lsid")>-1){
1266                                     try {
1267                                         LSID lsid = new LSID(id);
1268                                         synonym.setLsid(lsid);
1269                                     } catch (MalformedLSIDException e) {
1270                                         // TODO Auto-generated catch block
1271                                         e.printStackTrace();
1272                                     }
1273
1274                                 }
1275                                 else{
1276                                     //TODO ADD ORIGINAL SOURCE ID
1277                                     IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
1278                                     os.setIdInSource(id);
1279                                     Reference<?> re = ReferenceFactory.newGeneric();
1280                                     re.setTitle(source);
1281                                     os.setCitation(re);
1282                                     synonym.addSource(os);
1283                                 }
1284                             }
1285
1286                             acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
1287                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1288                         }
1289                     }
1290                 }
1291             }
1292         }
1293         //        importer.getClassificationService().saveOrUpdate(classification);
1294         return acceptedTaxon;
1295
1296     }
1297
1298     /**
1299      * @param identifier
1300      * @param acceptedTaxon
1301      */
1302     private void setLSID(String identifier, TaxonBase<?> taxon) {
1303         boolean lsidok=false;
1304         String id = identifier.split("__")[0];
1305         String source = identifier.split("__")[1];
1306         if (id.indexOf("lsid")>-1){
1307             try {
1308                 LSID lsid = new LSID(id);
1309                 taxon.setLsid(lsid);
1310                 lsidok=true;
1311             } catch (MalformedLSIDException e) {
1312                 logger.warn("Malformed LSID");
1313             }
1314
1315         }
1316         if ((id.indexOf("lsid")<0) || !lsidok){
1317             //ADD ORIGINAL SOURCE ID
1318             IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
1319             os.setIdInSource(id);
1320             Reference<?> re = ReferenceFactory.newGeneric();
1321             re.setTitle(source);
1322             os.setCitation(re);
1323             taxon.addSource(os);
1324         }
1325
1326     }
1327
1328     /**
1329      * try to solve a parsing problem for a scientific name
1330      * @param original : the name from the OCR document
1331      * @param name : the tagged version
1332      * @param parser
1333      * @return the corrected TaxonNameBase
1334      */
1335     @SuppressWarnings({ "unchecked", "rawtypes" })
1336     private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser) {
1337         Map<String,String> ato = namesMap.get(original);
1338         Rank rank=Rank.UNKNOWN_RANK();
1339
1340         if (ato == null){
1341             rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1342         }else{
1343             rank = getRank(ato);
1344         }
1345         TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1346         //                logger.info("RANK: "+rank);
1347         int retry=0;
1348         while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1349             String fullname =  getFullReference(name,nameTBF.getParsingProblems());
1350             if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1351                 nameTBF = BotanicalName.NewInstance(null);
1352             }
1353             if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1354                 nameTBF = ZoologicalName.NewInstance(null);
1355             }
1356             if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1357                 nameTBF= BacterialName.NewInstance(null);
1358             }
1359             parser.parseReferencedName(nameTBF, fullname, rank, false);
1360             retry++;
1361         }
1362         if (retry == 1){
1363             nameTBF.setFullTitleCache(name, true);
1364             //                    logger.info("FULL TITLE CACHE "+name);
1365         }
1366         return nameTBF;
1367     }
1368
1369     /**
1370      * @param nomenclatureNode: the XML nodes
1371      * @param nametosave: the list of objects to save into the CDM
1372      * @param refMods: the current reference extracted from the MODS
1373      * @return
1374      */
1375     @SuppressWarnings({ "rawtypes", "unused" })
1376     private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonNameBase> nametosave, Reference<?> refMods) {
1377         //        logger.info("extractNomenclature");
1378         NodeList children = nomenclatureNode.getChildNodes();
1379         String freetext;
1380         TaxonNameBase nameToBeFilled = null;
1381         Taxon acceptedTaxon = null;
1382         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1383         String identifier="";
1384
1385         Rank rank = Rank.UNKNOWN_RANK();
1386         //        String fullContent = nomenclatureNode.getTextContent();
1387         for (int i=0;i<children.getLength();i++){
1388             if (children.item(i).getNodeName().equalsIgnoreCase("#text")) {
1389                 freetext=children.item(i).getTextContent();
1390             }
1391             if (children.item(i).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1392                 System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
1393                 extractMaterialsDirect(children.item(i), acceptedTaxon, refMods, "collection");
1394             }
1395             if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1396                 String[] names;
1397                 try {
1398                     names = extractScientificName(children.item(i));
1399                     treatmentMainName = names[1];
1400                     originalTreatmentName = names[0];
1401                     rank = Rank.getRankByName(names[2]);
1402                     identifier=names[3];
1403
1404                 } catch (TransformerFactoryConfigurationError e1) {
1405                     logger.warn(e1);
1406                 } catch (TransformerException e1) {
1407                     logger.warn(e1);
1408                 } catch (UnknownCdmTypeException e) {
1409                     logger.warn(e);
1410                 }
1411
1412                 if (rank.equals(Rank.UNKNOWN_RANK()) || rank.isLower(configState.getConfig().getMaxRank())){
1413                     maxRankRespected=true;
1414                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1415                         nameToBeFilled = BotanicalName.NewInstance(null);
1416                     }
1417                     if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1418                         nameToBeFilled = ZoologicalName.NewInstance(null);
1419                     }
1420                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1421                         nameToBeFilled = BacterialName.NewInstance(null);
1422                     }
1423                     acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
1424                     if (acceptedTaxon ==null ){
1425                         nameToBeFilled = parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
1426                         if (nameToBeFilled.hasProblem() &&
1427                                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1428                             nameToBeFilled = solveNameProblem(originalTreatmentName,treatmentMainName,parser);
1429                         }
1430                         nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave);
1431                         if (!originalTreatmentName.isEmpty()) {
1432                             TaxonNameDescription td = TaxonNameDescription.NewInstance();
1433                             td.setTitleCache(originalTreatmentName);
1434                             nameToBeFilled.addDescription(td);
1435                         }
1436                         nameToBeFilled.addSource(OriginalSourceType.Import,null,null,refMods,null);
1437                         acceptedTaxon= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
1438                         if(!configState.getConfig().doKeepOriginalSecundum()) {
1439                             acceptedTaxon.setSec(configState.getConfig().getSecundum());
1440                             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
1441                         }
1442
1443
1444                         if (!identifier.isEmpty() && (identifier.length()>2)){
1445                             boolean lsidok=false;
1446                             String id = identifier.split("__")[0];
1447                             String source = identifier.split("__")[1];
1448                             if (id.indexOf("lsid")>-1){
1449                                 try {
1450                                     LSID lsid = new LSID(id);
1451                                     acceptedTaxon.setLsid(lsid);
1452                                     lsidok=true;
1453                                 } catch (MalformedLSIDException e) {
1454                                     logger.warn("Malformed LSID");
1455                                 }
1456
1457                             }
1458                             if ((id.indexOf("lsid")<0) || !lsidok){
1459                                 //TODO ADD ORIGINAL SOURCE ID
1460                                 IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
1461                                 os.setIdInSource(id);
1462                                 Reference<?> re = ReferenceFactory.newGeneric();
1463                                 re.setTitle(source);
1464                                 os.setCitation(re);
1465                                 acceptedTaxon.addSource(os);
1466                             }
1467                         }
1468
1469                         acceptedTaxon.addSource(OriginalSourceType.Import, null,null,refMods,null);
1470                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1471
1472                         Taxon parentTaxon = askParent(acceptedTaxon, classification);
1473                         if (parentTaxon ==null){
1474                             while (parentTaxon == null) {
1475                                 parentTaxon = createParent(acceptedTaxon, refMods);
1476                                 classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
1477                             }
1478                         }else{
1479                             classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
1480                         }
1481                     }else{
1482                         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1483                         Set<IdentifiableSource> sources = acceptedTaxon.getSources();
1484                         boolean sourcelinked=false;
1485                         for (IdentifiableSource source:sources){
1486                             if (source.getCitation().getTitle().equalsIgnoreCase(refMods.getTitleCache())) {
1487                                 sourcelinked=true;
1488                             }
1489                         }
1490                         if (!configState.getConfig().doKeepOriginalSecundum()) {
1491                             acceptedTaxon.setSec(configState.getConfig().getSecundum());
1492                             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
1493                         }
1494                         if (!sourcelinked){
1495                             acceptedTaxon.addSource(OriginalSourceType.Import, null, null, refMods, null);
1496                         }
1497                         if (!sourcelinked || !configState.getConfig().doKeepOriginalSecundum()){
1498
1499                             if (!identifier.isEmpty() && (identifier.length()>2)){
1500                                 setLSID(identifier, acceptedTaxon);
1501                             }
1502                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1503                         }
1504                     }
1505                 }else{
1506                     maxRankRespected=false;
1507                 }
1508             }
1509         }
1510         //        importer.getClassificationService().saveOrUpdate(classification);
1511         return acceptedTaxon;
1512     }
1513
1514     /**
1515      * @param acceptedTaxon: the current acceptedTaxon
1516      * @param ref: the current reference extracted from the MODS
1517      * @return the parent for the current accepted taxon
1518      */
1519     private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
1520         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1521
1522         List<Rank> rankList = new ArrayList<Rank>();
1523         rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
1524
1525         List<String> rankListStr = new ArrayList<String>();
1526         for (Rank r:rankList) {
1527             rankListStr.add(r.toString());
1528         }
1529         String r="";
1530         String s = acceptedTaxon.getTitleCache();
1531         Taxon tax = null;
1532
1533         int addTaxon = askAddParent(s);
1534         logger.info("ADD TAXON: "+addTaxon);
1535         if (addTaxon == 0){
1536             Taxon tmp = askParent(acceptedTaxon, classification);
1537             if (tmp == null){
1538                 s = askSetParent(s);
1539                 r = askRank(s,rankListStr);
1540
1541                 NonViralName<?> nameToBeFilled = null;
1542                 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1543                     nameToBeFilled = BotanicalName.NewInstance(null);
1544                 }
1545                 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1546                     nameToBeFilled = ZoologicalName.NewInstance(null);
1547                 }
1548                 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1549                     nameToBeFilled = BacterialName.NewInstance(null);
1550                 }
1551                 nameToBeFilled.setTitleCache(s);
1552                 nameToBeFilled.setRank(getRank(r));
1553
1554                 tax = Taxon.NewInstance(nameToBeFilled, ref);
1555             }
1556             else{
1557                 tax=tmp;
1558             }
1559
1560             createParent(tax, ref);
1561             //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
1562             classification.addParentChild(tax, acceptedTaxon, ref, null);
1563         }
1564         else{
1565             classification.addChildTaxon(acceptedTaxon, ref, null);
1566             tax=acceptedTaxon;
1567         }
1568         //        logger.info("RETURN: "+tax );
1569         return tax;
1570
1571     }
1572
1573
1574
1575     /**
1576      * @param name
1577      * @throws TransformerFactoryConfigurationError
1578      * @throws TransformerException
1579      * @return a list of possible names
1580      */
1581     private String[] extractScientificName(Node name) throws TransformerFactoryConfigurationError, TransformerException {
1582         //        System.out.println("extractScientificName");
1583         Rank rank = Rank.UNKNOWN_RANK();
1584         NodeList children = name.getChildNodes();
1585         String fullName = "";
1586         String newName="";
1587         String identifier="";
1588         HashMap<String, String> atomisedMap = new HashMap<String, String>();
1589         List<String> atomisedName= new ArrayList<String>();
1590
1591         String rankStr = "";
1592         Rank tmpRank ;
1593         for (int i=0;i<children.getLength();i++){
1594             if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
1595                 NodeList atom = children.item(i).getChildNodes();
1596                 for (int k=0;k<atom.getLength();k++){
1597                     if (atom.item(k).getNodeName().equalsIgnoreCase("tax:xid")){
1598                         try{
1599                             identifier = atom.item(k).getAttributes().getNamedItem("identifier").getNodeValue();
1600                         }catch(Exception e){
1601                             System.out.println("pb with identifier, maybe empty");
1602                         }
1603                         try{
1604                             identifier+="__"+atom.item(k).getAttributes().getNamedItem("source").getNodeValue();
1605                         }catch(Exception e){
1606                             System.out.println("pb with identifier, maybe empty");
1607                         }
1608                     }
1609                     tmpRank = null;
1610                     rankStr = atom.item(k).getNodeName().toLowerCase();
1611                     //                    logger.info("RANKSTR:*"+rankStr+"*");
1612                     if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
1613                         rankStr=atom.item(k).getTextContent().trim();
1614                         tmpRank = getRank(rankStr);
1615                     }
1616                     //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
1617                     if (tmpRank != null){
1618                         rank=tmpRank;
1619                     }
1620
1621                     atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
1622                     atomisedName.add(atom.item(k).getTextContent().trim());
1623                 }
1624             }
1625             if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
1626                 //                logger.info("name non atomised: "+children.item(i).getTextContent());
1627                 fullName = children.item(i).getTextContent().trim();
1628                 //                logger.info("fullname: "+fullName);
1629             }
1630         }
1631         if (fullName != null){
1632             fullName = fullName.replace("( ", "(");
1633             fullName = fullName.replace(" )",")");
1634
1635         }
1636         if (fullName.trim().isEmpty()){
1637             fullName=StringUtils.join(atomisedName," ");
1638         }
1639
1640         while(fullName.contains("  ")) {
1641             fullName=fullName.replace("  ", " ");
1642             //            logger.info("while");
1643         }
1644
1645         namesMap.put(fullName,atomisedMap);
1646         String atomisedNameStr = StringUtils.join(atomisedName," ");
1647         while(atomisedNameStr.contains("  ")) {
1648             atomisedNameStr=atomisedNameStr.replace("  ", " ");
1649             //            logger.info("atomisedNameStr: "+atomisedNameStr);
1650         }
1651         atomisedNameStr=atomisedNameStr.trim();
1652
1653         if (fullName != null){
1654             if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
1655                 newName=getScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
1656             } else {
1657                 newName=fullName;
1658             }
1659         }
1660         rank = askForRank(newName, rank, nomenclaturalCode);
1661         String[] names = new String[4];
1662         names[0]=fullName;
1663         names[1]=newName;
1664         names[2]=rank.toString();
1665         names[3]=identifier;
1666         return names;
1667
1668     }
1669
1670     /**
1671      * @param classification2
1672      */
1673     public void updateClassification(Classification classification2) {
1674         classification = classification2;
1675     }
1676
1677
1678 }