cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXTreatmentExtractor.java

   1 // $Id$
   2 /**
   3  * Copyright (C) 2013 EDIT
   4  * European Distributed Institute of Taxonomy
   5  * http://www.e-taxonomy.eu
   6  *
   7  * The contents of this file are subject to the Mozilla Public License Version 1.1
   8  * See LICENSE.TXT at the top of this package for the full license terms.
   9  */
  10 package eu.etaxonomy.cdm.io.taxonx2013;
  11
  12 import java.io.File;
  13 import java.io.FileWriter;
  14 import java.io.IOException;
  15 import java.net.URI;
  16 import java.util.ArrayList;
  17 import java.util.HashMap;
  18 import java.util.List;
  19 import java.util.Map;
  20 import java.util.Set;
  21 import java.util.regex.Pattern;
  22
  23 import javax.xml.transform.TransformerException;
  24 import javax.xml.transform.TransformerFactoryConfigurationError;
  25
  26 import org.apache.commons.lang.StringUtils;
  27 import org.w3c.dom.Node;
  28 import org.w3c.dom.NodeList;
  29
  30 import com.ibm.lsid.MalformedLSIDException;
  31
  32 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
  33 import eu.etaxonomy.cdm.model.common.CdmBase;
  34 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
  35 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
  36 import eu.etaxonomy.cdm.model.common.LSID;
  37 import eu.etaxonomy.cdm.model.common.Language;
  38 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
  39 import eu.etaxonomy.cdm.model.description.Feature;
  40 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
  41 import eu.etaxonomy.cdm.model.description.PolytomousKey;
  42 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
  43 import eu.etaxonomy.cdm.model.description.TaxonDescription;
  44 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
  45 import eu.etaxonomy.cdm.model.description.TextData;
  46 import eu.etaxonomy.cdm.model.name.BacterialName;
  47 import eu.etaxonomy.cdm.model.name.BotanicalName;
  48 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
  49 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
  50 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
  51 import eu.etaxonomy.cdm.model.name.NonViralName;
  52 import eu.etaxonomy.cdm.model.name.Rank;
  53 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
  54 import eu.etaxonomy.cdm.model.name.ZoologicalName;
  55 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
  56 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
  57 import eu.etaxonomy.cdm.model.reference.Reference;
  58 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
  59 import eu.etaxonomy.cdm.model.taxon.Classification;
  60 import eu.etaxonomy.cdm.model.taxon.Synonym;
  61 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
  62 import eu.etaxonomy.cdm.model.taxon.Taxon;
  63 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
  64 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
  65 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
  66 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
  67 import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
  68
  69 /**
  70  * @author pkelbert
  71  * @date 2 avr. 2013
  72  *
  73  */
  74 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
  75
  76     private final NomenclaturalCode nomenclaturalCode;
  77     private Classification classification;
  78
  79     private  String treatmentMainName,originalTreatmentName;
  80
  81     private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
  82
  83
  84     private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
  85     private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
  86
  87     private boolean maxRankRespected =false;
  88
  89     /**
  90      * @param nomenclaturalCode
  91      * @param classification
  92      * @param importer
  93      * @param configState
  94      */
  95     public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
  96             TaxonXImportState configState) {
  97         this.nomenclaturalCode=nomenclaturalCode;
  98         this.classification = classification;
  99         this.importer=importer;
 100         this.configState=configState;
 101         prepareCollectors(configState, importer.getAgentService());
 102     }
 103
 104     /**
 105      * extracts all the treament information and save them
 106      * @param treatmentnode: the XML Node
 107      * @param tosave: the list of object to save into the CDM
 108      * @param refMods: the reference extracted from the MODS
 109      * @param sourceName: the URI of the document
 110      */
 111     @SuppressWarnings({ "rawtypes", "unused" })
 112     protected void extractTreatment(Node treatmentnode, List<Object> tosave, Reference<?> refMods, URI sourceName) {
 113         logger.info("extractTreatment");
 114         List<TaxonNameBase> nametosave = new ArrayList<TaxonNameBase>();
 115         NodeList children = treatmentnode.getChildNodes();
 116         Taxon acceptedTaxon =null;
 117         Taxon defaultTaxon =null;
 118         boolean refgroup=false;
 119
 120         for (int i=0;i<children.getLength();i++){
 121             if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
 122                 refgroup=true;
 123             }
 124         }
 125
 126         for (int i=0;i<children.getLength();i++){
 127
 128             if (children.item(i).getNodeName().equalsIgnoreCase("tax:nomenclature")){
 129                 NodeList nomenclature = children.item(i).getChildNodes();
 130                 boolean containsName=false;
 131                 for(int k=0;k<nomenclature.getLength();k++){
 132                     if(nomenclature.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 133                         containsName=true;
 134                         break;
 135                     }
 136                 }
 137                 if (containsName){
 138                     reloadClassification();
 139                     //extract "main" the scientific name
 140                     acceptedTaxon = extractNomenclature(children.item(i),nametosave,refMods);
 141                 }
 142             }
 143             else if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
 144                 reloadClassification();
 145                 //extract the References within the document
 146                 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
 147             }
 148             else if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 149                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
 150                 File file = new File("/home/pkelbert/Bureau/multipleTaxonX.txt");
 151                 FileWriter writer;
 152                 try {
 153                     writer = new FileWriter(file ,true);
 154                     writer.write(sourceName+"\n");
 155                     writer.flush();
 156                     writer.close();
 157                 } catch (IOException e1) {
 158                     // TODO Auto-generated catch block
 159                     e1.printStackTrace();
 160                 }
 161                 String multiple = askMultiple(children.item(i));
 162                 if (multiple.equalsIgnoreCase("synonyms")) {
 163                     extractSynonyms(children.item(i),nametosave, acceptedTaxon,refMods);
 164                 }
 165                 else
 166                     if(multiple.equalsIgnoreCase("material examined")){
 167                         extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
 168                     }
 169                     else
 170                         if (multiple.equalsIgnoreCase("distribution")){
 171                             extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
 172                         }
 173                         else
 174                             if (multiple.equalsIgnoreCase("type status")){
 175                                 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,"TypeStatus");
 176                             }
 177                             else
 178                                 if (multiple.equalsIgnoreCase("vernacular name")){
 179                                     extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.COMMON_NAME().getTitleCache());
 180
 181                                 }
 182                                 else{
 183                                     extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,multiple);
 184                                 }
 185
 186             }
 187             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 188                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
 189                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, Feature.BIOLOGY_ECOLOGY());
 190             }
 191             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 192                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
 193                 extractDescriptionWithReference(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.COMMON_NAME().getTitleCache());
 194             }
 195             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 196                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
 197                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
 198             }
 199             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 200                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
 201                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,Feature.DIAGNOSIS());
 202             }
 203             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 204                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
 205                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DISCUSSION());
 206             }
 207             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 208                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
 209                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods, Feature.DESCRIPTION());
 210             }
 211
 212             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 213                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
 214                 extractDistribution(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods);
 215             }
 216             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 217                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
 218                 extractFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave,refMods,Feature.ETYMOLOGY());
 219             }
 220
 221             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 222                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
 223                 extractMaterials(children.item(i),acceptedTaxon, refMods, nametosave);
 224             }
 225             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:figure") && maxRankRespected){
 226                 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, "figure");
 227             }
 228             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 229                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("other") && maxRankRespected){
 230                 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon, nametosave, refMods, "table");
 231             }
 232
 233             else if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 234                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
 235                 //TODO IGNORE keys for the moment
 236                 //extractKey(children.item(i),acceptedTaxon, nametosave,source, refMods);
 237                 extractSpecificFeature(children.item(i),acceptedTaxon,defaultTaxon,nametosave, refMods,"Keys - unparsed");
 238             }
 239             else{
 240                 if (!children.item(i).getNodeName().equalsIgnoreCase("tax:pb")){
 241                     logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
 242                     if (children.item(i).getAttributes() !=null) {
 243                         logger.info(children.item(i).getAttributes().item(0));
 244                     }
 245                 }
 246             }
 247         }
 248         //        logger.info("saveUpdateNames");
 249         if (maxRankRespected){
 250             importer.getNameService().saveOrUpdate(nametosave);
 251             importer.getClassificationService().saveOrUpdate(classification);
 252             logger.info("saveUpdateNames-ok");
 253         }
 254     }
 255
 256
 257     /**
 258      * @param keys
 259      * @param acceptedTaxon: the current acceptedTaxon
 260      * @param nametosave: the list of objects to save into the CDM
 261      * @param refMods: the current reference extracted from the MODS
 262      */
 263     @SuppressWarnings("rawtypes")
 264     private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference<?> refMods) {
 265         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
 266
 267         NodeList children = keys.getChildNodes();
 268         String key="";
 269         PolytomousKey poly =  PolytomousKey.NewInstance();
 270         poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
 271         poly.addTaxonomicScope(acceptedTaxon);
 272         poly.setTitleCache("bloup");
 273         //        poly.addCoveredTaxon(acceptedTaxon);
 274         PolytomousKeyNode root = poly.getRoot();
 275         PolytomousKeyNode previous = null,tmpKey=null;
 276         Taxon taxonKey=null;
 277         List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
 278
 279         //        String fullContent = keys.getTextContent();
 280         for (int i=0;i<children.getLength();i++){
 281             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 282                 NodeList paragraph = children.item(i).getChildNodes();
 283                 key="";
 284                 taxonKey=null;
 285                 for (int j=0;j<paragraph.getLength();j++){
 286                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 287                         if (! paragraph.item(j).getTextContent().trim().isEmpty()){
 288                             key+=paragraph.item(j).getTextContent().trim();
 289                             //                            logger.info("KEY: "+j+"--"+key);
 290                         }
 291                     }
 292                     if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 293                         taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
 294                     }
 295                 }
 296                 //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
 297                 if (keypattern.matcher(key).matches()){
 298                     tmpKey = PolytomousKeyNode.NewInstance(key);
 299                     if (taxonKey!=null) {
 300                         tmpKey.setTaxon(taxonKey);
 301                     }
 302                     polyNodes.add(tmpKey);
 303                     if (previous == null) {
 304                         root.addChild(tmpKey);
 305                     } else {
 306                         previous.addChild(tmpKey);
 307                     }
 308                 }else{
 309                     if (!key.isEmpty()){
 310                         tmpKey=PolytomousKeyNode.NewInstance(key);
 311                         if (taxonKey!=null) {
 312                             tmpKey.setTaxon(taxonKey);
 313                         }
 314                         polyNodes.add(tmpKey);
 315                         if (keypatternend.matcher(key).matches()) {
 316                             root.addChild(tmpKey);
 317                             previous=tmpKey;
 318                         } else{
 319                             previous.addChild(tmpKey);
 320                         }
 321
 322                     }
 323                 }
 324             }
 325         }
 326         importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
 327         importer.getPolytomousKeyService().saveOrUpdate(poly);
 328     }
 329
 330     /**
 331      * @param taxons: the XML Nodegroup
 332      * @param nametosave: the list of objects to save into the CDM
 333      * @param acceptedTaxon: the current accepted Taxon
 334      * @param refMods: the current reference extracted from the MODS
 335      *
 336      * @return Taxon object built
 337      */
 338     @SuppressWarnings({ "rawtypes", "unchecked" })
 339     private Taxon getTaxonFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
 340         //        logger.info("getTaxonFromXML");
 341         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 342
 343         TaxonNameBase nameToBeFilled = null;
 344
 345         MyName myname = new MyName();
 346         NomenclaturalStatusType statusType = null;
 347
 348         try {
 349             myname = extractScientificName(taxons);
 350             if (!myname.getStatus().isEmpty()){
 351                 try {
 352                     statusType = nomStatusString2NomStatus(myname.getStatus());
 353                 } catch (UnknownCdmTypeException e) {
 354                     logger.warn("Problem with status");
 355                 }
 356             }
 357
 358         } catch (TransformerFactoryConfigurationError e1) {
 359             logger.warn(e1);
 360         } catch (TransformerException e1) {
 361             logger.warn(e1);
 362         }
 363         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
 364
 365         nameToBeFilled = parser.parseFullName(myname.getName(), nomenclaturalCode, myname.getRank());
 366         if (nameToBeFilled.hasProblem() &&
 367                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
 368             //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
 369             nameToBeFilled=solveNameProblem(myname.getOriginalName(), myname.getName(),parser);
 370         }
 371
 372         nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
 373
 374         //        importer.getNameService().saveOrUpdate(nametosave);
 375         Taxon t = importer.getTaxonService().findBestMatchingTaxon(nameToBeFilled.getTitleCache());
 376
 377         boolean statusMatch=false;
 378         if(t !=null ){
 379             statusMatch=compareStatus(t, statusType);
 380         }
 381         if (t ==null || (t != null && !statusMatch)){
 382             if(statusType != null) {
 383                 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
 384             }
 385             t= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
 386             if (t.getSec() == null) {
 387                 t.setSec(refMods);
 388             }
 389             if(!configState.getConfig().doKeepOriginalSecundum()) {
 390                 t.setSec(configState.getConfig().getSecundum());
 391                 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 392             }
 393             t.addSource(OriginalSourceType.Import,null,null,refMods,null);
 394
 395             /* boolean sourceExists=false;
 396             Set<IdentifiableSource> sources = t.getSources();
 397             for (IdentifiableSource src : sources){
 398                 String micro = src.getCitationMicroReference();
 399                 Reference r = src.getCitation();
 400                 if (r.equals(refMods) && micro == null) {
 401                     sourceExists=true;
 402                 }
 403             }
 404             if(!sourceExists) {
 405                 t.addSource(OriginalSourceType.Import,null,null,refMods,null);
 406             }
 407              */
 408
 409             if (!myname.getIdentifier().isEmpty() && (myname.getIdentifier().length()>2)){
 410                 setLSID(myname.getIdentifier(), t);
 411             }
 412
 413             Taxon parentTaxon = askParent(t, classification);
 414             if (parentTaxon ==null){
 415                 while (parentTaxon == null) {
 416                     parentTaxon = createParent(t, refMods);
 417                     classification.addParentChild(parentTaxon, t, refMods, null);
 418                 }
 419             }else{
 420                 classification.addParentChild(parentTaxon, t, refMods, null);
 421             }
 422         }
 423         else{
 424             t = CdmBase.deproxy(t, Taxon.class);
 425         }
 426         if (!configState.getConfig().doKeepOriginalSecundum()) {
 427             t.setSec(configState.getConfig().getSecundum());
 428             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 429         }
 430         return t;
 431     }
 432
 433
 434     /**
 435      * @param taxons: the XML Nodegroup
 436      * @param nametosave: the list of objects to save into the CDM
 437      * @param acceptedTaxon: the current accepted Taxon
 438      * @param refMods: the current reference extracted from the MODS
 439      *
 440      * @return Taxon object built
 441      */
 442     @SuppressWarnings({ "rawtypes", "unchecked" })
 443     private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference<?> refMods) {
 444         //        logger.info("getTaxonFromXML");
 445         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 446
 447         TaxonNameBase nameToBeFilled = null;
 448
 449         MyName myName=new MyName();
 450
 451         NomenclaturalStatusType statusType = null;
 452         try {
 453             myName = extractScientificName(taxons);
 454             if (!myName.getStatus().isEmpty()){
 455                 try {
 456                     statusType = nomStatusString2NomStatus(myName.getStatus());
 457                 } catch (UnknownCdmTypeException e) {
 458                     logger.warn("Problem with status");
 459                 }
 460             }
 461         } catch (TransformerFactoryConfigurationError e1) {
 462             logger.warn(e1);
 463         } catch (TransformerException e1) {
 464             logger.warn(e1);
 465         }
 466         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
 467
 468         nameToBeFilled = parser.parseFullName(myName.getName(), nomenclaturalCode, myName.getRank());
 469         if (nameToBeFilled.hasProblem() &&
 470                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
 471             //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
 472             nameToBeFilled=solveNameProblem(myName.getOriginalName(), myName.getName(),parser);
 473         }
 474
 475         nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
 476         return nameToBeFilled;
 477
 478     }
 479
 480     @SuppressWarnings("rawtypes")
 481     private TaxonNameBase getTaxonNameBase (TaxonNameBase name, List<TaxonNameBase> nametosave, NomenclaturalStatusType statusType){
 482         List<TaxonNameBase> names = importer.getNameService().list(TaxonNameBase.class, null, null, null, null);
 483         for (TaxonNameBase tb : names){
 484             if (tb.getTitleCache().equalsIgnoreCase(name.getTitleCache())) {
 485                 boolean statusMatch=false;
 486                 if(tb !=null ){
 487                     statusMatch=compareStatus(tb, statusType);
 488                 }
 489                 if (!statusMatch){
 490                     if(statusType != null) {
 491                         name.addStatus(NomenclaturalStatus.NewInstance(statusType));
 492                     }
 493                 }else
 494                 {
 495                     logger.info("TaxonNameBase FOUND"+name.getTitleCache());
 496                     return tb;
 497                 }
 498             }
 499         }
 500         logger.info("TaxonNameBase NOT FOUND "+name.getTitleCache());
 501         System.out.println("add name "+name);
 502         nametosave.add(name);
 503         return name;
 504
 505     }
 506
 507
 508
 509     /**
 510      * @param tb
 511      * @param statusType
 512      * @return
 513      */
 514     private boolean compareStatus(TaxonNameBase tb, NomenclaturalStatusType statusType) {
 515         boolean statusMatch=false;
 516         //found one taxon
 517         Set<NomenclaturalStatus> status = tb.getStatus();
 518         if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
 519             for (NomenclaturalStatus st:status){
 520                 NomenclaturalStatusType stype = st.getType();
 521                 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
 522                     statusMatch=true;
 523                 }
 524             }
 525         }
 526         else{
 527             if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
 528                 statusMatch=true;
 529             }
 530         }
 531         return statusMatch;
 532     }
 533
 534     /**
 535      *
 536      */
 537     private void reloadClassification() {
 538         Classification cl = importer.getClassificationService().find(classification.getUuid());
 539         if (cl != null){
 540             classification=cl;
 541         }else{
 542             importer.getClassificationService().saveOrUpdate(classification);
 543             classification = importer.getClassificationService().find(classification.getUuid());
 544         }
 545
 546     }
 547
 548     /**
 549      * Create a Taxon for the current NameBase, based on the current reference
 550      * @param taxonNameBase
 551      * @param refMods: the current reference extracted from the MODS
 552      * @return Taxon
 553      */
 554     @SuppressWarnings({ "unused", "rawtypes" })
 555     private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference<?> refMods) {
 556         Taxon t = new Taxon(taxonNameBase,null );
 557         if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
 558             t.setSec(configState.getConfig().getSecundum());
 559             logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 560         }
 561         /*<<<<<<< .courant
 562         boolean sourceExists=false;
 563         Set<IdentifiableSource> sources = t.getSources();
 564         for (IdentifiableSource src : sources){
 565             String micro = src.getCitationMicroReference();
 566             Reference r = src.getCitation();
 567             if (r.equals(refMods) && micro == null) {
 568                 sourceExists=true;
 569             }
 570         }
 571         if(!sourceExists) {
 572             t.addSource(null,null,refMods,null);
 573         }
 574 =======*/
 575         t.addSource(OriginalSourceType.Import,null,null,refMods,null);
 576         return t;
 577     }
 578
 579     @SuppressWarnings("rawtypes")
 580     private void  extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave,
 581             Reference<?> refMods, String featureName) {
 582         System.out.println("extractDescriptionWithReference !");
 583         NodeList children = typestatus.getChildNodes();
 584
 585         List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
 586         Feature currentFeature=null;
 587         for (DefinedTermBase feature: features){
 588             String tmpF = ((Feature)feature).getTitleCache();
 589             if (tmpF.equalsIgnoreCase(featureName)) {
 590                 currentFeature=(Feature)feature;
 591             }
 592         }
 593         if (currentFeature == null) {
 594             currentFeature=Feature.NewInstance(featureName, featureName, featureName);
 595             importer.getTermService().saveOrUpdate(currentFeature);
 596         }
 597
 598         String r="";String s="";
 599         for (int i=0;i<children.getLength();i++){
 600             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 601                 s+=children.item(i).getTextContent().trim();
 602             }
 603             if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
 604                 r+= children.item(i).getTextContent().trim();
 605             }
 606             if (s.indexOf(r)>-1) {
 607                 s=s.split(r)[0];
 608             }
 609         }
 610         Reference<?> currentref =  ReferenceFactory.newGeneric();
 611         if(!r.isEmpty()) {
 612             currentref.setTitle(r);
 613         } else {
 614             currentref=refMods;
 615         }
 616         setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
 617
 618     }
 619
 620     /**
 621      * @param nametosave
 622      * @param distribution: the XML node group
 623      * @param acceptedTaxon: the current accepted Taxon
 624      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 625      * @param refMods: the current reference extracted from the MODS
 626      */
 627     @SuppressWarnings("rawtypes")
 628     private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods) {
 629         //        logger.info("DISTRIBUTION");
 630         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 631         NodeList children = distribution.getChildNodes();
 632         Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
 633         Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
 634
 635         for (int i=0;i<children.getLength();i++){
 636             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 637                 NodeList paragraph = children.item(i).getChildNodes();
 638                 for (int j=0;j<paragraph.getLength();j++){
 639                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 640                         if(!paragraph.item(j).getTextContent().trim().isEmpty()) {
 641                             String s =paragraph.item(j).getTextContent().trim();
 642                             if (descriptionsFulltext.get(i) !=null){
 643                                 s = descriptionsFulltext.get(i)+" "+s;
 644                             }
 645                             descriptionsFulltext.put(i, s);
 646                         }
 647                     }
 648                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 649                         String s =getTaxonNameBaseFromXML(paragraph.item(j),nametosave,refMods).toString().split("sec.")[0];
 650                         if (descriptionsFulltext.get(i) !=null){
 651                             s = descriptionsFulltext.get(i)+" "+s;
 652                         }
 653                         descriptionsFulltext.put(i, s);
 654                     }
 655                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
 656                         MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
 657                         DerivedUnit derivedUnitBase = null;
 658                         specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
 659                         List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
 660                         if (speObsList == null) {
 661                             speObsList=new ArrayList<MySpecimenOrObservation>();
 662                         }
 663                         speObsList.add(specimenOrObservation);
 664                         specimenOrObservations.put(i,speObsList);
 665
 666                         String s = specimenOrObservation.getDerivedUnitBase().toString();
 667                         if (descriptionsFulltext.get(i) !=null){
 668                             s = descriptionsFulltext.get(i)+" "+s;
 669                         }
 670                         descriptionsFulltext.put(i, s);
 671                     }
 672
 673                 }
 674             }
 675         }
 676
 677         int m=0;
 678         for (int k:descriptionsFulltext.keySet()) {
 679             if (k>m) {
 680                 m=k;
 681             }
 682         }
 683         for (int k:specimenOrObservations.keySet()) {
 684             if (k>m) {
 685                 m=k;
 686             }
 687         }
 688
 689
 690         TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
 691         Feature currentFeature = Feature.DISTRIBUTION();
 692         DerivedUnit derivedUnitBase=null;
 693         String descr="";
 694         for (int k=0;k<=m;k++){
 695             if(specimenOrObservations.keySet().contains(k)){
 696                 for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
 697                     derivedUnitBase = soo.getDerivedUnitBase();
 698                     descr=soo.getDescr();
 699
 700                     /*<<<<<<< .courant
 701                     boolean sourceExists=false;
 702                     Set<IdentifiableSource> sources = derivedUnitBase.getSources();
 703                     for (IdentifiableSource src : sources){
 704                         String micro = src.getCitationMicroReference();
 705                         Reference r = src.getCitation();
 706                         if (r.equals(refMods) && micro == null) {
 707                             sourceExists=true;
 708                         }
 709                     }
 710                     if(!sourceExists) {
 711                         derivedUnitBase.addSource(null,null,refMods,null);
 712                     }
 713 =======*/
 714                     derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
 715
 716                     importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 717
 718                     TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 719                     acceptedTaxon.addDescription(taxonDescription);
 720
 721
 722                     IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 723
 724                     Feature feature=null;
 725                     feature = makeFeature(derivedUnitBase);
 726                     if(!StringUtils.isEmpty(descr)) {
 727                         derivedUnitBase.setTitleCache(descr, true);
 728                     }
 729                     indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 730                     indAssociation.setFeature(feature);
 731                     indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
 732
 733                     /* sourceExists=false;
 734                     Set<DescriptionElementSource> dsources = indAssociation.getSources();
 735                     for (DescriptionElementSource src : dsources){
 736                         String micro = src.getCitationMicroReference();
 737                         Reference r = src.getCitation();
 738                         if (r.equals(refMods) && micro == null) {
 739                             sourceExists=true;
 740                         }
 741                     }
 742                     if(!sourceExists) {
 743                         indAssociation.addSource(null, null, refMods, null);
 744                     }
 745                      */
 746                     indAssociation.addSource(OriginalSourceType.Import, null,null,refMods,null);
 747
 748                     taxonDescription.addElement(indAssociation);
 749                     taxonDescription.setTaxon(acceptedTaxon);
 750                     taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
 751
 752                     /*sourceExists=false;
 753                     sources = taxonDescription.getSources();
 754                     for (IdentifiableSource src : sources){
 755                         String micro = src.getCitationMicroReference();
 756                         Reference r = src.getCitation();
 757                         if (r.equals(refMods) && micro == null) {
 758                             sourceExists=true;
 759                         }
 760                     }
 761                     if(!sourceExists) {
 762                         taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
 763                     }*/
 764                     importer.getDescriptionService().saveOrUpdate(taxonDescription);
 765                     importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 766                     td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
 767                 }
 768             }
 769
 770             if (descriptionsFulltext.keySet().contains(k)){
 771                 if (!descriptionsFulltext.get(k).isEmpty() && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
 772                     setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
 773                     break;
 774                 }
 775                 else{
 776                     TextData textData = TextData.NewInstance();
 777
 778                     textData.setFeature(currentFeature);
 779                     textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
 780                     textData.addSource(OriginalSourceType.Import, null, null, refMods, null);
 781
 782                     td.addElement(textData);
 783                 }
 784             }
 785
 786
 787             if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
 788                 /*<<<<<<< .courant
 789                 boolean sourceExists=false;
 790                 Set<IdentifiableSource> sources = td.getSources();
 791                 for (IdentifiableSource src : sources){
 792                     String micro = src.getCitationMicroReference();
 793                     Reference r = src.getCitation();
 794                     if (r.equals(refMods) && micro == null) {
 795                         sourceExists=true;
 796                     }
 797                 }
 798                 if(!sourceExists) {
 799                     td.addSource(null,null,refMods,null);
 800                 }
 801 =======*/
 802                 td.addSource(OriginalSourceType.Import, null,null,refMods,null);
 803                 acceptedTaxon.addDescription(td);
 804                 importer.getDescriptionService().saveOrUpdate(td);
 805                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 806             }
 807         }
 808     }
 809
 810
 811     /**
 812      * @param materials: the XML node group
 813      * @param acceptedTaxon: the current accepted Taxon
 814      * @param refMods: the current reference extracted from the MODS
 815      */
 816     @SuppressWarnings("rawtypes")
 817     private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference<?> refMods,List<TaxonNameBase> nametosave) {
 818         //        logger.info("EXTRACTMATERIALS");
 819         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 820         NodeList children = materials.getChildNodes();
 821         NodeList events = null;
 822         String descr="";
 823
 824         DerivedUnit derivedUnitBase=null;
 825         MySpecimenOrObservation myspecimenOrObservation = null;
 826
 827         for (int i=0;i<children.getLength();i++){
 828             String rawAssociation="";
 829             boolean added=false;
 830             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 831                 events = children.item(i).getChildNodes();
 832                 for(int k=0;k<events.getLength();k++){
 833                     if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 834                         String linkedTaxon = getTaxonNameBaseFromXML(events.item(k), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
 835                         rawAssociation+=linkedTaxon.split("sec")[0];
 836                     }
 837                     if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
 838                             && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 839                         rawAssociation+= events.item(k).getTextContent().trim();
 840                     }
 841                     if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 842                         if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
 843                             rawAssociation="no description text";
 844                         }
 845                         added=true;
 846                         DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
 847                         derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
 848                         /*<<<<<<< .courant
 849                         System.out.println("derivedUnitBase: "+derivedUnitBase);
 850
 851                         boolean sourceExists=false;
 852                         Set<IdentifiableSource> sources = derivedUnitBase.getSources();
 853                         for (IdentifiableSource src : sources){
 854                             String micro = src.getCitationMicroReference();
 855                             Reference r = src.getCitation();
 856                             if (r.equals(refMods) && micro == null) {
 857                                 sourceExists=true;
 858                             }
 859                         }
 860                         if(!sourceExists) {
 861                             derivedUnitBase.addSource(null,null,refMods,null);
 862                         }
 863
 864 =======*/
 865                         derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
 866                         importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 867
 868                         myspecimenOrObservation = extractSpecimenOrObservation(events.item(k),derivedUnitBase,SpecimenOrObservationType.DerivedUnit);
 869                         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
 870                         descr=myspecimenOrObservation.getDescr();
 871
 872                         /*<<<<<<< .courant
 873                         sourceExists=false;
 874                         sources = derivedUnitBase.getSources();
 875                         for (IdentifiableSource src : sources){
 876                             String micro = src.getCitationMicroReference();
 877                             Reference r = src.getCitation();
 878                             if (r.equals(refMods) && micro == null) {
 879                                 sourceExists=true;
 880                             }
 881                         }
 882                         if(!sourceExists) {
 883                             derivedUnitBase.addSource(null,null,refMods,null);
 884                         }
 885 =======*/
 886                         derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
 887
 888                         importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
 889
 890                         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 891                         acceptedTaxon.addDescription(taxonDescription);
 892
 893
 894                         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 895
 896                         Feature feature = makeFeature(derivedUnitBase);
 897                         if(!StringUtils.isEmpty(descr)) {
 898                             derivedUnitBase.setTitleCache(descr, true);
 899                         }
 900                         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 901                         indAssociation.setFeature(feature);
 902                         indAssociation.addSource(OriginalSourceType.Import,null, null, refMods, null);
 903
 904                         /*sourceExists=false;
 905                         Set<DescriptionElementSource> dsources = indAssociation.getSources();
 906                         for (DescriptionElementSource src : dsources){
 907                             String micro = src.getCitationMicroReference();
 908                             Reference r = src.getCitation();
 909                             if (r.equals(refMods) && micro == null) {
 910                                 sourceExists=true;
 911                             }
 912                         }
 913                         if(!sourceExists) {
 914                             indAssociation.addSource(null, null, refMods, null);
 915                         }
 916                          */
 917
 918                         taxonDescription.addElement(indAssociation);
 919                         taxonDescription.setTaxon(acceptedTaxon);
 920                         taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
 921
 922                         /*sourceExists=false;
 923                         sources = taxonDescription.getSources();
 924                         for (IdentifiableSource src : sources){
 925                             String micro = src.getCitationMicroReference();
 926                             Reference r = src.getCitation();
 927                             if (r.equals(refMods) && micro == null) {
 928                                 sourceExists=true;
 929                             }
 930                         }
 931                         if(!sourceExists) {
 932                             taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
 933                         }*/
 934                         importer.getDescriptionService().saveOrUpdate(taxonDescription);
 935                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 936                     }
 937                     if (!rawAssociation.isEmpty() && !added){
 938                         DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
 939                         derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
 940
 941                         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 942                         acceptedTaxon.addDescription(taxonDescription);
 943
 944                         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 945
 946                         Feature feature = Feature.MATERIALS_EXAMINED();
 947                         if(!StringUtils.isEmpty(rawAssociation)) {
 948                             derivedUnitBase.setTitleCache(rawAssociation, true);
 949                         }
 950                         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 951                         indAssociation.setFeature(feature);
 952                         indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
 953
 954                         /*boolean sourceExists=false;
 955                         Set<DescriptionElementSource> dsources = indAssociation.getSources();
 956                         for (DescriptionElementSource src : dsources){
 957                             String micro = src.getCitationMicroReference();
 958                             Reference r = src.getCitation();
 959                             if (r.equals(refMods) && micro == null) {
 960                                 sourceExists=true;
 961                             }
 962                         }
 963                         if(!sourceExists) {
 964                             indAssociation.addSource(null, null, refMods, null);
 965                         }*/
 966                         taxonDescription.addElement(indAssociation);
 967                         taxonDescription.setTaxon(acceptedTaxon);
 968                         taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
 969
 970                         /*sourceExists=false;
 971                         Set<IdentifiableSource> sources = taxonDescription.getSources();
 972                         for (IdentifiableSource src : sources){
 973                             String micro = src.getCitationMicroReference();
 974                             Reference r = src.getCitation();
 975                             if (r.equals(refMods) && micro == null) {
 976                                 sourceExists=true;
 977                             }
 978                         }
 979                         if(!sourceExists) {
 980                             taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
 981                         }*/
 982
 983                         importer.getDescriptionService().saveOrUpdate(taxonDescription);
 984                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 985
 986                         rawAssociation="";
 987                     }
 988                 }
 989             }
 990         }
 991     }
 992
 993     /**
 994      * @param materials: the XML node group
 995      * @param acceptedTaxon: the current accepted Taxon
 996      * @param refMods: the current reference extracted from the MODS
 997      */
 998     @SuppressWarnings("rawtypes")
 999     private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference<?> refMods, String event) {
1000         //        logger.info("EXTRACTMATERIALS");
1001         //        logger.info("acceptedTaxon: "+acceptedTaxon);
1002         String descr="";
1003
1004         DerivedUnit derivedUnitBase=null;
1005         MySpecimenOrObservation myspecimenOrObservation = null;
1006
1007
1008         myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit);
1009         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
1010         descr=myspecimenOrObservation.getDescr();
1011
1012         /*<<<<<<< .courant
1013         boolean sourceExists=false;
1014         Set<IdentifiableSource> sources = derivedUnitBase.getSources();
1015         for (IdentifiableSource src : sources){
1016             String micro = src.getCitationMicroReference();
1017             Reference r = src.getCitation();
1018             if (r.equals(refMods) && micro == null) {
1019                 sourceExists=true;
1020             }
1021         }
1022         if(!sourceExists) {
1023             derivedUnitBase.addSource(null,null,refMods,null);
1024         }
1025 =======*/
1026         derivedUnitBase.addSource(OriginalSourceType.Import, null,null,refMods,null);
1027
1028         importer.getOccurrenceService().saveOrUpdate(derivedUnitBase);
1029
1030         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
1031         acceptedTaxon.addDescription(taxonDescription);
1032
1033
1034         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
1035
1036         Feature feature=null;
1037         if (event.equalsIgnoreCase("collection")){
1038             feature = makeFeature(derivedUnitBase);
1039         }
1040         else{
1041             feature = Feature.MATERIALS_EXAMINED();
1042         }
1043         if(!StringUtils.isEmpty(descr)) {
1044             derivedUnitBase.setTitleCache(descr);
1045         }
1046         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
1047         indAssociation.setFeature(feature);
1048         indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
1049
1050         /* sourceExists=false;
1051         Set<DescriptionElementSource> dsources = indAssociation.getSources();
1052         for (DescriptionElementSource src : dsources){
1053             String micro = src.getCitationMicroReference();
1054             Reference r = src.getCitation();
1055             if (r.equals(refMods) && micro == null) {
1056                 sourceExists=true;
1057             }
1058         }
1059         if(!sourceExists) {
1060             indAssociation.addSource(null, null, refMods, null);
1061         }
1062          */
1063         taxonDescription.addElement(indAssociation);
1064         taxonDescription.setTaxon(acceptedTaxon);
1065         taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
1066
1067         /*  sourceExists=false;
1068         sources = taxonDescription.getSources();
1069         for (IdentifiableSource src : sources){
1070             String micro = src.getCitationMicroReference();
1071             Reference r = src.getCitation();
1072             if (r.equals(refMods) && micro == null) {
1073                 sourceExists=true;
1074             }
1075         }
1076         if(!sourceExists) {
1077             taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
1078         }
1079          */
1080         importer.getDescriptionService().saveOrUpdate(taxonDescription);
1081         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1082
1083         return derivedUnitBase.getTitleCache();
1084
1085     }
1086
1087
1088     /**
1089      * @param description: the XML node group
1090      * @param acceptedTaxon: the current acceptedTaxon
1091      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1092      * @param nametosave: the list of objects to save into the CDM
1093      * @param refMods: the current reference extracted from the MODS
1094      * @param featureName: the feature name
1095      */
1096     private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1097             List<TaxonNameBase> nametosave, Reference<?> refMods, String featureName ) {
1098         //        System.out.println("GRUUUUuu");
1099         NodeList children = description.getChildNodes();
1100         NodeList insideNodes ;
1101         NodeList trNodes;
1102         NodeList tdNodes;
1103         String descr ="";
1104         String localdescr="";
1105         List<String> blabla=null;
1106         List<String> text = new ArrayList<String>();
1107
1108         String table="<table>";
1109         String head="";
1110         String line="";
1111
1112         //        String fullContent = description.getTextContent();
1113         for (int i=0;i<children.getLength();i++){
1114             localdescr="";
1115             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1116                 descr += children.item(i).getTextContent().trim();
1117             }
1118             //            if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1119             //                    children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("other") &&
1120             //                    children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1121             if (featureName.equalsIgnoreCase("table")){
1122                 System.out.println("children.item(i).name: "+i+"-- "+children.item(i).getNodeName());
1123                 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1124                         children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
1125                     head="<th>";
1126                     trNodes = children.item(i).getChildNodes();
1127                     for (int k=0;k<trNodes.getLength();k++){
1128                         System.out.println("NB ELEMENTS "+k +"("+trNodes.getLength()+")");
1129                         if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1130                                 && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1131
1132                             System.out.println("hop");
1133                             line="<tr>";
1134                             tdNodes=trNodes.item(k).getChildNodes();
1135                             for (int l=0;l<tdNodes.getLength();l++){
1136                                 if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1137                                     line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1138                                 }
1139                             }
1140                             line+="</tr>";
1141                             head+=line;
1142                         }
1143                     }
1144                     head+="</th>";
1145                     table+=head;
1146                     //                    }
1147                     line="<tr>";
1148                     if (children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1149                         line="<tr>";
1150                         tdNodes=children.item(i).getChildNodes();
1151                         for (int l=0;l<tdNodes.getLength();l++){
1152                             if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1153                                 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1154                             }
1155                         }
1156                     }
1157                     line+="</tr>";
1158                     if (!line.equalsIgnoreCase("<tr></tr>")) {
1159                         table+=line;
1160                     }
1161                 }
1162                 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1163                         children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1164                     line="<tr>";
1165                     trNodes = children.item(i).getChildNodes();
1166                     for (int k=0;k<trNodes.getLength();k++){
1167                         if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:p")){
1168                             line+="<td>"+trNodes.item(k).getTextContent()+"</td>";
1169                         }
1170                     }
1171                     line+="</tr>";
1172                     if(!line.equalsIgnoreCase("<tr></tr>")) {
1173                         table+=line;
1174                     }
1175                 }
1176             }
1177             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1178                 insideNodes=children.item(i).getChildNodes();
1179                 blabla= new ArrayList<String>();
1180                 for (int j=0;j<insideNodes.getLength();j++){
1181                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1182                         String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
1183                         blabla.add(linkedTaxon.split("sec")[0]);
1184                     }
1185                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1186                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1187                             blabla.add(insideNodes.item(j).getTextContent().trim());
1188                             localdescr += insideNodes.item(j).getTextContent().trim();
1189                         }
1190                     }
1191                 }
1192                 if (!blabla.isEmpty()) {
1193                     List<DefinedTermBase> features = importer.getTermService().list(Feature.class, null,null,null,null);
1194                     Feature currentFeature=null;
1195                     for (DefinedTermBase feature: features){
1196                         String tmpF = ((Feature)feature).getTitleCache();
1197                         if (tmpF.equalsIgnoreCase(featureName)) {
1198                             currentFeature=(Feature)feature;
1199                         }
1200                     }
1201                     if (currentFeature == null) {
1202                         currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1203                         importer.getTermService().saveOrUpdate(currentFeature);
1204                     }
1205                     setParticularDescription(StringUtils.join(blabla," "),acceptedTaxon,defaultTaxon, refMods,currentFeature);
1206                 }
1207                 text.add(StringUtils.join(blabla," "));
1208             }
1209         }
1210
1211         table+="</table>";
1212         if (!table.equalsIgnoreCase("<table></table>")){
1213             System.out.println("TABLE : "+table);
1214             text.add(table);
1215         }
1216
1217         if (text !=null && !text.isEmpty()) {
1218             return StringUtils.join(text," ");
1219         } else {
1220             return "";
1221         }
1222
1223     }
1224
1225
1226
1227
1228     /**
1229      * @param children: the XML node group
1230      * @param nametosave: the list of objects to save into the CDM
1231      * @param acceptedTaxon: the current acceptedTaxon
1232      * @param refMods: the current reference extracted from the MODS
1233      * @param fullContent :the parsed XML content
1234      * @return a list of description (text)
1235      */
1236     @SuppressWarnings("unused")
1237     private List<String> parseParagraph(List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods, Node paragraph, Feature feature){
1238         List<String> fullDescription=  new ArrayList<String>();
1239         //        String localdescr;
1240         String descr="";
1241         NodeList insideNodes ;
1242         boolean collectionEvent = false;
1243         List<Node>collectionEvents = new ArrayList<Node>();
1244
1245         NodeList children = paragraph.getChildNodes();
1246
1247         for (int i=0;i<children.getLength();i++){
1248             //            localdescr="";
1249             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1250                 descr += children.item(i).getTextContent().trim();
1251             }
1252             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1253                 insideNodes=children.item(i).getChildNodes();
1254                 List<String> blabla= new ArrayList<String>();
1255                 for (int j=0;j<insideNodes.getLength();j++){
1256                     boolean nodeKnown = false;
1257                     System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1258                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1259                         String linkedTaxon = getTaxonNameBaseFromXML(insideNodes.item(j), nametosave,refMods).toString();//TODO NOT IMPLEMENTED IN THE CDM YET
1260                         blabla.add(linkedTaxon.split("sec")[0]);
1261                         nodeKnown=true;
1262                     }
1263                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1264                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1265                             blabla.add(insideNodes.item(j).getTextContent().trim());
1266                             //                            localdescr += insideNodes.item(j).getTextContent().trim();
1267                         }
1268                         nodeKnown=true;
1269                     }
1270                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1271                         String ref = insideNodes.item(j).getTextContent().trim();
1272                         if (ref.endsWith(";")  && ((ref.length())>1)) {
1273                             ref=ref.substring(0, ref.length()-1)+".";
1274                         }
1275                         Reference<?> reference = ReferenceFactory.newGeneric();
1276                         reference.setTitleCache(ref, true);
1277                         blabla.add(reference.getTitleCache());
1278                         nodeKnown=true;
1279                     }
1280                     if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1281                         System.out.println("OUHOU");
1282                         String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, nametosave, refMods, "figure");
1283                         blabla.add(figure);
1284                     }
1285                     if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1286                             insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("other") &&
1287                             insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1288                         System.out.println("OUI?");
1289                         String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, nametosave, refMods, "table");
1290                         blabla.add(table);
1291                     }
1292                     if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1293                         logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1294                         String titlecache  = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection");
1295                         blabla.add(titlecache);
1296                         collectionEvent=true;
1297                         collectionEvents.add(insideNodes.item(j));
1298                         nodeKnown=true;
1299                     }
1300                     if (!nodeKnown && !insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:pb")) {
1301                         logger.info("Node not handled yet : "+insideNodes.item(j).getNodeName());
1302                     }
1303
1304                 }
1305                 if (!blabla.isEmpty()) {
1306                     fullDescription.add(StringUtils.join(blabla," "));
1307                 }
1308             }
1309             if  (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1310                 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, nametosave, refMods, "figure");
1311                 fullDescription.add(figure);
1312             }
1313             if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1314                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("other") &&
1315                     children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1316                 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, nametosave, refMods, "table");
1317                 fullDescription.add(table);
1318             }
1319         }
1320         //        if (collectionEvent) {
1321         //            logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1322         //            for (Node coll:collectionEvents){
1323         //                = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1324         //            }
1325         //        }
1326         return fullDescription;
1327     }
1328
1329
1330     /**
1331      * @param description: the XML node group
1332      * @param acceptedTaxon: the current acceptedTaxon
1333      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1334      * @param nametosave: the list of objects to save into the CDM
1335      * @param refMods: the current reference extracted from the MODS
1336      * @param feature: the feature to link the data with
1337      */
1338     private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference<?> refMods, Feature feature){
1339         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1340         List<String> fullDescription= parseParagraph( nametosave, acceptedTaxon, refMods, description,feature);
1341
1342         if (!fullDescription.isEmpty()) {
1343             setParticularDescription(StringUtils.join(fullDescription,"<br/>"),acceptedTaxon,defaultTaxon, refMods,feature);
1344         }
1345
1346     }
1347
1348
1349     /**
1350      * @param descr: the XML Nodegroup to parse
1351      * @param acceptedTaxon: the current acceptedTaxon
1352      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1353      * @param refMods: the current reference extracted from the MODS
1354      * @param currentFeature: the feature name
1355      * @return
1356      */
1357     private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference<?> refMods, Feature currentFeature) {
1358         //        logger.info("setParticularDescription "+currentFeature);
1359         //        logger.info("acceptedTaxon: "+acceptedTaxon);
1360         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1361
1362         TextData textData = TextData.NewInstance();
1363         textData.setFeature(currentFeature);
1364         textData.addSource(OriginalSourceType.Import, null,null,refMods,null);
1365
1366         textData.putText(Language.UNKNOWN_LANGUAGE(), descr+"<br/>");
1367
1368         if(! descr.isEmpty() && (acceptedTaxon!=null)){
1369             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1370             td.addElement(textData);
1371             td.addSource(OriginalSourceType.Import,null,null,refMods,null);
1372             acceptedTaxon.addDescription(td);
1373             importer.getDescriptionService().saveOrUpdate(td);
1374             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1375         }
1376
1377         if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1378             try{
1379                 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1380                 if (tmp!=null) {
1381                     defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1382                 }else{
1383                     importer.getTaxonService().saveOrUpdate(defaultTaxon);
1384                 }
1385             }catch(Exception e){
1386                 logger.debug("TAXON EXISTS"+defaultTaxon);
1387             }
1388
1389             TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1390             defaultTaxon.addDescription(td);
1391             td.addElement(textData);
1392             td.addSource(OriginalSourceType.Import,null,null,refMods,null);
1393             importer.getDescriptionService().saveOrUpdate(td);
1394             importer.getTaxonService().saveOrUpdate(defaultTaxon);
1395         }
1396     }
1397
1398     /**
1399      * @param descr: the XML Nodegroup to parse
1400      * @param acceptedTaxon: the current acceptedTaxon
1401      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1402      * @param refMods: the current reference extracted from the MODS
1403      * @param currentFeature: the feature name
1404      * @return
1405      */
1406     private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference<?> currentRef, Reference<?> refMods, Feature currentFeature) {
1407         System.out.println("setParticularDescriptionSPecial "+currentFeature);
1408         //        logger.info("acceptedTaxon: "+acceptedTaxon);
1409         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1410
1411         TextData textData = TextData.NewInstance();
1412         textData.setFeature(currentFeature);
1413         textData.addSource(OriginalSourceType.Import,null,null,refMods,null);
1414
1415         textData.putText(Language.UNKNOWN_LANGUAGE(), descr+"<br/>");
1416
1417         if(! descr.isEmpty() && (acceptedTaxon!=null)){
1418             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1419             td.addElement(textData);
1420             td.addSource(OriginalSourceType.Import,null,null,refMods,null);
1421             if(currentRef != refMods) {
1422                 td.addSource(OriginalSourceType.Import,null,null,currentRef,null);
1423             }
1424             acceptedTaxon.addDescription(td);
1425             importer.getDescriptionService().saveOrUpdate(td);
1426             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1427         }
1428
1429         if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1430             try{
1431                 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1432                 if (tmp!=null) {
1433                     defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1434                 }else{
1435                     importer.getTaxonService().saveOrUpdate(defaultTaxon);
1436                 }
1437             }catch(Exception e){
1438                 logger.debug("TAXON EXISTS"+defaultTaxon);
1439             }
1440
1441             TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1442             defaultTaxon.addDescription(td);
1443             td.addElement(textData);
1444             if(currentRef != refMods) {
1445                 td.addSource(OriginalSourceType.Import,null,null,refMods,null);
1446             }
1447             td.addSource(OriginalSourceType.Import,null,null,currentRef,null);
1448             importer.getDescriptionService().saveOrUpdate(td);
1449             importer.getTaxonService().saveOrUpdate(defaultTaxon);
1450         }
1451     }
1452
1453
1454
1455     /**
1456      * @param synonyms: the XML Nodegroup to parse
1457      * @param nametosave: the list of objects to save into the CDM
1458      * @param acceptedTaxon: the current acceptedTaxon
1459      * @param refMods: the current reference extracted from the MODS
1460      */
1461     @SuppressWarnings({ "rawtypes", "unchecked" })
1462     private void extractSynonyms(Node synonyms, List<TaxonNameBase> nametosave,Taxon acceptedTaxon, Reference<?> refMods) {
1463         //        logger.info("extractSynonyms: "+acceptedTaxon);
1464         Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1465         if (ttmp != null) {
1466             acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1467         }
1468         else{
1469             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1470         }
1471         NodeList children = synonyms.getChildNodes();
1472         TaxonNameBase nameToBeFilled = null;
1473         List<MyName> names = new ArrayList<MyName>();
1474
1475         if(synonyms.getNodeName().equalsIgnoreCase("tax:name")){
1476             MyName myName;
1477             try {
1478                 myName = extractScientificName(synonyms);
1479                 names.add(myName);
1480             } catch (TransformerFactoryConfigurationError e) {
1481                 logger.warn(e);
1482             } catch (TransformerException e) {
1483                 logger.warn(e);
1484             }
1485         }
1486
1487
1488         for (int i=0;i<children.getLength();i++){
1489             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1490                 NodeList tmp = children.item(i).getChildNodes();
1491                 //                String fullContent = children.item(i).getTextContent();
1492                 for (int j=0; j< tmp.getLength();j++){
1493                     if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1494                         MyName myName;
1495                         try {
1496                             myName = extractScientificName(tmp.item(j));
1497                             names.add(myName);
1498                         } catch (TransformerFactoryConfigurationError e) {
1499                             logger.warn(e);
1500                         } catch (TransformerException e) {
1501                             logger.warn(e);
1502                         }
1503
1504                     }
1505                 }
1506             }
1507             if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1508                 MyName myName;
1509                 try {
1510                     myName = extractScientificName(children.item(i));
1511                     names.add(myName);
1512                 } catch (TransformerFactoryConfigurationError e) {
1513                     logger.warn(e);
1514                 } catch (TransformerException e) {
1515                     logger.warn(e);
1516                 }
1517
1518             }
1519         }
1520         NomenclaturalStatusType statusType = null;
1521
1522         for(MyName name:names){
1523             System.out.println("HANDLE NAME "+name);
1524
1525             statusType = null;
1526
1527             if (!name.getStatus().isEmpty()){
1528                 try {
1529                     statusType = nomStatusString2NomStatus(name.getStatus());
1530                 } catch (UnknownCdmTypeException e) {
1531                     logger.warn("Problem with status");
1532                 }
1533             }
1534
1535             INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1536             nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1537             if (nameToBeFilled.hasProblem() &&
1538                     !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1539                 //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1540                 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser);
1541             }
1542             nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1543             Synonym synonym = Synonym.NewInstance(nameToBeFilled, refMods);
1544
1545
1546             if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1547                 setLSID(name.getIdentifier(), synonym);
1548             }
1549
1550             Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1551             System.out.println(synonym.getName()+" -- "+synonym.getSec());
1552             boolean synoExist = false;
1553             for (Synonym syn: synonymsSet){
1554                 System.out.println(syn.getName()+" -- "+syn.getSec());
1555                 boolean a =syn.getName().equals(synonym.getName());
1556                 boolean b = syn.getSec().equals(synonym.getSec());
1557                 if (a && b) {
1558                     synoExist=true;
1559                 }
1560             }
1561             if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1562                 System.out.println("SYNONYM");
1563                 synonym.addSource(OriginalSourceType.Import,null,null,refMods,null);
1564                 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1565                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1566             }
1567         }
1568
1569     }
1570
1571
1572
1573
1574
1575     /**
1576      * @param refgroup: the XML nodes
1577      * @param nametosave: the list of objects to save into the CDM
1578      * @param acceptedTaxon: the current acceptedTaxon
1579      * @param nametosave: the list of objects to save into the CDM
1580      * @param refMods: the current reference extracted from the MODS
1581      * @return the acceptedTaxon (why?)
1582      * handle cases where the bibref are inside <p> and outside
1583      */
1584     @SuppressWarnings({ "null", "unused" ,"rawtypes" })
1585     private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference<?> refMods) {
1586         //        logger.info("extractReferences");
1587         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1588
1589         NodeList children = refgroup.getChildNodes();
1590         NonViralName<?> nameToBeFilled = null;
1591         if (nomenclaturalCode.equals(NomenclaturalCode.ICNCP)){
1592             nameToBeFilled = BotanicalName.NewInstance(null);
1593         }
1594         if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1595             nameToBeFilled = ZoologicalName.NewInstance(null);
1596         }
1597         if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1598             nameToBeFilled = BacterialName.NewInstance(null);
1599         }
1600
1601         ReferenceBuilder refBuild = new ReferenceBuilder();
1602         for (int i=0;i<children.getLength();i++){
1603             if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1604                 String ref = children.item(i).getTextContent().trim();
1605                 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode,  acceptedTaxon, refMods);
1606                 if (!refBuild.isFoundBibref()){
1607                     extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, nametosave, refMods,acceptedTaxon);
1608                 }
1609             }
1610
1611             if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1612                 NodeList references = children.item(i).getChildNodes();
1613                 for (int j=0;j<references.getLength();j++){
1614                     if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1615                         String ref = references.item(j).getTextContent().trim();
1616                         refBuild.builReference(ref, treatmentMainName,  nomenclaturalCode,  acceptedTaxon, refMods);
1617                     }
1618                 }
1619                 if (!refBuild.isFoundBibref()){
1620                     extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1621                 }
1622             }
1623         }
1624         //        importer.getClassificationService().saveOrUpdate(classification);
1625         return acceptedTaxon;
1626
1627     }
1628
1629     /**
1630      * @param references
1631      * handle cases where the bibref are inside <p> and outside
1632      */
1633     @SuppressWarnings("rawtypes")
1634     private void extractReferenceRawText(NodeList references, NonViralName<?> nameToBeFilled, List<TaxonNameBase> nametosave,
1635             Reference<?> refMods, Taxon acceptedTaxon) {
1636         String refString="";
1637         NomenclaturalStatusType statusType = null;
1638         MyName myName= new MyName();
1639         for (int j=0;j<references.getLength();j++){
1640             //no bibref tag inside
1641             System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1642             if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1643
1644                 try {
1645                     myName = extractScientificName(references.item(j));
1646                     //                    if (myName.getNewName().isEmpty()) {
1647                     //                        name=myName.getOriginalName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1648                     //                    } else {
1649                     //                        name=myName.getNewName()+"---"+myName.getRank()+"---"+myName.getIdentifier()+"---"+myName.getStatus();
1650                     //                    }
1651                 } catch (TransformerFactoryConfigurationError e) {
1652                     logger.warn(e);
1653                 } catch (TransformerException e) {
1654                     logger.warn(e);
1655                 }
1656
1657                 //                name=name.trim();
1658             }
1659             if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1660                 refString = references.item(j).getTextContent().trim();
1661             }
1662             if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1663                 //
1664                 statusType = null;
1665                 if (!myName.getStatus().isEmpty()){
1666                     try {
1667                         statusType = nomStatusString2NomStatus(myName.getStatus());
1668                     } catch (UnknownCdmTypeException e) {
1669                         logger.warn("Problem with status");
1670                     }
1671                 }
1672
1673                 INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1674                 String fullLineRefName = references.item(j).getTextContent().trim();
1675                 int nameOrRefOrOther=1;
1676                 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1677                 System.out.println("NAMEORREFOR?? "+nameOrRefOrOther);
1678                 if (nameOrRefOrOther==0){
1679                     TaxonNameBase nameTBF = parser.parseFullName(fullLineRefName, nomenclaturalCode, Rank.UNKNOWN_RANK());
1680                     if (nameTBF.hasProblem() &&
1681                             !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1682                         nameTBF=solveNameProblem(fullLineRefName, fullLineRefName,parser);
1683                     }
1684                     nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1685                     Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1686
1687                     Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1688                     System.out.println(synonym.getName()+" -- "+synonym.getSec());
1689                     boolean synoExist = false;
1690                     for (Synonym syn: synonymsSet){
1691                         System.out.println(syn.getName()+" -- "+syn.getSec());
1692                         boolean a =syn.getName().equals(synonym.getName());
1693                         boolean b = syn.getSec().equals(synonym.getSec());
1694                         if (a && b) {
1695                             synoExist=true;
1696                         }
1697                     }
1698                     if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1699                         System.out.println("SYNONYM");
1700                         synonym.addSource(OriginalSourceType.Import,null,null,refMods,null);
1701                         acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1702                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1703                     }
1704                 }
1705
1706                 if (nameOrRefOrOther==1){
1707                     Reference<?> re = ReferenceFactory.newGeneric();
1708                     re.setTitleCache(fullLineRefName);
1709
1710                     TaxonNameBase nameTBF = parser.parseFullName(myName.getName(), nomenclaturalCode, Rank.UNKNOWN_RANK());
1711                     if (nameTBF.hasProblem() &&
1712                             !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1713                         nameTBF=solveNameProblem(myName.getName(), myName.getName(),parser);
1714                     }
1715                     nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1716                     Synonym synonym = Synonym.NewInstance(nameTBF, re);
1717
1718                     Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1719                     System.out.println(synonym.getName()+" -- "+synonym.getSec());
1720                     boolean synoExist = false;
1721                     for (Synonym syn: synonymsSet){
1722                         System.out.println(syn.getName()+" -- "+syn.getSec());
1723                         boolean a =syn.getName().equals(synonym.getName());
1724                         boolean b = syn.getSec().equals(synonym.getSec());
1725                         if (a && b) {
1726                             synoExist=true;
1727                         }
1728                     }
1729                     if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1730                         System.out.println("SYNONYM");
1731                         synonym.addSource(OriginalSourceType.Import,null,null,refMods,null);
1732                         acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),re, null);
1733                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1734                     }
1735                 }
1736
1737
1738
1739                 if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
1740                     setLSID(myName.getIdentifier(), acceptedTaxon);
1741                 }
1742
1743
1744                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1745             }
1746
1747
1748             if(!myName.getName().isEmpty()){
1749                 logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+myName.getName()+"*");
1750                 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(myName.getName().trim())){
1751                     Reference<?> refS = ReferenceFactory.newGeneric();
1752                     refS.setTitleCache(refString, true);
1753                     //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1754                     //                            acceptedTaxon.addDescription(td);
1755                     //                            acceptedTaxon.addSource(refSource);
1756                     //
1757                     //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1758                     //
1759                     //                            textData.addSource(null, null, refS, null);
1760                     //                            td.addElement(textData);
1761                     //                            td.addSource(refSource);
1762                     //                            importer.getDescriptionService().saveOrUpdate(td);
1763
1764
1765                     if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
1766                         setLSID(myName.getIdentifier(), acceptedTaxon);
1767
1768                     }
1769
1770                     acceptedTaxon.getName().setNomenclaturalReference(refS);
1771                     importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1772                 }
1773                 else{
1774                     INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1775                     TaxonNameBase nameTBF = parser.parseFullName(myName.getName(), nomenclaturalCode, myName.getRank());
1776                     if (nameTBF.hasProblem() &&
1777                             !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1778                         //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1779                         nameTBF=solveNameProblem(myName.getOriginalName(), myName.getName(),parser);
1780                     }
1781                     nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1782                     Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1783
1784
1785                     if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
1786                         String id = myName.getIdentifier().split("__")[0];
1787                         String source = myName.getIdentifier().split("__")[1];
1788                         if (id.indexOf("lsid")>-1){
1789                             try {
1790                                 LSID lsid = new LSID(id);
1791                                 synonym.setLsid(lsid);
1792                             } catch (MalformedLSIDException e) {
1793                                 // TODO Auto-generated catch block
1794                                 e.printStackTrace();
1795                             }
1796
1797                         }
1798                         else{
1799                             //TODO ADD ORIGINAL SOURCE ID
1800                             Reference<?> re = ReferenceFactory.newGeneric();
1801                             re.setTitle(source);
1802
1803                             IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import,null,null,re,null);
1804                             os.setIdInSource(id);
1805                             //
1806                             //                        os.setCitation(re);
1807                             synonym.addSource(os);
1808                         }
1809                     }
1810
1811                     Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1812                     System.out.println(synonym.getName()+" -- "+synonym.getSec());
1813                     boolean synoExist = false;
1814                     for (Synonym syn: synonymsSet){
1815                         System.out.println(syn.getName()+" -- "+syn.getSec());
1816                         boolean a =syn.getName().equals(synonym.getName());
1817                         boolean b = syn.getSec().equals(synonym.getSec());
1818                         if (a && b) {
1819                             synoExist=true;
1820                         }
1821                     }
1822                     if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1823                         System.out.println("SYNONYM");
1824                         synonym.addSource(OriginalSourceType.Import,null,null,refMods,null);
1825                         acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),refMods, null);
1826                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1827                     }
1828                 }
1829             }
1830         }
1831     }
1832
1833
1834
1835     /**
1836      * @param identifier
1837      * @param acceptedTaxon
1838      */
1839     private void setLSID(String identifier, TaxonBase<?> taxon) {
1840         boolean lsidok=false;
1841         String id = identifier.split("__")[0];
1842         String source = identifier.split("__")[1];
1843         if (id.indexOf("lsid")>-1){
1844             try {
1845                 LSID lsid = new LSID(id);
1846                 taxon.setLsid(lsid);
1847                 lsidok=true;
1848             } catch (MalformedLSIDException e) {
1849                 logger.warn("Malformed LSID");
1850             }
1851
1852         }
1853         if ((id.indexOf("lsid")<0) || !lsidok){
1854             //ADD ORIGINAL SOURCE ID
1855             IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
1856             os.setIdInSource(id);
1857             Reference<?> re = ReferenceFactory.newGeneric();
1858             re.setTitle(source);
1859             os.setCitation(re);
1860             taxon.addSource(os);
1861         }
1862
1863     }
1864
1865     /**
1866      * try to solve a parsing problem for a scientific name
1867      * @param original : the name from the OCR document
1868      * @param name : the tagged version
1869      * @param parser
1870      * @return the corrected TaxonNameBase
1871      */
1872     @SuppressWarnings({ "unchecked", "rawtypes" })
1873     private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser) {
1874         Map<String,String> ato = namesMap.get(original);
1875         Rank rank=Rank.UNKNOWN_RANK();
1876
1877         if (ato == null){
1878             rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1879         }else{
1880             rank = getRank(ato);
1881         }
1882         TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1883         //                logger.info("RANK: "+rank);
1884         int retry=0;
1885         while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1886             String fullname =  getFullReference(name,nameTBF.getParsingProblems());
1887             if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1888                 nameTBF = BotanicalName.NewInstance(null);
1889             }
1890             if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1891                 nameTBF = ZoologicalName.NewInstance(null);
1892             }
1893             if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1894                 nameTBF= BacterialName.NewInstance(null);
1895             }
1896             parser.parseReferencedName(nameTBF, fullname, rank, false);
1897             retry++;
1898         }
1899         if (retry == 1){
1900             nameTBF.setFullTitleCache(name, true);
1901             //                    logger.info("FULL TITLE CACHE "+name);
1902         }
1903         return nameTBF;
1904     }
1905
1906     /**
1907      * @param nomenclatureNode: the XML nodes
1908      * @param nametosave: the list of objects to save into the CDM
1909      * @param refMods: the current reference extracted from the MODS
1910      * @return
1911      */
1912     @SuppressWarnings({ "rawtypes", "unused" })
1913     private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonNameBase> nametosave, Reference<?> refMods) {
1914         //        logger.info("extractNomenclature");
1915         NodeList children = nomenclatureNode.getChildNodes();
1916         String freetext;
1917         TaxonNameBase nameToBeFilled = null;
1918         Taxon acceptedTaxon = null;
1919         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1920
1921         //        String fullContent = nomenclatureNode.getTextContent();
1922
1923         NomenclaturalStatusType statusType = null;
1924         for (int i=0;i<children.getLength();i++){
1925             if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
1926                 String status = children.item(i).getTextContent().trim();
1927                 if (!status.isEmpty()){
1928                     try {
1929                         statusType = nomStatusString2NomStatus(status);
1930                     } catch (UnknownCdmTypeException e) {
1931                         logger.warn("Problem with status");
1932                     }
1933                 }
1934             }
1935         }
1936
1937         boolean containsSynonyms=false;
1938         for (int i=0;i<children.getLength();i++){
1939
1940             if (children.item(i).getNodeName().equalsIgnoreCase("#text")) {
1941                 freetext=children.item(i).getTextContent();
1942             }
1943             if (children.item(i).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1944                 System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
1945                 extractMaterialsDirect(children.item(i), acceptedTaxon, refMods, "collection");
1946             }
1947             if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1948                 if(!containsSynonyms){
1949                     MyName myName = new MyName();
1950                     try {
1951                         myName = extractScientificName(children.item(i));
1952                         treatmentMainName = myName.getNewName();
1953                         originalTreatmentName = myName.getOriginalName();
1954
1955                     } catch (TransformerFactoryConfigurationError e1) {
1956                         logger.warn(e1);
1957                     } catch (TransformerException e1) {
1958                         logger.warn(e1);
1959                     }
1960
1961                     if (myName.getRank().equals(Rank.UNKNOWN_RANK()) || myName.getRank().isLower(configState.getConfig().getMaxRank())){
1962                         maxRankRespected=true;
1963                         if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1964                             nameToBeFilled = BotanicalName.NewInstance(null);
1965                         }
1966                         if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1967                             nameToBeFilled = ZoologicalName.NewInstance(null);
1968                         }
1969                         if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1970                             nameToBeFilled = BacterialName.NewInstance(null);
1971                         }
1972                         acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
1973                         System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
1974
1975
1976                         boolean statusMatch=false;
1977                         if(acceptedTaxon !=null ){
1978                             statusMatch=compareStatus(acceptedTaxon, statusType);
1979                         }
1980                         if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
1981                             System.out.println("devrait pas venir la");
1982                             nameToBeFilled = parser.parseFullName(treatmentMainName, nomenclaturalCode, null);
1983                             if (nameToBeFilled.hasProblem() &&
1984                                     !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1985                                 nameToBeFilled = solveNameProblem(originalTreatmentName,treatmentMainName,parser);
1986                             }
1987                             nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1988                             if (!originalTreatmentName.isEmpty()) {
1989                                 TaxonNameDescription td = TaxonNameDescription.NewInstance();
1990                                 td.setTitleCache(originalTreatmentName);
1991                                 nameToBeFilled.addDescription(td);
1992                             }
1993                             if(statusType != null) {
1994                                 nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1995                             }
1996                             nameToBeFilled.addSource(OriginalSourceType.Import,null,null,refMods,null);
1997                             acceptedTaxon= new Taxon(nameToBeFilled,(Reference<?>) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
1998                             if(!configState.getConfig().doKeepOriginalSecundum()) {
1999                                 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2000                                 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2001                             }
2002
2003
2004                             if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
2005                                 boolean lsidok=false;
2006                                 String id = myName.getIdentifier().split("__")[0];
2007                                 String source = myName.getIdentifier().split("__")[1];
2008                                 if (id.indexOf("lsid")>-1){
2009                                     try {
2010                                         LSID lsid = new LSID(id);
2011                                         acceptedTaxon.setLsid(lsid);
2012                                         lsidok=true;
2013                                     } catch (MalformedLSIDException e) {
2014                                         logger.warn("Malformed LSID");
2015                                     }
2016
2017                                 }
2018                                 if ((id.indexOf("lsid")<0) || !lsidok){
2019                                     //TODO ADD ORIGINAL SOURCE ID
2020                                     IdentifiableSource os = IdentifiableSource.NewInstance(OriginalSourceType.Import);
2021                                     os.setIdInSource(id);
2022                                     Reference<?> re = ReferenceFactory.newGeneric();
2023                                     re.setTitle(source);
2024                                     os.setCitation(re);
2025                                     acceptedTaxon.addSource(os);
2026                                 }
2027                             }
2028                             /*<<<<<<< .courant
2029                         boolean sourceExists=false;
2030                         Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2031                         for (IdentifiableSource src : sources){
2032                             String micro = src.getCitationMicroReference();
2033                             Reference r = src.getCitation();
2034                             if (r.equals(refMods)) {
2035                                 sourceExists=true;
2036                             }
2037                         }
2038                         if(!sourceExists) {
2039                             acceptedTaxon.addSource(null,null,refMods,null);
2040                         }
2041 =======*/
2042
2043                             acceptedTaxon.addSource(OriginalSourceType.Import, null,null,refMods,null);
2044                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2045
2046                             Taxon parentTaxon = askParent(acceptedTaxon, classification);
2047                             if (parentTaxon ==null){
2048                                 while (parentTaxon == null) {
2049                                     parentTaxon = createParent(acceptedTaxon, refMods);
2050                                     classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
2051                                 }
2052                             }else{
2053                                 classification.addParentChild(parentTaxon, acceptedTaxon, refMods, null);
2054                             }
2055                         }else{
2056                             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2057                             Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2058                             boolean sourcelinked=false;
2059                             for (IdentifiableSource source:sources){
2060                                 if (source.getCitation().getTitle().equalsIgnoreCase(refMods.getTitleCache())) {
2061                                     sourcelinked=true;
2062                                 }
2063                             }
2064                             if (!configState.getConfig().doKeepOriginalSecundum()) {
2065                                 acceptedTaxon.setSec(configState.getConfig().getSecundum());
2066                                 logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2067                             }
2068                             if (!sourcelinked){
2069                                 acceptedTaxon.addSource(OriginalSourceType.Import, null, null, refMods, null);
2070                             }
2071                             if (!sourcelinked || !configState.getConfig().doKeepOriginalSecundum()){
2072
2073                                 if (!myName.getIdentifier().isEmpty() && (myName.getIdentifier().length()>2)){
2074                                     setLSID(myName.getIdentifier(), acceptedTaxon);
2075                                 }
2076                                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2077                             }
2078                         }
2079                     }else{
2080                         maxRankRespected=false;
2081                     }
2082                     containsSynonyms=true;
2083                 }else{
2084                     extractSynonyms(children.item(i), nametosave, acceptedTaxon, refMods);
2085                 }
2086             }
2087             if (children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2088                 reloadClassification();
2089                 //extract the References within the document
2090                 extractReferences(children.item(i),nametosave,acceptedTaxon,refMods);
2091             }
2092
2093         }
2094         //        importer.getClassificationService().saveOrUpdate(classification);
2095         return acceptedTaxon;
2096     }
2097
2098     /**
2099      * @return
2100      */
2101     private boolean compareStatus(Taxon t, NomenclaturalStatusType statusType) {
2102         boolean statusMatch=false;
2103         //found one taxon
2104         Set<NomenclaturalStatus> status = t.getName().getStatus();
2105         if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2106             for (NomenclaturalStatus st:status){
2107                 NomenclaturalStatusType stype = st.getType();
2108                 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2109                     statusMatch=true;
2110                 }
2111             }
2112         }
2113         else{
2114             if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2115                 statusMatch=true;
2116             }
2117         }
2118         return statusMatch;
2119     }
2120
2121     /**
2122      * @param acceptedTaxon: the current acceptedTaxon
2123      * @param ref: the current reference extracted from the MODS
2124      * @return the parent for the current accepted taxon
2125      */
2126     private Taxon createParent(Taxon acceptedTaxon, Reference<?> ref) {
2127         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2128
2129         List<Rank> rankList = new ArrayList<Rank>();
2130         rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2131
2132         List<String> rankListStr = new ArrayList<String>();
2133         for (Rank r:rankList) {
2134             rankListStr.add(r.toString());
2135         }
2136         String r="";
2137         String s = acceptedTaxon.getTitleCache();
2138         Taxon tax = null;
2139
2140         int addTaxon = askAddParent(s);
2141         logger.info("ADD TAXON: "+addTaxon);
2142         if (addTaxon == 0){
2143             Taxon tmp = askParent(acceptedTaxon, classification);
2144             if (tmp == null){
2145                 s = askSetParent(s);
2146                 r = askRank(s,rankListStr);
2147
2148                 NonViralName<?> nameToBeFilled = null;
2149                 if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2150                     nameToBeFilled = BotanicalName.NewInstance(null);
2151                 }
2152                 if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2153                     nameToBeFilled = ZoologicalName.NewInstance(null);
2154                 }
2155                 if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2156                     nameToBeFilled = BacterialName.NewInstance(null);
2157                 }
2158                 nameToBeFilled.setTitleCache(s);
2159                 nameToBeFilled.setRank(getRank(r));
2160
2161                 tax = Taxon.NewInstance(nameToBeFilled, ref);
2162             }
2163             else{
2164                 tax=tmp;
2165             }
2166
2167             createParent(tax, ref);
2168             //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2169             classification.addParentChild(tax, acceptedTaxon, ref, null);
2170         }
2171         else{
2172             classification.addChildTaxon(acceptedTaxon, ref, null);
2173             tax=acceptedTaxon;
2174         }
2175         //        logger.info("RETURN: "+tax );
2176         return tax;
2177
2178     }
2179
2180
2181
2182     /**
2183      * @param name
2184      * @throws TransformerFactoryConfigurationError
2185      * @throws TransformerException
2186      * @return a list of possible names
2187      */
2188     private MyName extractScientificName(Node name) throws TransformerFactoryConfigurationError, TransformerException {
2189         //        System.out.println("extractScientificName");
2190
2191         String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:infraspecificepithet","dwc:scientificNameAuthorship"};
2192         List<String> rankListToPrint = new ArrayList<String>();
2193         for (String r : rankListToPrint_tmp) {
2194             rankListToPrint.add(r.toLowerCase());
2195         }
2196
2197         Rank rank = Rank.UNKNOWN_RANK();
2198         NodeList children = name.getChildNodes();
2199         String fullName = "";
2200         String newName="";
2201         String identifier="";
2202         HashMap<String, String> atomisedMap = new HashMap<String, String>();
2203         List<String> atomisedName= new ArrayList<String>();
2204
2205         String rankStr = "";
2206         Rank tmpRank ;
2207
2208         String status="";
2209         NomenclaturalStatusType statusType = null;
2210         for (int i=0;i<children.getLength();i++){
2211             if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2212                     (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2213                             children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2214                 status = children.item(i).getTextContent().trim();
2215             }
2216         }
2217
2218         for (int i=0;i<children.getLength();i++){
2219             if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2220                 NodeList atom = children.item(i).getChildNodes();
2221                 for (int k=0;k<atom.getLength();k++){
2222                     if (atom.item(k).getNodeName().equalsIgnoreCase("tax:xid")){
2223                         try{
2224                             identifier = atom.item(k).getAttributes().getNamedItem("identifier").getNodeValue();
2225                         }catch(Exception e){
2226                             System.out.println("pb with identifier, maybe empty");
2227                         }
2228                         try{
2229                             identifier+="__"+atom.item(k).getAttributes().getNamedItem("source").getNodeValue();
2230                         }catch(Exception e){
2231                             System.out.println("pb with identifier, maybe empty");
2232                         }
2233                     }
2234                     tmpRank = null;
2235                     rankStr = atom.item(k).getNodeName().toLowerCase();
2236                     //                    logger.info("RANKSTR:*"+rankStr+"*");
2237                     if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2238                         rankStr=atom.item(k).getTextContent().trim();
2239                         tmpRank = getRank(rankStr);
2240                     }
2241                     //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2242                     if (tmpRank != null){
2243                         rank=tmpRank;
2244                     }
2245
2246
2247                     atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2248                     if (!atom.item(k).getNodeName().equalsIgnoreCase("dwc:taxonRank") ) {
2249                         if (atom.item(k).getNodeName().equalsIgnoreCase("dwc:subgenus") || atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:subgenus")) {
2250                             atomisedName.add("("+atom.item(k).getTextContent().trim()+")");
2251                         } else{
2252                             if(atom.item(k).getNodeName().equalsIgnoreCase("dwcranks:varietyepithet")) {
2253                                 atomisedName.add("var. "+atom.item(k).getTextContent().trim());
2254                             } else{
2255                                 if(rankListToPrint.contains(atom.item(k).getNodeName().toLowerCase())) {
2256                                     atomisedName.add(atom.item(k).getTextContent().trim());
2257                                 }
2258                                 else{
2259                                     System.out.println("on a oublie qqn "+atom.item(k).getNodeName());
2260                                 }
2261                             }
2262                         }
2263                     }
2264                 }
2265             }
2266             if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2267                 //                logger.info("name non atomised: "+children.item(i).getTextContent());
2268                 fullName = children.item(i).getTextContent().trim();
2269                 //                logger.info("fullname: "+fullName);
2270             }
2271         }
2272         if (fullName != null){
2273             fullName = fullName.replace("( ", "(");
2274             fullName = fullName.replace(" )",")");
2275
2276         }
2277         if (fullName.trim().isEmpty()){
2278             fullName=StringUtils.join(atomisedName," ");
2279         }
2280
2281         while(fullName.contains("  ")) {
2282             fullName=fullName.replace("  ", " ");
2283             //            logger.info("while");
2284         }
2285
2286         namesMap.put(fullName,atomisedMap);
2287         String atomisedNameStr = StringUtils.join(atomisedName," ");
2288         while(atomisedNameStr.contains("  ")) {
2289             atomisedNameStr=atomisedNameStr.replace("  ", " ");
2290             //            logger.info("atomisedNameStr: "+atomisedNameStr);
2291         }
2292         atomisedNameStr=atomisedNameStr.trim();
2293
2294         if (fullName != null){
2295             if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2296                 newName=getScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2297             } else {
2298                 newName=fullName;
2299             }
2300         }
2301         rank = askForRank(newName, rank, nomenclaturalCode);
2302         String[] names = new String[5];
2303         MyName myname = new MyName();
2304         myname.setOriginalName(fullName);
2305         myname.setNewName(newName);
2306         myname.setRank(rank);
2307         myname.setIdentifier(identifier);
2308         myname.setStatus(status);
2309         return myname;
2310
2311     }
2312
2313     /**
2314      * @param classification2
2315      */
2316     public void updateClassification(Classification classification2) {
2317         classification = classification2;
2318     }
2319
2320     public class MyName {
2321         String originalName="";
2322         String newName="";
2323         Rank rank=Rank.UNKNOWN_RANK();
2324         String identifier="";
2325         String status="";
2326
2327         public String getName(){
2328             if (newName.isEmpty()) {
2329                 return originalName;
2330             } else {
2331                 return newName;
2332             }
2333
2334         }
2335         /**
2336          * @return the fullName
2337          */
2338         public String getOriginalName() {
2339             return originalName;
2340         }
2341         /**
2342          * @param fullName the fullName to set
2343          */
2344         public void setOriginalName(String fullName) {
2345             this.originalName = fullName;
2346         }
2347         /**
2348          * @return the newName
2349          */
2350         public String getNewName() {
2351             return newName;
2352         }
2353         /**
2354          * @param newName the newName to set
2355          */
2356         public void setNewName(String newName) {
2357             this.newName = newName;
2358         }
2359         /**
2360          * @return the rank
2361          */
2362         public Rank getRank() {
2363             return rank;
2364         }
2365         /**
2366          * @param rank the rank to set
2367          */
2368         public void setRank(Rank rank) {
2369             this.rank = rank;
2370         }
2371         /**
2372          * @return the idenfitiger
2373          */
2374         public String getIdentifier() {
2375             return identifier;
2376         }
2377         /**
2378          * @param idenfitiger the idenfitiger to set
2379          */
2380         public void setIdentifier(String identifier) {
2381             this.identifier = identifier;
2382         }
2383         /**
2384          * @return the status
2385          */
2386         public String getStatus() {
2387             return status;
2388         }
2389         /**
2390          * @param status the status to set
2391          */
2392         public void setStatus(String status) {
2393             this.status = status;
2394         }
2395
2396
2397
2398     }
2399
2400 }
2401
2402
2403