cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXTreatmentExtractor.java

   1 /**
   2  * Copyright (C) 2013 EDIT
   3  * European Distributed Institute of Taxonomy
   4  * http://www.e-taxonomy.eu
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version 1.1
   7  * See LICENSE.TXT at the top of this package for the full license terms.
   8  */
   9 package eu.etaxonomy.cdm.io.taxonx2013;
  10
  11 import java.io.BufferedWriter;
  12 import java.io.File;
  13 import java.io.FileWriter;
  14 import java.io.IOException;
  15 import java.net.URI;
  16 import java.util.ArrayList;
  17 import java.util.Arrays;
  18 import java.util.HashMap;
  19 import java.util.List;
  20 import java.util.Map;
  21 import java.util.Set;
  22 import java.util.UUID;
  23 import java.util.regex.Matcher;
  24 import java.util.regex.Pattern;
  25
  26 import javax.xml.transform.TransformerException;
  27 import javax.xml.transform.TransformerFactoryConfigurationError;
  28
  29 import org.apache.commons.lang.StringUtils;
  30 import org.apache.log4j.Logger;
  31 import org.w3c.dom.Node;
  32 import org.w3c.dom.NodeList;
  33
  34 import com.ibm.lsid.MalformedLSIDException;
  35
  36 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
  37 import eu.etaxonomy.cdm.api.service.pager.Pager;
  38 import eu.etaxonomy.cdm.model.agent.AgentBase;
  39 import eu.etaxonomy.cdm.model.agent.Person;
  40 import eu.etaxonomy.cdm.model.common.CdmBase;
  41 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
  42 import eu.etaxonomy.cdm.model.common.LSID;
  43 import eu.etaxonomy.cdm.model.common.Language;
  44 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
  45 import eu.etaxonomy.cdm.model.description.Feature;
  46 import eu.etaxonomy.cdm.model.description.FeatureNode;
  47 import eu.etaxonomy.cdm.model.description.FeatureTree;
  48 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
  49 import eu.etaxonomy.cdm.model.description.TaxonDescription;
  50 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
  51 import eu.etaxonomy.cdm.model.description.TextData;
  52 import eu.etaxonomy.cdm.model.name.INonViralName;
  53 import eu.etaxonomy.cdm.model.name.ITaxonNameBase;
  54 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
  55 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
  56 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
  57 import eu.etaxonomy.cdm.model.name.Rank;
  58 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
  59 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
  60 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
  61 import eu.etaxonomy.cdm.model.reference.Reference;
  62 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
  63 import eu.etaxonomy.cdm.model.taxon.Classification;
  64 import eu.etaxonomy.cdm.model.taxon.Synonym;
  65 import eu.etaxonomy.cdm.model.taxon.SynonymType;
  66 import eu.etaxonomy.cdm.model.taxon.Taxon;
  67 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
  68 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
  69 import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
  70 import eu.etaxonomy.cdm.persistence.query.MatchMode;
  71 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
  72 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
  73 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
  74 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
  75
  76 /**
  77  * @author pkelbert
  78  * @date 2 avr. 2013
  79  *
  80  */
  81 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
  82
  83     private static final String PUBLICATION_YEAR = "publicationYear";
  84
  85         private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
  86
  87     private static final String notMarkedUp = "Not marked-up";
  88     private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
  89     private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
  90     private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
  91     private static final boolean skippQuestion = true;
  92
  93     private final NomenclaturalCode nomenclaturalCode;
  94     private Classification classification;
  95
  96     private  String treatmentMainName,originalTreatmentName;
  97
  98     private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
  99
 100
 101     private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
 102     private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
 103
 104     private boolean maxRankRespected =false;
 105     private Map<String, Feature> featuresMap;
 106
 107     private MyName currentMyName;
 108
 109     private Reference sourceUrlRef;
 110
 111     private String followingText;  //text element immediately following a tax:name in tax:nomenclature TODO move do state
 112     private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
 113
 114     private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
 115
 116     /**
 117      * @param nomenclaturalCode
 118      * @param classification
 119      * @param importer
 120      * @param configState
 121      */
 122     public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
 123             TaxonXImportState configState,Map<String, Feature> featuresMap,  Reference urlSource) {
 124         this.nomenclaturalCode=nomenclaturalCode;
 125         this.classification = classification;
 126         this.importer=importer;
 127         this.state2=configState;
 128         this.featuresMap=featuresMap;
 129         this.sourceUrlRef =urlSource;
 130         prepareCollectors(configState, importer.getAgentService());
 131         this.sourceHandler.setSourceUrlRef(sourceUrlRef);
 132         this.sourceHandler.setImporter(importer);
 133         this.sourceHandler.setConfigState(configState);
 134     }
 135
 136     /**
 137      * extracts all the treament information and save them
 138      * @param treatmentnode: the XML Node
 139      * @param tosave: the list of object to save into the CDM
 140      * @param refMods: the reference extracted from the MODS
 141      * @param sourceName: the URI of the document
 142      */
 143     @SuppressWarnings({ "rawtypes", "unused" })
 144
 145     protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) {        logger.info("extractTreatment");
 146         List<TaxonNameBase> namesToSave = new ArrayList<TaxonNameBase>();
 147         NodeList children = treatmentnode.getChildNodes();
 148         Taxon acceptedTaxon =null;
 149         boolean hasRefgroup=false;
 150
 151         //needed?
 152         for (int i=0;i<children.getLength();i++){
 153             if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
 154                 hasRefgroup=true;
 155             }
 156         }
 157
 158         for (int i=0;i<children.getLength();i++){
 159                 Node child = children.item(i);
 160                 acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
 161         }
 162         //        logger.info("saveUpdateNames");
 163         if (maxRankRespected){
 164             importer.getNameService().saveOrUpdate(namesToSave);
 165             importer.getClassificationService().saveOrUpdate(classification);
 166             //logger.info("saveUpdateNames-ok");
 167         }
 168
 169         buildFeatureTree();
 170     }
 171
 172         private Taxon handleSingleNode(Reference refMods, URI sourceName,
 173                         List<TaxonNameBase> namesToSave, Node child, Taxon acceptedTaxon) {
 174                 Taxon defaultTaxon =null;
 175
 176                 String nodeName = child.getNodeName();
 177                 if (nodeName.equalsIgnoreCase("tax:nomenclature")){
 178                     NodeList nomenclatureChildren = child.getChildNodes();
 179                     boolean containsName = false;
 180                     for(int k=0; k<nomenclatureChildren.getLength(); k++){
 181                         if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 182                             containsName=true;
 183                             break;
 184                         }
 185                     }
 186                     if (containsName){
 187                         reloadClassification();
 188                         //extract "main" the scientific name
 189                         try{
 190                             acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
 191                         }catch(ClassCastException e){
 192                                 //FIXME exception handling
 193                                 e.printStackTrace();
 194                         }
 195                         //                    System.out.println("acceptedTaxon : "+acceptedTaxon);
 196                     }
 197                 }else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
 198                     reloadClassification();
 199                     //extract the References within the document
 200                     extractReferences(child, namesToSave ,acceptedTaxon,refMods);
 201                 }else if (nodeName.equalsIgnoreCase("tax:div") &&
 202                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
 203                     File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
 204                     FileWriter writer;
 205                     try {
 206                         writer = new FileWriter(file ,true);
 207                         writer.write(sourceName+"\n");
 208                         writer.flush();
 209                         writer.close();
 210                     } catch (IOException e1) {
 211                         // TODO Auto-generated catch block
 212                         logger.error(e1.getMessage());
 213                     }
 214                     //                String multiple = askMultiple(children.item(i));
 215                     String multiple = "Other";
 216                     if (multiple.equalsIgnoreCase("other")) {
 217                         extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
 218                     }else if (multiple.equalsIgnoreCase("synonyms")) {
 219                         try{
 220                             extractSynonyms(child,acceptedTaxon, refMods, null);
 221                         }catch(NullPointerException e){
 222                             logger.warn("the accepted taxon is maybe null");
 223                         }
 224                     }else if(multiple.equalsIgnoreCase("material examined")){
 225                         extractMaterials(child, acceptedTaxon, refMods, namesToSave);
 226                     }else if (multiple.equalsIgnoreCase("distribution")){
 227                         extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
 228                     }else if (multiple.equalsIgnoreCase("type status")){
 229                         extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
 230                     }else if (multiple.equalsIgnoreCase("vernacular name")){
 231                         extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
 232                     }else{
 233                         extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
 234                     }
 235                 }
 236                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 237                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
 238                     extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
 239                 }
 240                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 241                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
 242                     extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
 243                 }
 244                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 245                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
 246                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
 247                 }
 248                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 249                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
 250                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
 251                 }
 252                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 253                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
 254                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
 255                 }
 256                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 257                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
 258                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
 259                 }
 260                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 261                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
 262                     extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
 263                 }
 264                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 265                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
 266                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
 267                 }
 268                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 269                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
 270                     extractMaterials(child,acceptedTaxon, refMods, namesToSave);
 271                 }
 272                 else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
 273                     extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
 274                 }
 275                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 276                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
 277                     extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
 278                 }else if(nodeName.equalsIgnoreCase("tax:div") &&
 279                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
 280                     //TODO IGNORE keys for the moment
 281                     //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
 282                     extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
 283                 }
 284                 else{
 285                     if (! nodeName.equalsIgnoreCase("tax:pb")){
 286                         //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
 287                         if (child.getAttributes() !=null) {
 288                             logger.info("First Attribute: " + child.getAttributes().item(0));
 289                         }
 290                         extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
 291                     }else{
 292                         //FIXME
 293                         logger.warn("Unhandled");
 294                     }
 295                 }
 296                 return acceptedTaxon;
 297         }
 298
 299
 300     protected Map<String,Feature> getFeaturesUsed(){
 301         return featuresMap;
 302     }
 303     /**
 304      *
 305      */
 306     private void buildFeatureTree() {
 307         logger.info("buildFeatureTree");
 308         FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
 309         if (proibiospheretree == null){
 310             List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
 311             if (trees.size()==1) {
 312                 FeatureTree ft = trees.get(0);
 313                 if (featuresMap==null) {
 314                     featuresMap=new HashMap<String, Feature>();
 315                 }
 316                 for (Feature feature: ft.getDistinctFeatures()){
 317                     if(feature!=null) {
 318                         featuresMap.put(feature.getTitleCache(), feature);
 319                     }
 320                 }
 321             }
 322             proibiospheretree = FeatureTree.NewInstance();
 323             proibiospheretree.setUuid(proIbioTreeUUID);
 324         }
 325         //        FeatureNode root = proibiospheretree.getRoot();
 326         FeatureNode root2 = proibiospheretree.getRoot();
 327         if (root2 != null){
 328             int nbChildren = root2.getChildCount()-1;
 329             while (nbChildren>-1){
 330                 try{
 331                     root2.removeChild(nbChildren);
 332                 }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
 333                 nbChildren --;
 334             }
 335
 336         }
 337
 338         for (Feature feature:featuresMap.values()) {
 339             root2.addChild(FeatureNode.NewInstance(feature));
 340         }
 341         importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
 342
 343     }
 344
 345
 346     /**
 347      * @param keys
 348      * @param acceptedTaxon: the current acceptedTaxon
 349      * @param nametosave: the list of objects to save into the CDM
 350      * @param refMods: the current reference extracted from the MODS
 351      */
 352     /*   @SuppressWarnings("rawtypes")
 353     private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference refMods) {
 354         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
 355
 356         NodeList children = keys.getChildNodes();
 357         String key="";
 358         PolytomousKey poly =  PolytomousKey.NewInstance();
 359         poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
 360         poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
 361         poly.addTaxonomicScope(acceptedTaxon);
 362         poly.setTitleCache("bloup", true);
 363         //        poly.addCoveredTaxon(acceptedTaxon);
 364         PolytomousKeyNode root = poly.getRoot();
 365         PolytomousKeyNode previous = null,tmpKey=null;
 366         Taxon taxonKey=null;
 367         List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
 368
 369         //        String fullContent = keys.getTextContent();
 370         for (int i=0;i<children.getLength();i++){
 371             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 372                 NodeList paragraph = children.item(i).getChildNodes();
 373                 key="";
 374                 taxonKey=null;
 375                 for (int j=0;j<paragraph.getLength();j++){
 376                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 377                         if (! paragraph.item(j).getTextContent().trim().isEmpty()){
 378                             key+=paragraph.item(j).getTextContent().trim();
 379                             //                            logger.info("KEY: "+j+"--"+key);
 380                         }
 381                     }
 382                     if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 383                         taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
 384                     }
 385                 }
 386                 //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
 387                 if (keypattern.matcher(key).matches()){
 388                     tmpKey = PolytomousKeyNode.NewInstance(key);
 389                     if (taxonKey!=null) {
 390                         tmpKey.setTaxon(taxonKey);
 391                     }
 392                     polyNodes.add(tmpKey);
 393                     if (previous == null) {
 394                         root.addChild(tmpKey);
 395                     } else {
 396                         previous.addChild(tmpKey);
 397                     }
 398                 }else{
 399                     if (!key.isEmpty()){
 400                         tmpKey=PolytomousKeyNode.NewInstance(key);
 401                         if (taxonKey!=null) {
 402                             tmpKey.setTaxon(taxonKey);
 403                         }
 404                         polyNodes.add(tmpKey);
 405                         if (keypatternend.matcher(key).matches()) {
 406                             root.addChild(tmpKey);
 407                             previous=tmpKey;
 408                         } else{
 409                             previous.addChild(tmpKey);
 410                         }
 411
 412                     }
 413                 }
 414             }
 415         }
 416         importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
 417         importer.getPolytomousKeyService().saveOrUpdate(poly);
 418     }
 419 */
 420
 421
 422     /**
 423      * @param taxons: the XML Nodegroup
 424      * @param nametosave: the list of objects to save into the CDM
 425      * @param acceptedTaxon: the current accepted Taxon
 426      * @param refMods: the current reference extracted from the MODS
 427      *
 428      * @return Taxon object built
 429      */
 430     @SuppressWarnings({ "rawtypes", "unused" })
 431     private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference refMods, boolean isSynonym) {
 432         //        logger.info("getTaxonFromXML");
 433         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 434         logger.info("getTaxonNameBaseFromXML");
 435         TaxonNameBase nameToBeFilled = null;
 436
 437         currentMyName=new MyName(isSynonym);
 438
 439         NomenclaturalStatusType statusType = null;
 440         try {
 441                 String followingText = null;  //needs to be checked if following text is possible
 442             currentMyName = extractScientificName(taxons,refMods, null);
 443         } catch (TransformerFactoryConfigurationError e1) {
 444             logger.warn(e1);
 445         } catch (TransformerException e1) {
 446             logger.warn(e1);
 447         }
 448         /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
 449
 450         nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
 451         if (nameToBeFilled.hasProblem() &&
 452                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
 453             //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
 454             addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
 455             nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
 456         }
 457
 458         nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
 459          */
 460         nameToBeFilled = currentMyName.getTaxonNameBase();
 461         return nameToBeFilled;
 462
 463     }
 464
 465
 466     /**
 467      *
 468      */
 469     private void reloadClassification() {
 470         logger.info("reloadClassification");
 471         Classification cl = importer.getClassificationService().find(classification.getUuid());
 472         if (cl != null){
 473             classification = cl;
 474         }else{
 475             importer.getClassificationService().saveOrUpdate(classification);
 476             classification = importer.getClassificationService().find(classification.getUuid());
 477         }
 478     }
 479
 480     //    /**
 481     //     * Create a Taxon for the current NameBase, based on the current reference
 482     //     * @param taxonNameBase
 483     //     * @param refMods: the current reference extracted from the MODS
 484     //     * @return Taxon
 485     //     */
 486     //    @SuppressWarnings({ "unused", "rawtypes" })
 487     //    private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference refMods) {
 488     //        Taxon t = new Taxon(taxonNameBase,null );
 489     //        if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
 490     //            t.setSec(configState.getConfig().getSecundum());
 491     //            logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 492     //        }
 493     //        /*<<<<<<< .courant
 494     //        boolean sourceExists=false;
 495     //        Set<IdentifiableSource> sources = t.getSources();
 496     //        for (IdentifiableSource src : sources){
 497     //            String micro = src.getCitationMicroReference();
 498     //            Reference r = src.getCitation();
 499     //            if (r.equals(refMods) && micro == null) {
 500     //                sourceExists=true;
 501     //            }
 502     //        }
 503     //        if(!sourceExists) {
 504     //            t.addSource(null,null,refMods,null);
 505     //        }
 506     //=======*/
 507     //        t.addSource(OriginalSourceType.Import,null,null,refMods,null);
 508     //        t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
 509     //        return t;
 510     //    }
 511
 512     private void  extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
 513             String featureName) {
 514         //        System.out.println("extractDescriptionWithReference !");
 515         logger.info("extractDescriptionWithReference");
 516         NodeList children = typestatus.getChildNodes();
 517
 518         Feature currentFeature=getFeatureObjectFromString(featureName);
 519
 520         String r="";String s="";
 521         for (int i=0;i<children.getLength();i++){
 522             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 523                 s+=children.item(i).getTextContent().trim();
 524             }
 525             if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
 526                 r+= children.item(i).getTextContent().trim();
 527             }
 528             if (s.indexOf(r)>-1) {
 529                 s=s.split(r)[0];
 530             }
 531         }
 532
 533         Reference currentref =  ReferenceFactory.newGeneric();
 534         if(!r.isEmpty()) {
 535             currentref.setTitleCache(r, true);
 536         } else {
 537             currentref=refMods;
 538         }
 539         setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
 540     }
 541
 542     /**
 543      * @param nametosave
 544      * @param distribution: the XML node group
 545      * @param acceptedTaxon: the current accepted Taxon
 546      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 547      * @param refMods: the current reference extracted from the MODS
 548      */
 549     @SuppressWarnings("rawtypes")
 550     private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference refMods) {
 551         logger.info("extractDistribution");
 552         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 553         NodeList children = distribution.getChildNodes();
 554         Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
 555         Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
 556
 557         for (int i=0;i<children.getLength();i++){
 558             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 559                 NodeList paragraph = children.item(i).getChildNodes();
 560                 for (int j=0;j<paragraph.getLength();j++){
 561                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 562                         extractText(descriptionsFulltext, i, paragraph.item(j));
 563                     }
 564                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 565                         extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
 566                     }
 567                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
 568                         MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
 569                         DerivedUnit derivedUnitBase = null;
 570                         specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
 571                         extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
 572                     }
 573                 }
 574             }
 575         }
 576
 577         int m=0;
 578         for (int k:descriptionsFulltext.keySet()) {
 579             if (k>m) {
 580                 m=k;
 581             }
 582         }
 583         for (int k:specimenOrObservations.keySet()) {
 584             if (k>m) {
 585                 m=k;
 586             }
 587         }
 588
 589
 590         if(acceptedTaxon!=null){
 591             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
 592             Feature currentFeature = Feature.DISTRIBUTION();
 593             //        DerivedUnit derivedUnitBase=null;
 594             //        String descr="";
 595             for (int k=0;k<=m;k++){
 596                 if(specimenOrObservations.keySet().contains(k)){
 597                     for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
 598                         handleAssociation(acceptedTaxon, refMods, td, soo);
 599                     }
 600                 }
 601
 602                 if (descriptionsFulltext.keySet().contains(k)){
 603                     if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
 604                         setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
 605                         break;
 606                     }
 607                     else{
 608                         handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
 609                     }
 610                 }
 611
 612                 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
 613                     acceptedTaxon.addDescription(td);
 614                     sourceHandler.addAndSaveSource(refMods, td, null);
 615                     importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 616                 }
 617             }
 618         }
 619     }
 620
 621     /**
 622      * @param refMods
 623      * @param descriptionsFulltext
 624      * @param td
 625      * @param currentFeature
 626      * @param k
 627      */
 628     private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
 629             Feature currentFeature, int k) {
 630         //logger.info("handleTextData");
 631         TextData textData = TextData.NewInstance();
 632         textData.setFeature(currentFeature);
 633         textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
 634         sourceHandler.addSource(refMods, textData);
 635         td.addElement(textData);
 636     }
 637
 638     /**
 639      * @param acceptedTaxon
 640      * @param refMods
 641      * @param td
 642      * @param soo
 643      */
 644     private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
 645         logger.info("handleAssociation");
 646         String descr=soo.getDescr();
 647         DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
 648
 649         sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
 650
 651         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 652
 653         Feature feature=null;
 654         feature = makeFeature(derivedUnitBase);
 655         if(!StringUtils.isEmpty(descr)) {
 656             derivedUnitBase.setTitleCache(descr, true);
 657         }
 658
 659         IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
 660
 661         taxonDescription.addElement(indAssociation);
 662         sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
 663         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 664         td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
 665     }
 666
 667     /**
 668      * create an individualAssociation
 669      * @param refMods
 670      * @param derivedUnitBase
 671      * @param feature
 672      * @return
 673      */
 674     private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
 675             Feature feature) {
 676         logger.info("createIndividualAssociation");
 677         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 678         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 679         indAssociation.setFeature(feature);
 680         indAssociation = sourceHandler.addSource(refMods, indAssociation);
 681         return indAssociation;
 682     }
 683
 684     /**
 685      * @param specimenOrObservations
 686      * @param descriptionsFulltext
 687      * @param i
 688      * @param specimenOrObservation
 689      */
 690     private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
 691             Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
 692         logger.info("extractTextFromSpecimenOrObservation");
 693         List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
 694         if (speObsList == null) {
 695             speObsList=new ArrayList<MySpecimenOrObservation>();
 696         }
 697         speObsList.add(specimenOrObservation);
 698         specimenOrObservations.put(i,speObsList);
 699
 700         String s = specimenOrObservation.getDerivedUnitBase().toString();
 701         if (descriptionsFulltext.get(i) !=null){
 702             s = descriptionsFulltext.get(i)+" "+s;
 703         }
 704         descriptionsFulltext.put(i, s);
 705     }
 706
 707     /**
 708      * Extract the text with the inline link to a taxon
 709      * @param nametosave
 710      * @param refMods
 711      * @param descriptionsFulltext
 712      * @param i
 713      * @param paragraph
 714      */
 715     @SuppressWarnings("rawtypes")
 716     private void extractInLine(List<TaxonNameBase> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
 717             int i, Node paragraph) {
 718         //logger.info("extractInLine");
 719         String inLine=getInlineTextForName(nametosave, refMods, paragraph);
 720         if (descriptionsFulltext.get(i) !=null){
 721             inLine = descriptionsFulltext.get(i)+inLine;
 722         }
 723         descriptionsFulltext.put(i, inLine);
 724     }
 725
 726     /**
 727      * Extract the raw text from a Node
 728      * @param descriptionsFulltext
 729      * @param node
 730      * @param j
 731      */
 732     private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
 733         //logger.info("extractText");
 734         if(!node.getTextContent().trim().isEmpty()) {
 735             String s =node.getTextContent().trim();
 736             if (descriptionsFulltext.get(i) !=null){
 737                 s = descriptionsFulltext.get(i)+" "+s;
 738             }
 739             descriptionsFulltext.put(i, s);
 740         }
 741     }
 742
 743
 744     /**
 745      * @param materials: the XML node group
 746      * @param acceptedTaxon: the current accepted Taxon
 747      * @param refMods: the current reference extracted from the MODS
 748      */
 749     @SuppressWarnings("rawtypes")
 750     private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonNameBase> nametosave) {
 751         logger.info("EXTRACTMATERIALS");
 752         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 753         NodeList children = materials.getChildNodes();
 754         NodeList events = null;
 755         //        String descr="";
 756
 757
 758         for (int i=0;i<children.getLength();i++){
 759             String rawAssociation="";
 760             boolean added=false;
 761             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 762                 events = children.item(i).getChildNodes();
 763                 for(int k=0;k<events.getLength();k++){
 764                     if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 765                         String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
 766                         if(!inLine.isEmpty()) {
 767                             rawAssociation+=inLine;
 768                         }
 769                     }
 770                     if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
 771                             && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 772                         rawAssociation+= events.item(k).getTextContent().trim();
 773                     }
 774                     if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 775                         if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
 776                             rawAssociation="no description text";
 777                         }
 778                         added=true;
 779                         handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
 780                     }
 781                     if (!rawAssociation.isEmpty() && !added){
 782
 783                         Feature feature = Feature.MATERIALS_EXAMINED();
 784                         featuresMap.put(feature.getTitleCache(),feature);
 785
 786                         TextData textData = createTextData(rawAssociation, refMods, feature);
 787
 788                         if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
 789                             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
 790                             td.addElement(textData);
 791                             acceptedTaxon.addDescription(td);
 792                             sourceHandler.addAndSaveSource(refMods, td, null);
 793                         }
 794                         //                        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
 795                         //                        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
 796                         //
 797                         //                        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 798                         //                        acceptedTaxon.addDescription(taxonDescription);
 799                         //
 800                         //                        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 801                         //
 802                         //                        Feature feature = Feature.MATERIALS_EXAMINED();
 803                         //                        featuresMap.put(feature.getTitleCache(),feature);
 804                         //                        if(!StringUtils.isEmpty(rawAssociation)) {
 805                         //                            derivedUnitBase.setTitleCache(rawAssociation, true);
 806                         //                        }
 807                         //                        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 808                         //                        indAssociation.setFeature(feature);
 809                         //                        indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
 810                         //
 811                         //                        /*boolean sourceExists=false;
 812                         //                        Set<DescriptionElementSource> dsources = indAssociation.getSources();
 813                         //                        for (DescriptionElementSource src : dsources){
 814                         //                            String micro = src.getCitationMicroReference();
 815                         //                            Reference r = src.getCitation();
 816                         //                            if (r.equals(refMods) && micro == null) {
 817                         //                                sourceExists=true;
 818                         //                            }
 819                         //                        }
 820                         //                        if(!sourceExists) {
 821                         //                            indAssociation.addSource(null, null, refMods, null);
 822                         //                        }*/
 823                         //                        taxonDescription.addElement(indAssociation);
 824                         //                        taxonDescription.setTaxon(acceptedTaxon);
 825                         //                        taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
 826                         //
 827                         //                        /*sourceExists=false;
 828                         //                        Set<IdentifiableSource> sources = taxonDescription.getSources();
 829                         //                        for (IdentifiableSource src : sources){
 830                         //                            String micro = src.getCitationMicroReference();
 831                         //                            Reference r = src.getCitation();
 832                         //                            if (r.equals(refMods) && micro == null) {
 833                         //                                sourceExists=true;
 834                         //                            }
 835                         //                        }
 836                         //                        if(!sourceExists) {
 837                         //                            taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
 838                         //                        }*/
 839                         //
 840                         //                        importer.getDescriptionService().saveOrUpdate(taxonDescription);
 841                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 842
 843                         rawAssociation="";
 844                     }
 845                 }
 846             }
 847         }
 848     }
 849
 850     /**
 851      * @param acceptedTaxon
 852      * @param refMods
 853      * @param events
 854      * @param rawAssociation
 855      * @param k
 856      */
 857     private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
 858             String rawAssociation) {
 859         logger.info("handleDerivedUnitFacadeAndBase");
 860         String descr;
 861         DerivedUnit derivedUnitBase;
 862         MySpecimenOrObservation myspecimenOrObservation;
 863         DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
 864         derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
 865
 866         sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
 867
 868         //TODO this may not always be correct, ask user
 869         TaxonNameBase<?,?> typifiableName = acceptedTaxon != null ?  acceptedTaxon.getName() : null;
 870         myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
 871         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
 872         descr=myspecimenOrObservation.getDescr();
 873
 874         sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
 875
 876         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 877
 878         Feature feature = makeFeature(derivedUnitBase);
 879         featuresMap.put(feature.getTitleCache(),feature);
 880         if(!StringUtils.isEmpty(descr)) {
 881             derivedUnitBase.setTitleCache(descr, true);
 882         }
 883
 884         IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
 885
 886         taxonDescription.addElement(indAssociation);
 887         sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
 888         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 889     }
 890
 891
 892
 893     /**
 894      * @param currentName
 895      * @param materials: the XML node group
 896      * @param acceptedTaxon: the current accepted Taxon
 897      * @param refMods: the current reference extracted from the MODS
 898      */
 899     private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, TaxonNameBase<?,?> currentName) {
 900         logger.info("extractMaterialsDirect");
 901         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 902         String descr="";
 903
 904         DerivedUnit derivedUnitBase=null;
 905         MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
 906         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
 907
 908         sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
 909
 910         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 911
 912         Feature feature=null;
 913         if (event.equalsIgnoreCase("collection")){
 914             feature = makeFeature(derivedUnitBase);
 915         }
 916         else{
 917             feature = Feature.MATERIALS_EXAMINED();
 918         }
 919         featuresMap.put(feature.getTitleCache(),  feature);
 920
 921         descr=myspecimenOrObservation.getDescr();
 922         if(!StringUtils.isEmpty(descr)) {
 923             derivedUnitBase.setTitleCache(descr, true);
 924         }
 925
 926         IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
 927
 928         taxonDescription.addElement(indAssociation);
 929         sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
 930         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 931
 932         return derivedUnitBase.getTitleCache();
 933
 934     }
 935
 936
 937     /**
 938      * @param description: the XML node group
 939      * @param acceptedTaxon: the current acceptedTaxon
 940      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 941      * @param nametosave: the list of objects to save into the CDM
 942      * @param refMods: the current reference extracted from the MODS
 943      * @param featureName: the feature name
 944      */
 945     @SuppressWarnings({ "rawtypes"})
 946     private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
 947             List<TaxonNameBase> nametosave, Reference refMods, String featureName ) {
 948         logger.info("extractSpecificFeature "+featureName);
 949         //        System.out.println("GRUUUUuu");
 950         NodeList children = description.getChildNodes();
 951         NodeList insideNodes ;
 952         NodeList trNodes;
 953         //        String descr ="";
 954         String localdescr="";
 955         List<String> blabla=null;
 956         List<String> text = new ArrayList<String>();
 957
 958         String table="<table>";
 959         String head="";
 960         String line="";
 961
 962         Feature currentFeature=getFeatureObjectFromString(featureName);
 963
 964         //        String fullContent = description.getTextContent();
 965         for (int i=0;i<children.getLength();i++){
 966             //            localdescr="";
 967             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
 968                 text.add(children.item(i).getTextContent().trim());
 969             }
 970             if (featureName.equalsIgnoreCase("table")){
 971                 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 972                         children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
 973                     head = extractTableHead(children.item(i));
 974                     table+=head;
 975                     line = extractTableLine(children.item(i));
 976                     if (!line.equalsIgnoreCase("<tr></tr>")) {
 977                         table+=line;
 978                     }
 979                 }
 980                 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 981                         children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
 982                     line = extractTableLineWithColumn(children.item(i).getChildNodes());
 983                     if(!line.equalsIgnoreCase("<tr></tr>")) {
 984                         table+=line;
 985                     }
 986                 }
 987             }
 988             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 989                 insideNodes=children.item(i).getChildNodes();
 990                 blabla= new ArrayList<String>();
 991                 for (int j=0;j<insideNodes.getLength();j++){
 992                     Node insideNode = insideNodes.item(j);
 993                         if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
 994                         String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
 995                         if (!inlinetext.isEmpty()) {
 996                             blabla.add(inlinetext);
 997                         }
 998                     }
 999                     else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
1000                         if(!insideNode.getTextContent().trim().isEmpty()){
1001                             blabla.add(insideNode.getTextContent().trim());
1002                             //                            localdescr += insideNodes.item(j).getTextContent().trim();
1003                         }
1004                     }
1005                 }
1006                 if (!blabla.isEmpty()) {
1007                     String blaStr = StringUtils.join(blabla," ").trim();
1008                     if(!stringIsEmpty(blaStr)) {
1009                         setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1010                         text.add(blaStr);
1011                     }
1012                 }
1013
1014             }
1015             if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1016                 if(!children.item(i).getTextContent().trim().isEmpty()){
1017                     localdescr = children.item(i).getTextContent().trim();
1018                     if(!stringIsEmpty(localdescr)) {
1019                         setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1020                     }
1021                 }
1022             }
1023         }
1024
1025         table+="</table>";
1026         if (!table.equalsIgnoreCase("<table></table>")){
1027             //            System.out.println("TABLE : "+table);
1028             text.add(table);
1029         }
1030
1031         if (text !=null && !text.isEmpty()) {
1032             return StringUtils.join(text," ");
1033         } else {
1034             return "";
1035         }
1036
1037     }
1038
1039     /**
1040      * @param children
1041      * @param i
1042      * @return
1043      */
1044     private String extractTableLine(Node child) {
1045         //logger.info("extractTableLine");
1046         String line;
1047         line="<tr>";
1048         if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1049             line = extractTableLineWithColumn(child.getChildNodes());
1050         }
1051         line+="</tr>";
1052         return line;
1053     }
1054
1055     /**
1056      * @param children
1057      * @param i
1058      * @return
1059      */
1060     private String extractTableHead(Node child) {
1061         //logger.info("extractTableHead");
1062         String head;
1063         String line;
1064         head="<th>";
1065         NodeList trNodes = child.getChildNodes();
1066         for (int k=0;k<trNodes.getLength();k++){
1067             if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1068                     && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1069                 line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1070                 head+=line;
1071             }
1072         }
1073         head+="</th>";
1074         return head;
1075     }
1076
1077     /**
1078      * build a html table line, with td columns
1079      * @param tdNodes
1080      * @return an html coded line
1081      */
1082     private String extractTableLineWithColumn(NodeList tdNodes) {
1083         //logger.info("extractTableLineWithColumn");
1084         String line;
1085         line="<tr>";
1086         for (int l=0;l<tdNodes.getLength();l++){
1087             if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1088                 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1089             }
1090         }
1091         line+="</tr>";
1092         return line;
1093     }
1094
1095     /**
1096      * @param description: the XML node group
1097      * @param acceptedTaxon: the current acceptedTaxon
1098      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1099      * @param nametosave: the list of objects to save into the CDM
1100      * @param refMods: the current reference extracted from the MODS
1101      * @param featureName: the feature name
1102      */
1103     @SuppressWarnings({ "unused", "rawtypes" })
1104     private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1105             List<TaxonNameBase> nameToSave, Reference refMods, String featureName ) {
1106         logger.info("extractSpecificFeatureNotStructured " + featureName);
1107         NodeList children = description.getChildNodes();
1108         NodeList insideNodes ;
1109         List<String> blabla= new ArrayList<String>();
1110
1111
1112         Feature currentFeature = getFeatureObjectFromString(featureName);
1113
1114         String fullContent = description.getTextContent();
1115         for (int i=0;i<children.getLength();i++){
1116             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1117                 insideNodes=children.item(i).getChildNodes();
1118                 for (int j=0;j<insideNodes.getLength();j++){
1119                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1120                         String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1121                         if(!inlineText.isEmpty()) {
1122                             blabla.add(inlineText);
1123                         }
1124                     }
1125                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1126                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1127                             blabla.add(insideNodes.item(j).getTextContent().trim());
1128                         }
1129                     }
1130                 }
1131             }
1132             if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1133                 if(!children.item(i).getTextContent().trim().isEmpty()){
1134                     String localdescr = children.item(i).getTextContent().trim();
1135                     if(!localdescr.isEmpty())
1136                     {
1137                         blabla.add(localdescr);
1138                     }
1139                 }
1140             }
1141         }
1142
1143         if (blabla !=null && !blabla.isEmpty()) {
1144             String blaStr = StringUtils.join(blabla," ").trim();
1145             if (! stringIsEmpty(blaStr)) {
1146                 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1147                 return blaStr;
1148             } else {
1149                 return "";
1150             }
1151         } else {
1152             return "";
1153         }
1154
1155     }
1156
1157     /**
1158      * @param blaStr
1159      * @return
1160      */
1161     private boolean stringIsEmpty(String blaStr) {
1162         if (blaStr.matches("(\\.|,|;|\\.-)?")){
1163                 return true;
1164         }else{
1165                 return false;
1166         }
1167     }
1168
1169     /**
1170      * @param nametosave
1171      * @param refMods
1172      * @param insideNodes
1173      * @param blabla
1174      * @param j
1175      */
1176     @SuppressWarnings({ "rawtypes" })
1177     private String getInlineTextForName(List<TaxonNameBase> nametosave, Reference refMods, Node insideNode) {
1178         if (true){
1179                 NodeList children = insideNode.getChildNodes();
1180                 String result = "";
1181             for (int i=0;i<children.getLength();i++){
1182                 Node nameChild = children.item(i);
1183                 if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1184                         result += nameChild.getTextContent();
1185                 }else{
1186                         //do nothing
1187                 }
1188             }
1189                 return result.replace("\n", "").trim();
1190         }else{
1191                 TaxonNameBase tnb = getTaxonNameBaseFromXML(insideNode, nametosave,refMods,false);
1192                 //                        Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1193                 Taxon tax = currentMyName.getTaxon();
1194                 if(tnb !=null && tax != null){
1195                     String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1196                     return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1197                 }else if (tnb != null && tax == null){
1198                         //TODO
1199                         return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0]  +"</cdm:taxonName>";
1200                 }else{
1201                         logger.warn("Inline text has no content yet");
1202                 }
1203                 return "";
1204         }
1205     }
1206
1207     /**
1208      * @param featureName
1209      * @return
1210      */
1211     @SuppressWarnings("rawtypes")
1212     private Feature getFeatureObjectFromString(String featureName) {
1213         logger.info("getFeatureObjectFromString");
1214         List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1215         Feature currentFeature=null;
1216         for (Feature feature: features){
1217             String tmpF = feature.getTitleCache();
1218             if (tmpF.equalsIgnoreCase(featureName)) {
1219                 currentFeature=feature;
1220                 //                System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1221             }
1222         }
1223         if (currentFeature == null) {
1224             currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1225             if(featureName.equalsIgnoreCase("Other")){
1226                 currentFeature.setUuid(OtherUUID);
1227             }
1228             if(featureName.equalsIgnoreCase(notMarkedUp)){
1229                 currentFeature.setUuid(NotMarkedUpUUID);
1230             }
1231             importer.getTermService().saveOrUpdate(currentFeature);
1232         }
1233         return currentFeature;
1234     }
1235
1236
1237
1238
1239     /**
1240      * @param children: the XML node group
1241      * @param nametosave: the list of objects to save into the CDM
1242      * @param acceptedTaxon: the current acceptedTaxon
1243      * @param refMods: the current reference extracted from the MODS
1244      * @param fullContent :the parsed XML content
1245      * @return a list of description (text)
1246      */
1247     @SuppressWarnings({ "unused", "rawtypes" })
1248     private List<String> parseParagraph(List<TaxonNameBase> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1249         logger.info("parseParagraph "+feature.toString());
1250         List<String> fullDescription=  new ArrayList<String>();
1251         //        String localdescr;
1252         String descr="";
1253         NodeList insideNodes ;
1254         boolean collectionEvent = false;
1255         List<Node>collectionEvents = new ArrayList<Node>();
1256
1257         NodeList children = paragraph.getChildNodes();
1258
1259         for (int i=0;i<children.getLength();i++){
1260             //            localdescr="";
1261             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1262                 descr += children.item(i).getTextContent().trim();
1263             }
1264             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1265                 insideNodes=children.item(i).getChildNodes();
1266                 List<String> blabla= new ArrayList<String>();
1267                 for (int j=0;j<insideNodes.getLength();j++){
1268                     boolean nodeKnown = false;
1269                     //    System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1270                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1271                         String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1272                         if (!inlineText.isEmpty()) {
1273                             blabla.add(inlineText);
1274                         }
1275                         nodeKnown=true;
1276                     }
1277                     else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1278                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1279                             blabla.add(insideNodes.item(j).getTextContent().trim());
1280                             // localdescr += insideNodes.item(j).getTextContent().trim();
1281                         }
1282                         nodeKnown=true;
1283                     }
1284                     else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1285                         String ref = insideNodes.item(j).getTextContent().trim();
1286                         if (ref.endsWith(";")  && ((ref.length())>1)) {
1287                             ref=ref.substring(0, ref.length()-1)+".";
1288                         }
1289                         Reference reference = ReferenceFactory.newGeneric();
1290                         reference.setTitleCache(ref, true);
1291                         blabla.add(reference.getTitleCache());
1292                         nodeKnown=true;
1293                     }
1294                     else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1295                         String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1296                         blabla.add(figure);
1297                     }
1298                     else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1299                             insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1300                             insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1301                         String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1302                         blabla.add(table);
1303                     }
1304                     else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1305                         //                        logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1306                         String titlecache  = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1307                         blabla.add(titlecache);
1308                         collectionEvent=true;
1309                         collectionEvents.add(insideNodes.item(j));
1310                         nodeKnown=true;
1311                     }else{
1312                         logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1313                     }
1314
1315                 }
1316                 if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1317                     fullDescription.add(StringUtils.join(blabla," "));
1318                 }
1319             }
1320             if  (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1321                 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1322                 fullDescription.add(figure);
1323             }
1324             if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1325                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1326                     children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1327                 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1328                 fullDescription.add(table);
1329             }
1330         }
1331
1332         if( !stringIsEmpty(descr.trim())){
1333             Feature currentFeature= getNotMarkedUpFeatureObject();
1334             setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1335         }
1336         //        if (collectionEvent) {
1337         //            logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1338         //            for (Node coll:collectionEvents){
1339         //                = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1340         //            }
1341         //        }
1342         return fullDescription;
1343     }
1344
1345
1346     /**
1347      * @param description: the XML node group
1348      * @param acceptedTaxon: the current acceptedTaxon
1349      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1350      * @param nametosave: the list of objects to save into the CDM
1351      * @param refMods: the current reference extracted from the MODS
1352      * @param feature: the feature to link the data with
1353      */
1354     @SuppressWarnings("rawtypes")
1355     private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> namesToSave, Reference refMods, Feature feature){
1356         logger.info("EXTRACT FEATURE "+feature.toString());
1357         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1358         List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1359
1360         //        System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1361         if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1362             setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1363         }
1364
1365     }
1366
1367
1368     /**
1369      * @param descr: the XML Nodegroup to parse
1370      * @param acceptedTaxon: the current acceptedTaxon
1371      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1372      * @param refMods: the current reference extracted from the MODS
1373      * @param currentFeature: the feature name
1374      * @return
1375      */
1376     private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1377         logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1378
1379         //remove redundant feature title
1380         String featureStr = currentFeature.getTitleCache();
1381         if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1382                 descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1383         }
1384
1385
1386         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1387         featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1388
1389         TextData textData = createTextData(descr, refMods, currentFeature);
1390
1391         if(acceptedTaxon!=null){
1392             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1393             td.addElement(textData);
1394             acceptedTaxon.addDescription(td);
1395
1396             sourceHandler.addAndSaveSource(refMods, td, null);
1397             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1398         }
1399
1400         if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1401             try{
1402                 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1403                 if (tmp!=null) {
1404                     defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1405                 }else{
1406                     importer.getTaxonService().saveOrUpdate(defaultTaxon);
1407                 }
1408             }catch(Exception e){
1409                 logger.debug("TAXON EXISTS"+defaultTaxon);
1410             }
1411
1412             TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1413             defaultTaxon.addDescription(td);
1414             td.addElement(textData);
1415             sourceHandler.addAndSaveSource(refMods, td, null);
1416             importer.getTaxonService().saveOrUpdate(defaultTaxon);
1417         }
1418     }
1419
1420     /**
1421      * @param descr
1422      * @param refMods
1423      * @param currentFeature
1424      * @return
1425      */
1426     private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1427         //logger.info("createTextData");
1428         TextData textData = TextData.NewInstance();
1429         textData.setFeature(currentFeature);
1430         sourceHandler.addSource(refMods, textData);
1431
1432         textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1433         return textData;
1434     }
1435
1436
1437
1438     /**
1439      * @param descr: the XML Nodegroup to parse
1440      * @param acceptedTaxon: the current acceptedTaxon
1441      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1442      * @param refMods: the current reference extracted from the MODS
1443      * @param currentFeature: the feature name
1444      * @return
1445      */
1446     private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1447         //        System.out.println("setParticularDescriptionSPecial "+currentFeature);
1448         //        logger.info("acceptedTaxon: "+acceptedTaxon);
1449         logger.info("setParticularDescription");
1450         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1451
1452         featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1453         TextData textData = createTextData(descr, refMods, currentFeature);
1454
1455         if(! descr.isEmpty() && (acceptedTaxon!=null)){
1456             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1457             td.addElement(textData);
1458             acceptedTaxon.addDescription(td);
1459
1460             sourceHandler.addAndSaveSource(refMods, td, currentRef);
1461             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1462         }
1463
1464         if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1465             try{
1466                 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1467                 if (tmp!=null) {
1468                     defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1469                 }else{
1470                     importer.getTaxonService().saveOrUpdate(defaultTaxon);
1471                 }
1472             }catch(Exception e){
1473                 logger.debug("TAXON EXISTS"+defaultTaxon);
1474             }
1475
1476             TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1477             defaultTaxon.addDescription(td);
1478             td.addElement(textData);
1479             sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1480             importer.getTaxonService().saveOrUpdate(defaultTaxon);
1481         }
1482     }
1483
1484
1485
1486     /**
1487      * @param synonyms: the XML Nodegroup to parse
1488      * @param nametosave: the list of objects to save into the CDM
1489      * @param acceptedTaxon: the current acceptedTaxon
1490      * @param refMods: the current reference extracted from the MODS
1491      */
1492     @SuppressWarnings({ "rawtypes" })
1493     private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1494         logger.info("extractSynonyms");
1495         //System.out.println("extractSynonyms for: "+acceptedTaxon);
1496         Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1497         if (ttmp != null) {
1498             acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1499         }
1500         else{
1501             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1502         }
1503         NodeList children = synonymsNode.getChildNodes();
1504         List<MyName> names = new ArrayList<MyName>();
1505
1506         if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1507             try {
1508                 MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1509                 names.add(myName);
1510             } catch (TransformerFactoryConfigurationError e) {
1511                 logger.warn(e);
1512             } catch (TransformerException e) {
1513                 logger.warn(e);
1514             }
1515         }
1516
1517
1518         for (int i=0;i<children.getLength();i++){
1519             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1520                 NodeList tmp = children.item(i).getChildNodes();
1521                 //                String fullContent = children.item(i).getTextContent();
1522                 for (int j=0; j< tmp.getLength();j++){
1523                     if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1524                         try {
1525                                 MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1526                             names.add(myName);
1527                         } catch (TransformerFactoryConfigurationError e) {
1528                             logger.warn(e);
1529                         } catch (TransformerException e) {
1530                             logger.warn(e);
1531                         }
1532                     }
1533                 }
1534             }
1535             if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1536                 try {
1537                         MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1538                     names.add(myName);
1539                 } catch (TransformerFactoryConfigurationError e) {
1540                     logger.warn(e);
1541                 } catch (TransformerException e) {
1542                     logger.warn(e);
1543                 }
1544
1545             }
1546         }
1547
1548         for(MyName name:names){
1549                 TaxonNameBase nameToBeFilled = name.getTaxonNameBase();
1550             Synonym synonym = name.getSyno();
1551             addFollowingTextToName(nameToBeFilled, followingText);
1552
1553             /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1554             nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1555             if (nameToBeFilled.hasProblem() &&
1556                     !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1557                 //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1558                 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1559                 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1560             }
1561             nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1562              */
1563             if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1564                 setLSID(name.getIdentifier(), synonym);
1565             }
1566
1567             Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1568             boolean synoExist = false;
1569             for (Synonym syn: synonymsSet){
1570
1571                 boolean a =syn.getName().equals(synonym.getName());
1572                 boolean b = syn.getSec().equals(synonym.getSec());
1573                 if (a && b) {
1574                     synoExist=true;
1575                 }
1576             }
1577             if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1578                 sourceHandler.addSource(refMods, synonym);
1579                 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1580             }
1581         }
1582         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1583     }
1584
1585
1586     private boolean addFollowingTextToName(ITaxonNameBase nameToBeFilled, String followingText) {
1587         if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1588                 if (! followingText.matches("\\d\\.?")){
1589
1590                         if (followingText.startsWith(",")){
1591                                 followingText = followingText.substring(1).trim();
1592                         }
1593                         nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1594                 }
1595                 return true;
1596         }
1597         return false;
1598
1599         }
1600
1601         /**
1602      * @param refgroup: the XML nodes
1603      * @param nametosave: the list of objects to save into the CDM
1604      * @param acceptedTaxon: the current acceptedTaxon
1605      * @param nametosave: the list of objects to save into the CDM
1606      * @param refMods: the current reference extracted from the MODS
1607      * @return the acceptedTaxon (why?)
1608      * handle cases where the bibref are inside <p> and outside
1609      */
1610     @SuppressWarnings({ "rawtypes" })
1611     private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference refMods) {
1612         logger.info("extractReferences");
1613         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1614
1615         NodeList children = refgroup.getChildNodes();
1616         INonViralName nameToBeFilled = getNonViralNameAccNomenclature();
1617
1618         ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1619         for (int i=0;i<children.getLength();i++){
1620             if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1621                 String ref = children.item(i).getTextContent().trim();
1622                 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode,  acceptedTaxon, refMods);
1623                 if (!refBuild.isFoundBibref()){
1624                     extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1625                 }
1626             }
1627
1628             if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1629                 NodeList references = children.item(i).getChildNodes();
1630                 String descr="";
1631                 for (int j=0;j<references.getLength();j++){
1632                     if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1633                         String ref = references.item(j).getTextContent().trim();
1634                         refBuild.builReference(ref, treatmentMainName,  nomenclaturalCode,  acceptedTaxon, refMods);
1635                     }
1636                     else
1637                         if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1638                                 && !references.item(j).getTextContent().trim().isEmpty()){
1639                             descr += references.item(j).getTextContent().trim();
1640                         }
1641
1642                 }
1643                 if (!refBuild.isFoundBibref()){
1644                     //if it's not tagged, put it as row information.
1645                     //                    extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1646                     //then put it as a not markup feature if not empty
1647                     if (!stringIsEmpty(descr.trim())){
1648                         Feature currentFeature= getNotMarkedUpFeatureObject();
1649                         setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1650                     }
1651                 }
1652             }
1653         }
1654         //        importer.getClassificationService().saveOrUpdate(classification);
1655         return acceptedTaxon;
1656
1657     }
1658
1659     /**
1660      * get the non viral name according to the current nomenclature
1661      * @return
1662      */
1663
1664     private INonViralName getNonViralNameAccNomenclature() {
1665         return nomenclaturalCode.getNewTaxonNameInstance(null);
1666     }
1667
1668     /**
1669      * @return the feature object for the category "not marked up"
1670      */
1671     private Feature getNotMarkedUpFeatureObject() {
1672         // FIXME use getFeature(uuid ....)
1673         logger.info("getNotMarkedUpFeatureObject");
1674         Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1675         if (currentFeature == null) {
1676             currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1677             currentFeature.setUuid(NotMarkedUpUUID);
1678             //TODO use userDefined Feature Vocabulary
1679             Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1680 //            importer.getTermService().saveOrUpdate(currentFeature);
1681             importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1682         }
1683         return currentFeature;
1684     }
1685
1686     /**
1687      * @param references
1688      * handle cases where the bibref are inside <p> and outside
1689      */
1690     @SuppressWarnings("rawtypes")
1691     private void extractReferenceRawText(NodeList references, INonViralName nameToBeFilled, Reference refMods,
1692             Taxon acceptedTaxon) {
1693         logger.info("extractReferenceRawText");
1694         String refString="";
1695         currentMyName= new MyName(true);
1696         for (int j=0;j<references.getLength();j++){
1697             acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1698             //no bibref tag inside
1699             //            System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1700             if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1701
1702                 try {
1703                         String followingText = null;  //needs to be checked if follText is possible
1704                         //TODO create or not create?
1705                     currentMyName = extractScientificName(references.item(j), refMods, followingText);
1706                 } catch (TransformerFactoryConfigurationError e) {
1707                     logger.warn(e);
1708                 } catch (TransformerException e) {
1709                     logger.warn(e);
1710                 }
1711
1712                 //                name=name.trim();
1713             }
1714             if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1715                 refString = references.item(j).getTextContent().trim();
1716             }
1717             if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1718                 //
1719                if (!currentMyName.getStatus().isEmpty()){
1720                    String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1721                         if (nomNovStatus != null){
1722                                 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1723                         }else{
1724                            try {
1725                                 NomenclaturalStatusType  statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1726                             nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1727                             } catch (UnknownCdmTypeException e) {
1728                                 addProblematicStatusToFile(currentMyName.getStatus());
1729                                 logger.warn("Problem with status");
1730                             }
1731                         }
1732                 }
1733
1734                 String fullLineRefName = references.item(j).getTextContent().trim();
1735                 int nameOrRefOrOther=2;
1736                 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1737                 if (nameOrRefOrOther==0){
1738                     TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1739                     Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1740
1741                     Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1742                     //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1743                     boolean synoExist = false;
1744                     for (Synonym syn: synonymsSet){
1745                         //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1746                         boolean a =syn.getName().equals(synonym.getName());
1747                         boolean b = syn.getSec().equals(synonym.getSec());
1748                         if (a && b) {
1749                             synoExist=true;
1750                         }
1751                     }
1752                     if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1753                         sourceHandler.addSource(refMods, synonym);
1754
1755                         acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1756                     }
1757                 }
1758
1759                 if (nameOrRefOrOther==1){
1760                     Reference re = ReferenceFactory.newGeneric();
1761                     re.setTitleCache(fullLineRefName, true);
1762
1763                     /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1764                     if (nameTBF.hasProblem() &&
1765                             !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1766                         addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1767                         nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1768                     }
1769                     nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1770                      */
1771                     TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1772                     Synonym synonym = Synonym.NewInstance(nameTBF, re);
1773
1774                     Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1775                     //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1776                     boolean synoExist = false;
1777                     for (Synonym syn: synonymsSet){
1778                         //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1779                         boolean a =syn.getName().equals(synonym.getName());
1780                         boolean b = syn.getSec().equals(synonym.getSec());
1781                         if (a && b) {
1782                             synoExist=true;
1783                         }
1784                     }
1785                     if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1786                         sourceHandler.addSource(refMods, synonym);
1787
1788                         acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1789                     }
1790
1791                 }
1792
1793
1794                 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1795                     setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1796                 }
1797             }
1798
1799             if(!currentMyName.getName().isEmpty()){
1800                 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1801                 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1802                     Reference refS = ReferenceFactory.newGeneric();
1803                     refS.setTitleCache(refString, true);
1804                     //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1805                     //                            acceptedTaxon.addDescription(td);
1806                     //                            acceptedTaxon.addSource(refSource);
1807                     //
1808                     //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1809                     //
1810                     //                            textData.addSource(null, null, refS, null);
1811                     //                            td.addElement(textData);
1812                     //                            td.addSource(refSource);
1813                     //                            importer.getDescriptionService().saveOrUpdate(td);
1814
1815
1816                     if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1817                         setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1818
1819                     }
1820
1821                     acceptedTaxon.getName().setNomenclaturalReference(refS);
1822                 }else{
1823                     TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1824                     Synonym synonym = null;
1825                     if (! currentMyName.getStatus().isEmpty()){
1826                         String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1827                         if (nomNovStatus != null){
1828                                 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1829                         }else{
1830                                 try {
1831                                     NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1832                                     nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1833                                     synonym = Synonym.NewInstance(nameTBF, refMods);
1834                                 } catch (UnknownCdmTypeException e) {
1835                                     addProblematicStatusToFile(currentMyName.getStatus());
1836                                     logger.warn("Problem with status");
1837                                     synonym = Synonym.NewInstance(nameTBF, refMods);
1838                                     synonym.setAppendedPhrase(currentMyName.getStatus());
1839                                 }
1840                         }
1841                     }else{
1842                         synonym =  Synonym.NewInstance(nameTBF, refMods);
1843                     }
1844
1845
1846                     if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1847                         setLSID(currentMyName.getIdentifier(), synonym);
1848                     }
1849
1850                     Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1851                     //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1852                     boolean synoExist = false;
1853                     for (Synonym syn: synonymsSet){
1854                         //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1855                         boolean a =syn.getName().equals(synonym.getName());
1856                         boolean b = syn.getSec().equals(synonym.getSec());
1857                         if (a && b) {
1858                             synoExist=true;
1859                         }
1860                     }
1861                     if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1862                         sourceHandler.addSource(refMods, synonym);
1863
1864                         acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1865                     }
1866                 }
1867             }
1868             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1869         }
1870     }
1871
1872
1873
1874     /**
1875      * @param identifier
1876      * @param acceptedTaxon
1877      */
1878     @SuppressWarnings("rawtypes")
1879     private void setLSID(String identifier, TaxonBase<?> taxon) {
1880         //logger.info("setLSID");
1881         //        boolean lsidok=false;
1882         String id = identifier.split("__")[0];
1883         String source = identifier.split("__")[1];
1884         if (id.indexOf("lsid")>-1){
1885             try {
1886                 LSID lsid = new LSID(id);
1887                 taxon.setLsid(lsid);
1888                 //                lsidok=true;
1889             } catch (MalformedLSIDException e) {
1890                 logger.warn("Malformed LSID");
1891             }
1892
1893         }
1894
1895         //logger.info("search reference for LSID");
1896         //  if ((id.indexOf("lsid")<0) || !lsidok){
1897         //ADD ORIGINAL SOURCE ID EVEN IF LSID
1898         Reference re = null;
1899         Pager<Reference> references = importer.getReferenceService().findByTitle(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1900         if( references !=null && references.getCount()>0){
1901             re=references.getRecords().get(0);
1902         }
1903         //logger.info("search reference for LSID-end");
1904         if(re == null){
1905             re = ReferenceFactory.newGeneric();
1906             re.setTitleCache(source, true);
1907             importer.getReferenceService().saveOrUpdate(re);
1908         }
1909         re=CdmBase.deproxy(re, Reference.class);
1910
1911         //logger.info("search source for LSID");
1912         Set<IdentifiableSource> sources = taxon.getSources();
1913         boolean lsidinsource=false;
1914         boolean urlinsource=false;
1915         for (IdentifiableSource src:sources){
1916             if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1917                 lsidinsource=true;
1918             }
1919             if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1920                 urlinsource=true;
1921             }
1922         }
1923         if(!lsidinsource) {
1924             taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1925         }
1926         if(!urlinsource)
1927         {
1928             sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1929             taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1930             // }
1931         }
1932
1933     }
1934
1935     /**
1936      * try to solve a parsing problem for a scientific name
1937      * @param original : the name from the OCR document
1938      * @param name : the tagged version
1939      * @param parser
1940      * @return the corrected TaxonNameBase
1941      */
1942     /*   @SuppressWarnings({ "unchecked", "rawtypes" })
1943     private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1944         Map<String,String> ato = namesMap.get(original);
1945         if (ato == null) {
1946             ato = namesMap.get(original+" "+author);
1947         }
1948
1949
1950         if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1951             rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1952         }
1953         if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1954             rank = getRank(ato);
1955         }
1956         //        TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1957         TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1958         //                logger.info("RANK: "+rank);
1959         int retry=0;
1960         List<ParserProblem> problems = nameTBF.getParsingProblems();
1961         for (ParserProblem pb:problems) {
1962             System.out.println(pb.toString());
1963         }
1964         while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1965             addProblemNameToFile(name,author,nomenclaturalCode,rank);
1966             String fullname=name;
1967             if(! skippQuestion) {
1968                 fullname =  getFullReference(name,nameTBF.getParsingProblems());
1969             }
1970             if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1971                 nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1972             }
1973             if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1974                 nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1975             }
1976             if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1977                 nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1978             }
1979             parser.parseReferencedName(nameTBF, fullname, rank, false);
1980             retry++;
1981         }
1982         if (retry == 1){
1983             if(author != null){
1984                 if (name.indexOf(author)>-1) {
1985                     nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1986                 } else {
1987                     nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1988                 }
1989                 if (nameTBF.hasProblem()){
1990                     if (name.indexOf(author)>-1) {
1991                         addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1992                     } else {
1993                         addProblemNameToFile(name,author,nomenclaturalCode,rank);
1994                     }
1995                     //                    System.out.println("TBF still has problems "+nameTBF.hasProblem());
1996                     problems = nameTBF.getParsingProblems();
1997                     for (ParserProblem pb:problems) {
1998                         System.out.println(pb.toString());
1999                     }
2000                     nameTBF.setFullTitleCache(name, true);
2001                 }else{
2002                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2003                         ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2004                     }
2005                     if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2006                         ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2007                     }
2008                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2009                         ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2010                     }
2011                 }
2012                 //                    logger.info("FULL TITLE CACHE "+name);
2013             }else{
2014                 nameTBF.setFullTitleCache(name, true);
2015             }
2016         }
2017         return nameTBF;
2018     }
2019
2020      */
2021
2022     /**
2023      * @param nomenclatureNode: the XML nodes
2024      * @param nametosave: the list of objects to save into the CDM
2025      * @param refMods: the current reference extracted from the MODS
2026      * @return
2027      */
2028     @SuppressWarnings({ "rawtypes" })
2029     private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonNameBase> nametosave, Reference refMods) throws ClassCastException{
2030         refMods=CdmBase.deproxy(refMods, Reference.class);
2031
2032         logger.info("extractNomenclature");
2033         NodeList children = nomenclatureNode.getChildNodes();
2034         String freetext="";
2035         Taxon acceptedTaxon = null;
2036         //   INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2037
2038         //        String fullContent = nomenclatureNode.getTextContent();
2039
2040         NomenclaturalStatusType statusType = null;
2041         String newNameStatus = null;
2042         //TODO
2043         for (int i=0;i<children.getLength();i++){
2044             if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2045                 String status = children.item(i).getTextContent().trim();
2046
2047                 if (!status.isEmpty()){
2048                         if (newNameStatus(status) != null){
2049                                 newNameStatus = newNameStatus(status);
2050                     }else{
2051                             try {
2052                                 statusType = nomStatusString2NomStatus(status);
2053                             } catch (UnknownCdmTypeException e) {
2054         //                      nomNovStatus;
2055                                 addProblematicStatusToFile(status);
2056                                 logger.warn("Problem with status: " + status);
2057                             }
2058                     }
2059                 }
2060             }
2061         }
2062
2063         boolean containsSynonyms=false;
2064         boolean wasSynonym = false;
2065         usedFollowingTextPrefix = null;  //reset
2066
2067         for (int i=0; i<children.getLength(); i++){
2068                 Node childNode = children.item(i);
2069                 String childName = childNode.getNodeName();
2070
2071
2072                 //following text
2073                 followingText = null;
2074                 if ( i + 1 < children.getLength()){
2075                 Node followingTextNode = children.item(i +1);
2076                 if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2077                         followingText = followingTextNode.getTextContent();
2078                 }
2079                 }
2080
2081                 //traverse nodes
2082             if (childName.equalsIgnoreCase("#text")) {
2083                 freetext = childNode.getTextContent().trim();
2084                 if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2085                         freetext = freetext.substring(usedFollowingTextPrefix.length());
2086                 }
2087                 usedFollowingTextPrefix = null;  //reset
2088             }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2089                 //                System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2090                 extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonNameBase());
2091             }else if(childName.equalsIgnoreCase("tax:name")){
2092                 INonViralName nameToBeFilled;
2093                 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2094                 if(!containsSynonyms){
2095                         wasSynonym = false;
2096
2097                         //System.out.println("I : "+i);
2098                     currentMyName = new MyName(false);
2099                     try {
2100                         currentMyName = extractScientificName(childNode, refMods, followingText);
2101                         treatmentMainName = currentMyName.getNewName();
2102                         originalTreatmentName = currentMyName.getOriginalName();
2103
2104                     } catch (TransformerFactoryConfigurationError e1) {
2105                         throw new RuntimeException(e1);
2106                     } catch (TransformerException e1) {
2107                         throw new RuntimeException(e1);
2108                     }
2109
2110                     if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2111                         maxRankRespected=true;
2112
2113                         nameToBeFilled=currentMyName.getTaxonNameBase();
2114
2115                         //   acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2116                         acceptedTaxon=currentMyName.getTaxon();
2117                         //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2118
2119
2120                         boolean statusMatch=false;
2121                         if(acceptedTaxon !=null ){
2122                             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2123                             statusMatch=compareStatus(acceptedTaxon, statusType);
2124                             //System.out.println("statusMatch: "+statusMatch);
2125                         }
2126                         if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2127
2128                             nameToBeFilled=currentMyName.getTaxonNameBase();
2129                             if (nameToBeFilled != null){
2130                                 if (!originalTreatmentName.isEmpty()) {
2131                                     TaxonNameDescription td = TaxonNameDescription.NewInstance();
2132                                     td.setTitleCache(originalTreatmentName, true);
2133                                     nameToBeFilled.addDescription(td);
2134                                 }
2135
2136                                 if(statusType != null) {
2137                                     nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2138                                 }
2139                                 if(newNameStatus != null){
2140                                         nameToBeFilled.setAppendedPhrase(newNameStatus);
2141                                 }
2142                                 sourceHandler.addSource(refMods, nameToBeFilled);
2143
2144                                 if (nameToBeFilled.getNomenclaturalReference() == null) {
2145                                     acceptedTaxon= Taxon.NewInstance(nameToBeFilled,refMods);
2146                                     //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2147                                 }
2148                                 else {
2149                                     acceptedTaxon= Taxon.NewInstance(nameToBeFilled,(Reference) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2150                                     //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2151                                 }
2152
2153                                 sourceHandler.addSource(refMods, acceptedTaxon);
2154
2155                                 if(!state2.getConfig().doKeepOriginalSecundum()) {
2156                                     acceptedTaxon.setSec(state2.getConfig().getSecundum());
2157                                     //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2158                                     //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2159                                 }
2160
2161                                 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2162                                     setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2163                                 }
2164
2165
2166                                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2167                                 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2168                             }
2169
2170                         }else{
2171                             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2172                             Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2173                             boolean sourcelinked=false;
2174                             for (IdentifiableSource source:sources){
2175                                 if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2176                                     sourcelinked=true;
2177                                 }
2178                             }
2179                             if (!state2.getConfig().doKeepOriginalSecundum()) {
2180                                 acceptedTaxon.setSec(state2.getConfig().getSecundum());
2181                                 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2182                                 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2183                             }
2184                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2185
2186                             if (!sourcelinked){
2187                                 sourceHandler.addSource(refMods, acceptedTaxon);
2188                             }
2189                             if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2190
2191                                 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2192                                     //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2193                                         setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2194                                 }
2195                                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2196                             }
2197                         }
2198                     }else{
2199                         maxRankRespected=false;
2200                     }
2201                     containsSynonyms=true;  //all folowing names are handled as synonyms
2202                 }else{
2203                     try{
2204                         extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2205                         wasSynonym = true;
2206
2207                     }catch(NullPointerException e){
2208                         logger.warn("null pointer exception, the accepted taxon might be null");
2209                     }
2210                 }
2211                 containsSynonyms=true;
2212             }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2213                 reloadClassification();
2214                 //extract the References within the document
2215                 extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2216             }else if (childName.equalsIgnoreCase("tax:bibref")){
2217                 logger.warn(childName + " still preliminary");
2218
2219                 INonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2220                 boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2221                 if (! handled){
2222                         setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2223                 }
2224             }else{
2225                 logger.warn(childName + " not yet handled");
2226             }
2227             if(!stringIsEmpty(freetext.trim())) {;
2228                 if (! freetext.matches("\\d\\.?")){
2229                     INonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2230                         boolean handled = false;
2231                         if (currentName != null && !wasSynonym){
2232                                 handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2233                         }
2234                         if (! handled){
2235                                 setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2236                         }
2237                 }
2238
2239                  freetext = "";
2240             }
2241
2242         }
2243         //importer.getClassificationService().saveOrUpdate(classification);
2244         return acceptedTaxon;
2245     }
2246
2247
2248
2249
2250         /**
2251      * @return
2252      */
2253
2254     private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2255         //logger.info("compareStatus");
2256         boolean statusMatch=false;
2257         //found one taxon
2258         Set<NomenclaturalStatus> status = t.getName().getStatus();
2259         if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2260             for (NomenclaturalStatus st:status){
2261                 NomenclaturalStatusType stype = st.getType();
2262                 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2263                     statusMatch=true;
2264                 }
2265             }
2266         }
2267         else{
2268             if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2269                 statusMatch=true;
2270             }
2271         }
2272         return statusMatch;
2273     }
2274
2275     /**
2276      * @param acceptedTaxon: the current acceptedTaxon
2277      * @param ref: the current reference extracted from the MODS
2278      * @return the parent for the current accepted taxon
2279      */
2280     /*  private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2281         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2282
2283         List<Rank> rankList = new ArrayList<Rank>();
2284         rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2285
2286         List<String> rankListStr = new ArrayList<String>();
2287         for (Rank r:rankList) {
2288             rankListStr.add(r.toString());
2289         }
2290         String r="";
2291         String s = acceptedTaxon.getTitleCache();
2292         Taxon tax = null;
2293         if(!skippQuestion){
2294             int addTaxon = askAddParent(s);
2295             logger.info("ADD TAXON: "+addTaxon);
2296             if (addTaxon == 0 ){
2297                 Taxon tmp = askParent(acceptedTaxon, classification);
2298                 if (tmp == null){
2299                     s = askSetParent(s);
2300                     r = askRank(s,rankListStr);
2301
2302                     TaxonNameBase<?,?> nameToBeFilled = null;
2303                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2304                         nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2305                     }
2306                     if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2307                         nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2308                     }
2309                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2310                         nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2311                     }
2312                     nameToBeFilled.setTitleCache(s, true);
2313                     nameToBeFilled.setRank(getRank(r), true);
2314
2315                     tax = Taxon.NewInstance(nameToBeFilled, ref);
2316                 }
2317                 else{
2318                     tax=tmp;
2319                 }
2320
2321                 createParent(tax, ref);
2322                 //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2323                 classification.addParentChild(tax, acceptedTaxon, ref, null);
2324             }
2325             else{
2326                 classification.addChildTaxon(acceptedTaxon, ref, null);
2327                 tax=acceptedTaxon;
2328             }
2329         } else{
2330             classification.addChildTaxon(acceptedTaxon, ref, null);
2331             tax=acceptedTaxon;
2332         }
2333         //        logger.info("RETURN: "+tax );
2334         return tax;
2335
2336     }
2337
2338      */
2339
2340
2341     private MyName  extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2342         //System.out.println("extractScientificNameSynonym");
2343         logger.info("extractScientificNameSynonym");
2344         String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2345         List<String> rankListToPrint = new ArrayList<String>();
2346         for (String r : rankListToPrint_tmp) {
2347             rankListToPrint.add(r.toLowerCase());
2348         }
2349
2350         Rank rank = Rank.UNKNOWN_RANK();
2351         NodeList children = name.getChildNodes();
2352         String originalName="";
2353         String fullName = "";
2354         String newName="";
2355         String identifier="";
2356         HashMap<String, String> atomisedMap = new HashMap<String, String>();
2357         List<String> atomisedName= new ArrayList<String>();
2358
2359         String rankStr = "";
2360         Rank tmpRank ;
2361
2362         String status= extractStatus(children);
2363
2364         for (int i=0;i<children.getLength();i++){
2365             if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2366                 NodeList atom = children.item(i).getChildNodes();
2367                 for (int k=0;k<atom.getLength();k++){
2368                     identifier = extractIdentifier(identifier, atom.item(k));
2369                     tmpRank = null;
2370                     rankStr = atom.item(k).getNodeName().toLowerCase();
2371                     //                    logger.info("RANKSTR:*"+rankStr+"*");
2372                     if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2373                         rankStr=atom.item(k).getTextContent().trim();
2374                         tmpRank = getRank(rankStr);
2375                     }
2376                     //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2377                     if (tmpRank != null){
2378                         rank=tmpRank;
2379                     }
2380                     atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2381                 }
2382                 addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2383             }
2384             if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2385                 //                logger.info("name non atomised: "+children.item(i).getTextContent());
2386                 fullName = children.item(i).getTextContent().trim();
2387                 //                logger.info("fullname: "+fullName);
2388             }
2389         }
2390         originalName=fullName;
2391         fullName = cleanName(fullName, atomisedName);
2392         namesMap.put(fullName,atomisedMap);
2393
2394         String atomisedNameStr = getAtomisedNameStr(atomisedName);
2395
2396         if (fullName != null){
2397             //            System.out.println("fullname: "+fullName);
2398             //            System.out.println("atomised: "+atomisedNameStr);
2399             if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2400                 if (skippQuestion){
2401                     //                    String defaultN = "";
2402                     if (atomisedNameStr.length()>fullName.length()) {
2403                         newName=atomisedNameStr;
2404                     } else {
2405                         if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2406                             newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2407                         } else {
2408                             newName=fullName;
2409                         }
2410                     }
2411                 } else {
2412                     newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2413                 }
2414             } else {
2415                 newName=fullName;
2416             }
2417         }
2418         //not really needed
2419         //        rank = askForRank(newName, rank, nomenclaturalCode);
2420         //        System.out.println("atomised: "+atomisedMap.toString());
2421
2422         //        String[] names = new String[5];
2423         MyName myname = new MyName(true);
2424
2425         //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2426         //        System.out.println(atomisedMap.keySet());
2427         fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2428         myname.setOriginalName(fullName);
2429         myname.setNewName(newName);
2430         myname.setRank(rank);
2431         myname.setIdentifier(identifier);
2432         myname.setStatus(status);
2433         myname.setSource(refMods);
2434
2435         //        boolean higherAdded=false;
2436
2437
2438         boolean parseNameManually=false;
2439         INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2440         ITaxonNameBase  nameToBeFilledTest ;
2441
2442         //if selected the atomised version
2443         if(newName==atomisedNameStr){
2444             nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2445             if (nameToBeFilledTest.hasProblem()){
2446                 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2447                 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode, rank);
2448                 if (nameToBeFilledTest.hasProblem()){
2449                     addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2450                     parseNameManually=true;
2451                 }
2452             }
2453         }else{
2454             nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2455             if (nameToBeFilledTest.hasProblem()){
2456                 addProblemNameToFile("fullversion",fullName, nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2457                 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2458                 parseNameManually=true;
2459                 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2460                     addNameDifferenceToFile(originalName,atomisedNameStr);
2461                 }
2462             }
2463         }
2464
2465         if(parseNameManually){
2466             //System.out.println("DO IT MANUALLY");
2467                 if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2468                 createUnparsedSynonym(rank, newName, atomisedMap, myname);
2469                 }else{
2470                         createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2471                 }
2472         } else{
2473             //System.out.println("AUTOMATIC!");
2474             //            createAtomisedTaxonString(newName, atomisedMap, myname);
2475             myname.setParsedName(nameToBeFilledTest);
2476             myname.buildTaxon();
2477         }
2478         //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2479         return myname;
2480     }
2481
2482
2483         /**
2484      * @param name
2485      * @throws TransformerFactoryConfigurationError
2486      * @throws TransformerException
2487      * @return a list of possible names
2488      */
2489     @SuppressWarnings({"rawtypes" })
2490     private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2491         logger.info("extractScientificName");
2492
2493         String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2494         List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2495
2496         Rank rank = Rank.UNKNOWN_RANK();
2497         NodeList children = name.getChildNodes();
2498         String originalName = "";
2499         String fullName = "";
2500         String newName = "";
2501         String identifier = "";
2502         HashMap<String, String> atomisedMap = new HashMap<String, String>();
2503         List<String> atomisedNameList= new ArrayList<String>();
2504
2505         String status= extractStatus(children);
2506
2507         for (int i=0;i<children.getLength();i++){
2508                 Node nameChild = children.item(i);
2509             if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2510                 NodeList xmlDataChildren = nameChild.getChildNodes();
2511                 for (int k=0;k<xmlDataChildren.getLength();k++){
2512                         Node xmlDataChild = xmlDataChildren.item(k);
2513                     identifier = extractIdentifier(identifier, xmlDataChild);
2514                     String rankStr = xmlDataChild.getNodeName().toLowerCase();
2515                     if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2516                         rankStr=xmlDataChild.getTextContent().trim();
2517                         Rank tmpRank = getRank(rankStr);
2518                         if (tmpRank != null){
2519                             rank=tmpRank;
2520                         }
2521                     }
2522                     //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2523
2524                     atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2525                 }
2526                 addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2527             }
2528             else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2529                 //                logger.info("name non atomised: "+children.item(i).getTextContent());
2530                 fullName = nameChild.getTextContent().trim();
2531                 //                logger.info("fullname: "+fullName);
2532             }
2533         }
2534         originalName=fullName;
2535         fullName = cleanName(fullName, atomisedNameList);
2536         namesMap.put(fullName,atomisedMap);
2537
2538         String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2539
2540         if (fullName != null){
2541             if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2542                 if (skippQuestion){
2543                     if (atomisedNameStr.length()>fullName.length()) {
2544                         newName = atomisedNameStr;
2545                     } else {
2546                         if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2547                             newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2548                         } else {
2549                             newName = fullName;
2550                         }
2551                     }
2552                 } else {
2553                     newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2554                 }
2555             } else {
2556                 newName=fullName;
2557             }
2558         }
2559         //not really needed
2560         //        rank = askForRank(newName, rank, nomenclaturalCode);
2561         //        System.out.println("atomised: "+atomisedMap.toString());
2562
2563         //        String[] names = new String[5];
2564         MyName myname = new MyName(false);
2565
2566         //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2567         //        System.out.println(atomisedMap.keySet());
2568         fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2569         myname.setOriginalName(fullName);
2570         myname.setNewName(newName);
2571
2572         myname.setRank(rank);
2573         myname.setIdentifier(identifier);
2574         myname.setStatus(status);
2575         myname.setSource(refMods);
2576
2577         //        boolean higherAdded=false;
2578
2579
2580         boolean parseNameManually=false;
2581         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2582         ITaxonNameBase  nameToBeFilledTest = null;
2583
2584         //if selected the atomised version
2585         if(newName==atomisedNameStr){
2586             nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2587             if (nameToBeFilledTest.hasProblem()){
2588                     addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2589                 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2590                 if (nameToBeFilledTest.hasProblem()){
2591                     addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2592                     parseNameManually=true;
2593                 }
2594             }
2595         }else{
2596             nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2597             if (nameToBeFilledTest.hasProblem()){
2598                 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2599                 nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2600                 parseNameManually=true;
2601                 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2602                     addNameDifferenceToFile(originalName,atomisedNameStr);
2603                 }
2604             }
2605         }
2606
2607         //System.out.println("parseNameManually: "+parseNameManually);
2608         if(parseNameManually){
2609             createAtomisedTaxon(rank, newName, atomisedMap, myname);
2610         }
2611         else{
2612             createAtomisedTaxonString(newName, atomisedMap, myname);
2613             myname.setParsedName(nameToBeFilledTest);
2614             //TODO correct handling of createIfNotExists
2615                 myname.buildTaxon();
2616         }
2617         return myname;
2618
2619     }
2620
2621     private ITaxonNameBase parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2622         Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2623
2624         TaxonNameBase name = (TaxonNameBase)parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2625         if (nameExtensionResult != null && nameExtensionResult[0] != null){
2626                 String ext = (String)nameExtensionResult[0];
2627                 TaxonNameBase extName = (TaxonNameBase)parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2628                 if (! extName.hasProblem()){
2629                         name = extName;
2630                         this.usedFollowingTextPrefix = ext;
2631                         //TODO do we need to fill the atomisedMap at all?
2632                         if ((Boolean)(nameExtensionResult[1])){
2633                                 //TODO
2634                         }
2635                         if ((Boolean)(nameExtensionResult[2])){
2636                                 //TODO BasionymYear etc.
2637                                 Integer origYear = name.getPublicationYear();
2638                                 if (origYear != null){
2639                                         atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2640                                 }
2641                         }
2642                 }
2643         }
2644                 return name;
2645         }
2646
2647         private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2648                 if (StringUtils.isBlank(followingText)){
2649                         return null;
2650                 }
2651
2652         boolean includeAuthor = true;
2653         boolean includeYear = false;
2654                 if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2655                         includeAuthor = false;
2656                 }
2657         if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2658                 includeYear = true;
2659         }
2660         String patternStr = "";
2661         if (includeAuthor){
2662                 patternStr += NonViralNameParserImplRegExBase.capitalWord;
2663         }
2664         if (includeYear){
2665                 patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2666         }
2667         String match = null;
2668         if (! patternStr.isEmpty()){
2669                 Pattern pattern = Pattern.compile("^" + patternStr);
2670                 Matcher matcher = pattern.matcher(followingText.trim());
2671                 if (matcher.find()){
2672                         match = matcher.group();
2673                 }
2674         }
2675
2676                 return new Object[]{match, includeAuthor, includeYear};
2677         }
2678
2679         /**
2680      * @param atomisedName
2681      * @return
2682      */
2683     private String getAtomisedNameStr(List<String> atomisedName) {
2684         //logger.info("getAtomisedNameStr");
2685         String atomisedNameStr = StringUtils.join(atomisedName," ");
2686         while(atomisedNameStr.contains("  ")) {
2687             atomisedNameStr=atomisedNameStr.replace("  ", " ");
2688         }
2689         atomisedNameStr=atomisedNameStr.trim();
2690         return atomisedNameStr;
2691     }
2692
2693     /**
2694      * @param children
2695      * @param status
2696      * @return
2697      */
2698     private String extractStatus(NodeList children) {
2699         logger.info("extractStatus");
2700         String status="";
2701         for (int i=0;i<children.getLength();i++){
2702             if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2703                     (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2704                             children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2705                 status = children.item(i).getTextContent().trim();
2706             }
2707         }
2708         return status;
2709     }
2710
2711     /**
2712      * @param identifier
2713      * @param atom
2714      * @param k
2715      * @return
2716      */
2717     private String extractIdentifier(String identifier, Node atom) {
2718         //logger.info("extractIdentifier");
2719         if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2720             try{
2721                 identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2722             }catch(Exception e){
2723                 System.out.println("pb with identifier, maybe empty");
2724             }
2725             try{
2726                 identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2727             }catch(Exception e){
2728                 System.out.println("pb with identifier, maybe empty");
2729             }
2730         }
2731         return identifier;
2732     }
2733
2734     /**
2735      * @param rankListToPrint
2736      * @param rank
2737      * @param atomisedName
2738      * @param atom
2739      */
2740     private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2741         logger.info("addAtomisedNamesToMap");
2742         for (int k=0;k<atom.getLength();k++){
2743                 Node node = atom.item(k);
2744                 String nodeName = node.getNodeName();
2745             if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) {  //rank has been handled in higher method
2746                 if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2747                     atomisedName.add("("+ node.getTextContent().trim()+")");
2748                 } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2749                         if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2750                             atomisedName.add("var. "+node.getTextContent().trim());
2751                         }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2752                             atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2753                         }
2754                 } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2755                     atomisedName.add(node.getTextContent().trim());
2756                 } else{
2757                     if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2758                         atomisedName.add(node.getTextContent().trim());
2759                     }else if (nodeName.equals("#text")){
2760                         String text = node.getTextContent();
2761                         if (StringUtils.isNotBlank(text)){
2762                                 //TODO handle text
2763                                 logger.warn("name xmldata contains text. This is unhandled");
2764                         }
2765                     }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2766                         //we currently do not use higher ranks information
2767                     }else{
2768                         //TODO handle unhandled node
2769                         logger.warn("Unhandled node: " + nodeName);
2770                     }
2771                 }
2772             }
2773         }
2774     }
2775
2776     /**
2777      * @param fullName
2778      * @param atomisedName
2779      * @return
2780      */
2781     private String cleanName(String name, List<String> atomisedName) {
2782         //logger.info("cleanName");
2783         String fullName =name;
2784         if (fullName != null){
2785             fullName = fullName.replace("( ", "(");
2786             fullName = fullName.replace(" )",")");
2787
2788             if (fullName.trim().isEmpty()){
2789                 fullName=StringUtils.join(atomisedName," ");
2790             }
2791
2792             while(fullName.contains("  ")) {
2793                 fullName=fullName.replace("  ", " ");
2794                 //            logger.info("while");
2795             }
2796             fullName=fullName.trim();
2797         }
2798         return fullName;
2799     }
2800
2801     /**
2802      * @param rank
2803      * @param fullName
2804      * @param atomisedMap
2805      * @param myname
2806      * @return
2807      */
2808     private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2809         logger.info("extractAuthorFromNames");
2810         String fullName=name;
2811         if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2812             //            System.out.println("rank : "+rank.toString());
2813             if(rank.isHigher(Rank.SPECIES())){
2814                 try{
2815                     String author=null;
2816                     if(atomisedMap.get("dwcranks:subgenus") != null) {
2817                         author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2818                     }
2819                     if(atomisedMap.get("dwc:subgenus") != null) {
2820                         author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2821                     }
2822                     if(author == null) {
2823                         if(atomisedMap.get("dwc:genus") != null) {
2824                             author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2825                         }
2826                     }
2827                     if(author != null){
2828                         fullName = fullName.substring(0, fullName.indexOf(author));
2829                         author=author.replaceAll(",","").trim();
2830                         myname.setAuthor(author);
2831                     }
2832                 }catch(Exception e){
2833                     //could not extract the author
2834                 }
2835             }
2836             if(rank.equals(Rank.SPECIES())){
2837                 try{
2838                     String author=null;
2839                     if(author == null) {
2840                         if(atomisedMap.get("dwc:species") != null) {
2841                             String[] t = fullName.split(atomisedMap.get("dwc:species"));
2842                             //                            System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2843                             author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2844                             //                            System.out.println("AUTEUR "+author);
2845                         }
2846                     }
2847                     if(author != null){
2848                         fullName = fullName.substring(0, fullName.indexOf(author));
2849                         author=author.replaceAll(",","").trim();
2850                         myname.setAuthor(author);
2851                     }
2852                 }catch(Exception e){
2853                     //could not extract the author
2854                 }
2855             }
2856         }else{
2857             myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2858         }
2859         return fullName;
2860     }
2861
2862     /**
2863      * @param newName
2864      * @param atomisedMap
2865      * @param myname
2866      */
2867     private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2868         logger.info("createAtomisedTaxonString "+atomisedMap);
2869         if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2870             myname.setFamilyStr(atomisedMap.get("dwc:family"));
2871         }
2872         if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
2873             myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2874         }
2875         if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2876             myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2877         }
2878         if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2879             myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2880         }
2881         if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2882             myname.setGenusStr(atomisedMap.get("dwc:genus"));
2883         }
2884         if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2885             myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2886         }
2887         if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2888             myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2889         }
2890         if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2891             String n=newName;
2892             if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2893                 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2894                 n=n.replace("subsp.","");
2895             }
2896             if(atomisedMap.get("dwc:subspecies") != null) {
2897                 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2898                 n=n.replace("subsp.","");
2899             }
2900             if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2901                 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2902                 n=n.replace("var.","");
2903                 n=n.replace("v.","");
2904             }
2905             if(atomisedMap.get("dwcranks:formepithet") != null) {
2906                 //TODO
2907                 System.out.println("TODO FORMA");
2908                 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2909                 n=n.replace("forma","");
2910             }
2911             n=n.trim();
2912             String author = myname.getAuthor();
2913             if(n.split(" ").length>2){
2914
2915                 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2916                 String a= "";
2917                 try{
2918                     a=n.split(n2)[1].trim();
2919                 }catch(Exception e){
2920                     logger.info("no author in "+n+"?");}
2921
2922                 myname.setAuthor(a);
2923                 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2924                 n=n2;
2925
2926             }
2927
2928             myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2929             myname.setAuthor(author);
2930         }
2931         if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2932             myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2933         }
2934         if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2935             myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2936         }
2937         if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2938             myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2939         }
2940         if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2941             myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2942         }
2943         if (atomisedMap.get(PUBLICATION_YEAR) != null){
2944                 myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2945         }
2946     }
2947
2948     /**
2949      * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2950      * @param rank
2951      * @param newName
2952      * @param atomisedMap
2953      * @param myname
2954      */
2955     private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2956         logger.info("createSynonym");
2957         //System.out.println("createsynonym");
2958         if(rank.equals(Rank.UNKNOWN_RANK())){
2959             myname.setNotParsableTaxon(newName);
2960         }else{
2961                 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2962                     myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2963                 }
2964                 if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2965                     myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2966                 }
2967                 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2968                     myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2969                 }
2970                 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2971                     myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2972                 }
2973                 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2974                     myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2975                 }
2976                 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2977                     myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2978                 }
2979                 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2980                     myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2981                 }
2982                 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2983                     String n=newName;
2984                     if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2985                         n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2986                         n=n.replace("subsp.","");
2987                     }
2988                     if(atomisedMap.get("dwc:subspecies") != null) {
2989                         n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2990                         n=n.replace("subsp.","");
2991                     }
2992                     if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2993                         n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2994                         n=n.replace("var.","");
2995                         n=n.replace("v.","");
2996                     }
2997                     if(atomisedMap.get("dwcranks:formepithet") != null) {
2998                         //TODO
2999                         //System.out.println("TODO FORMA");
3000                         n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3001                         n=n.replace("forma","");
3002                     }
3003                     n=n.trim();
3004                     String author = myname.getAuthor();
3005                     if(n.split(" ").length>2){
3006
3007                         String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3008                         String a="";
3009                         try{
3010                             a= n.split(n2)[1].trim();
3011                         }catch(Exception e){logger.info("no author in "+n);}
3012                         myname.setAuthor(a);
3013                         //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3014                         n=n2;
3015
3016                     }
3017                     Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3018                     myname.setSpecies(species);
3019                     myname.setAuthor(author);
3020                 }
3021                 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3022                     myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3023                 }
3024                 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3025                     myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3026                 }
3027                 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3028                     myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3029                 }
3030                 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3031                     myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3032                 }
3033         }
3034
3035     }
3036
3037
3038     /**
3039      * @param refMods
3040      * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3041      * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3042      * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3043      * I created this switch for old
3044      * for Spiders the new version is preferred
3045      */
3046     private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3047         logger.info("createSynonym");
3048
3049         INonViralName nameToBeFilled = this.getNonViralNameAccNomenclature();
3050         //System.out.println("createsynonym");
3051         if(rank.equals(Rank.UNKNOWN_RANK())){
3052             //TODO
3053                 myname.setNotParsableTaxon(newName);
3054
3055                 nameToBeFilled.setTitleCache(newName, true);
3056         }else{
3057                 if(atomisedMap.get("dwc:genus") != null ){
3058                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3059                 }
3060                 if (rank.isSupraGeneric()){
3061                         if (atomisedMap.get("dwcranks:subtribe") != null ){
3062                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3063                 }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3064                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3065                 }else if (atomisedMap.get("dwcranks:tribe") != null ){
3066                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3067                 }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3068                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3069                 }else if (atomisedMap.get("dwc:family") != null ){
3070                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3071                     }else{
3072                         logger.warn("Supra generic rank not yet handled or atomisation not available");
3073                     }
3074                 }
3075                 if (atomisedMap.get("dwcranks:subgenus") != null){
3076                         nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3077                 }
3078                 if (atomisedMap.get("dwc:subgenus") != null){
3079                         nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3080                 }
3081                 if (atomisedMap.get("dwc:species") != null){
3082                         nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3083                 }
3084                 if (atomisedMap.get("dwcranks:formepithet") != null){
3085                         nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3086                 }else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3087                         nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3088                 }else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3089                         nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3090                 }else if (atomisedMap.get("dwc:subspecies") != null){
3091                         nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3092                 }
3093             Reference sec = sourceUrlRef;
3094             if(!state2.getConfig().doKeepOriginalSecundum()){
3095                 sec = state2.getConfig().getSecundum();
3096             }
3097                 Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3098 //              sourceHandler.addSource(refMods, syn);
3099                 myname.setSyno(syn);
3100                 myname.setSynonym(true);
3101         }
3102         }
3103
3104     /**
3105      * @param rank
3106      * @param newName
3107      * @param atomisedMap
3108      * @param myname
3109      */
3110     private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3111         logger.info("createAtomisedTaxon "+atomisedMap);
3112         if(rank.equals(Rank.UNKNOWN_RANK())){
3113             myname.setNotParsableTaxon(newName);
3114         }
3115         else{
3116             if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3117                 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3118             }
3119             if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
3120                 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3121             }
3122             if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3123                 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3124             }
3125             if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3126                 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3127             }
3128             if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3129                 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3130             }
3131             if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3132                 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3133             }
3134             if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3135                 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3136             }
3137             if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3138                 String n=newName;
3139                 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3140                     n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3141                     n=n.replace("subsp.","");
3142                 }
3143                 if(atomisedMap.get("dwc:subspecies") != null) {
3144                     n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3145                     n=n.replace("subsp.","");
3146                 }
3147                 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3148                     n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3149                     n=n.replace("var.","");
3150                     n=n.replace("v.","");
3151                 }
3152                 if(atomisedMap.get("dwcranks:formepithet") != null) {
3153                     //TODO
3154                     //System.out.println("TODO FORMA");
3155                     n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3156                     n=n.replace("forma","");
3157                 }
3158                 n=n.trim();
3159                 String author = myname.getAuthor();
3160                 if(n.split(" ").length>2){
3161                     String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3162                     String a="";
3163                     try{
3164                         a= n.split(n2)[1].trim();
3165                     }catch(Exception e){logger.info("no author  in "+n);}
3166                     myname.setAuthor(a);
3167                     //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3168                     n=n2;
3169
3170                 }
3171
3172                 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3173                 myname.setAuthor(author);
3174             }
3175             if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3176                 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3177             }
3178             if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3179                 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3180             }
3181             if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3182                 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3183             }
3184             if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3185                 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3186             }
3187         }
3188     }
3189
3190     /**
3191      * @return
3192      */
3193     private boolean checkRankValidForImport(Rank currentRank) {
3194         //logger.info("checkRankValidForImport");
3195         return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3196     }
3197
3198
3199
3200     /**
3201      * @param classification2
3202      */
3203     public void updateClassification(Classification classification2) {
3204         //logger.info("updateClassification");
3205         classification = classification2;
3206     }
3207
3208
3209
3210     public class MyName {
3211         /**
3212          * @param isSynonym
3213          */
3214         public MyName(boolean isSynonym) {
3215             super();
3216             this.isSynonym = isSynonym;
3217         }
3218
3219         String originalName="";
3220         String newName="";
3221         Rank rank=Rank.UNKNOWN_RANK();
3222         String identifier="";
3223         String status="";
3224         String author=null;
3225
3226         TaxonNameBase<?,?> taxonNameBase;
3227
3228         Reference refMods ;
3229
3230         Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3231         INonViralName familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3232         String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3233         Integer publicationYear;
3234
3235
3236                 Taxon higherTaxa;
3237         Rank higherRank;
3238         private Taxon taxon;
3239         private Synonym syno;
3240
3241         /**
3242          * @return the syno
3243          */
3244         public Synonym getSyno() {
3245             return syno;
3246         }
3247
3248         @Override
3249         public String toString(){
3250             List<String> tot=new ArrayList<String>();
3251             String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3252             for (String elt:n){
3253                 if (!StringUtils.isEmpty(elt)) {
3254                     tot.add(elt);
3255                 } else {
3256                     tot.add("*");
3257                 }
3258             }
3259             return StringUtils.join(tot," ");
3260         }
3261         /**
3262          * @param syno the syno to set
3263          */
3264         public void setSyno(Synonym syno) {
3265             this.syno = syno;
3266         }
3267
3268         boolean isSynonym=false;
3269
3270         /**
3271          * @return the isSynonym
3272          */
3273         public boolean isSynonym() {
3274             return isSynonym;
3275         }
3276
3277         /**
3278          * @param isSynonym the isSynonym to set
3279          */
3280         public void setSynonym(boolean isSynonym) {
3281             this.isSynonym = isSynonym;
3282         }
3283
3284         public void setSource(Reference re){
3285             refMods=re;
3286         }
3287
3288         /**
3289          * @param string
3290          */
3291         public void setFormStr(String string) {
3292             this.formStr=string;
3293
3294         }
3295         /**
3296          * @param string
3297          */
3298         public void setVarietyStr(String string) {
3299             this.varietyStr=string;
3300
3301         }
3302         /**
3303          * @param string
3304          */
3305         public void setSubspeciesStr(String string) {
3306             this.subspeciesStr=string;
3307
3308         }
3309         /**
3310          * @param string
3311          */
3312         public void setSpeciesStr(String string) {
3313             this.speciesStr=string;
3314
3315         }
3316         /**
3317          * @param string
3318          */
3319         public void setSubgenusStr(String string) {
3320             this.subgenusStr=string;
3321
3322         }
3323         /**
3324          * @param string
3325          */
3326         public void setGenusStr(String string) {
3327             this.genusStr=string;
3328
3329         }
3330         /**
3331          * @param string
3332          */
3333         public void setSubtribeStr(String string) {
3334             this.subtribeStr=string;
3335
3336         }
3337         /**
3338          * @param string
3339          */
3340         public void setTribeStr(String string) {
3341             this.tribeStr=string;
3342
3343         }
3344         /**
3345          * @param string
3346          */
3347         public void setSubfamilyStr(String string) {
3348             this.subfamilyStr=string;
3349
3350         }
3351         /**
3352          * @param string
3353          */
3354         public void setFamilyStr(String string) {
3355             this.familyStr=string;
3356
3357         }
3358         /**
3359          * @return the familyStr
3360          */
3361         public String getFamilyStr() {
3362             return familyStr;
3363         }
3364         /**
3365          * @return the subfamilyStr
3366          */
3367         public String getSubfamilyStr() {
3368             return subfamilyStr;
3369         }
3370         /**
3371          * @return the tribeStr
3372          */
3373         public String getTribeStr() {
3374             return tribeStr;
3375         }
3376         /**
3377          * @return the subtribeStr
3378          */
3379         public String getSubtribeStr() {
3380             return subtribeStr;
3381         }
3382         /**
3383          * @return the genusStr
3384          */
3385         public String getGenusStr() {
3386             return genusStr;
3387         }
3388         /**
3389          * @return the subgenusStr
3390          */
3391         public String getSubgenusStr() {
3392             return subgenusStr;
3393         }
3394         /**
3395          * @return the speciesStr
3396          */
3397         public String getSpeciesStr() {
3398             return speciesStr;
3399         }
3400         /**
3401          * @return the subspeciesStr
3402          */
3403         public String getSubspeciesStr() {
3404             return subspeciesStr;
3405         }
3406         /**
3407          * @return the formStr
3408          */
3409         public String getFormStr() {
3410             return formStr;
3411         }
3412         /**
3413          * @return the varietyStr
3414          */
3415         public String getVarietyStr() {
3416             return varietyStr;
3417         }
3418
3419         public Integer getPublicationYear() {
3420                         return publicationYear;
3421                 }
3422
3423                 public void setPublicationYear(Integer publicationYear) {
3424                         this.publicationYear = publicationYear;
3425                 }
3426
3427         /**
3428          * @param newName2
3429          */
3430         public void setNotParsableTaxon(String newName2) {
3431             //takes too much time
3432             //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3433
3434             NomenclaturalStatusType statusType = null;
3435             if (!getStatus().isEmpty()){
3436                 try {
3437                     statusType = nomStatusString2NomStatus(getStatus());
3438                 } catch (UnknownCdmTypeException e) {
3439                     addProblematicStatusToFile(getStatus());
3440                     logger.warn("Problem with status");
3441                 }
3442             }
3443             List<TaxonBase> tmpList = new ArrayList<>();
3444
3445             Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3446             tmpList.addAll(taxontest.getRecords());
3447
3448             //logger.info("tmpList returned: "+tmpList.size());
3449
3450
3451             INonViralName identicName = null;
3452             boolean foundIdentic=false;
3453             TaxonBase<?> tmpTaxonBase=null;
3454             //            Taxon tmpPartial=null;
3455             for (TaxonBase<?> tmpb:tmpList){
3456                 if(tmpb !=null){
3457                     TaxonNameBase<?,?> tnb =  tmpb.getName();
3458                     Rank crank=null;
3459                     if (tnb != null){
3460                         if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3461                             crank =tnb.getRank();
3462                             if (crank !=null && rank !=null){
3463                                 if (crank.equals(rank)){
3464                                         identicName = tnb;
3465                                         if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3466                                                 foundIdentic=true;
3467                                                 tmpTaxonBase=tmpb;
3468                                                 break;
3469                                         }
3470                                 }
3471                             }
3472                         }
3473                     }
3474                 }
3475             }
3476             boolean statusMatch=false;
3477             boolean appendedMatch=false;
3478             if(tmpTaxonBase !=null && foundIdentic){
3479                 statusMatch=compareStatus(tmpTaxonBase, statusType);
3480                 if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3481                     appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3482                 }
3483                 if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3484                     appendedMatch=true;
3485                 }
3486
3487             }
3488             if ((tmpTaxonBase == null || !foundIdentic) ||  (tmpTaxonBase != null && !statusMatch) ||  (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3489
3490                 INonViralName tnb;
3491                 if (identicName == null){
3492                         tnb = getNonViralNameAccNomenclature();
3493                         tnb.setRank(rank);
3494
3495                         if(statusType != null) {
3496                             tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3497                         }
3498                         if(StringUtils.isNotBlank(getStatus())) {
3499                             tnb.setAppendedPhrase(getStatus());
3500                         }
3501                         tnb.setTitleCache(newName2,true);
3502                         tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3503                     }else{
3504                         tnb = identicName;
3505                 }
3506
3507                 if(tmpTaxonBase==null){
3508                     tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3509                     if(!state2.getConfig().doKeepOriginalSecundum()) {
3510                         tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3511                     }
3512                     //tmptaxonbase.setSec(refMods);
3513                     if(!isSynonym) {
3514                         classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3515                         sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3516                     }
3517                 }
3518             }
3519
3520             tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3521             if (author != null) {
3522                 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3523                     setLSID(getIdentifier(), tmpTaxonBase);
3524                     importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3525                     tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3526                 }
3527             }
3528             TaxonNameBase<?,?> tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonNameBase.class);
3529
3530             if(!isSynonym) {
3531                 this.taxon=(Taxon)tmpTaxonBase;
3532             } else {
3533                 if (tmpTaxonBase instanceof Taxon){
3534                         logger.warn("Incorrect status");
3535                 }
3536                 this.syno=(Synonym)tmpTaxonBase;
3537             }
3538
3539             taxonNameBase = tnb;
3540
3541         }
3542
3543         /**
3544          *
3545          */
3546         public void buildTaxon() {
3547             //System.out.println("BUILD TAXON");
3548             logger.info("buildTaxon");
3549             NomenclaturalStatusType statusType = null;
3550             if (!getStatus().isEmpty()){
3551                 status = getStatus();
3552                 String newNameStatus = newNameStatus(status);
3553                 if (newNameStatus != null){
3554                         taxonNameBase.setAppendedPhrase(newNameStatus);
3555                 }else{
3556                         try {
3557                                 statusType = nomStatusString2NomStatus(getStatus());
3558                                 taxonNameBase.addStatus(NomenclaturalStatus.NewInstance(statusType));
3559                         } catch (UnknownCdmTypeException e) {
3560                                 addProblematicStatusToFile(getStatus());
3561                                 logger.warn("Problem with status");
3562                         }
3563                 }
3564             }
3565             importer.getNameService().save(taxonNameBase);
3566
3567             TaxonBase<?> tmpTaxonBase;
3568             if (!isSynonym) {
3569                 tmpTaxonBase =Taxon.NewInstance(taxonNameBase, refMods); //sec set null
3570             }
3571             else {
3572                 tmpTaxonBase =Synonym.NewInstance(taxonNameBase, refMods); //sec set null
3573             }
3574             boolean exist = false;
3575             if (!isSynonym){
3576                     for (TaxonNode node : classification.getAllNodes()){
3577                         try{
3578                                 Taxon nodeTaxon = node.getTaxon();
3579                                 boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3580                                 boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3581                                 boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3582                             if(titleMatches && nomStatusMatches) {
3583                                 if (!isSynonym) {
3584                                         tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3585                                     exist =true;
3586                                 } else {
3587                                     logger.info("Found the same name but from another type (taxon/synonym)");
3588                                     TaxonNameBase<?,?> existingTnb = getTaxon().getName();
3589                                 tmpTaxonBase = Synonym.NewInstance(existingTnb, refMods);
3590                                 importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3591                                 exist =true;
3592                             }
3593                             }else if (nodeNameReplaceable){
3594                                 nodeTaxon.setName(tmpTaxonBase.getName());
3595                                 tmpTaxonBase = nodeTaxon;
3596                                 exist = true;
3597                             }
3598                         }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3599                     }
3600             }
3601             if (!exist){
3602
3603                 boolean insertAsExisting =false;
3604                 List<Taxon> existingTaxons=new ArrayList<Taxon>();
3605                 try {
3606                     existingTaxons = getMatchingTaxa(taxonNameBase);
3607                 } catch (Exception e1) {
3608                     e1.printStackTrace();
3609                 }
3610                 double similarityScore=0.0;
3611                 double similarityAuthor=-1;
3612                 String author1="";
3613                 String author2="";
3614                 String t1="";
3615                 String t2="";
3616                 for (Taxon bestMatchingTaxon : existingTaxons){
3617                     //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3618                     //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3619                     if(taxonNameBase.getAuthorshipCache()!=null) {
3620                         author1=taxonNameBase.getAuthorshipCache();
3621                     }
3622                     try {
3623                         if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
3624                             author2=bestMatchingTaxon.getName().getAuthorshipCache();
3625                         }
3626                     } catch (Exception e) {
3627                         // TODO Auto-generated catch block
3628                         e.printStackTrace();
3629                     }
3630                     try {
3631                         t1=taxonNameBase.getTitleCache();
3632                         if (author1!=null && !StringUtils.isEmpty(author1)) {
3633                             t1=t1.split(Pattern.quote(author1))[0];
3634                         }
3635                     } catch (Exception e) {
3636                         // TODO Auto-generated catch block
3637                         e.printStackTrace();
3638                     }
3639                     try {
3640                         t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3641                         if (author2!=null && !StringUtils.isEmpty(author2)) {
3642                             t2=t2.split(Pattern.quote(author2))[0];
3643                         }
3644                     } catch (Exception e) {
3645                         // TODO Auto-generated catch block
3646                         e.printStackTrace();
3647                     }
3648
3649                     similarityScore=similarity(t1.trim(), t2.trim());
3650                     //System.out.println("taxonscore "+similarityScore);
3651                     similarityAuthor=similarity(author1.trim(), author2.trim());
3652                     //System.out.println("authorscore "+similarityAuthor);
3653                     insertAsExisting = compareAndCheckTaxon(taxonNameBase, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3654                     if(insertAsExisting) {
3655                         tmpTaxonBase=bestMatchingTaxon;
3656                         break;
3657                     }
3658                 }
3659                 if ( !insertAsExisting ){
3660                     if(!state2.getConfig().doKeepOriginalSecundum()) {
3661                         tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3662                     }
3663
3664                     //                    tmptaxonbase.setSec(refMods);
3665                     if (taxonNameBase.getRank().equals(state2.getConfig().getMaxRank())) {
3666                         //System.out.println("****************************"+tmptaxonbase);
3667                         if (!isSynonym) {
3668                             classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3669                         }
3670                     } else{
3671                         hierarchy = new HashMap<Rank, Taxon>();
3672                         //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3673                         if (!isSynonym){
3674                             lookForParentNode(taxonNameBase,(Taxon)tmpTaxonBase, refMods,this);
3675                             //System.out.println("HIERARCHY "+hierarchy);
3676                             Taxon parent = buildHierarchy();
3677                             if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3678                                 if(parent !=null) {
3679                                     classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3680                                 } else {
3681                                     classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3682                                 }
3683                                 importer.getClassificationService().saveOrUpdate(classification);
3684                             }
3685                         }
3686                         //                        Set<TaxonNode> nodeList = classification.getAllNodes();
3687                         //                        for(TaxonNode tn:nodeList) {
3688                         //                            System.out.println(tn.getTaxon());
3689                         //                        }
3690                     }
3691                 }
3692                 importer.getClassificationService().saveOrUpdate(classification);
3693                  if(isSynonym) {
3694                     try{
3695                         Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3696                     }catch(Exception e){
3697                         TaxonNameBase<?,?> existingTnb = tmpTaxonBase.getName();
3698                         Synonym castTest = Synonym.NewInstance(existingTnb, refMods);
3699                         importer.getTaxonService().saveOrUpdate(castTest);
3700                         tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3701                     }
3702                 }
3703             }
3704             if(!isSynonym) {
3705                 taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3706             } else {
3707                 syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3708             }
3709
3710         }
3711
3712                 private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3713                         //TODO preliminary check
3714                         if (newTaxon.isInstanceOf(Synonym.class)){
3715                                 return false;
3716                         }
3717                         INonViralName nodeName = nodeTaxon.getName();
3718                         INonViralName newName = newTaxon.getName();
3719                         if (nodeTaxon.getName() == null ||  newName == null){
3720                                 return false;
3721                         }
3722                         if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3723                                 return false;
3724                         }
3725                         boolean compare = true;
3726                         for (NomenclaturalStatus status : newName.getStatus() ){
3727                                 compare &= compareStatus(nodeTaxon, status.getType());
3728                         }
3729                         if (! compare){
3730                                 return false;
3731                         }
3732
3733                         if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3734                                 if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3735                                         if (newName.getNameCache().length() < newName.getTitleCache().length()){
3736                                                 logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3737                                                 return true;
3738                                         }
3739                                 }
3740                         }
3741
3742                         return false;
3743                 }
3744
3745                 /**
3746          *
3747          */
3748         private Taxon buildHierarchy() {
3749             logger.info("buildHierarchy");
3750             Taxon higherTaxon = null;
3751             //add the maxRank as a root
3752             if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3753                 Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3754                 if(!taxonExistsInClassification(higherTaxon, ct)) {
3755                    classification.addChildTaxon(ct, refMods, null);
3756                 }
3757                 higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3758                 //                return higherTaxon;
3759             }
3760             //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3761
3762             //TODO higher Ranks
3763
3764             if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3765                 higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3766             }
3767             if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3768                 higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3769             }
3770             if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3771                 higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3772             }
3773             if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3774                 higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3775             }
3776             if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3777                 higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3778             }
3779             if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3780                 higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3781             }
3782             importer.getClassificationService().saveOrUpdate(classification);
3783             return higherTaxon;
3784         }
3785
3786         private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3787             Taxon ct=hierarchy.get(r);
3788             if(!taxonExistsInClassification(higherTaxon,ct )) {
3789                 if(higherTaxon != null && ct!=null) {
3790                     classification.addParentChild(higherTaxon, ct, refMods, null);
3791                 } else
3792                     if(higherTaxon == null && ct !=null) {
3793                         classification.addChildTaxon(ct, refMods, null);
3794                 }
3795             }
3796             return ct;
3797         }
3798
3799         private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3800             logger.info("taxonExistsInClassification");
3801             //            System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3802             boolean found=false;
3803             if(parent !=null){
3804                 for (TaxonNode p : classification.getAllNodes()){
3805                     if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3806                         for (TaxonNode c : p.getChildNodes()) {
3807                             if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3808                                 found=true;
3809                                 break;
3810                             }
3811                         }
3812                     }
3813                 }
3814             }
3815             else{
3816                 for (TaxonNode p : classification.getAllNodes()){
3817                     if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3818                         found=true;
3819                         break;
3820                     }
3821                 }
3822             }
3823             //            System.out.println("LOOK IF TAXA EXIST? "+found);
3824             return found;
3825         }
3826         /**
3827          * @param nameToBeFilledTest
3828          */
3829         public void setParsedName(ITaxonNameBase nameToBeFilledTest) {
3830             this.taxonNameBase = TaxonNameBase.castAndDeproxy(nameToBeFilledTest);
3831
3832         }
3833         //variety dwcranks:varietyEpithet
3834         /**
3835          * @return the author
3836          */
3837         public String getAuthor() {
3838             return author;
3839         }
3840         /**
3841          * @return
3842          */
3843         public Taxon getTaxon() {
3844             return taxon;
3845         }
3846         /**
3847          * @return
3848          */
3849         public TaxonNameBase<?,?> getTaxonNameBase() {
3850             return taxonNameBase;
3851         }
3852
3853         /**
3854          * @param findOrCreateTaxon
3855          */
3856         public void setForm(Taxon form) {
3857             this.form=form;
3858
3859         }
3860         /**
3861          * @param findOrCreateTaxon
3862          */
3863         public void setVariety(Taxon variety) {
3864             this.variety=variety;
3865
3866         }
3867         /**
3868          * @param string
3869          * @return
3870          */
3871         @SuppressWarnings("rawtypes")
3872         public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3873             logger.info("findOrCreateTaxon");
3874             sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3875             //takes too much time
3876             //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3877             //            logger.info("tmpList returned: "+tmpList.size());
3878
3879             NomenclaturalStatusType statusType = null;
3880             if (!getStatus().isEmpty()){
3881                 try {
3882                     statusType = nomStatusString2NomStatus(getStatus());
3883                 } catch (UnknownCdmTypeException e) {
3884                     addProblematicStatusToFile(getStatus());
3885                     logger.warn("Problem with status");
3886                 }
3887             }
3888
3889             List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3890
3891             Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3892
3893             tmpListFiltered.addAll(taxontest.getRecords());
3894             taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3895             tmpListFiltered.addAll(taxontest.getRecords());
3896
3897             //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3898
3899             boolean nameCorrected=false;
3900             if (fullname.indexOf(partialname)<0) {
3901                 nameCorrected=true;
3902             }
3903
3904             boolean foundIdentic=false;
3905             Taxon tmp=null;
3906             for (TaxonBase tmpb:tmpListFiltered){
3907                 if(tmpb !=null){
3908                     TaxonNameBase tnb =  tmpb.getName();
3909                     Rank crank=null;
3910                     if (tnb != null){
3911                          if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3912                             if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3913                                 crank =tnb.getRank();
3914                                 if (crank !=null && rank !=null){
3915                                     if (crank.equals(rank)){
3916                                         foundIdentic=true;
3917                                         try{
3918                                             tmp=(Taxon)tmpb;
3919                                             break;
3920                                         }catch(Exception e){
3921                                             e.printStackTrace();
3922                                         }
3923                                     }
3924                                 }
3925                             }
3926                             if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3927                                 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3928                                     crank =tnb.getRank();
3929                                     if (crank !=null && rank !=null){
3930                                         if (crank.equals(rank)){
3931                                             foundIdentic=true;
3932                                             try{
3933                                                 tmp=(Taxon)tmpb;
3934                                                 break;
3935                                             }catch(Exception e){
3936                                                 e.printStackTrace();
3937                                             }
3938                                         }
3939                                     }
3940                                 }
3941                             }
3942                         }
3943                         else{
3944                             if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3945                                 crank =tnb.getRank();
3946                                 if (crank !=null && rank !=null){
3947                                     if (crank.equals(rank)){
3948                                         foundIdentic=true;
3949                                         try{
3950                                             tmp=(Taxon)tmpb;
3951                                             break;
3952                                         }catch(Exception e){
3953                                             e.printStackTrace();
3954                                         }
3955                                     }
3956                                 }
3957                             }
3958                         }
3959                     }
3960                 }
3961             }
3962             boolean statusMatch=false;
3963             boolean appendedMatch=false;
3964             if(tmp !=null && foundIdentic){
3965                 statusMatch=compareStatus(tmp, statusType);
3966                 if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3967                     appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3968                 }
3969                 if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3970                     appendedMatch=true;
3971                 }
3972
3973             }
3974             if ((tmp == null || !foundIdentic) ||  (tmp != null && !statusMatch) ||  (tmp != null && !appendedMatch && !statusMatch)){
3975
3976                 INonViralName tnb = getNonViralNameAccNomenclature();
3977                 tnb.setRank(rank);
3978
3979                 if(statusType != null) {
3980                     tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3981                 }
3982                 if(StringUtils.isNotBlank(getStatus())) {
3983                     tnb.setAppendedPhrase(getStatus());
3984                 }
3985
3986                 if(rank.equals(Rank.UNKNOWN_RANK())){
3987                     tnb.setTitleCache(fullname, true);
3988                     //                    tnb.setGenusOrUninomial(fullname);
3989                 }
3990                 if(rank.isHigher(Rank.GENUS())) {
3991                     tnb.setGenusOrUninomial(partialname);
3992                 }
3993
3994                 if(rank.isHigher(Rank.SPECIES())) {
3995                     tnb.setTitleCache(partialname, true);
3996                 }
3997
3998                 if (rank.equals(globalrank) && author != null) {
3999
4000                     tnb.setCombinationAuthorship(findOrCreateAuthor(author));
4001                     if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4002                         Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4003                         if (taxonLSID !=null) {
4004                             tmp=taxonLSID;
4005                         }
4006                     }
4007                 }
4008
4009                 if(tmp == null){
4010                     if (rank.equals(Rank.FAMILY())) {
4011                         tmp = buildFamily(tnb);
4012                     }
4013                     if (rank.equals(Rank.SUBFAMILY())) {
4014                         tmp = buildSubfamily(tnb);
4015                     }
4016                     if (rank.equals(Rank.TRIBE())) {
4017                         tmp = buildTribe(tnb);
4018                     }
4019                     if (rank.equals(Rank.SUBTRIBE())) {
4020                         tmp = buildSubtribe(tnb);
4021                     }
4022                     if (rank.equals(Rank.GENUS())) {
4023                         tmp = buildGenus(partialname, tnb);
4024                     }
4025
4026                     if (rank.equals(Rank.SUBGENUS())) {
4027                         tmp = buildSubgenus(partialname, tnb);
4028                     }
4029                     if (rank.equals(Rank.SPECIES())) {
4030                         tmp = buildSpecies(partialname, tnb);
4031                     }
4032
4033                     if (rank.equals(Rank.SUBSPECIES())) {
4034                         tmp = buildSubspecies(partialname, tnb);
4035                     }
4036
4037                     if (rank.equals(Rank.VARIETY())) {
4038                         tmp = buildVariety(fullname, partialname, tnb);
4039                     }
4040
4041                     if (rank.equals(Rank.FORM())) {
4042                         tmp = buildForm(fullname, partialname, tnb);
4043                     }
4044                     if (tmp != null){
4045                         TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4046                     }
4047
4048                     importer.getClassificationService().saveOrUpdate(classification);
4049                 }
4050
4051             }
4052
4053             tmp = CdmBase.deproxy(tmp, Taxon.class);
4054             if (rank.equals(globalrank) && author != null) {
4055                 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4056                     setLSID(getIdentifier(), tmp);
4057                     importer.getTaxonService().saveOrUpdate(tmp);
4058                     tmp = CdmBase.deproxy(tmp, Taxon.class);
4059                 }
4060             }
4061
4062             this.taxon=tmp;
4063
4064             return tmp;
4065         }
4066
4067         /**
4068          * @param tnb
4069          * @return
4070          */
4071         private Taxon buildSubfamily(INonViralName tnb) {
4072             Taxon tmp;
4073             //            tnb.generateTitle();
4074             tmp = findMatchingTaxon(tnb,refMods);
4075             if(tmp ==null){
4076                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4077                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4078                     tmp.setSec(state2.getConfig().getSecundum());
4079                 }
4080                 //                tmp.setSec(refMods);
4081                 //                sourceHandler.addSource(refMods, tmp);
4082                 if(family != null) {
4083                     classification.addParentChild(family, tmp, null, null);
4084                     higherRank=Rank.FAMILY();
4085                     higherTaxa=family;
4086                 } else {
4087                     //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4088                     classification.addChildTaxon(tmp, null, null);
4089                 }
4090             }
4091             return tmp;
4092         }
4093         /**
4094          * @param tnb
4095          * @return
4096          */
4097         private Taxon buildFamily(INonViralName tnb) {
4098             Taxon tmp;
4099             //            tnb.generateTitle();
4100             tmp = findMatchingTaxon(tnb,refMods);
4101             if(tmp ==null){
4102                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4103                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4104                     tmp.setSec(state2.getConfig().getSecundum());
4105                 }
4106                 //                tmp.setSec(refMods);
4107                 //sourceHandler.addSource(refMods, tmp);
4108                 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4109                 classification.addChildTaxon(tmp, null, null);
4110             }
4111             return tmp;
4112         }
4113         /**
4114          * @param fullname
4115          * @param tnb
4116          * @return
4117          */
4118         private Taxon buildForm(String fullname, String partialname, INonViralName tnb) {
4119             if (genusName !=null) {
4120                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4121             }
4122             if (subgenusName !=null) {
4123                 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4124             }
4125             if(speciesName !=null) {
4126                 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4127             }
4128             if(subspeciesName != null) {
4129                 tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4130             }
4131             if(partialname!= null) {
4132                 tnb.setInfraSpecificEpithet(partialname);
4133             }
4134              //TODO how to save form??
4135             tnb.setTitleCache(fullname, true);
4136             Taxon tmp = findMatchingTaxon(tnb,refMods);
4137             if(tmp ==null){
4138                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4139                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4140                     tmp.setSec(state2.getConfig().getSecundum());
4141                 }
4142                 //                tmp.setSec(refMods);
4143                 //sourceHandler.addSource(refMods, tmp);
4144                 if (subspecies !=null) {
4145                     classification.addParentChild(subspecies, tmp, null, null);
4146                     higherRank=Rank.SUBSPECIES();
4147                     higherTaxa=subspecies;
4148                 } else {
4149                     if (species !=null) {
4150                         classification.addParentChild(species, tmp, null, null);
4151                         higherRank=Rank.SPECIES();
4152                         higherTaxa=species;
4153                     }
4154                     else{
4155                         //                        System.out.println("ADDCHILDTAXON FORM "+tmp);
4156                         classification.addChildTaxon(tmp, null, null);
4157                     }
4158                 }
4159             }
4160             return tmp;
4161         }
4162         /**
4163          * @param fullname
4164          * @param tnb
4165          * @return
4166          */
4167         private Taxon buildVariety(String fullname, String partialname, INonViralName tnb) {
4168             Taxon tmp;
4169             if (genusName !=null) {
4170                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4171             }
4172             if (subgenusName !=null) {
4173                 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4174             }
4175             if(speciesName !=null) {
4176                 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4177             }
4178             if(subspeciesName != null) {
4179                 tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4180             }
4181             if(partialname != null) {
4182                 tnb.setInfraSpecificEpithet(partialname);
4183             }
4184             //TODO how to save variety?
4185             tnb.setTitleCache(fullname, true);
4186             tmp = findMatchingTaxon(tnb,refMods);
4187             if(tmp ==null){
4188                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4189                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4190                     tmp.setSec(state2.getConfig().getSecundum());
4191                 }
4192                 //                tmp.setSec(refMods);
4193                 //sourceHandler.addSource(refMods, tmp);
4194                 if (subspecies !=null) {
4195                     classification.addParentChild(subspecies, tmp, null, null);
4196                     higherRank=Rank.SUBSPECIES();
4197                     higherTaxa=subspecies;
4198                 } else {
4199                     if(species !=null) {
4200                         classification.addParentChild(species, tmp, null, null);
4201                         higherRank=Rank.SPECIES();
4202                         higherTaxa=species;
4203                     }
4204                     else{
4205                         //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4206                         classification.addChildTaxon(tmp, null, null);
4207                     }
4208                 }
4209             }
4210             return tmp;
4211         }
4212         /**
4213          * @param partialname
4214          * @param tnb
4215          * @return
4216          */
4217         private Taxon buildSubspecies(String partialname, INonViralName tnb) {
4218             if (genusName !=null) {
4219                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4220             }
4221             if (subgenusName !=null) {
4222                 //                            System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4223                 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4224             }
4225             if(speciesName !=null) {
4226                 //                            System.out.println("SPE:"+speciesName.getSpecificEpithet());
4227                 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4228             }
4229             tnb.setInfraSpecificEpithet(partialname);
4230             Taxon tmp = findMatchingTaxon(tnb,refMods);
4231             if(tmp ==null){
4232                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4233                 if(!state2.getConfig().doKeepOriginalSecundum())
4234                  {
4235                     tmp.setSec(state2.getConfig().getSecundum());
4236                 //                tmp.setSec(refMods);
4237                 //sourceHandler.addSource(refMods, tmp);
4238                 }
4239
4240                 if(species != null) {
4241                     classification.addParentChild(species, tmp, null, null);
4242                     higherRank=Rank.SPECIES();
4243                     higherTaxa=species;
4244                 }
4245                 else{
4246                     //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4247                     classification.addChildTaxon(tmp, null, null);
4248                 }
4249             }
4250             return tmp;
4251         }
4252         /**
4253          * @param partialname
4254          * @param tnb
4255          * @return
4256          */
4257         private Taxon buildSpecies(String partialname, INonViralName tnb) {
4258             if (genusName !=null) {
4259                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4260             }
4261             if (subgenusName !=null) {
4262                 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4263             }
4264             tnb.setSpecificEpithet(partialname.toLowerCase());
4265             Taxon tmp = findMatchingTaxon(tnb,refMods);
4266             if(tmp ==null){
4267                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4268                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4269                     tmp.setSec(state2.getConfig().getSecundum());
4270                 }
4271                 //                tmp.setSec(refMods);
4272                 //sourceHandler.addSource(refMods, tmp);
4273                 if (subgenus !=null) {
4274                     classification.addParentChild(subgenus, tmp, null, null);
4275                     higherRank=Rank.SUBGENUS();
4276                     higherTaxa=subgenus;
4277                 } else {
4278                     if (genus !=null) {
4279                         classification.addParentChild(genus, tmp, null, null);
4280                         higherRank=Rank.GENUS();
4281                         higherTaxa=genus;
4282                     }
4283                     else{
4284                         //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4285                         classification.addChildTaxon(tmp, null, null);
4286                     }
4287                 }
4288             }
4289             return tmp;
4290         }
4291         /**
4292          * @param partialname
4293          * @param tnb
4294          * @return
4295          */
4296         private Taxon buildSubgenus(String partialname, INonViralName tnb) {
4297             tnb.setInfraGenericEpithet(partialname);
4298             if (genusName !=null) {
4299                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4300             }
4301             Taxon tmp = findMatchingTaxon(tnb,refMods);
4302             if(tmp ==null){
4303                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4304                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4305                     tmp.setSec(state2.getConfig().getSecundum());
4306                 }
4307                 //                tmp.setSec(refMods);
4308                 //sourceHandler.addSource(refMods, tmp);
4309                 if(genus != null) {
4310                     classification.addParentChild(genus, tmp, null, null);
4311                     higherRank=Rank.GENUS();
4312                     higherTaxa=genus;
4313                 } else{
4314                     //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4315                     classification.addChildTaxon(tmp, null, null);
4316                 }
4317             }
4318             return tmp;
4319         }
4320         /**
4321          * @param partialname
4322          * @param tnb
4323          * @return
4324          */
4325         private Taxon buildGenus(String partialname, INonViralName tnb) {
4326             Taxon tmp;
4327             tnb.setGenusOrUninomial(partialname);
4328
4329
4330             tmp = findMatchingTaxon(tnb,refMods);
4331             if(tmp ==null){
4332                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4333                 if(!state2.getConfig().doKeepOriginalSecundum())
4334                  {
4335                     tmp.setSec(state2.getConfig().getSecundum());
4336                 //                tmp.setSec(refMods);
4337                 //sourceHandler.addSource(refMods, tmp);
4338                 }
4339
4340                 if(subtribe != null) {
4341                     classification.addParentChild(subtribe, tmp, null, null);
4342                     higherRank=Rank.SUBTRIBE();
4343                     higherTaxa=subtribe;
4344                 } else{
4345                     if(tribe !=null) {
4346                         classification.addParentChild(tribe, tmp, null, null);
4347                         higherRank=Rank.TRIBE();
4348                         higherTaxa=tribe;
4349                     } else{
4350                         if(subfamily !=null) {
4351                             classification.addParentChild(subfamily, tmp, null, null);
4352                             higherRank=Rank.SUBFAMILY();
4353                             higherTaxa=subfamily;
4354                         } else
4355                             if(family !=null) {
4356                                 classification.addParentChild(family, tmp, null, null);
4357                                 higherRank=Rank.FAMILY();
4358                                 higherTaxa=family;
4359                             }
4360                             else{
4361                                 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4362                                 classification.addChildTaxon(tmp, null, null);
4363                             }
4364                     }
4365                 }
4366             }
4367             return tmp;
4368         }
4369
4370         /**
4371          * @param tnb
4372          * @return
4373          */
4374         private Taxon buildSubtribe(INonViralName tnb) {
4375             Taxon tmp = findMatchingTaxon(tnb,refMods);
4376             if(tmp==null){
4377                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4378                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4379                     tmp.setSec(state2.getConfig().getSecundum());
4380                 }
4381                 //                tmp.setSec(refMods);
4382                 //sourceHandler.addSource(refMods, tmp);
4383                 if(tribe != null) {
4384                     classification.addParentChild(tribe, tmp, null, null);
4385                     higherRank=Rank.TRIBE();
4386                     higherTaxa=tribe;
4387                 } else{
4388                     //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4389                     classification.addChildTaxon(tmp, null, null);
4390                 }
4391             }
4392             return tmp;
4393         }
4394         /**
4395          * @param tnb
4396          * @return
4397          */
4398         private Taxon buildTribe(INonViralName tnb) {
4399             Taxon tmp = findMatchingTaxon(tnb,refMods);
4400             if(tmp==null){
4401                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4402                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4403                     tmp.setSec(state2.getConfig().getSecundum());
4404                 }
4405                 //                tmp.setSec(refMods);
4406                 //sourceHandler.addSource(refMods, tmp);
4407                 if (subfamily !=null) {
4408                     classification.addParentChild(subfamily, tmp, null, null);
4409                     higherRank=Rank.SUBFAMILY();
4410                     higherTaxa=subfamily;
4411                 } else {
4412                     if(family != null) {
4413                         classification.addParentChild(family, tmp, null, null);
4414                         higherRank=Rank.FAMILY();
4415                         higherTaxa=family;
4416                     }
4417                     else{
4418                         //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4419                         classification.addChildTaxon(tmp, null, null);
4420                     }
4421                 }
4422             }
4423             return tmp;
4424         }
4425
4426         /**
4427          * @param identifier2
4428          * @return
4429          */
4430         @SuppressWarnings("rawtypes")
4431         private Taxon getTaxonByLSID(String identifier) {
4432             //logger.info("getTaxonByLSID");
4433             //            boolean lsidok=false;
4434             String id = identifier.split("__")[0];
4435             //            String source = identifier.split("__")[1];
4436             LSID lsid = null;
4437             if (id.indexOf("lsid")>-1){
4438                 try {
4439                     lsid = new LSID(id);
4440                     //                    lsidok=true;
4441                 } catch (MalformedLSIDException e) {
4442                     logger.warn("Malformed LSID");
4443                 }
4444             }
4445             if (lsid !=null){
4446                 List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4447                 LSID currentlsid=null;
4448                 for (Taxon t:taxa){
4449                     currentlsid = t.getLsid();
4450                     if (currentlsid !=null){
4451                         if (currentlsid.getLsid().equals(lsid.getLsid())){
4452                             try{
4453                                 return t;
4454                             }
4455                             catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4456                         }
4457                     }
4458                 }
4459             }
4460             return null;
4461         }
4462         /**
4463          * @param author2
4464          * @return
4465          */
4466         @SuppressWarnings("rawtypes")
4467         private Person findOrCreateAuthor(String author2) {
4468             //logger.info("findOrCreateAuthor");
4469             List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4470             for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4471                 if(hibernateP.getTitleCache().equals(author2)) {
4472                     AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4473                     return CdmBase.deproxy(existing, Person.class);
4474                 }
4475             }
4476             Person p = Person.NewInstance();
4477             p.setTitleCache(author2,true);
4478             importer.getAgentService().saveOrUpdate(p);
4479             return CdmBase.deproxy(p, Person.class);
4480         }
4481         /**
4482          * @param author the author to set
4483          */
4484         public void setAuthor(String author) {
4485             this.author = author;
4486         }
4487
4488         /**
4489          * @return the higherTaxa
4490          */
4491         public Taxon getHigherTaxa() {
4492             return higherTaxa;
4493         }
4494         /**
4495          * @param higherTaxa the higherTaxa to set
4496          */
4497         public void setHigherTaxa(Taxon higherTaxa) {
4498             this.higherTaxa = higherTaxa;
4499         }
4500         /**
4501          * @return the higherRank
4502          */
4503         public Rank getHigherRank() {
4504             return higherRank;
4505         }
4506         /**
4507          * @param higherRank the higherRank to set
4508          */
4509         public void setHigherRank(Rank higherRank) {
4510             this.higherRank = higherRank;
4511         }
4512         public String getName(){
4513             if (newName.isEmpty()) {
4514                 return originalName;
4515             } else {
4516                 return newName;
4517             }
4518
4519         }
4520         /**
4521          * @return the fullName
4522          */
4523         public String getOriginalName() {
4524             return originalName;
4525         }
4526         /**
4527          * @param fullName the fullName to set
4528          */
4529         public void setOriginalName(String fullName) {
4530             this.originalName = fullName;
4531         }
4532         /**
4533          * @return the newName
4534          */
4535         public String getNewName() {
4536             return newName;
4537         }
4538         /**
4539          * @param newName the newName to set
4540          */
4541         public void setNewName(String newName) {
4542             this.newName = newName;
4543         }
4544         /**
4545          * @return the rank
4546          */
4547         public Rank getRank() {
4548             return rank;
4549         }
4550         /**
4551          * @param rank the rank to set
4552          */
4553         public void setRank(Rank rank) {
4554             this.rank = rank;
4555         }
4556         /**
4557          * @return the idenfitiger
4558          */
4559         public String getIdentifier() {
4560             return identifier;
4561         }
4562         /**
4563          * @param idenfitiger the idenfitiger to set
4564          */
4565         public void setIdentifier(String identifier) {
4566             this.identifier = identifier;
4567         }
4568         /**
4569          * @return the status
4570          */
4571         public String getStatus() {
4572             if (status == null) {
4573                 return "";
4574             }
4575             return status;
4576         }
4577         /**
4578          * @param status the status to set
4579          */
4580         public void setStatus(String status) {
4581             this.status = status;
4582         }
4583         /**
4584          * @return the family
4585          */
4586         public Taxon getFamily() {
4587             return family;
4588         }
4589         /**
4590          * @param family the family to set
4591          */
4592         @SuppressWarnings("rawtypes")
4593         public void setFamily(Taxon family) {
4594             this.family = family;
4595             familyName = CdmBase.deproxy(family.getName());
4596         }
4597         /**
4598          * @return the subfamily
4599          */
4600         public Taxon getSubfamily() {
4601             return subfamily;
4602         }
4603         /**
4604          * @param subfamily the subfamily to set
4605          */
4606         @SuppressWarnings("rawtypes")
4607         public void setSubfamily(Taxon subfamily) {
4608             this.subfamily = subfamily;
4609             subfamilyName = CdmBase.deproxy(subfamily.getName());
4610         }
4611         /**
4612          * @return the tribe
4613          */
4614         public Taxon getTribe() {
4615             return tribe;
4616         }
4617         /**
4618          * @param tribe the tribe to set
4619          */
4620         @SuppressWarnings("rawtypes")
4621         public void setTribe(Taxon tribe) {
4622             this.tribe = tribe;
4623             tribeName = CdmBase.deproxy(tribe.getName());
4624         }
4625         /**
4626          * @return the subtribe
4627          */
4628         public Taxon getSubtribe() {
4629             return subtribe;
4630         }
4631         /**
4632          * @param subtribe the subtribe to set
4633          */
4634         @SuppressWarnings("rawtypes")
4635         public void setSubtribe(Taxon subtribe) {
4636             this.subtribe = subtribe;
4637             subtribeName =CdmBase.deproxy(subtribe.getName());
4638         }
4639         /**
4640          * @return the genus
4641          */
4642         public Taxon getGenus() {
4643             return genus;
4644         }
4645         /**
4646          * @param genus the genus to set
4647          */
4648         @SuppressWarnings("rawtypes")
4649         public void setGenus(Taxon genus) {
4650             if (genus != null){
4651                         this.genus = genus;
4652                     genusName = CdmBase.deproxy(genus.getName());
4653             }
4654         }
4655         /**
4656          * @return the subgenus
4657          */
4658         public Taxon getSubgenus() {
4659             return subgenus;
4660         }
4661         /**
4662          * @param subgenus the subgenus to set
4663          */
4664         @SuppressWarnings("rawtypes")
4665         public void setSubgenus(Taxon subgenus) {
4666             this.subgenus = subgenus;
4667             subgenusName = CdmBase.deproxy(subgenus.getName());
4668         }
4669         /**
4670          * @return the species
4671          */
4672         public Taxon getSpecies() {
4673             return species;
4674         }
4675         /**
4676          * @param species the species to set
4677          */
4678         public void setSpecies(Taxon species) {
4679                 if (species != null){
4680                     this.species = species;
4681                     speciesName = CdmBase.deproxy(species.getName());
4682                 }
4683         }
4684         /**
4685          * @return the subspecies
4686          */
4687         public Taxon getSubspecies() {
4688             return subspecies;
4689         }
4690         /**
4691          * @param subspecies the subspecies to set
4692          */
4693         @SuppressWarnings("rawtypes")
4694         public void setSubspecies(Taxon subspecies) {
4695             this.subspecies = subspecies;
4696             subspeciesName = CdmBase.deproxy(subspecies.getName());
4697
4698         }
4699
4700
4701
4702     }
4703
4704
4705     /**
4706      * @param status
4707      */
4708     private void addProblematicStatusToFile(String status) {
4709         try{
4710             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4711             BufferedWriter out = new BufferedWriter(fstream);
4712             out.write(status+"\n");
4713             //Close the output stream
4714             out.close();
4715         }catch (Exception e){//Catch exception if any
4716             System.err.println("Error: " + e.getMessage());
4717         }
4718
4719     }
4720
4721
4722
4723     /**
4724      * @param tnb
4725      * @return
4726      */
4727     private Taxon findMatchingTaxon(INonViralName tnb, Reference refMods) {
4728         logger.info("findMatchingTaxon");
4729         Taxon tmp=null;
4730
4731         refMods=CdmBase.deproxy(refMods, Reference.class);
4732         boolean insertAsExisting =false;
4733         List<Taxon> existingTaxa = new ArrayList<Taxon>();
4734         try {
4735             existingTaxa = getMatchingTaxa(tnb);
4736         } catch (Exception e1) {
4737             // TODO Auto-generated catch block
4738             e1.printStackTrace();
4739         }
4740         double similarityScore=0.0;
4741         double similarityAuthor=-1;
4742         String author1="";
4743         String author2="";
4744         String t1="";
4745         String t2="";
4746         for (Taxon bestMatchingTaxon : existingTaxa){
4747             if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4748                 //                System.out.println("tnb "+tnb.getTitleCache());
4749                 //                System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4750                 try {
4751                     if(tnb.getAuthorshipCache()!=null) {
4752                         author1=tnb.getAuthorshipCache();
4753                     }
4754                 } catch (Exception e) {
4755                     // TODO Auto-generated catch block
4756                     e.printStackTrace();
4757                 }
4758                 try {
4759                     if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
4760                         author2=bestMatchingTaxon.getName().getAuthorshipCache();
4761                     }
4762                 } catch (Exception e) {
4763                     // TODO Auto-generated catch block
4764                     e.printStackTrace();
4765                 }
4766                 try {
4767                     t1=tnb.getTitleCache().split("sec.")[0].trim();
4768                     if (author1!=null && !StringUtils.isEmpty(author1)) {
4769                         t1=t1.split(Pattern.quote(author1))[0];
4770                     }
4771                 } catch (Exception e) {
4772                     // TODO Auto-generated catch block
4773                     e.printStackTrace();
4774                 }
4775                 try {
4776                     t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4777                     if (author2!=null && !StringUtils.isEmpty(author2)) {
4778                         t2=t2.split(Pattern.quote(author2))[0];
4779                     }
4780                 } catch (Exception e) {
4781                     // TODO Auto-generated catch block
4782                     e.printStackTrace();
4783                 }
4784                 similarityScore=similarity(t1.trim(), t2.trim());
4785                 //                System.out.println("taxascore: "+similarityScore);
4786                 similarityAuthor=similarity(author1.trim(), author2.trim());
4787                 //                System.out.println("authorscore: "+similarityAuthor);
4788                 insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4789             }
4790             if(insertAsExisting) {
4791                 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4792                 tmp=bestMatchingTaxon;
4793                 sourceHandler.addSource(refMods, tmp);
4794                 return tmp;
4795             }
4796         }
4797         return tmp;
4798     }
4799
4800
4801     /**
4802      * @param tnb
4803      * @param refMods
4804      * @param similarityScore
4805      * @param bestMatchingTaxon
4806      * @param similarityAuthor
4807      * @return
4808      */
4809     private boolean compareAndCheckTaxon(INonViralName tnb, Reference refMods, double similarityScore,
4810             Taxon bestMatchingTaxon, double similarityAuthor) {
4811         //logger.info("compareAndCheckTaxon");
4812         boolean insertAsExisting;
4813         //        if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4814         //            insertAsExisting=false;
4815         //        } else{
4816         //a small hack/automatisation for Chenopodium only
4817         if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4818                 bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4819             insertAsExisting=true;
4820         } else {
4821             insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4822         }
4823         //        }
4824
4825         logDecision(tnb, bestMatchingTaxon, insertAsExisting, refMods);
4826         return insertAsExisting;
4827     }
4828
4829     /**
4830      * @return
4831      */
4832     @SuppressWarnings("rawtypes")
4833     private List<Taxon> getMatchingTaxa(ITaxonNameBase tnb) {
4834         //logger.info("getMatchingTaxon");
4835         if (tnb.getTitleCache() == null){
4836                 tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4837         }
4838
4839         Pager<TaxonBase> pager=importer.getTaxonService().findByTitle(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4840         List<TaxonBase>records = pager.getRecords();
4841
4842         List<Taxon> existingTaxons = new ArrayList<Taxon>();
4843         for (TaxonBase r:records){
4844             try{
4845                 Taxon bestMatchingTaxon = (Taxon)r;
4846                 //                System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4847                 if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4848                     existingTaxons.add(bestMatchingTaxon);
4849                 }
4850             }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4851         }
4852         Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4853         if (!existingTaxons.contains(bmt) && bmt!=null) {
4854             if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4855                 existingTaxons.add(bmt);
4856             }
4857         }
4858         return existingTaxons;
4859     }
4860
4861     /**
4862      * Check if the found Taxon can reasonnably be the same
4863      * example: with and without author should match, but the subspecies should not be suggested for a genus
4864      * */
4865     private boolean compareTaxonNameLength(String f, String o){
4866         //logger.info("compareTaxonNameLength");
4867         boolean lengthOk=false;
4868         int sizeF = f.length();
4869         int sizeO = o.length();
4870         if (sizeO>=sizeF) {
4871             lengthOk=true;
4872         }
4873         if(sizeF>sizeO) {
4874             if (sizeF-sizeO>10) {
4875                 lengthOk=false;
4876             } else {
4877                 lengthOk=true;
4878             }
4879         }
4880
4881         //        System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4882         return lengthOk;
4883     }
4884
4885     private double similarity(String s1, String s2) {
4886         //logger.info("similarity");
4887         //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4888         if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4889             String l1=s1.toLowerCase().trim();
4890             String l2=s2.toLowerCase().trim();
4891             if (l1.length() < l2.length()) { // s1 should always be bigger
4892                 String swap = l1; l1 = l2; l2 = swap;
4893             }
4894             int bigLen = l1.length();
4895             if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4896             return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4897         }
4898         else{
4899             if(s1!=null && s2!=null){
4900                 if (s1.equalsIgnoreCase(s2)) {
4901                     return 1;
4902                 }
4903             }
4904             return -1;
4905         }
4906     }
4907
4908     private int computeEditDistance(String s1, String s2) {
4909         //logger.info("computeEditDistance");
4910         int[] costs = new int[s2.length() + 1];
4911         for (int i = 0; i <= s1.length(); i++) {
4912             int lastValue = i;
4913             for (int j = 0; j <= s2.length(); j++) {
4914                 if (i == 0) {
4915                     costs[j] = j;
4916                 } else {
4917                     if (j > 0) {
4918                         int newValue = costs[j - 1];
4919                         if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4920                             newValue = Math.min(Math.min(newValue, lastValue),
4921                                     costs[j]) + 1;
4922                         }
4923                         costs[j - 1] = lastValue;
4924                         lastValue = newValue;
4925                     }
4926                 }
4927             }
4928             if (i > 0) {
4929                 costs[s2.length()] = lastValue;
4930             }
4931         }
4932         return costs[s2.length()];
4933     }
4934
4935     Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
4936     /**
4937      * @param taxonNameBase
4938      */
4939     @SuppressWarnings("rawtypes")
4940     public void lookForParentNode(INonViralName taxonNameBase, Taxon tax, Reference ref, MyName myName) {
4941         logger.info("lookForParentNode "+taxonNameBase.getTitleCache()+" for "+myName.toString());
4942         //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
4943         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4944         if (taxonNameBase.getRank().equals(Rank.FORM())){
4945             handleFormHierarchy(ref, myName, parser);
4946         }
4947         else if (taxonNameBase.getRank().equals(Rank.VARIETY())){
4948             handleVarietyHierarchy(ref, myName, parser);
4949         }
4950         else if (taxonNameBase.getRank().equals(Rank.SUBSPECIES())){
4951             handleSubSpeciesHierarchy(ref, myName, parser);
4952         }
4953         else if (taxonNameBase.getRank().equals(Rank.SPECIES())){
4954             handleSpeciesHierarchy(ref, myName, parser);
4955         }
4956         else if (taxonNameBase.getRank().equals(Rank.SUBGENUS())){
4957             handleSubgenusHierarchy(ref, myName, parser);
4958         }
4959
4960         if (taxonNameBase.getRank().equals(Rank.GENUS())){
4961             handleGenusHierarchy(ref, myName, parser);
4962         }
4963         if (taxonNameBase.getRank().equals(Rank.SUBTRIBE())){
4964             handleSubtribeHierarchy(ref, myName, parser);
4965         }
4966         if (taxonNameBase.getRank().equals(Rank.TRIBE())){
4967             handleTribeHierarchy(ref, myName, parser);
4968         }
4969
4970         if (taxonNameBase.getRank().equals(Rank.SUBFAMILY())){
4971             handleSubfamilyHierarchy(ref, myName, parser);
4972         }
4973     }
4974
4975     /**
4976      * @param ref
4977      * @param myName
4978      * @param parser
4979      */
4980     private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
4981         System.out.println("handleSubfamilyHierarchy");
4982         String parentStr = myName.getFamilyStr();
4983         Rank r = Rank.FAMILY();
4984         if(parentStr!=null){
4985
4986             Taxon parent = null;
4987             Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
4988             for(TaxonBase tb:taxontest.getRecords()){
4989                 try {
4990                     if (tb.getName().getRank().equals(r)) {
4991                         parent=CdmBase.deproxy(tb, Taxon.class);
4992                     }
4993                     break;
4994                 } catch (Exception e) {
4995                     // TODO Auto-generated catch block
4996                     e.printStackTrace();
4997                 }
4998             }
4999             if(parent == null) {
5000                 INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5001                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5002                 if(tmp ==null)
5003                 {
5004                     parent=Taxon.NewInstance(parentNameName, ref);
5005                     importer.getTaxonService().save(parent);
5006                     parent = CdmBase.deproxy(parent, Taxon.class);
5007                 } else {
5008                     parent=tmp;
5009                 }
5010                 lookForParentNode(parentNameName, parent, ref,myName);
5011
5012             }
5013             hierarchy.put(r,parent);
5014         }
5015     }
5016
5017     /**
5018      * @param ref
5019      * @param myName
5020      * @param parser
5021      */
5022     private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5023         String parentStr = myName.getSubfamilyStr();
5024         Rank r = Rank.SUBFAMILY();
5025         if (parentStr == null){
5026             parentStr = myName.getFamilyStr();
5027             r = Rank.FAMILY();
5028         }
5029         if(parentStr!=null){
5030             INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5031             Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5032             //                    importer.getTaxonService().save(parent);
5033             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5034
5035             boolean parentDoesNotExists = true;
5036             for (TaxonNode p : classification.getAllNodes()){
5037                 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5038                     parentDoesNotExists = false;
5039                     parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5040                     break;
5041                 }
5042             }
5043             //                if(parentDoesNotExists) {
5044             //                    importer.getTaxonService().save(parent);
5045             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5046             //                    lookForParentNode(parentNameName, parent, ref,myName);
5047             //                }
5048             if(parentDoesNotExists) {
5049                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5050                 if(tmp ==null)
5051                 {
5052                     parent=Taxon.NewInstance(parentNameName, ref);
5053                     importer.getTaxonService().save(parent);
5054                     parent = CdmBase.deproxy(parent, Taxon.class);
5055                 } else {
5056                     parent=tmp;
5057                 }
5058                 lookForParentNode(parentNameName, parent, ref,myName);
5059
5060             }
5061             hierarchy.put(r,parent);
5062         }
5063     }
5064
5065     /**
5066      * @param ref
5067      * @param myName
5068      * @param parser
5069      */
5070     private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5071         String parentStr = myName.getTribeStr();
5072         Rank r = Rank.TRIBE();
5073         if (parentStr == null){
5074             parentStr = myName.getSubfamilyStr();
5075             r = Rank.SUBFAMILY();
5076         }
5077         if (parentStr == null){
5078             parentStr = myName.getFamilyStr();
5079             r = Rank.FAMILY();
5080         }
5081         if(parentStr!=null){
5082             INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5083             Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5084             //                    importer.getTaxonService().save(parent);
5085             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5086
5087             boolean parentDoesNotExists = true;
5088             for (TaxonNode p : classification.getAllNodes()){
5089                 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5090                     parentDoesNotExists = false;
5091                     parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5092
5093                     break;
5094                 }
5095             }
5096             //                if(parentDoesNotExists) {
5097             //                    importer.getTaxonService().save(parent);
5098             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5099             //                    lookForParentNode(parentNameName, parent, ref,myName);
5100             //                }
5101             if(parentDoesNotExists) {
5102                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5103                 if(tmp ==null)
5104                 {
5105                     parent=Taxon.NewInstance(parentNameName, ref);
5106                     importer.getTaxonService().save(parent);
5107                     parent = CdmBase.deproxy(parent, Taxon.class);
5108                 } else {
5109                     parent=tmp;
5110                 }
5111                 lookForParentNode(parentNameName, parent, ref,myName);
5112
5113             }
5114             hierarchy.put(r,parent);
5115         }
5116     }
5117
5118     /**
5119      * @param ref
5120      * @param myName
5121      * @param parser
5122      */
5123     private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5124         String parentStr = myName.getSubtribeStr();
5125         Rank r = Rank.SUBTRIBE();
5126         if (parentStr == null){
5127             parentStr = myName.getTribeStr();
5128             r = Rank.TRIBE();
5129         }
5130         if (parentStr == null){
5131             parentStr = myName.getSubfamilyStr();
5132             r = Rank.SUBFAMILY();
5133         }
5134         if (parentStr == null){
5135             parentStr = myName.getFamilyStr();
5136             r = Rank.FAMILY();
5137         }
5138         if(parentStr!=null){
5139             INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5140             Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5141             //                    importer.getTaxonService().save(parent);
5142             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5143
5144             boolean parentDoesNotExist = true;
5145             for (TaxonNode p : classification.getAllNodes()){
5146                 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5147                     //                        System.out.println(p.getTaxon().getUuid());
5148                     //                        System.out.println(parent.getUuid());
5149                     parentDoesNotExist = false;
5150                     parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5151                     break;
5152                 }
5153             }
5154             //                if(parentDoesNotExists) {
5155             //                    importer.getTaxonService().save(parent);
5156             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5157             //                    lookForParentNode(parentNameName, parent, ref,myName);
5158             //                }
5159             if(parentDoesNotExist) {
5160                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5161                 if(tmp ==null){
5162
5163                     parent=Taxon.NewInstance(parentNameName, ref);
5164                     importer.getTaxonService().save(parent);
5165                     parent = CdmBase.deproxy(parent, Taxon.class);
5166                 } else {
5167                     parent=tmp;
5168                 }
5169                 lookForParentNode(parentNameName, parent, ref,myName);
5170
5171             }
5172             hierarchy.put(r,parent);
5173         }
5174     }
5175
5176     /**
5177      * @param ref
5178      * @param myName
5179      * @param parser
5180      */
5181     private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5182         String parentStr = myName.getGenusStr();
5183         Rank r = Rank.GENUS();
5184
5185         if(parentStr==null){
5186             parentStr = myName.getSubtribeStr();
5187             r = Rank.SUBTRIBE();
5188         }
5189         if (parentStr == null){
5190             parentStr = myName.getTribeStr();
5191             r = Rank.TRIBE();
5192         }
5193         if (parentStr == null){
5194             parentStr = myName.getSubfamilyStr();
5195             r = Rank.SUBFAMILY();
5196         }
5197         if (parentStr == null){
5198             parentStr = myName.getFamilyStr();
5199             r = Rank.FAMILY();
5200         }
5201         if(parentStr!=null){
5202             INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5203             Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5204             //                    importer.getTaxonService().save(parent);
5205             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5206
5207             boolean parentDoesNotExists = true;
5208             for (TaxonNode p : classification.getAllNodes()){
5209                 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5210                     //                        System.out.println(p.getTaxon().getUuid());
5211                     //                        System.out.println(parent.getUuid());
5212                     parentDoesNotExists = false;
5213                     parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5214                     break;
5215                 }
5216             }
5217             //                if(parentDoesNotExists) {
5218             //                    importer.getTaxonService().save(parent);
5219             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5220             //                    lookForParentNode(parentNameName, parent, ref,myName);
5221             //                }
5222             if(parentDoesNotExists) {
5223                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5224                 if(tmp ==null)
5225                 {
5226                     parent=Taxon.NewInstance(parentNameName, ref);
5227                     importer.getTaxonService().save(parent);
5228                     parent = CdmBase.deproxy(parent, Taxon.class);
5229                 } else {
5230                     parent=tmp;
5231                 }
5232                 lookForParentNode(parentNameName, parent, ref,myName);
5233
5234             }
5235             hierarchy.put(r,parent);
5236         }
5237     }
5238
5239     /**
5240      * @param ref
5241      * @param myName
5242      * @param parser
5243      */
5244     private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5245         String parentStr = myName.getSubgenusStr();
5246         Rank r = Rank.SUBGENUS();
5247
5248         if(parentStr==null){
5249             parentStr = myName.getGenusStr();
5250             r = Rank.GENUS();
5251         }
5252
5253         if(parentStr==null){
5254             parentStr = myName.getSubtribeStr();
5255             r = Rank.SUBTRIBE();
5256         }
5257         if (parentStr == null){
5258             parentStr = myName.getTribeStr();
5259             r = Rank.TRIBE();
5260         }
5261         if (parentStr == null){
5262             parentStr = myName.getSubfamilyStr();
5263             r = Rank.SUBFAMILY();
5264         }
5265         if (parentStr == null){
5266             parentStr = myName.getFamilyStr();
5267             r = Rank.FAMILY();
5268         }
5269         if(parentStr!=null){
5270             Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5271             //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5272             hierarchy.put(r,parent);
5273         }
5274     }
5275
5276     /**
5277      * @param ref
5278      * @param myName
5279      * @param parser
5280      */
5281     private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5282         String parentStr = myName.getSpeciesStr();
5283         Rank r = Rank.SPECIES();
5284
5285
5286         if(parentStr==null){
5287             parentStr = myName.getSubgenusStr();
5288             r = Rank.SUBGENUS();
5289         }
5290
5291         if(parentStr==null){
5292             parentStr = myName.getGenusStr();
5293             r = Rank.GENUS();
5294         }
5295
5296         if(parentStr==null){
5297             parentStr = myName.getSubtribeStr();
5298             r = Rank.SUBTRIBE();
5299         }
5300         if (parentStr == null){
5301             parentStr = myName.getTribeStr();
5302             r = Rank.TRIBE();
5303         }
5304         if (parentStr == null){
5305             parentStr = myName.getSubfamilyStr();
5306             r = Rank.SUBFAMILY();
5307         }
5308         if (parentStr == null){
5309             parentStr = myName.getFamilyStr();
5310             r = Rank.FAMILY();
5311         }
5312         if(parentStr!=null){
5313             Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5314             //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5315             hierarchy.put(r,parent);
5316         }
5317     }
5318
5319
5320     /**
5321      * @param ref
5322      * @param myName
5323      * @param parser
5324      */
5325     private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5326         String parentStr = myName.getSubspeciesStr();
5327         Rank r = Rank.SUBSPECIES();
5328
5329
5330         if(parentStr==null){
5331             parentStr = myName.getSpeciesStr();
5332             r = Rank.SPECIES();
5333         }
5334
5335         if(parentStr==null){
5336             parentStr = myName.getSubgenusStr();
5337             r = Rank.SUBGENUS();
5338         }
5339
5340         if(parentStr==null){
5341             parentStr = myName.getGenusStr();
5342             r = Rank.GENUS();
5343         }
5344
5345         if(parentStr==null){
5346             parentStr = myName.getSubtribeStr();
5347             r = Rank.SUBTRIBE();
5348         }
5349         if (parentStr == null){
5350             parentStr = myName.getTribeStr();
5351             r = Rank.TRIBE();
5352         }
5353         if (parentStr == null){
5354             parentStr = myName.getSubfamilyStr();
5355             r = Rank.SUBFAMILY();
5356         }
5357         if (parentStr == null){
5358             parentStr = myName.getFamilyStr();
5359             r = Rank.FAMILY();
5360         }
5361         if(parentStr!=null){
5362             Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5363             //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5364             hierarchy.put(r,parent);
5365         }
5366     }
5367
5368     /**
5369      * @param ref
5370      * @param myName
5371      * @param parser
5372      */
5373     private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5374         String parentStr = myName.getSubspeciesStr();
5375         Rank r = Rank.SUBSPECIES();
5376
5377         if(parentStr==null){
5378             parentStr = myName.getSpeciesStr();
5379             r = Rank.SPECIES();
5380         }
5381
5382         if(parentStr==null){
5383             parentStr = myName.getSubgenusStr();
5384             r = Rank.SUBGENUS();
5385         }
5386
5387         if(parentStr==null){
5388             parentStr = myName.getGenusStr();
5389             r = Rank.GENUS();
5390         }
5391
5392         if(parentStr==null){
5393             parentStr = myName.getSubtribeStr();
5394             r = Rank.SUBTRIBE();
5395         }
5396         if (parentStr == null){
5397             parentStr = myName.getTribeStr();
5398             r = Rank.TRIBE();
5399         }
5400         if (parentStr == null){
5401             parentStr = myName.getSubfamilyStr();
5402             r = Rank.SUBFAMILY();
5403         }
5404         if (parentStr == null){
5405             parentStr = myName.getFamilyStr();
5406             r = Rank.FAMILY();
5407         }
5408         if(parentStr!=null){
5409             Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5410             //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5411             hierarchy.put(r,parent);
5412         }
5413     }
5414
5415     /**
5416      * @param ref
5417      * @param myName
5418      * @param parser
5419      * @param parentStr
5420      * @param r
5421      * @return
5422      */
5423     private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5424         INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5425         Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5426         //                    importer.getTaxonService().save(parent);
5427         //                    parent = CdmBase.deproxy(parent, Taxon.class);
5428
5429         boolean parentDoesNotExists = true;
5430         for (TaxonNode p : classification.getAllNodes()){
5431             if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5432                 //                        System.out.println(p.getTaxon().getUuid());
5433                 //                        System.out.println(parent.getUuid());
5434                 parentDoesNotExists = false;
5435                 parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5436                 break;
5437             }
5438         }
5439         if(parentDoesNotExists) {
5440             Taxon tmp = findMatchingTaxon(parentNameName,ref);
5441             //                    System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5442             if(tmp ==null){
5443
5444                 parent=Taxon.NewInstance(parentNameName, ref);
5445                 importer.getTaxonService().save(parent);
5446
5447             } else {
5448                 parent=tmp;
5449             }
5450             lookForParentNode(parentNameName, parent, ref,myName);
5451
5452         }
5453         return parent;
5454     }
5455
5456     private void addNameDifferenceToFile(String originalname, String atomisedname){
5457         try{
5458             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5459             BufferedWriter out = new BufferedWriter(fstream);
5460             out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5461             //Close the output stream
5462             out.close();
5463         }catch (Exception e){//Catch exception if any
5464             System.err.println("Error: " + e.getMessage());
5465         }
5466     }
5467     /**
5468      * @param name
5469      * @param author
5470      * @param nomenclaturalCode2
5471      * @param rank
5472      */
5473     private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5474         try{
5475             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5476             BufferedWriter out = new BufferedWriter(fstream);
5477             out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5478             //Close the output stream
5479             out.close();
5480         }catch (Exception e){//Catch exception if any
5481             System.err.println("Error: " + e.getMessage());
5482         }
5483     }
5484
5485
5486     /**
5487      * @param tnb
5488      * @param bestMatchingTaxon
5489      * @param insertAsExisting
5490      * @param refMods
5491      */
5492     private void logDecision(INonViralName tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5493         try{
5494             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt", true);
5495             BufferedWriter out = new BufferedWriter(fstream);
5496             out.write(tnb.getTitleCache() + " sec. " + refMods + "\t" + bestMatchingTaxon.getTitleCache() + "\t" + insertAsExisting + "\n");
5497             //Close the output stream
5498             out.close();
5499         }catch (Exception e){//Catch exception if any
5500             System.err.println("Error: " + e.getMessage());
5501         }
5502     }
5503
5504
5505     @SuppressWarnings("unused")
5506     private String replaceNull(Object in){
5507         if (in == null) {
5508             return "";
5509         }
5510         if (in.getClass().equals(NomenclaturalCode.class)) {
5511             return ((NomenclaturalCode)in).getTitleCache();
5512         }
5513         return in.toString();
5514     }
5515
5516     /**
5517      * @param fullName
5518      * @param nomenclaturalCode2
5519      * @param rank
5520      */
5521     private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5522         try{
5523             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5524             BufferedWriter out = new BufferedWriter(fstream);
5525             out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5526             //Close the output stream
5527             out.close();
5528         }catch (Exception e){//Catch exception if any
5529             System.err.println("Error: " + e.getMessage());
5530         }
5531
5532     }
5533
5534 }
5535
5536
5537