cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXTreatmentExtractor.java

   1 /**
   2  * Copyright (C) 2013 EDIT
   3  * European Distributed Institute of Taxonomy
   4  * http://www.e-taxonomy.eu
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version 1.1
   7  * See LICENSE.TXT at the top of this package for the full license terms.
   8  */
   9 package eu.etaxonomy.cdm.io.taxonx2013;
  10
  11 import java.io.BufferedWriter;
  12 import java.io.File;
  13 import java.io.FileWriter;
  14 import java.io.IOException;
  15 import java.net.URI;
  16 import java.util.ArrayList;
  17 import java.util.Arrays;
  18 import java.util.HashMap;
  19 import java.util.List;
  20 import java.util.Map;
  21 import java.util.Set;
  22 import java.util.UUID;
  23 import java.util.regex.Matcher;
  24 import java.util.regex.Pattern;
  25
  26 import javax.xml.transform.TransformerException;
  27 import javax.xml.transform.TransformerFactoryConfigurationError;
  28
  29 import org.apache.commons.lang.StringUtils;
  30 import org.apache.log4j.Logger;
  31 import org.w3c.dom.Node;
  32 import org.w3c.dom.NodeList;
  33
  34 import com.ibm.lsid.MalformedLSIDException;
  35
  36 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
  37 import eu.etaxonomy.cdm.api.service.pager.Pager;
  38 import eu.etaxonomy.cdm.model.agent.AgentBase;
  39 import eu.etaxonomy.cdm.model.agent.Person;
  40 import eu.etaxonomy.cdm.model.common.CdmBase;
  41 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
  42 import eu.etaxonomy.cdm.model.common.LSID;
  43 import eu.etaxonomy.cdm.model.common.Language;
  44 import eu.etaxonomy.cdm.model.description.Feature;
  45 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
  46 import eu.etaxonomy.cdm.model.description.TaxonDescription;
  47 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
  48 import eu.etaxonomy.cdm.model.description.TextData;
  49 import eu.etaxonomy.cdm.model.name.INonViralName;
  50 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
  51 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
  52 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
  53 import eu.etaxonomy.cdm.model.name.Rank;
  54 import eu.etaxonomy.cdm.model.name.TaxonName;
  55 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
  56 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
  57 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
  58 import eu.etaxonomy.cdm.model.reference.Reference;
  59 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
  60 import eu.etaxonomy.cdm.model.taxon.Classification;
  61 import eu.etaxonomy.cdm.model.taxon.Synonym;
  62 import eu.etaxonomy.cdm.model.taxon.SynonymType;
  63 import eu.etaxonomy.cdm.model.taxon.Taxon;
  64 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
  65 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
  66 import eu.etaxonomy.cdm.model.term.FeatureNode;
  67 import eu.etaxonomy.cdm.model.term.FeatureTree;
  68 import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
  69 import eu.etaxonomy.cdm.persistence.query.MatchMode;
  70 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
  71 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
  72 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
  73 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
  74
  75 /**
  76  * @author pkelbert
  77  * @since 2 avr. 2013
  78  *
  79  */
  80 public class TaxonXTreatmentExtractor extends TaxonXExtractor{
  81
  82     private static final String PUBLICATION_YEAR = "publicationYear";
  83
  84         private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
  85
  86     private static final String notMarkedUp = "Not marked-up";
  87     private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
  88     private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
  89     private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
  90     private static final boolean skippQuestion = true;
  91
  92     private final NomenclaturalCode nomenclaturalCode;
  93     private Classification classification;
  94
  95     private  String treatmentMainName,originalTreatmentName;
  96
  97     private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
  98
  99
 100     private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
 101     private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
 102
 103     private boolean maxRankRespected =false;
 104     private Map<String, Feature> featuresMap;
 105
 106     private MyName currentMyName;
 107
 108     private Reference sourceUrlRef;
 109
 110     private String followingText;  //text element immediately following a tax:name in tax:nomenclature TODO move do state
 111     private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
 112
 113     private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
 114
 115     /**
 116      * @param nomenclaturalCode
 117      * @param classification
 118      * @param importer
 119      * @param configState
 120      */
 121     public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
 122             TaxonXImportState configState,Map<String, Feature> featuresMap,  Reference urlSource) {
 123         this.nomenclaturalCode=nomenclaturalCode;
 124         this.classification = classification;
 125         this.importer=importer;
 126         this.state2=configState;
 127         this.featuresMap=featuresMap;
 128         this.sourceUrlRef =urlSource;
 129         prepareCollectors(configState, importer.getAgentService());
 130         this.sourceHandler.setSourceUrlRef(sourceUrlRef);
 131         this.sourceHandler.setImporter(importer);
 132         this.sourceHandler.setConfigState(configState);
 133     }
 134
 135     /**
 136      * extracts all the treament information and save them
 137      * @param treatmentnode: the XML Node
 138      * @param tosave: the list of object to save into the CDM
 139      * @param refMods: the reference extracted from the MODS
 140      * @param sourceName: the URI of the document
 141      */
 142     @SuppressWarnings({ "rawtypes", "unused" })
 143
 144     protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) {        logger.info("extractTreatment");
 145         List<TaxonName> namesToSave = new ArrayList<TaxonName>();
 146         NodeList children = treatmentnode.getChildNodes();
 147         Taxon acceptedTaxon =null;
 148         boolean hasRefgroup=false;
 149
 150         //needed?
 151         for (int i=0;i<children.getLength();i++){
 152             if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
 153                 hasRefgroup=true;
 154             }
 155         }
 156
 157         for (int i=0;i<children.getLength();i++){
 158                 Node child = children.item(i);
 159                 acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
 160         }
 161         //        logger.info("saveUpdateNames");
 162         if (maxRankRespected){
 163             importer.getNameService().saveOrUpdate(namesToSave);
 164             importer.getClassificationService().saveOrUpdate(classification);
 165             //logger.info("saveUpdateNames-ok");
 166         }
 167
 168         buildFeatureTree();
 169     }
 170
 171         private Taxon handleSingleNode(Reference refMods, URI sourceName,
 172                         List<TaxonName> namesToSave, Node child, Taxon acceptedTaxon) {
 173                 Taxon defaultTaxon =null;
 174
 175                 String nodeName = child.getNodeName();
 176                 if (nodeName.equalsIgnoreCase("tax:nomenclature")){
 177                     NodeList nomenclatureChildren = child.getChildNodes();
 178                     boolean containsName = false;
 179                     for(int k=0; k<nomenclatureChildren.getLength(); k++){
 180                         if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 181                             containsName=true;
 182                             break;
 183                         }
 184                     }
 185                     if (containsName){
 186                         reloadClassification();
 187                         //extract "main" the scientific name
 188                         try{
 189                             acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
 190                         }catch(ClassCastException e){
 191                                 //FIXME exception handling
 192                                 e.printStackTrace();
 193                         }
 194                         //                    System.out.println("acceptedTaxon : "+acceptedTaxon);
 195                     }
 196                 }else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
 197                     reloadClassification();
 198                     //extract the References within the document
 199                     extractReferences(child, namesToSave ,acceptedTaxon,refMods);
 200                 }else if (nodeName.equalsIgnoreCase("tax:div") &&
 201                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
 202                     File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
 203                     FileWriter writer;
 204                     try {
 205                         writer = new FileWriter(file ,true);
 206                         writer.write(sourceName+"\n");
 207                         writer.flush();
 208                         writer.close();
 209                     } catch (IOException e1) {
 210                         // TODO Auto-generated catch block
 211                         logger.error(e1.getMessage());
 212                     }
 213                     //                String multiple = askMultiple(children.item(i));
 214                     String multiple = "Other";
 215                     if (multiple.equalsIgnoreCase("other")) {
 216                         extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
 217                     }else if (multiple.equalsIgnoreCase("synonyms")) {
 218                         try{
 219                             extractSynonyms(child,acceptedTaxon, refMods, null);
 220                         }catch(NullPointerException e){
 221                             logger.warn("the accepted taxon is maybe null");
 222                         }
 223                     }else if(multiple.equalsIgnoreCase("material examined")){
 224                         extractMaterials(child, acceptedTaxon, refMods, namesToSave);
 225                     }else if (multiple.equalsIgnoreCase("distribution")){
 226                         extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
 227                     }else if (multiple.equalsIgnoreCase("type status")){
 228                         extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
 229                     }else if (multiple.equalsIgnoreCase("vernacular name")){
 230                         extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
 231                     }else{
 232                         extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
 233                     }
 234                 }
 235                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 236                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
 237                     extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
 238                 }
 239                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 240                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
 241                     extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
 242                 }
 243                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 244                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
 245                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
 246                 }
 247                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 248                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
 249                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
 250                 }
 251                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 252                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
 253                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
 254                 }
 255                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 256                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
 257                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
 258                 }
 259                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 260                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
 261                     extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
 262                 }
 263                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 264                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
 265                     extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
 266                 }
 267                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 268                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
 269                     extractMaterials(child,acceptedTaxon, refMods, namesToSave);
 270                 }
 271                 else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
 272                     extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
 273                 }
 274                 else if(nodeName.equalsIgnoreCase("tax:div") &&
 275                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
 276                     extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
 277                 }else if(nodeName.equalsIgnoreCase("tax:div") &&
 278                                 child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
 279                     //TODO IGNORE keys for the moment
 280                     //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
 281                     extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
 282                 }
 283                 else{
 284                     if (! nodeName.equalsIgnoreCase("tax:pb")){
 285                         //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
 286                         if (child.getAttributes() !=null) {
 287                             logger.info("First Attribute: " + child.getAttributes().item(0));
 288                         }
 289                         extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
 290                     }else{
 291                         //FIXME
 292                         logger.warn("Unhandled");
 293                     }
 294                 }
 295                 return acceptedTaxon;
 296         }
 297
 298
 299     protected Map<String,Feature> getFeaturesUsed(){
 300         return featuresMap;
 301     }
 302     /**
 303      *
 304      */
 305     private void buildFeatureTree() {
 306         logger.info("buildFeatureTree");
 307         FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
 308         if (proibiospheretree == null){
 309             List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
 310             if (trees.size()==1) {
 311                 FeatureTree<Feature> ft = trees.get(0);
 312                 if (featuresMap==null) {
 313                     featuresMap=new HashMap<String, Feature>();
 314                 }
 315                 for (Feature feature: ft.getDistinctFeatures()){
 316                     if(feature!=null) {
 317                         featuresMap.put(feature.getTitleCache(), feature);
 318                     }
 319                 }
 320             }
 321             proibiospheretree = FeatureTree.NewInstance();
 322             proibiospheretree.setUuid(proIbioTreeUUID);
 323         }
 324         //        FeatureNode root = proibiospheretree.getRoot();
 325         FeatureNode root2 = proibiospheretree.getRoot();
 326         if (root2 != null){
 327             int nbChildren = root2.getChildCount()-1;
 328             while (nbChildren>-1){
 329                 try{
 330                     root2.removeChild(nbChildren);
 331                 }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
 332                 nbChildren --;
 333             }
 334
 335         }
 336
 337         for (Feature feature:featuresMap.values()) {
 338             root2.addChild(FeatureNode.NewInstance(feature));
 339         }
 340         importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
 341
 342     }
 343
 344
 345     /**
 346      * @param keys
 347      * @param acceptedTaxon: the current acceptedTaxon
 348      * @param nametosave: the list of objects to save into the CDM
 349      * @param refMods: the current reference extracted from the MODS
 350      */
 351     /*   @SuppressWarnings("rawtypes")
 352     private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonName> nametosave, Reference refMods) {
 353         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
 354
 355         NodeList children = keys.getChildNodes();
 356         String key="";
 357         PolytomousKey poly =  PolytomousKey.NewInstance();
 358         poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
 359         poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
 360         poly.addTaxonomicScope(acceptedTaxon);
 361         poly.setTitleCache("bloup", true);
 362         //        poly.addCoveredTaxon(acceptedTaxon);
 363         PolytomousKeyNode root = poly.getRoot();
 364         PolytomousKeyNode previous = null,tmpKey=null;
 365         Taxon taxonKey=null;
 366         List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
 367
 368         //        String fullContent = keys.getTextContent();
 369         for (int i=0;i<children.getLength();i++){
 370             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 371                 NodeList paragraph = children.item(i).getChildNodes();
 372                 key="";
 373                 taxonKey=null;
 374                 for (int j=0;j<paragraph.getLength();j++){
 375                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 376                         if (! paragraph.item(j).getTextContent().trim().isEmpty()){
 377                             key+=paragraph.item(j).getTextContent().trim();
 378                             //                            logger.info("KEY: "+j+"--"+key);
 379                         }
 380                     }
 381                     if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 382                         taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
 383                     }
 384                 }
 385                 //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
 386                 if (keypattern.matcher(key).matches()){
 387                     tmpKey = PolytomousKeyNode.NewInstance(key);
 388                     if (taxonKey!=null) {
 389                         tmpKey.setTaxon(taxonKey);
 390                     }
 391                     polyNodes.add(tmpKey);
 392                     if (previous == null) {
 393                         root.addChild(tmpKey);
 394                     } else {
 395                         previous.addChild(tmpKey);
 396                     }
 397                 }else{
 398                     if (!key.isEmpty()){
 399                         tmpKey=PolytomousKeyNode.NewInstance(key);
 400                         if (taxonKey!=null) {
 401                             tmpKey.setTaxon(taxonKey);
 402                         }
 403                         polyNodes.add(tmpKey);
 404                         if (keypatternend.matcher(key).matches()) {
 405                             root.addChild(tmpKey);
 406                             previous=tmpKey;
 407                         } else{
 408                             previous.addChild(tmpKey);
 409                         }
 410
 411                     }
 412                 }
 413             }
 414         }
 415         importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
 416         importer.getPolytomousKeyService().saveOrUpdate(poly);
 417     }
 418 */
 419
 420
 421     /**
 422      * @param taxons: the XML Nodegroup
 423      * @param nametosave: the list of objects to save into the CDM
 424      * @param acceptedTaxon: the current accepted Taxon
 425      * @param refMods: the current reference extracted from the MODS
 426      *
 427      * @return Taxon object built
 428      */
 429     @SuppressWarnings({ "rawtypes", "unused" })
 430     private TaxonName getTaxonNameFromXML(Node taxons, List<TaxonName> nametosave, Reference refMods, boolean isSynonym) {
 431         //        logger.info("getTaxonFromXML");
 432         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 433         logger.info("getTaxonNameFromXML");
 434         TaxonName nameToBeFilled = null;
 435
 436         currentMyName=new MyName(isSynonym);
 437
 438         NomenclaturalStatusType statusType = null;
 439         try {
 440                 String followingText = null;  //needs to be checked if following text is possible
 441             currentMyName = extractScientificName(taxons,refMods, null);
 442         } catch (TransformerFactoryConfigurationError e1) {
 443             logger.warn(e1);
 444         } catch (TransformerException e1) {
 445             logger.warn(e1);
 446         }
 447         /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
 448
 449         nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
 450         if (nameToBeFilled.hasProblem() &&
 451                 !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
 452             //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
 453             addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
 454             nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
 455         }
 456
 457         nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
 458          */
 459         nameToBeFilled = currentMyName.getTaxonName();
 460         return nameToBeFilled;
 461
 462     }
 463
 464
 465     /**
 466      *
 467      */
 468     private void reloadClassification() {
 469         logger.info("reloadClassification");
 470         Classification cl = importer.getClassificationService().find(classification.getUuid());
 471         if (cl != null){
 472             classification = cl;
 473         }else{
 474             importer.getClassificationService().saveOrUpdate(classification);
 475             classification = importer.getClassificationService().find(classification.getUuid());
 476         }
 477     }
 478
 479     //    /**
 480     //     * Create a Taxon for the current NameBase, based on the current reference
 481     //     * @param taxonName
 482     //     * @param refMods: the current reference extracted from the MODS
 483     //     * @return Taxon
 484     //     */
 485     //    @SuppressWarnings({ "unused", "rawtypes" })
 486     //    private Taxon getTaxon(TaxonName taxonName, Reference refMods) {
 487     //        Taxon t = new Taxon(taxonName,null );
 488     //        if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
 489     //            t.setSec(configState.getConfig().getSecundum());
 490     //            logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
 491     //        }
 492     //        /*<<<<<<< .courant
 493     //        boolean sourceExists=false;
 494     //        Set<IdentifiableSource> sources = t.getSources();
 495     //        for (IdentifiableSource src : sources){
 496     //            String micro = src.getCitationMicroReference();
 497     //            Reference r = src.getCitation();
 498     //            if (r.equals(refMods) && micro == null) {
 499     //                sourceExists=true;
 500     //            }
 501     //        }
 502     //        if(!sourceExists) {
 503     //            t.addSource(null,null,refMods,null);
 504     //        }
 505     //=======*/
 506     //        t.addSource(OriginalSourceType.Import,null,null,refMods,null);
 507     //        t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
 508     //        return t;
 509     //    }
 510
 511     private void  extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
 512             String featureName) {
 513         //        System.out.println("extractDescriptionWithReference !");
 514         logger.info("extractDescriptionWithReference");
 515         NodeList children = typestatus.getChildNodes();
 516
 517         Feature currentFeature=getFeatureObjectFromString(featureName);
 518
 519         String r="";String s="";
 520         for (int i=0;i<children.getLength();i++){
 521             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 522                 s+=children.item(i).getTextContent().trim();
 523             }
 524             if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
 525                 r+= children.item(i).getTextContent().trim();
 526             }
 527             if (s.indexOf(r)>-1) {
 528                 s=s.split(r)[0];
 529             }
 530         }
 531
 532         Reference currentref =  ReferenceFactory.newGeneric();
 533         if(!r.isEmpty()) {
 534             currentref.setTitleCache(r, true);
 535         } else {
 536             currentref=refMods;
 537         }
 538         setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
 539     }
 540
 541     /**
 542      * @param nametosave
 543      * @param distribution: the XML node group
 544      * @param acceptedTaxon: the current accepted Taxon
 545      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 546      * @param refMods: the current reference extracted from the MODS
 547      */
 548     @SuppressWarnings("rawtypes")
 549     private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonName> nametosave, Reference refMods) {
 550         logger.info("extractDistribution");
 551         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 552         NodeList children = distribution.getChildNodes();
 553         Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
 554         Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
 555
 556         for (int i=0;i<children.getLength();i++){
 557             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 558                 NodeList paragraph = children.item(i).getChildNodes();
 559                 for (int j=0;j<paragraph.getLength();j++){
 560                     if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
 561                         extractText(descriptionsFulltext, i, paragraph.item(j));
 562                     }
 563                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
 564                         extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
 565                     }
 566                     else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
 567                         MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
 568                         DerivedUnit derivedUnitBase = null;
 569                         specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
 570                         extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
 571                     }
 572                 }
 573             }
 574         }
 575
 576         int m=0;
 577         for (int k:descriptionsFulltext.keySet()) {
 578             if (k>m) {
 579                 m=k;
 580             }
 581         }
 582         for (int k:specimenOrObservations.keySet()) {
 583             if (k>m) {
 584                 m=k;
 585             }
 586         }
 587
 588
 589         if(acceptedTaxon!=null){
 590             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
 591             Feature currentFeature = Feature.DISTRIBUTION();
 592             //        DerivedUnit derivedUnitBase=null;
 593             //        String descr="";
 594             for (int k=0;k<=m;k++){
 595                 if(specimenOrObservations.keySet().contains(k)){
 596                     for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
 597                         handleAssociation(acceptedTaxon, refMods, td, soo);
 598                     }
 599                 }
 600
 601                 if (descriptionsFulltext.keySet().contains(k)){
 602                     if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
 603                         setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
 604                         break;
 605                     }
 606                     else{
 607                         handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
 608                     }
 609                 }
 610
 611                 if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
 612                     acceptedTaxon.addDescription(td);
 613                     sourceHandler.addAndSaveSource(refMods, td, null);
 614                     importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 615                 }
 616             }
 617         }
 618     }
 619
 620     /**
 621      * @param refMods
 622      * @param descriptionsFulltext
 623      * @param td
 624      * @param currentFeature
 625      * @param k
 626      */
 627     private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
 628             Feature currentFeature, int k) {
 629         //logger.info("handleTextData");
 630         TextData textData = TextData.NewInstance();
 631         textData.setFeature(currentFeature);
 632         textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
 633         sourceHandler.addSource(refMods, textData);
 634         td.addElement(textData);
 635     }
 636
 637     /**
 638      * @param acceptedTaxon
 639      * @param refMods
 640      * @param td
 641      * @param soo
 642      */
 643     private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
 644         logger.info("handleAssociation");
 645         String descr=soo.getDescr();
 646         DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
 647
 648         sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
 649
 650         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 651
 652         Feature feature=null;
 653         feature = makeFeature(derivedUnitBase);
 654         if(!StringUtils.isEmpty(descr)) {
 655             derivedUnitBase.setTitleCache(descr, true);
 656         }
 657
 658         IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
 659
 660         taxonDescription.addElement(indAssociation);
 661         sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
 662         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 663         td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
 664     }
 665
 666     /**
 667      * create an individualAssociation
 668      * @param refMods
 669      * @param derivedUnitBase
 670      * @param feature
 671      * @return
 672      */
 673     private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
 674             Feature feature) {
 675         logger.info("createIndividualAssociation");
 676         IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 677         indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 678         indAssociation.setFeature(feature);
 679         indAssociation = sourceHandler.addSource(refMods, indAssociation);
 680         return indAssociation;
 681     }
 682
 683     /**
 684      * @param specimenOrObservations
 685      * @param descriptionsFulltext
 686      * @param i
 687      * @param specimenOrObservation
 688      */
 689     private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
 690             Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
 691         logger.info("extractTextFromSpecimenOrObservation");
 692         List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
 693         if (speObsList == null) {
 694             speObsList=new ArrayList<MySpecimenOrObservation>();
 695         }
 696         speObsList.add(specimenOrObservation);
 697         specimenOrObservations.put(i,speObsList);
 698
 699         String s = specimenOrObservation.getDerivedUnitBase().toString();
 700         if (descriptionsFulltext.get(i) !=null){
 701             s = descriptionsFulltext.get(i)+" "+s;
 702         }
 703         descriptionsFulltext.put(i, s);
 704     }
 705
 706     /**
 707      * Extract the text with the inline link to a taxon
 708      * @param nametosave
 709      * @param refMods
 710      * @param descriptionsFulltext
 711      * @param i
 712      * @param paragraph
 713      */
 714     @SuppressWarnings("rawtypes")
 715     private void extractInLine(List<TaxonName> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
 716             int i, Node paragraph) {
 717         //logger.info("extractInLine");
 718         String inLine=getInlineTextForName(nametosave, refMods, paragraph);
 719         if (descriptionsFulltext.get(i) !=null){
 720             inLine = descriptionsFulltext.get(i)+inLine;
 721         }
 722         descriptionsFulltext.put(i, inLine);
 723     }
 724
 725     /**
 726      * Extract the raw text from a Node
 727      * @param descriptionsFulltext
 728      * @param node
 729      * @param j
 730      */
 731     private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
 732         //logger.info("extractText");
 733         if(!node.getTextContent().trim().isEmpty()) {
 734             String s =node.getTextContent().trim();
 735             if (descriptionsFulltext.get(i) !=null){
 736                 s = descriptionsFulltext.get(i)+" "+s;
 737             }
 738             descriptionsFulltext.put(i, s);
 739         }
 740     }
 741
 742
 743     /**
 744      * @param materials: the XML node group
 745      * @param acceptedTaxon: the current accepted Taxon
 746      * @param refMods: the current reference extracted from the MODS
 747      */
 748     @SuppressWarnings("rawtypes")
 749     private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonName> nametosave) {
 750         logger.info("EXTRACTMATERIALS");
 751         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 752         NodeList children = materials.getChildNodes();
 753         NodeList events = null;
 754         //        String descr="";
 755
 756
 757         for (int i=0;i<children.getLength();i++){
 758             String rawAssociation="";
 759             boolean added=false;
 760             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 761                 events = children.item(i).getChildNodes();
 762                 for(int k=0;k<events.getLength();k++){
 763                     if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
 764                         String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
 765                         if(!inLine.isEmpty()) {
 766                             rawAssociation+=inLine;
 767                         }
 768                     }
 769                     if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
 770                             && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 771                         rawAssociation+= events.item(k).getTextContent().trim();
 772                     }
 773                     if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
 774                         if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
 775                             rawAssociation="no description text";
 776                         }
 777                         added=true;
 778                         handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
 779                     }
 780                     if (!rawAssociation.isEmpty() && !added){
 781
 782                         Feature feature = Feature.MATERIALS_EXAMINED();
 783                         featuresMap.put(feature.getTitleCache(),feature);
 784
 785                         TextData textData = createTextData(rawAssociation, refMods, feature);
 786
 787                         if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
 788                             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
 789                             td.addElement(textData);
 790                             acceptedTaxon.addDescription(td);
 791                             sourceHandler.addAndSaveSource(refMods, td, null);
 792                         }
 793                         //                        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
 794                         //                        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
 795                         //
 796                         //                        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 797                         //                        acceptedTaxon.addDescription(taxonDescription);
 798                         //
 799                         //                        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
 800                         //
 801                         //                        Feature feature = Feature.MATERIALS_EXAMINED();
 802                         //                        featuresMap.put(feature.getTitleCache(),feature);
 803                         //                        if(!StringUtils.isEmpty(rawAssociation)) {
 804                         //                            derivedUnitBase.setTitleCache(rawAssociation, true);
 805                         //                        }
 806                         //                        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
 807                         //                        indAssociation.setFeature(feature);
 808                         //                        indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
 809                         //
 810                         //                        /*boolean sourceExists=false;
 811                         //                        Set<DescriptionElementSource> dsources = indAssociation.getSources();
 812                         //                        for (DescriptionElementSource src : dsources){
 813                         //                            String micro = src.getCitationMicroReference();
 814                         //                            Reference r = src.getCitation();
 815                         //                            if (r.equals(refMods) && micro == null) {
 816                         //                                sourceExists=true;
 817                         //                            }
 818                         //                        }
 819                         //                        if(!sourceExists) {
 820                         //                            indAssociation.addSource(null, null, refMods, null);
 821                         //                        }*/
 822                         //                        taxonDescription.addElement(indAssociation);
 823                         //                        taxonDescription.setTaxon(acceptedTaxon);
 824                         //                        taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
 825                         //
 826                         //                        /*sourceExists=false;
 827                         //                        Set<IdentifiableSource> sources = taxonDescription.getSources();
 828                         //                        for (IdentifiableSource src : sources){
 829                         //                            String micro = src.getCitationMicroReference();
 830                         //                            Reference r = src.getCitation();
 831                         //                            if (r.equals(refMods) && micro == null) {
 832                         //                                sourceExists=true;
 833                         //                            }
 834                         //                        }
 835                         //                        if(!sourceExists) {
 836                         //                            taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
 837                         //                        }*/
 838                         //
 839                         //                        importer.getDescriptionService().saveOrUpdate(taxonDescription);
 840                         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 841
 842                         rawAssociation="";
 843                     }
 844                 }
 845             }
 846         }
 847     }
 848
 849     /**
 850      * @param acceptedTaxon
 851      * @param refMods
 852      * @param events
 853      * @param rawAssociation
 854      * @param k
 855      */
 856     private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
 857             String rawAssociation) {
 858         logger.info("handleDerivedUnitFacadeAndBase");
 859         String descr;
 860         DerivedUnit derivedUnitBase;
 861         MySpecimenOrObservation myspecimenOrObservation;
 862         DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
 863         derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
 864
 865         sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
 866
 867         //TODO this may not always be correct, ask user
 868         TaxonName typifiableName = acceptedTaxon != null ?  acceptedTaxon.getName() : null;
 869         myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
 870         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
 871         descr=myspecimenOrObservation.getDescr();
 872
 873         sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
 874
 875         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 876
 877         Feature feature = makeFeature(derivedUnitBase);
 878         featuresMap.put(feature.getTitleCache(),feature);
 879         if(!StringUtils.isEmpty(descr)) {
 880             derivedUnitBase.setTitleCache(descr, true);
 881         }
 882
 883         IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
 884
 885         taxonDescription.addElement(indAssociation);
 886         sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
 887         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 888     }
 889
 890
 891
 892     /**
 893      * @param currentName
 894      * @param materials: the XML node group
 895      * @param acceptedTaxon: the current accepted Taxon
 896      * @param refMods: the current reference extracted from the MODS
 897      */
 898     private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, TaxonName currentName) {
 899         logger.info("extractMaterialsDirect");
 900         //        logger.info("acceptedTaxon: "+acceptedTaxon);
 901         String descr="";
 902
 903         DerivedUnit derivedUnitBase=null;
 904         MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
 905         derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
 906
 907         sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
 908
 909         TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
 910
 911         Feature feature=null;
 912         if (event.equalsIgnoreCase("collection")){
 913             feature = makeFeature(derivedUnitBase);
 914         }
 915         else{
 916             feature = Feature.MATERIALS_EXAMINED();
 917         }
 918         featuresMap.put(feature.getTitleCache(),  feature);
 919
 920         descr=myspecimenOrObservation.getDescr();
 921         if(!StringUtils.isEmpty(descr)) {
 922             derivedUnitBase.setTitleCache(descr, true);
 923         }
 924
 925         IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
 926
 927         taxonDescription.addElement(indAssociation);
 928         sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
 929         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
 930
 931         return derivedUnitBase.getTitleCache();
 932
 933     }
 934
 935
 936     /**
 937      * @param description: the XML node group
 938      * @param acceptedTaxon: the current acceptedTaxon
 939      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
 940      * @param nametosave: the list of objects to save into the CDM
 941      * @param refMods: the current reference extracted from the MODS
 942      * @param featureName: the feature name
 943      */
 944     @SuppressWarnings({ "rawtypes"})
 945     private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
 946             List<TaxonName> nametosave, Reference refMods, String featureName ) {
 947         logger.info("extractSpecificFeature "+featureName);
 948         //        System.out.println("GRUUUUuu");
 949         NodeList children = description.getChildNodes();
 950         NodeList insideNodes ;
 951         NodeList trNodes;
 952         //        String descr ="";
 953         String localdescr="";
 954         List<String> blabla=null;
 955         List<String> text = new ArrayList<String>();
 956
 957         String table="<table>";
 958         String head="";
 959         String line="";
 960
 961         Feature currentFeature=getFeatureObjectFromString(featureName);
 962
 963         //        String fullContent = description.getTextContent();
 964         for (int i=0;i<children.getLength();i++){
 965             //            localdescr="";
 966             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
 967                 text.add(children.item(i).getTextContent().trim());
 968             }
 969             if (featureName.equalsIgnoreCase("table")){
 970                 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 971                         children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
 972                     head = extractTableHead(children.item(i));
 973                     table+=head;
 974                     line = extractTableLine(children.item(i));
 975                     if (!line.equalsIgnoreCase("<tr></tr>")) {
 976                         table+=line;
 977                     }
 978                 }
 979                 if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
 980                         children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
 981                     line = extractTableLineWithColumn(children.item(i).getChildNodes());
 982                     if(!line.equalsIgnoreCase("<tr></tr>")) {
 983                         table+=line;
 984                     }
 985                 }
 986             }
 987             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
 988                 insideNodes=children.item(i).getChildNodes();
 989                 blabla= new ArrayList<String>();
 990                 for (int j=0;j<insideNodes.getLength();j++){
 991                     Node insideNode = insideNodes.item(j);
 992                         if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
 993                         String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
 994                         if (!inlinetext.isEmpty()) {
 995                             blabla.add(inlinetext);
 996                         }
 997                     }
 998                     else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
 999                         if(!insideNode.getTextContent().trim().isEmpty()){
1000                             blabla.add(insideNode.getTextContent().trim());
1001                             //                            localdescr += insideNodes.item(j).getTextContent().trim();
1002                         }
1003                     }
1004                 }
1005                 if (!blabla.isEmpty()) {
1006                     String blaStr = StringUtils.join(blabla," ").trim();
1007                     if(!stringIsEmpty(blaStr)) {
1008                         setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1009                         text.add(blaStr);
1010                     }
1011                 }
1012
1013             }
1014             if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1015                 if(!children.item(i).getTextContent().trim().isEmpty()){
1016                     localdescr = children.item(i).getTextContent().trim();
1017                     if(!stringIsEmpty(localdescr)) {
1018                         setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1019                     }
1020                 }
1021             }
1022         }
1023
1024         table+="</table>";
1025         if (!table.equalsIgnoreCase("<table></table>")){
1026             //            System.out.println("TABLE : "+table);
1027             text.add(table);
1028         }
1029
1030         if (text !=null && !text.isEmpty()) {
1031             return StringUtils.join(text," ");
1032         } else {
1033             return "";
1034         }
1035
1036     }
1037
1038     /**
1039      * @param children
1040      * @param i
1041      * @return
1042      */
1043     private String extractTableLine(Node child) {
1044         //logger.info("extractTableLine");
1045         String line;
1046         line="<tr>";
1047         if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1048             line = extractTableLineWithColumn(child.getChildNodes());
1049         }
1050         line+="</tr>";
1051         return line;
1052     }
1053
1054     /**
1055      * @param children
1056      * @param i
1057      * @return
1058      */
1059     private String extractTableHead(Node child) {
1060         //logger.info("extractTableHead");
1061         String head;
1062         String line;
1063         head="<th>";
1064         NodeList trNodes = child.getChildNodes();
1065         for (int k=0;k<trNodes.getLength();k++){
1066             if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1067                     && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1068                 line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1069                 head+=line;
1070             }
1071         }
1072         head+="</th>";
1073         return head;
1074     }
1075
1076     /**
1077      * build a html table line, with td columns
1078      * @param tdNodes
1079      * @return an html coded line
1080      */
1081     private String extractTableLineWithColumn(NodeList tdNodes) {
1082         //logger.info("extractTableLineWithColumn");
1083         String line;
1084         line="<tr>";
1085         for (int l=0;l<tdNodes.getLength();l++){
1086             if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1087                 line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1088             }
1089         }
1090         line+="</tr>";
1091         return line;
1092     }
1093
1094     /**
1095      * @param description: the XML node group
1096      * @param acceptedTaxon: the current acceptedTaxon
1097      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1098      * @param nametosave: the list of objects to save into the CDM
1099      * @param refMods: the current reference extracted from the MODS
1100      * @param featureName: the feature name
1101      */
1102     @SuppressWarnings({ "unused", "rawtypes" })
1103     private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1104             List<TaxonName> nameToSave, Reference refMods, String featureName ) {
1105         logger.info("extractSpecificFeatureNotStructured " + featureName);
1106         NodeList children = description.getChildNodes();
1107         NodeList insideNodes ;
1108         List<String> blabla= new ArrayList<String>();
1109
1110
1111         Feature currentFeature = getFeatureObjectFromString(featureName);
1112
1113         String fullContent = description.getTextContent();
1114         for (int i=0;i<children.getLength();i++){
1115             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1116                 insideNodes=children.item(i).getChildNodes();
1117                 for (int j=0;j<insideNodes.getLength();j++){
1118                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1119                         String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1120                         if(!inlineText.isEmpty()) {
1121                             blabla.add(inlineText);
1122                         }
1123                     }
1124                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1125                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1126                             blabla.add(insideNodes.item(j).getTextContent().trim());
1127                         }
1128                     }
1129                 }
1130             }
1131             if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1132                 if(!children.item(i).getTextContent().trim().isEmpty()){
1133                     String localdescr = children.item(i).getTextContent().trim();
1134                     if(!localdescr.isEmpty())
1135                     {
1136                         blabla.add(localdescr);
1137                     }
1138                 }
1139             }
1140         }
1141
1142         if (blabla !=null && !blabla.isEmpty()) {
1143             String blaStr = StringUtils.join(blabla," ").trim();
1144             if (! stringIsEmpty(blaStr)) {
1145                 setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1146                 return blaStr;
1147             } else {
1148                 return "";
1149             }
1150         } else {
1151             return "";
1152         }
1153
1154     }
1155
1156     /**
1157      * @param blaStr
1158      * @return
1159      */
1160     private boolean stringIsEmpty(String blaStr) {
1161         if (blaStr.matches("(\\.|,|;|\\.-)?")){
1162                 return true;
1163         }else{
1164                 return false;
1165         }
1166     }
1167
1168     /**
1169      * @param nametosave
1170      * @param refMods
1171      * @param insideNodes
1172      * @param blabla
1173      * @param j
1174      */
1175     @SuppressWarnings({ "rawtypes" })
1176     private String getInlineTextForName(List<TaxonName> nametosave, Reference refMods, Node insideNode) {
1177         if (true){
1178                 NodeList children = insideNode.getChildNodes();
1179                 String result = "";
1180             for (int i=0;i<children.getLength();i++){
1181                 Node nameChild = children.item(i);
1182                 if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1183                         result += nameChild.getTextContent();
1184                 }else{
1185                         //do nothing
1186                 }
1187             }
1188                 return result.replace("\n", "").trim();
1189         }else{
1190                 TaxonName tnb = getTaxonNameFromXML(insideNode, nametosave,refMods,false);
1191                 //                        Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1192                 Taxon tax = currentMyName.getTaxon();
1193                 if(tnb !=null && tax != null){
1194                     String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1195                     return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1196                 }else if (tnb != null && tax == null){
1197                         //TODO
1198                         return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0]  +"</cdm:taxonName>";
1199                 }else{
1200                         logger.warn("Inline text has no content yet");
1201                 }
1202                 return "";
1203         }
1204     }
1205
1206     /**
1207      * @param featureName
1208      * @return
1209      */
1210     @SuppressWarnings("rawtypes")
1211     private Feature getFeatureObjectFromString(String featureName) {
1212         logger.info("getFeatureObjectFromString");
1213         List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1214         Feature currentFeature=null;
1215         for (Feature feature: features){
1216             String tmpF = feature.getTitleCache();
1217             if (tmpF.equalsIgnoreCase(featureName)) {
1218                 currentFeature=feature;
1219                 //                System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1220             }
1221         }
1222         if (currentFeature == null) {
1223             currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1224             if(featureName.equalsIgnoreCase("Other")){
1225                 currentFeature.setUuid(OtherUUID);
1226             }
1227             if(featureName.equalsIgnoreCase(notMarkedUp)){
1228                 currentFeature.setUuid(NotMarkedUpUUID);
1229             }
1230             importer.getTermService().saveOrUpdate(currentFeature);
1231         }
1232         return currentFeature;
1233     }
1234
1235
1236
1237
1238     /**
1239      * @param children: the XML node group
1240      * @param nametosave: the list of objects to save into the CDM
1241      * @param acceptedTaxon: the current acceptedTaxon
1242      * @param refMods: the current reference extracted from the MODS
1243      * @param fullContent :the parsed XML content
1244      * @return a list of description (text)
1245      */
1246     @SuppressWarnings({ "unused", "rawtypes" })
1247     private List<String> parseParagraph(List<TaxonName> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1248         logger.info("parseParagraph "+feature.toString());
1249         List<String> fullDescription=  new ArrayList<String>();
1250         //        String localdescr;
1251         String descr="";
1252         NodeList insideNodes ;
1253         boolean collectionEvent = false;
1254         List<Node>collectionEvents = new ArrayList<Node>();
1255
1256         NodeList children = paragraph.getChildNodes();
1257
1258         for (int i=0;i<children.getLength();i++){
1259             //            localdescr="";
1260             if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1261                 descr += children.item(i).getTextContent().trim();
1262             }
1263             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1264                 insideNodes=children.item(i).getChildNodes();
1265                 List<String> blabla= new ArrayList<String>();
1266                 for (int j=0;j<insideNodes.getLength();j++){
1267                     boolean nodeKnown = false;
1268                     //    System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1269                     if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1270                         String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1271                         if (!inlineText.isEmpty()) {
1272                             blabla.add(inlineText);
1273                         }
1274                         nodeKnown=true;
1275                     }
1276                     else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1277                         if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1278                             blabla.add(insideNodes.item(j).getTextContent().trim());
1279                             // localdescr += insideNodes.item(j).getTextContent().trim();
1280                         }
1281                         nodeKnown=true;
1282                     }
1283                     else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1284                         String ref = insideNodes.item(j).getTextContent().trim();
1285                         if (ref.endsWith(";")  && ((ref.length())>1)) {
1286                             ref=ref.substring(0, ref.length()-1)+".";
1287                         }
1288                         Reference reference = ReferenceFactory.newGeneric();
1289                         reference.setTitleCache(ref, true);
1290                         blabla.add(reference.getTitleCache());
1291                         nodeKnown=true;
1292                     }
1293                     else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1294                         String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1295                         blabla.add(figure);
1296                     }
1297                     else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1298                             insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1299                             insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1300                         String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1301                         blabla.add(table);
1302                     }
1303                     else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1304                         //                        logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1305                         String titlecache  = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1306                         blabla.add(titlecache);
1307                         collectionEvent=true;
1308                         collectionEvents.add(insideNodes.item(j));
1309                         nodeKnown=true;
1310                     }else{
1311                         logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1312                     }
1313
1314                 }
1315                 if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1316                     fullDescription.add(StringUtils.join(blabla," "));
1317                 }
1318             }
1319             if  (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1320                 String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1321                 fullDescription.add(figure);
1322             }
1323             if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1324                     children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1325                     children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1326                 String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1327                 fullDescription.add(table);
1328             }
1329         }
1330
1331         if( !stringIsEmpty(descr.trim())){
1332             Feature currentFeature= getNotMarkedUpFeatureObject();
1333             setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1334         }
1335         //        if (collectionEvent) {
1336         //            logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1337         //            for (Node coll:collectionEvents){
1338         //                = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1339         //            }
1340         //        }
1341         return fullDescription;
1342     }
1343
1344
1345     /**
1346      * @param description: the XML node group
1347      * @param acceptedTaxon: the current acceptedTaxon
1348      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1349      * @param nametosave: the list of objects to save into the CDM
1350      * @param refMods: the current reference extracted from the MODS
1351      * @param feature: the feature to link the data with
1352      */
1353     @SuppressWarnings("rawtypes")
1354     private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonName> namesToSave, Reference refMods, Feature feature){
1355         logger.info("EXTRACT FEATURE "+feature.toString());
1356         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1357         List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1358
1359         //        System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1360         if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1361             setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1362         }
1363
1364     }
1365
1366
1367     /**
1368      * @param descr: the XML Nodegroup to parse
1369      * @param acceptedTaxon: the current acceptedTaxon
1370      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1371      * @param refMods: the current reference extracted from the MODS
1372      * @param currentFeature: the feature name
1373      * @return
1374      */
1375     private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1376         logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1377
1378         //remove redundant feature title
1379         String featureStr = currentFeature.getTitleCache();
1380         if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1381                 descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1382         }
1383
1384
1385         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1386         featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1387
1388         TextData textData = createTextData(descr, refMods, currentFeature);
1389
1390         if(acceptedTaxon!=null){
1391             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1392             td.addElement(textData);
1393             acceptedTaxon.addDescription(td);
1394
1395             sourceHandler.addAndSaveSource(refMods, td, null);
1396             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1397         }
1398
1399         if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1400             try{
1401                 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1402                 if (tmp!=null) {
1403                     defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1404                 }else{
1405                     importer.getTaxonService().saveOrUpdate(defaultTaxon);
1406                 }
1407             }catch(Exception e){
1408                 logger.debug("TAXON EXISTS"+defaultTaxon);
1409             }
1410
1411             TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1412             defaultTaxon.addDescription(td);
1413             td.addElement(textData);
1414             sourceHandler.addAndSaveSource(refMods, td, null);
1415             importer.getTaxonService().saveOrUpdate(defaultTaxon);
1416         }
1417     }
1418
1419     /**
1420      * @param descr
1421      * @param refMods
1422      * @param currentFeature
1423      * @return
1424      */
1425     private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1426         //logger.info("createTextData");
1427         TextData textData = TextData.NewInstance();
1428         textData.setFeature(currentFeature);
1429         sourceHandler.addSource(refMods, textData);
1430
1431         textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1432         return textData;
1433     }
1434
1435
1436
1437     /**
1438      * @param descr: the XML Nodegroup to parse
1439      * @param acceptedTaxon: the current acceptedTaxon
1440      * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1441      * @param refMods: the current reference extracted from the MODS
1442      * @param currentFeature: the feature name
1443      * @return
1444      */
1445     private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1446         //        System.out.println("setParticularDescriptionSPecial "+currentFeature);
1447         //        logger.info("acceptedTaxon: "+acceptedTaxon);
1448         logger.info("setParticularDescription");
1449         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1450
1451         featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1452         TextData textData = createTextData(descr, refMods, currentFeature);
1453
1454         if(! descr.isEmpty() && (acceptedTaxon!=null)){
1455             TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1456             td.addElement(textData);
1457             acceptedTaxon.addDescription(td);
1458
1459             sourceHandler.addAndSaveSource(refMods, td, currentRef);
1460             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1461         }
1462
1463         if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1464             try{
1465                 Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1466                 if (tmp!=null) {
1467                     defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1468                 }else{
1469                     importer.getTaxonService().saveOrUpdate(defaultTaxon);
1470                 }
1471             }catch(Exception e){
1472                 logger.debug("TAXON EXISTS"+defaultTaxon);
1473             }
1474
1475             TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1476             defaultTaxon.addDescription(td);
1477             td.addElement(textData);
1478             sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1479             importer.getTaxonService().saveOrUpdate(defaultTaxon);
1480         }
1481     }
1482
1483
1484
1485     /**
1486      * @param synonyms: the XML Nodegroup to parse
1487      * @param nametosave: the list of objects to save into the CDM
1488      * @param acceptedTaxon: the current acceptedTaxon
1489      * @param refMods: the current reference extracted from the MODS
1490      */
1491     @SuppressWarnings({ "rawtypes" })
1492     private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1493         logger.info("extractSynonyms");
1494         //System.out.println("extractSynonyms for: "+acceptedTaxon);
1495         Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1496         if (ttmp != null) {
1497             acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1498         }
1499         else{
1500             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1501         }
1502         NodeList children = synonymsNode.getChildNodes();
1503         List<MyName> names = new ArrayList<MyName>();
1504
1505         if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1506             try {
1507                 MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1508                 names.add(myName);
1509             } catch (TransformerFactoryConfigurationError e) {
1510                 logger.warn(e);
1511             } catch (TransformerException e) {
1512                 logger.warn(e);
1513             }
1514         }
1515
1516
1517         for (int i=0;i<children.getLength();i++){
1518             if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1519                 NodeList tmp = children.item(i).getChildNodes();
1520                 //                String fullContent = children.item(i).getTextContent();
1521                 for (int j=0; j< tmp.getLength();j++){
1522                     if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1523                         try {
1524                                 MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1525                             names.add(myName);
1526                         } catch (TransformerFactoryConfigurationError e) {
1527                             logger.warn(e);
1528                         } catch (TransformerException e) {
1529                             logger.warn(e);
1530                         }
1531                     }
1532                 }
1533             }
1534             if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1535                 try {
1536                         MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1537                     names.add(myName);
1538                 } catch (TransformerFactoryConfigurationError e) {
1539                     logger.warn(e);
1540                 } catch (TransformerException e) {
1541                     logger.warn(e);
1542                 }
1543
1544             }
1545         }
1546
1547         for(MyName name:names){
1548                 TaxonName nameToBeFilled = name.getTaxonName();
1549             Synonym synonym = name.getSyno();
1550             addFollowingTextToName(nameToBeFilled, followingText);
1551
1552             /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1553             nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1554             if (nameToBeFilled.hasProblem() &&
1555                     !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1556                 //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1557                 addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1558                 nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1559             }
1560             nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
1561              */
1562             if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1563                 setLSID(name.getIdentifier(), synonym);
1564             }
1565
1566             Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1567             boolean synoExist = false;
1568             for (Synonym syn: synonymsSet){
1569
1570                 boolean a =syn.getName().equals(synonym.getName());
1571                 boolean b = syn.getSec().equals(synonym.getSec());
1572                 if (a && b) {
1573                     synoExist=true;
1574                 }
1575             }
1576             if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1577                 sourceHandler.addSource(refMods, synonym);
1578                 acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1579             }
1580         }
1581         importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1582     }
1583
1584
1585     private boolean addFollowingTextToName(TaxonName nameToBeFilled, String followingText) {
1586         if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1587                 if (! followingText.matches("\\d\\.?")){
1588
1589                         if (followingText.startsWith(",")){
1590                                 followingText = followingText.substring(1).trim();
1591                         }
1592                         nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1593                 }
1594                 return true;
1595         }
1596         return false;
1597
1598         }
1599
1600         /**
1601      * @param refgroup: the XML nodes
1602      * @param nametosave: the list of objects to save into the CDM
1603      * @param acceptedTaxon: the current acceptedTaxon
1604      * @param nametosave: the list of objects to save into the CDM
1605      * @param refMods: the current reference extracted from the MODS
1606      * @return the acceptedTaxon (why?)
1607      * handle cases where the bibref are inside <p> and outside
1608      */
1609     @SuppressWarnings({ "rawtypes" })
1610     private Taxon extractReferences(Node refgroup, List<TaxonName> nametosave, Taxon acceptedTaxon, Reference refMods) {
1611         logger.info("extractReferences");
1612         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1613
1614         NodeList children = refgroup.getChildNodes();
1615         INonViralName nameToBeFilled = getNonViralNameAccNomenclature();
1616
1617         ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1618         for (int i=0;i<children.getLength();i++){
1619             if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1620                 String ref = children.item(i).getTextContent().trim();
1621                 refBuild.builReference(ref, treatmentMainName, nomenclaturalCode,  acceptedTaxon, refMods);
1622                 if (!refBuild.isFoundBibref()){
1623                     extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1624                 }
1625             }
1626
1627             if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1628                 NodeList references = children.item(i).getChildNodes();
1629                 String descr="";
1630                 for (int j=0;j<references.getLength();j++){
1631                     if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1632                         String ref = references.item(j).getTextContent().trim();
1633                         refBuild.builReference(ref, treatmentMainName,  nomenclaturalCode,  acceptedTaxon, refMods);
1634                     }
1635                     else
1636                         if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1637                                 && !references.item(j).getTextContent().trim().isEmpty()){
1638                             descr += references.item(j).getTextContent().trim();
1639                         }
1640
1641                 }
1642                 if (!refBuild.isFoundBibref()){
1643                     //if it's not tagged, put it as row information.
1644                     //                    extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1645                     //then put it as a not markup feature if not empty
1646                     if (!stringIsEmpty(descr.trim())){
1647                         Feature currentFeature= getNotMarkedUpFeatureObject();
1648                         setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1649                     }
1650                 }
1651             }
1652         }
1653         //        importer.getClassificationService().saveOrUpdate(classification);
1654         return acceptedTaxon;
1655
1656     }
1657
1658     /**
1659      * get the non viral name according to the current nomenclature
1660      * @return
1661      */
1662
1663     private INonViralName getNonViralNameAccNomenclature() {
1664         return nomenclaturalCode.getNewTaxonNameInstance(null);
1665     }
1666
1667     /**
1668      * @return the feature object for the category "not marked up"
1669      */
1670     private Feature getNotMarkedUpFeatureObject() {
1671         // FIXME use getFeature(uuid ....)
1672         logger.info("getNotMarkedUpFeatureObject");
1673         Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1674         if (currentFeature == null) {
1675             currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1676             currentFeature.setUuid(NotMarkedUpUUID);
1677             //TODO use userDefined Feature Vocabulary
1678             Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1679 //            importer.getTermService().saveOrUpdate(currentFeature);
1680             importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1681         }
1682         return currentFeature;
1683     }
1684
1685     /**
1686      * @param references
1687      * handle cases where the bibref are inside <p> and outside
1688      */
1689     @SuppressWarnings("rawtypes")
1690     private void extractReferenceRawText(NodeList references, INonViralName nameToBeFilled, Reference refMods,
1691             Taxon acceptedTaxon) {
1692         logger.info("extractReferenceRawText");
1693         String refString="";
1694         currentMyName= new MyName(true);
1695         for (int j=0;j<references.getLength();j++){
1696             acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1697             //no bibref tag inside
1698             //            System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1699             if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1700
1701                 try {
1702                         String followingText = null;  //needs to be checked if follText is possible
1703                         //TODO create or not create?
1704                     currentMyName = extractScientificName(references.item(j), refMods, followingText);
1705                 } catch (TransformerFactoryConfigurationError e) {
1706                     logger.warn(e);
1707                 } catch (TransformerException e) {
1708                     logger.warn(e);
1709                 }
1710
1711                 //                name=name.trim();
1712             }
1713             if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1714                 refString = references.item(j).getTextContent().trim();
1715             }
1716             if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1717                 //
1718                if (!currentMyName.getStatus().isEmpty()){
1719                    String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1720                         if (nomNovStatus != null){
1721                                 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1722                         }else{
1723                            try {
1724                                 NomenclaturalStatusType  statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1725                             nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1726                             } catch (UnknownCdmTypeException e) {
1727                                 addProblematicStatusToFile(currentMyName.getStatus());
1728                                 logger.warn("Problem with status");
1729                             }
1730                         }
1731                 }
1732
1733                 String fullLineRefName = references.item(j).getTextContent().trim();
1734                 int nameOrRefOrOther=2;
1735                 nameOrRefOrOther=askIfNameContained(fullLineRefName);
1736                 if (nameOrRefOrOther==0){
1737                     TaxonName nameTBF = currentMyName.getTaxonName();
1738                     Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1739
1740                     Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1741                     //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1742                     boolean synoExist = false;
1743                     for (Synonym syn: synonymsSet){
1744                         //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1745                         boolean a =syn.getName().equals(synonym.getName());
1746                         boolean b = syn.getSec().equals(synonym.getSec());
1747                         if (a && b) {
1748                             synoExist=true;
1749                         }
1750                     }
1751                     if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1752                         sourceHandler.addSource(refMods, synonym);
1753
1754                         acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1755                     }
1756                 }
1757
1758                 if (nameOrRefOrOther==1){
1759                     Reference re = ReferenceFactory.newGeneric();
1760                     re.setTitleCache(fullLineRefName, true);
1761
1762                     /* TaxonName nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1763                     if (nameTBF.hasProblem() &&
1764                             !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1765                         addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1766                         nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1767                     }
1768                     nameTBF = getTaxonName(nameTBF,nametosave,statusType);
1769                      */
1770                     TaxonName nameTBF = currentMyName.getTaxonName();
1771                     Synonym synonym = Synonym.NewInstance(nameTBF, re);
1772
1773                     Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1774                     //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1775                     boolean synoExist = false;
1776                     for (Synonym syn: synonymsSet){
1777                         //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1778                         boolean a =syn.getName().equals(synonym.getName());
1779                         boolean b = syn.getSec().equals(synonym.getSec());
1780                         if (a && b) {
1781                             synoExist=true;
1782                         }
1783                     }
1784                     if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1785                         sourceHandler.addSource(refMods, synonym);
1786
1787                         acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1788                     }
1789
1790                 }
1791
1792
1793                 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1794                     setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1795                 }
1796             }
1797
1798             if(!currentMyName.getName().isEmpty()){
1799                 //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1800                 if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1801                     Reference refS = ReferenceFactory.newGeneric();
1802                     refS.setTitleCache(refString, true);
1803                     //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1804                     //                            acceptedTaxon.addDescription(td);
1805                     //                            acceptedTaxon.addSource(refSource);
1806                     //
1807                     //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1808                     //
1809                     //                            textData.addSource(null, null, refS, null);
1810                     //                            td.addElement(textData);
1811                     //                            td.addSource(refSource);
1812                     //                            importer.getDescriptionService().saveOrUpdate(td);
1813
1814
1815                     if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1816                         setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1817
1818                     }
1819
1820                     acceptedTaxon.getName().setNomenclaturalReference(refS);
1821                 }else{
1822                     TaxonName nameTBF = currentMyName.getTaxonName();
1823                     Synonym synonym = null;
1824                     if (! currentMyName.getStatus().isEmpty()){
1825                         String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1826                         if (nomNovStatus != null){
1827                                 nameToBeFilled.setAppendedPhrase(nomNovStatus);
1828                         }else{
1829                                 try {
1830                                     NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1831                                     nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1832                                     synonym = Synonym.NewInstance(nameTBF, refMods);
1833                                 } catch (UnknownCdmTypeException e) {
1834                                     addProblematicStatusToFile(currentMyName.getStatus());
1835                                     logger.warn("Problem with status");
1836                                     synonym = Synonym.NewInstance(nameTBF, refMods);
1837                                     synonym.setAppendedPhrase(currentMyName.getStatus());
1838                                 }
1839                         }
1840                     }else{
1841                         synonym =  Synonym.NewInstance(nameTBF, refMods);
1842                     }
1843
1844
1845                     if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1846                         setLSID(currentMyName.getIdentifier(), synonym);
1847                     }
1848
1849                     Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1850                     //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1851                     boolean synoExist = false;
1852                     for (Synonym syn: synonymsSet){
1853                         //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1854                         boolean a =syn.getName().equals(synonym.getName());
1855                         boolean b = syn.getSec().equals(synonym.getSec());
1856                         if (a && b) {
1857                             synoExist=true;
1858                         }
1859                     }
1860                     if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1861                         sourceHandler.addSource(refMods, synonym);
1862
1863                         acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1864                     }
1865                 }
1866             }
1867             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1868         }
1869     }
1870
1871
1872
1873     /**
1874      * @param identifier
1875      * @param acceptedTaxon
1876      */
1877     @SuppressWarnings("rawtypes")
1878     private void setLSID(String identifier, TaxonBase<?> taxon) {
1879         //logger.info("setLSID");
1880         //        boolean lsidok=false;
1881         String id = identifier.split("__")[0];
1882         String source = identifier.split("__")[1];
1883         if (id.indexOf("lsid")>-1){
1884             try {
1885                 LSID lsid = new LSID(id);
1886                 taxon.setLsid(lsid);
1887                 //                lsidok=true;
1888             } catch (MalformedLSIDException e) {
1889                 logger.warn("Malformed LSID");
1890             }
1891
1892         }
1893
1894         //logger.info("search reference for LSID");
1895         //  if ((id.indexOf("lsid")<0) || !lsidok){
1896         //ADD ORIGINAL SOURCE ID EVEN IF LSID
1897         Reference re = null;
1898         Pager<Reference> references = importer.getReferenceService().findByTitleWithRestrictions(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1899         if( references !=null && references.getCount()>0){
1900             re=references.getRecords().get(0);
1901         }
1902         //logger.info("search reference for LSID-end");
1903         if(re == null){
1904             re = ReferenceFactory.newGeneric();
1905             re.setTitleCache(source, true);
1906             importer.getReferenceService().saveOrUpdate(re);
1907         }
1908         re=CdmBase.deproxy(re, Reference.class);
1909
1910         //logger.info("search source for LSID");
1911         Set<IdentifiableSource> sources = taxon.getSources();
1912         boolean lsidinsource=false;
1913         boolean urlinsource=false;
1914         for (IdentifiableSource src:sources){
1915             if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1916                 lsidinsource=true;
1917             }
1918             if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1919                 urlinsource=true;
1920             }
1921         }
1922         if(!lsidinsource) {
1923             taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1924         }
1925         if(!urlinsource)
1926         {
1927             sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1928             taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1929             // }
1930         }
1931
1932     }
1933
1934     /**
1935      * try to solve a parsing problem for a scientific name
1936      * @param original : the name from the OCR document
1937      * @param name : the tagged version
1938      * @param parser
1939      * @return the corrected TaxonName
1940      */
1941     /*   @SuppressWarnings({ "unchecked", "rawtypes" })
1942     private TaxonName solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1943         Map<String,String> ato = namesMap.get(original);
1944         if (ato == null) {
1945             ato = namesMap.get(original+" "+author);
1946         }
1947
1948
1949         if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1950             rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1951         }
1952         if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1953             rank = getRank(ato);
1954         }
1955         //        TaxonName nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1956         TaxonName nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1957         //                logger.info("RANK: "+rank);
1958         int retry=0;
1959         List<ParserProblem> problems = nameTBF.getParsingProblems();
1960         for (ParserProblem pb:problems) {
1961             System.out.println(pb.toString());
1962         }
1963         while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1964             addProblemNameToFile(name,author,nomenclaturalCode,rank);
1965             String fullname=name;
1966             if(! skippQuestion) {
1967                 fullname =  getFullReference(name,nameTBF.getParsingProblems());
1968             }
1969             if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1970                 nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1971             }
1972             if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1973                 nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1974             }
1975             if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1976                 nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1977             }
1978             parser.parseReferencedName(nameTBF, fullname, rank, false);
1979             retry++;
1980         }
1981         if (retry == 1){
1982             if(author != null){
1983                 if (name.indexOf(author)>-1) {
1984                     nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1985                 } else {
1986                     nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1987                 }
1988                 if (nameTBF.hasProblem()){
1989                     if (name.indexOf(author)>-1) {
1990                         addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1991                     } else {
1992                         addProblemNameToFile(name,author,nomenclaturalCode,rank);
1993                     }
1994                     //                    System.out.println("TBF still has problems "+nameTBF.hasProblem());
1995                     problems = nameTBF.getParsingProblems();
1996                     for (ParserProblem pb:problems) {
1997                         System.out.println(pb.toString());
1998                     }
1999                     nameTBF.setFullTitleCache(name, true);
2000                 }else{
2001                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2002                         ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2003                     }
2004                     if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2005                         ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2006                     }
2007                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2008                         ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2009                     }
2010                 }
2011                 //                    logger.info("FULL TITLE CACHE "+name);
2012             }else{
2013                 nameTBF.setFullTitleCache(name, true);
2014             }
2015         }
2016         return nameTBF;
2017     }
2018
2019      */
2020
2021     /**
2022      * @param nomenclatureNode: the XML nodes
2023      * @param nametosave: the list of objects to save into the CDM
2024      * @param refMods: the current reference extracted from the MODS
2025      * @return
2026      */
2027     @SuppressWarnings({ "rawtypes" })
2028     private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonName> nametosave, Reference refMods) throws ClassCastException{
2029         refMods=CdmBase.deproxy(refMods, Reference.class);
2030
2031         logger.info("extractNomenclature");
2032         NodeList children = nomenclatureNode.getChildNodes();
2033         String freetext="";
2034         Taxon acceptedTaxon = null;
2035         //   INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2036
2037         //        String fullContent = nomenclatureNode.getTextContent();
2038
2039         NomenclaturalStatusType statusType = null;
2040         String newNameStatus = null;
2041         //TODO
2042         for (int i=0;i<children.getLength();i++){
2043             if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2044                 String status = children.item(i).getTextContent().trim();
2045
2046                 if (!status.isEmpty()){
2047                         if (newNameStatus(status) != null){
2048                                 newNameStatus = newNameStatus(status);
2049                     }else{
2050                             try {
2051                                 statusType = nomStatusString2NomStatus(status);
2052                             } catch (UnknownCdmTypeException e) {
2053         //                      nomNovStatus;
2054                                 addProblematicStatusToFile(status);
2055                                 logger.warn("Problem with status: " + status);
2056                             }
2057                     }
2058                 }
2059             }
2060         }
2061
2062         boolean containsSynonyms=false;
2063         boolean wasSynonym = false;
2064         usedFollowingTextPrefix = null;  //reset
2065
2066         for (int i=0; i<children.getLength(); i++){
2067                 Node childNode = children.item(i);
2068                 String childName = childNode.getNodeName();
2069
2070
2071                 //following text
2072                 followingText = null;
2073                 if ( i + 1 < children.getLength()){
2074                 Node followingTextNode = children.item(i +1);
2075                 if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2076                         followingText = followingTextNode.getTextContent();
2077                 }
2078                 }
2079
2080                 //traverse nodes
2081             if (childName.equalsIgnoreCase("#text")) {
2082                 freetext = childNode.getTextContent().trim();
2083                 if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2084                         freetext = freetext.substring(usedFollowingTextPrefix.length());
2085                 }
2086                 usedFollowingTextPrefix = null;  //reset
2087             }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2088                 //                System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2089                 extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonName());
2090             }else if(childName.equalsIgnoreCase("tax:name")){
2091                 INonViralName nameToBeFilled;
2092                 //System.out.println("HANDLE FIRST NAME OF THE LIST");
2093                 if(!containsSynonyms){
2094                         wasSynonym = false;
2095
2096                         //System.out.println("I : "+i);
2097                     currentMyName = new MyName(false);
2098                     try {
2099                         currentMyName = extractScientificName(childNode, refMods, followingText);
2100                         treatmentMainName = currentMyName.getNewName();
2101                         originalTreatmentName = currentMyName.getOriginalName();
2102
2103                     } catch (TransformerFactoryConfigurationError e1) {
2104                         throw new RuntimeException(e1);
2105                     } catch (TransformerException e1) {
2106                         throw new RuntimeException(e1);
2107                     }
2108
2109                     if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2110                         maxRankRespected=true;
2111
2112                         nameToBeFilled=currentMyName.getTaxonName();
2113
2114                         //   acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2115                         acceptedTaxon=currentMyName.getTaxon();
2116                         //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2117
2118
2119                         boolean statusMatch=false;
2120                         if(acceptedTaxon !=null ){
2121                             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2122                             statusMatch=compareStatus(acceptedTaxon, statusType);
2123                             //System.out.println("statusMatch: "+statusMatch);
2124                         }
2125                         if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2126
2127                             nameToBeFilled=currentMyName.getTaxonName();
2128                             if (nameToBeFilled != null){
2129                                 if (!originalTreatmentName.isEmpty()) {
2130                                     TaxonNameDescription td = TaxonNameDescription.NewInstance();
2131                                     td.setTitleCache(originalTreatmentName, true);
2132                                     nameToBeFilled.addDescription(td);
2133                                 }
2134
2135                                 if(statusType != null) {
2136                                     nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2137                                 }
2138                                 if(newNameStatus != null){
2139                                         nameToBeFilled.setAppendedPhrase(newNameStatus);
2140                                 }
2141                                 sourceHandler.addSource(refMods, TaxonName.castAndDeproxy(nameToBeFilled));
2142
2143                                 if (nameToBeFilled.getNomenclaturalReference() == null) {
2144                                     acceptedTaxon= Taxon.NewInstance(nameToBeFilled,refMods);
2145                                     //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2146                                 }
2147                                 else {
2148                                     acceptedTaxon= Taxon.NewInstance(nameToBeFilled,nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2149                                     //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2150                                 }
2151
2152                                 sourceHandler.addSource(refMods, acceptedTaxon);
2153
2154                                 if(!state2.getConfig().doKeepOriginalSecundum()) {
2155                                     acceptedTaxon.setSec(state2.getConfig().getSecundum());
2156                                     //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2157                                     //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2158                                 }
2159
2160                                 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2161                                     setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2162                                 }
2163
2164
2165                                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2166                                 acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2167                             }
2168
2169                         }else{
2170                             acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2171                             Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2172                             boolean sourcelinked=false;
2173                             for (IdentifiableSource source:sources){
2174                                 if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2175                                     sourcelinked=true;
2176                                 }
2177                             }
2178                             if (!state2.getConfig().doKeepOriginalSecundum()) {
2179                                 acceptedTaxon.setSec(state2.getConfig().getSecundum());
2180                                 //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2181                                 //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2182                             }
2183                             importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2184
2185                             if (!sourcelinked){
2186                                 sourceHandler.addSource(refMods, acceptedTaxon);
2187                             }
2188                             if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2189
2190                                 if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2191                                     //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2192                                         setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2193                                 }
2194                                 importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2195                             }
2196                         }
2197                     }else{
2198                         maxRankRespected=false;
2199                     }
2200                     containsSynonyms=true;  //all folowing names are handled as synonyms
2201                 }else{
2202                     try{
2203                         extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2204                         wasSynonym = true;
2205
2206                     }catch(NullPointerException e){
2207                         logger.warn("null pointer exception, the accepted taxon might be null");
2208                     }
2209                 }
2210                 containsSynonyms=true;
2211             }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2212                 reloadClassification();
2213                 //extract the References within the document
2214                 extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2215             }else if (childName.equalsIgnoreCase("tax:bibref")){
2216                 logger.warn(childName + " still preliminary");
2217
2218                 TaxonName currentName = currentMyName == null ? null : currentMyName.getTaxonName();
2219                 boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2220                 if (! handled){
2221                         setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2222                 }
2223             }else{
2224                 logger.warn(childName + " not yet handled");
2225             }
2226             if(!stringIsEmpty(freetext.trim())) {;
2227                 if (! freetext.matches("\\d\\.?")){
2228                     TaxonName currentName = currentMyName == null ? null : currentMyName.getTaxonName();
2229                         boolean handled = false;
2230                         if (currentName != null && !wasSynonym){
2231                                 handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2232                         }
2233                         if (! handled){
2234                                 setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2235                         }
2236                 }
2237
2238                  freetext = "";
2239             }
2240
2241         }
2242         //importer.getClassificationService().saveOrUpdate(classification);
2243         return acceptedTaxon;
2244     }
2245
2246
2247
2248
2249         /**
2250      * @return
2251      */
2252
2253     private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2254         //logger.info("compareStatus");
2255         boolean statusMatch=false;
2256         //found one taxon
2257         Set<NomenclaturalStatus> status = t.getName().getStatus();
2258         if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2259             for (NomenclaturalStatus st:status){
2260                 NomenclaturalStatusType stype = st.getType();
2261                 if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2262                     statusMatch=true;
2263                 }
2264             }
2265         }
2266         else{
2267             if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2268                 statusMatch=true;
2269             }
2270         }
2271         return statusMatch;
2272     }
2273
2274     /**
2275      * @param acceptedTaxon: the current acceptedTaxon
2276      * @param ref: the current reference extracted from the MODS
2277      * @return the parent for the current accepted taxon
2278      */
2279     /*  private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2280         acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2281
2282         List<Rank> rankList = new ArrayList<Rank>();
2283         rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2284
2285         List<String> rankListStr = new ArrayList<String>();
2286         for (Rank r:rankList) {
2287             rankListStr.add(r.toString());
2288         }
2289         String r="";
2290         String s = acceptedTaxon.getTitleCache();
2291         Taxon tax = null;
2292         if(!skippQuestion){
2293             int addTaxon = askAddParent(s);
2294             logger.info("ADD TAXON: "+addTaxon);
2295             if (addTaxon == 0 ){
2296                 Taxon tmp = askParent(acceptedTaxon, classification);
2297                 if (tmp == null){
2298                     s = askSetParent(s);
2299                     r = askRank(s,rankListStr);
2300
2301                     TaxonName nameToBeFilled = null;
2302                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2303                         nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2304                     }
2305                     if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2306                         nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2307                     }
2308                     if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2309                         nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2310                     }
2311                     nameToBeFilled.setTitleCache(s, true);
2312                     nameToBeFilled.setRank(getRank(r), true);
2313
2314                     tax = Taxon.NewInstance(nameToBeFilled, ref);
2315                 }
2316                 else{
2317                     tax=tmp;
2318                 }
2319
2320                 createParent(tax, ref);
2321                 //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2322                 classification.addParentChild(tax, acceptedTaxon, ref, null);
2323             }
2324             else{
2325                 classification.addChildTaxon(acceptedTaxon, ref, null);
2326                 tax=acceptedTaxon;
2327             }
2328         } else{
2329             classification.addChildTaxon(acceptedTaxon, ref, null);
2330             tax=acceptedTaxon;
2331         }
2332         //        logger.info("RETURN: "+tax );
2333         return tax;
2334
2335     }
2336
2337      */
2338
2339
2340     private MyName  extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2341         //System.out.println("extractScientificNameSynonym");
2342         logger.info("extractScientificNameSynonym");
2343         String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2344         List<String> rankListToPrint = new ArrayList<String>();
2345         for (String r : rankListToPrint_tmp) {
2346             rankListToPrint.add(r.toLowerCase());
2347         }
2348
2349         Rank rank = Rank.UNKNOWN_RANK();
2350         NodeList children = name.getChildNodes();
2351         String originalName="";
2352         String fullName = "";
2353         String newName="";
2354         String identifier="";
2355         HashMap<String, String> atomisedMap = new HashMap<String, String>();
2356         List<String> atomisedName= new ArrayList<String>();
2357
2358         String rankStr = "";
2359         Rank tmpRank ;
2360
2361         String status= extractStatus(children);
2362
2363         for (int i=0;i<children.getLength();i++){
2364             if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2365                 NodeList atom = children.item(i).getChildNodes();
2366                 for (int k=0;k<atom.getLength();k++){
2367                     identifier = extractIdentifier(identifier, atom.item(k));
2368                     tmpRank = null;
2369                     rankStr = atom.item(k).getNodeName().toLowerCase();
2370                     //                    logger.info("RANKSTR:*"+rankStr+"*");
2371                     if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2372                         rankStr=atom.item(k).getTextContent().trim();
2373                         tmpRank = getRank(rankStr);
2374                     }
2375                     //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2376                     if (tmpRank != null){
2377                         rank=tmpRank;
2378                     }
2379                     atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2380                 }
2381                 addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2382             }
2383             if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2384                 //                logger.info("name non atomised: "+children.item(i).getTextContent());
2385                 fullName = children.item(i).getTextContent().trim();
2386                 //                logger.info("fullname: "+fullName);
2387             }
2388         }
2389         originalName=fullName;
2390         fullName = cleanName(fullName, atomisedName);
2391         namesMap.put(fullName,atomisedMap);
2392
2393         String atomisedNameStr = getAtomisedNameStr(atomisedName);
2394
2395         if (fullName != null){
2396             //            System.out.println("fullname: "+fullName);
2397             //            System.out.println("atomised: "+atomisedNameStr);
2398             if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2399                 if (skippQuestion){
2400                     //                    String defaultN = "";
2401                     if (atomisedNameStr.length()>fullName.length()) {
2402                         newName=atomisedNameStr;
2403                     } else {
2404                         if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2405                             newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2406                         } else {
2407                             newName=fullName;
2408                         }
2409                     }
2410                 } else {
2411                     newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2412                 }
2413             } else {
2414                 newName=fullName;
2415             }
2416         }
2417         //not really needed
2418         //        rank = askForRank(newName, rank, nomenclaturalCode);
2419         //        System.out.println("atomised: "+atomisedMap.toString());
2420
2421         //        String[] names = new String[5];
2422         MyName myname = new MyName(true);
2423
2424         //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2425         //        System.out.println(atomisedMap.keySet());
2426         fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2427         myname.setOriginalName(fullName);
2428         myname.setNewName(newName);
2429         myname.setRank(rank);
2430         myname.setIdentifier(identifier);
2431         myname.setStatus(status);
2432         myname.setSource(refMods);
2433
2434         //        boolean higherAdded=false;
2435
2436
2437         boolean parseNameManually=false;
2438         INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2439         TaxonName nameToBeFilledTest ;
2440
2441         //if selected the atomised version
2442         if(newName==atomisedNameStr){
2443             nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2444             if (nameToBeFilledTest.hasProblem()){
2445                 addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2446                 nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode, rank);
2447                 if (nameToBeFilledTest.hasProblem()){
2448                     addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2449                     parseNameManually=true;
2450                 }
2451             }
2452         }else{
2453             nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2454             if (nameToBeFilledTest.hasProblem()){
2455                 addProblemNameToFile("fullversion",fullName, nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2456                 nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2457                 parseNameManually=true;
2458                 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2459                     addNameDifferenceToFile(originalName,atomisedNameStr);
2460                 }
2461             }
2462         }
2463
2464         if(parseNameManually){
2465             //System.out.println("DO IT MANUALLY");
2466                 if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2467                 createUnparsedSynonym(rank, newName, atomisedMap, myname);
2468                 }else{
2469                         createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2470                 }
2471         } else{
2472             //System.out.println("AUTOMATIC!");
2473             //            createAtomisedTaxonString(newName, atomisedMap, myname);
2474             myname.setParsedName(nameToBeFilledTest);
2475             myname.buildTaxon();
2476         }
2477         //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2478         return myname;
2479     }
2480
2481
2482         /**
2483      * @param name
2484      * @throws TransformerFactoryConfigurationError
2485      * @throws TransformerException
2486      * @return a list of possible names
2487      */
2488     @SuppressWarnings({"rawtypes" })
2489     private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2490         logger.info("extractScientificName");
2491
2492         String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2493         List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2494
2495         Rank rank = Rank.UNKNOWN_RANK();
2496         NodeList children = name.getChildNodes();
2497         String originalName = "";
2498         String fullName = "";
2499         String newName = "";
2500         String identifier = "";
2501         HashMap<String, String> atomisedMap = new HashMap<String, String>();
2502         List<String> atomisedNameList= new ArrayList<String>();
2503
2504         String status= extractStatus(children);
2505
2506         for (int i=0;i<children.getLength();i++){
2507                 Node nameChild = children.item(i);
2508             if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2509                 NodeList xmlDataChildren = nameChild.getChildNodes();
2510                 for (int k=0;k<xmlDataChildren.getLength();k++){
2511                         Node xmlDataChild = xmlDataChildren.item(k);
2512                     identifier = extractIdentifier(identifier, xmlDataChild);
2513                     String rankStr = xmlDataChild.getNodeName().toLowerCase();
2514                     if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2515                         rankStr=xmlDataChild.getTextContent().trim();
2516                         Rank tmpRank = getRank(rankStr);
2517                         if (tmpRank != null){
2518                             rank=tmpRank;
2519                         }
2520                     }
2521                     //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2522
2523                     atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2524                 }
2525                 addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2526             }
2527             else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2528                 //                logger.info("name non atomised: "+children.item(i).getTextContent());
2529                 fullName = nameChild.getTextContent().trim();
2530                 //                logger.info("fullname: "+fullName);
2531             }
2532         }
2533         originalName=fullName;
2534         fullName = cleanName(fullName, atomisedNameList);
2535         namesMap.put(fullName,atomisedMap);
2536
2537         String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2538
2539         if (fullName != null){
2540             if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2541                 if (skippQuestion){
2542                     if (atomisedNameStr.length()>fullName.length()) {
2543                         newName = atomisedNameStr;
2544                     } else {
2545                         if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2546                             newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2547                         } else {
2548                             newName = fullName;
2549                         }
2550                     }
2551                 } else {
2552                     newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2553                 }
2554             } else {
2555                 newName=fullName;
2556             }
2557         }
2558         //not really needed
2559         //        rank = askForRank(newName, rank, nomenclaturalCode);
2560         //        System.out.println("atomised: "+atomisedMap.toString());
2561
2562         //        String[] names = new String[5];
2563         MyName myname = new MyName(false);
2564
2565         //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2566         //        System.out.println(atomisedMap.keySet());
2567         fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2568         myname.setOriginalName(fullName);
2569         myname.setNewName(newName);
2570
2571         myname.setRank(rank);
2572         myname.setIdentifier(identifier);
2573         myname.setStatus(status);
2574         myname.setSource(refMods);
2575
2576         //        boolean higherAdded=false;
2577
2578
2579         boolean parseNameManually=false;
2580         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2581         TaxonName  nameToBeFilledTest = null;
2582
2583         //if selected the atomised version
2584         if(newName==atomisedNameStr){
2585             nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2586             if (nameToBeFilledTest.hasProblem()){
2587                     addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2588                 nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2589                 if (nameToBeFilledTest.hasProblem()){
2590                     addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2591                     parseNameManually=true;
2592                 }
2593             }
2594         }else{
2595             nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2596             if (nameToBeFilledTest.hasProblem()){
2597                 addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2598                 nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2599                 parseNameManually=true;
2600                 if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2601                     addNameDifferenceToFile(originalName,atomisedNameStr);
2602                 }
2603             }
2604         }
2605
2606         //System.out.println("parseNameManually: "+parseNameManually);
2607         if(parseNameManually){
2608             createAtomisedTaxon(rank, newName, atomisedMap, myname);
2609         }
2610         else{
2611             createAtomisedTaxonString(newName, atomisedMap, myname);
2612             myname.setParsedName(nameToBeFilledTest);
2613             //TODO correct handling of createIfNotExists
2614                 myname.buildTaxon();
2615         }
2616         return myname;
2617
2618     }
2619
2620     private TaxonName parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2621         Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2622
2623         TaxonName name = (TaxonName)parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2624         if (nameExtensionResult != null && nameExtensionResult[0] != null){
2625                 String ext = (String)nameExtensionResult[0];
2626                 TaxonName extName = (TaxonName)parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2627                 if (! extName.hasProblem()){
2628                         name = extName;
2629                         this.usedFollowingTextPrefix = ext;
2630                         //TODO do we need to fill the atomisedMap at all?
2631                         if ((Boolean)(nameExtensionResult[1])){
2632                                 //TODO
2633                         }
2634                         if ((Boolean)(nameExtensionResult[2])){
2635                                 //TODO BasionymYear etc.
2636                                 Integer origYear = name.getPublicationYear();
2637                                 if (origYear != null){
2638                                         atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2639                                 }
2640                         }
2641                 }
2642         }
2643                 return name;
2644         }
2645
2646         private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2647                 if (StringUtils.isBlank(followingText)){
2648                         return null;
2649                 }
2650
2651         boolean includeAuthor = true;
2652         boolean includeYear = false;
2653                 if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2654                         includeAuthor = false;
2655                 }
2656         if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2657                 includeYear = true;
2658         }
2659         String patternStr = "";
2660         if (includeAuthor){
2661                 patternStr += NonViralNameParserImplRegExBase.capitalWord;
2662         }
2663         if (includeYear){
2664                 patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2665         }
2666         String match = null;
2667         if (! patternStr.isEmpty()){
2668                 Pattern pattern = Pattern.compile("^" + patternStr);
2669                 Matcher matcher = pattern.matcher(followingText.trim());
2670                 if (matcher.find()){
2671                         match = matcher.group();
2672                 }
2673         }
2674
2675                 return new Object[]{match, includeAuthor, includeYear};
2676         }
2677
2678         /**
2679      * @param atomisedName
2680      * @return
2681      */
2682     private String getAtomisedNameStr(List<String> atomisedName) {
2683         //logger.info("getAtomisedNameStr");
2684         String atomisedNameStr = StringUtils.join(atomisedName," ");
2685         while(atomisedNameStr.contains("  ")) {
2686             atomisedNameStr=atomisedNameStr.replace("  ", " ");
2687         }
2688         atomisedNameStr=atomisedNameStr.trim();
2689         return atomisedNameStr;
2690     }
2691
2692     /**
2693      * @param children
2694      * @param status
2695      * @return
2696      */
2697     private String extractStatus(NodeList children) {
2698         logger.info("extractStatus");
2699         String status="";
2700         for (int i=0;i<children.getLength();i++){
2701             if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2702                     (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2703                             children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2704                 status = children.item(i).getTextContent().trim();
2705             }
2706         }
2707         return status;
2708     }
2709
2710     /**
2711      * @param identifier
2712      * @param atom
2713      * @param k
2714      * @return
2715      */
2716     private String extractIdentifier(String identifier, Node atom) {
2717         //logger.info("extractIdentifier");
2718         if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2719             try{
2720                 identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2721             }catch(Exception e){
2722                 System.out.println("pb with identifier, maybe empty");
2723             }
2724             try{
2725                 identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2726             }catch(Exception e){
2727                 System.out.println("pb with identifier, maybe empty");
2728             }
2729         }
2730         return identifier;
2731     }
2732
2733     /**
2734      * @param rankListToPrint
2735      * @param rank
2736      * @param atomisedName
2737      * @param atom
2738      */
2739     private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2740         logger.info("addAtomisedNamesToMap");
2741         for (int k=0;k<atom.getLength();k++){
2742                 Node node = atom.item(k);
2743                 String nodeName = node.getNodeName();
2744             if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) {  //rank has been handled in higher method
2745                 if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2746                     atomisedName.add("("+ node.getTextContent().trim()+")");
2747                 } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2748                         if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2749                             atomisedName.add("var. "+node.getTextContent().trim());
2750                         }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2751                             atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2752                         }
2753                 } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2754                     atomisedName.add(node.getTextContent().trim());
2755                 } else{
2756                     if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2757                         atomisedName.add(node.getTextContent().trim());
2758                     }else if (nodeName.equals("#text")){
2759                         String text = node.getTextContent();
2760                         if (StringUtils.isNotBlank(text)){
2761                                 //TODO handle text
2762                                 logger.warn("name xmldata contains text. This is unhandled");
2763                         }
2764                     }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2765                         //we currently do not use higher ranks information
2766                     }else{
2767                         //TODO handle unhandled node
2768                         logger.warn("Unhandled node: " + nodeName);
2769                     }
2770                 }
2771             }
2772         }
2773     }
2774
2775     /**
2776      * @param fullName
2777      * @param atomisedName
2778      * @return
2779      */
2780     private String cleanName(String name, List<String> atomisedName) {
2781         //logger.info("cleanName");
2782         String fullName =name;
2783         if (fullName != null){
2784             fullName = fullName.replace("( ", "(");
2785             fullName = fullName.replace(" )",")");
2786
2787             if (fullName.trim().isEmpty()){
2788                 fullName=StringUtils.join(atomisedName," ");
2789             }
2790
2791             while(fullName.contains("  ")) {
2792                 fullName=fullName.replace("  ", " ");
2793                 //            logger.info("while");
2794             }
2795             fullName=fullName.trim();
2796         }
2797         return fullName;
2798     }
2799
2800     /**
2801      * @param rank
2802      * @param fullName
2803      * @param atomisedMap
2804      * @param myname
2805      * @return
2806      */
2807     private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2808         logger.info("extractAuthorFromNames");
2809         String fullName=name;
2810         if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2811             //            System.out.println("rank : "+rank.toString());
2812             if(rank.isHigher(Rank.SPECIES())){
2813                 try{
2814                     String author=null;
2815                     if(atomisedMap.get("dwcranks:subgenus") != null) {
2816                         author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2817                     }
2818                     if(atomisedMap.get("dwc:subgenus") != null) {
2819                         author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2820                     }
2821                     if(author == null) {
2822                         if(atomisedMap.get("dwc:genus") != null) {
2823                             author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2824                         }
2825                     }
2826                     if(author != null){
2827                         fullName = fullName.substring(0, fullName.indexOf(author));
2828                         author=author.replaceAll(",","").trim();
2829                         myname.setAuthor(author);
2830                     }
2831                 }catch(Exception e){
2832                     //could not extract the author
2833                 }
2834             }
2835             if(rank.equals(Rank.SPECIES())){
2836                 try{
2837                     String author=null;
2838                     if(author == null) {
2839                         if(atomisedMap.get("dwc:species") != null) {
2840                             String[] t = fullName.split(atomisedMap.get("dwc:species"));
2841                             //                            System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2842                             author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2843                             //                            System.out.println("AUTEUR "+author);
2844                         }
2845                     }
2846                     if(author != null){
2847                         fullName = fullName.substring(0, fullName.indexOf(author));
2848                         author=author.replaceAll(",","").trim();
2849                         myname.setAuthor(author);
2850                     }
2851                 }catch(Exception e){
2852                     //could not extract the author
2853                 }
2854             }
2855         }else{
2856             myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2857         }
2858         return fullName;
2859     }
2860
2861     /**
2862      * @param newName
2863      * @param atomisedMap
2864      * @param myname
2865      */
2866     private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2867         logger.info("createAtomisedTaxonString "+atomisedMap);
2868         if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2869             myname.setFamilyStr(atomisedMap.get("dwc:family"));
2870         }
2871         if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
2872             myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2873         }
2874         if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2875             myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2876         }
2877         if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2878             myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2879         }
2880         if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2881             myname.setGenusStr(atomisedMap.get("dwc:genus"));
2882         }
2883         if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2884             myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2885         }
2886         if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2887             myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2888         }
2889         if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2890             String n=newName;
2891             if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2892                 n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2893                 n=n.replace("subsp.","");
2894             }
2895             if(atomisedMap.get("dwc:subspecies") != null) {
2896                 n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2897                 n=n.replace("subsp.","");
2898             }
2899             if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2900                 n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2901                 n=n.replace("var.","");
2902                 n=n.replace("v.","");
2903             }
2904             if(atomisedMap.get("dwcranks:formepithet") != null) {
2905                 //TODO
2906                 System.out.println("TODO FORMA");
2907                 n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2908                 n=n.replace("forma","");
2909             }
2910             n=n.trim();
2911             String author = myname.getAuthor();
2912             if(n.split(" ").length>2){
2913
2914                 String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2915                 String a= "";
2916                 try{
2917                     a=n.split(n2)[1].trim();
2918                 }catch(Exception e){
2919                     logger.info("no author in "+n+"?");}
2920
2921                 myname.setAuthor(a);
2922                 //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2923                 n=n2;
2924
2925             }
2926
2927             myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2928             myname.setAuthor(author);
2929         }
2930         if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2931             myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2932         }
2933         if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2934             myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2935         }
2936         if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2937             myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2938         }
2939         if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2940             myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2941         }
2942         if (atomisedMap.get(PUBLICATION_YEAR) != null){
2943                 myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2944         }
2945     }
2946
2947     /**
2948      * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2949      * @param rank
2950      * @param newName
2951      * @param atomisedMap
2952      * @param myname
2953      */
2954     private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2955         logger.info("createSynonym");
2956         //System.out.println("createsynonym");
2957         if(rank.equals(Rank.UNKNOWN_RANK())){
2958             myname.setNotParsableTaxon(newName);
2959         }else{
2960                 if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2961                     myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2962                 }
2963                 if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2964                     myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2965                 }
2966                 if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2967                     myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2968                 }
2969                 if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2970                     myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2971                 }
2972                 if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2973                     myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2974                 }
2975                 if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2976                     myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2977                 }
2978                 if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2979                     myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2980                 }
2981                 if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2982                     String n=newName;
2983                     if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2984                         n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2985                         n=n.replace("subsp.","");
2986                     }
2987                     if(atomisedMap.get("dwc:subspecies") != null) {
2988                         n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2989                         n=n.replace("subsp.","");
2990                     }
2991                     if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2992                         n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2993                         n=n.replace("var.","");
2994                         n=n.replace("v.","");
2995                     }
2996                     if(atomisedMap.get("dwcranks:formepithet") != null) {
2997                         //TODO
2998                         //System.out.println("TODO FORMA");
2999                         n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3000                         n=n.replace("forma","");
3001                     }
3002                     n=n.trim();
3003                     String author = myname.getAuthor();
3004                     if(n.split(" ").length>2){
3005
3006                         String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3007                         String a="";
3008                         try{
3009                             a= n.split(n2)[1].trim();
3010                         }catch(Exception e){logger.info("no author in "+n);}
3011                         myname.setAuthor(a);
3012                         //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3013                         n=n2;
3014
3015                     }
3016                     Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3017                     myname.setSpecies(species);
3018                     myname.setAuthor(author);
3019                 }
3020                 if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3021                     myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3022                 }
3023                 if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3024                     myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3025                 }
3026                 if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3027                     myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3028                 }
3029                 if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3030                     myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3031                 }
3032         }
3033
3034     }
3035
3036
3037     /**
3038      * @param refMods
3039      * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3040      * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3041      * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3042      * I created this switch for old
3043      * for Spiders the new version is preferred
3044      */
3045     private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3046         logger.info("createSynonym");
3047
3048         INonViralName nameToBeFilled = this.getNonViralNameAccNomenclature();
3049         //System.out.println("createsynonym");
3050         if(rank.equals(Rank.UNKNOWN_RANK())){
3051             //TODO
3052                 myname.setNotParsableTaxon(newName);
3053
3054                 nameToBeFilled.setTitleCache(newName, true);
3055         }else{
3056                 if(atomisedMap.get("dwc:genus") != null ){
3057                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3058                 }
3059                 if (rank.isSupraGeneric()){
3060                         if (atomisedMap.get("dwcranks:subtribe") != null ){
3061                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3062                 }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3063                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3064                 }else if (atomisedMap.get("dwcranks:tribe") != null ){
3065                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3066                 }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3067                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3068                 }else if (atomisedMap.get("dwc:family") != null ){
3069                         nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3070                     }else{
3071                         logger.warn("Supra generic rank not yet handled or atomisation not available");
3072                     }
3073                 }
3074                 if (atomisedMap.get("dwcranks:subgenus") != null){
3075                         nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3076                 }
3077                 if (atomisedMap.get("dwc:subgenus") != null){
3078                         nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3079                 }
3080                 if (atomisedMap.get("dwc:species") != null){
3081                         nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3082                 }
3083                 if (atomisedMap.get("dwcranks:formepithet") != null){
3084                         nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3085                 }else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3086                         nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3087                 }else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3088                         nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3089                 }else if (atomisedMap.get("dwc:subspecies") != null){
3090                         nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3091                 }
3092             Reference sec = sourceUrlRef;
3093             if(!state2.getConfig().doKeepOriginalSecundum()){
3094                 sec = state2.getConfig().getSecundum();
3095             }
3096                 Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3097 //              sourceHandler.addSource(refMods, syn);
3098                 myname.setSyno(syn);
3099                 myname.setSynonym(true);
3100         }
3101         }
3102
3103     /**
3104      * @param rank
3105      * @param newName
3106      * @param atomisedMap
3107      * @param myname
3108      */
3109     private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3110         logger.info("createAtomisedTaxon "+atomisedMap);
3111         if(rank.equals(Rank.UNKNOWN_RANK())){
3112             myname.setNotParsableTaxon(newName);
3113         }
3114         else{
3115             if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3116                 myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3117             }
3118             if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
3119                 myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3120             }
3121             if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3122                 myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3123             }
3124             if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3125                 myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3126             }
3127             if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3128                 myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3129             }
3130             if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3131                 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3132             }
3133             if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3134                 myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3135             }
3136             if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3137                 String n=newName;
3138                 if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3139                     n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3140                     n=n.replace("subsp.","");
3141                 }
3142                 if(atomisedMap.get("dwc:subspecies") != null) {
3143                     n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3144                     n=n.replace("subsp.","");
3145                 }
3146                 if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3147                     n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3148                     n=n.replace("var.","");
3149                     n=n.replace("v.","");
3150                 }
3151                 if(atomisedMap.get("dwcranks:formepithet") != null) {
3152                     //TODO
3153                     //System.out.println("TODO FORMA");
3154                     n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3155                     n=n.replace("forma","");
3156                 }
3157                 n=n.trim();
3158                 String author = myname.getAuthor();
3159                 if(n.split(" ").length>2){
3160                     String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3161                     String a="";
3162                     try{
3163                         a= n.split(n2)[1].trim();
3164                     }catch(Exception e){logger.info("no author  in "+n);}
3165                     myname.setAuthor(a);
3166                     //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3167                     n=n2;
3168
3169                 }
3170
3171                 myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3172                 myname.setAuthor(author);
3173             }
3174             if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3175                 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3176             }
3177             if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3178                 myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3179             }
3180             if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3181                 myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3182             }
3183             if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3184                 myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3185             }
3186         }
3187     }
3188
3189     /**
3190      * @return
3191      */
3192     private boolean checkRankValidForImport(Rank currentRank) {
3193         //logger.info("checkRankValidForImport");
3194         return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3195     }
3196
3197
3198
3199     /**
3200      * @param classification2
3201      */
3202     public void updateClassification(Classification classification2) {
3203         //logger.info("updateClassification");
3204         classification = classification2;
3205     }
3206
3207
3208
3209     public class MyName {
3210         /**
3211          * @param isSynonym
3212          */
3213         public MyName(boolean isSynonym) {
3214             super();
3215             this.isSynonym = isSynonym;
3216         }
3217
3218         String originalName="";
3219         String newName="";
3220         Rank rank=Rank.UNKNOWN_RANK();
3221         String identifier="";
3222         String status="";
3223         String author=null;
3224
3225         TaxonName taxonName;
3226
3227         Reference refMods ;
3228
3229         Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3230         INonViralName familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3231         String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3232         Integer publicationYear;
3233
3234
3235                 Taxon higherTaxa;
3236         Rank higherRank;
3237         private Taxon taxon;
3238         private Synonym syno;
3239
3240         /**
3241          * @return the syno
3242          */
3243         public Synonym getSyno() {
3244             return syno;
3245         }
3246
3247         @Override
3248         public String toString(){
3249             List<String> tot=new ArrayList<String>();
3250             String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3251             for (String elt:n){
3252                 if (!StringUtils.isEmpty(elt)) {
3253                     tot.add(elt);
3254                 } else {
3255                     tot.add("*");
3256                 }
3257             }
3258             return StringUtils.join(tot," ");
3259         }
3260         /**
3261          * @param syno the syno to set
3262          */
3263         public void setSyno(Synonym syno) {
3264             this.syno = syno;
3265         }
3266
3267         boolean isSynonym=false;
3268
3269         /**
3270          * @return the isSynonym
3271          */
3272         public boolean isSynonym() {
3273             return isSynonym;
3274         }
3275
3276         /**
3277          * @param isSynonym the isSynonym to set
3278          */
3279         public void setSynonym(boolean isSynonym) {
3280             this.isSynonym = isSynonym;
3281         }
3282
3283         public void setSource(Reference re){
3284             refMods=re;
3285         }
3286
3287         /**
3288          * @param string
3289          */
3290         public void setFormStr(String string) {
3291             this.formStr=string;
3292
3293         }
3294         /**
3295          * @param string
3296          */
3297         public void setVarietyStr(String string) {
3298             this.varietyStr=string;
3299
3300         }
3301         /**
3302          * @param string
3303          */
3304         public void setSubspeciesStr(String string) {
3305             this.subspeciesStr=string;
3306
3307         }
3308         /**
3309          * @param string
3310          */
3311         public void setSpeciesStr(String string) {
3312             this.speciesStr=string;
3313
3314         }
3315         /**
3316          * @param string
3317          */
3318         public void setSubgenusStr(String string) {
3319             this.subgenusStr=string;
3320
3321         }
3322         /**
3323          * @param string
3324          */
3325         public void setGenusStr(String string) {
3326             this.genusStr=string;
3327
3328         }
3329         /**
3330          * @param string
3331          */
3332         public void setSubtribeStr(String string) {
3333             this.subtribeStr=string;
3334
3335         }
3336         /**
3337          * @param string
3338          */
3339         public void setTribeStr(String string) {
3340             this.tribeStr=string;
3341
3342         }
3343         /**
3344          * @param string
3345          */
3346         public void setSubfamilyStr(String string) {
3347             this.subfamilyStr=string;
3348
3349         }
3350         /**
3351          * @param string
3352          */
3353         public void setFamilyStr(String string) {
3354             this.familyStr=string;
3355
3356         }
3357         /**
3358          * @return the familyStr
3359          */
3360         public String getFamilyStr() {
3361             return familyStr;
3362         }
3363         /**
3364          * @return the subfamilyStr
3365          */
3366         public String getSubfamilyStr() {
3367             return subfamilyStr;
3368         }
3369         /**
3370          * @return the tribeStr
3371          */
3372         public String getTribeStr() {
3373             return tribeStr;
3374         }
3375         /**
3376          * @return the subtribeStr
3377          */
3378         public String getSubtribeStr() {
3379             return subtribeStr;
3380         }
3381         /**
3382          * @return the genusStr
3383          */
3384         public String getGenusStr() {
3385             return genusStr;
3386         }
3387         /**
3388          * @return the subgenusStr
3389          */
3390         public String getSubgenusStr() {
3391             return subgenusStr;
3392         }
3393         /**
3394          * @return the speciesStr
3395          */
3396         public String getSpeciesStr() {
3397             return speciesStr;
3398         }
3399         /**
3400          * @return the subspeciesStr
3401          */
3402         public String getSubspeciesStr() {
3403             return subspeciesStr;
3404         }
3405         /**
3406          * @return the formStr
3407          */
3408         public String getFormStr() {
3409             return formStr;
3410         }
3411         /**
3412          * @return the varietyStr
3413          */
3414         public String getVarietyStr() {
3415             return varietyStr;
3416         }
3417
3418         public Integer getPublicationYear() {
3419                         return publicationYear;
3420                 }
3421
3422                 public void setPublicationYear(Integer publicationYear) {
3423                         this.publicationYear = publicationYear;
3424                 }
3425
3426         /**
3427          * @param newName2
3428          */
3429         public void setNotParsableTaxon(String newName2) {
3430             //takes too much time
3431             //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3432
3433             NomenclaturalStatusType statusType = null;
3434             if (!getStatus().isEmpty()){
3435                 try {
3436                     statusType = nomStatusString2NomStatus(getStatus());
3437                 } catch (UnknownCdmTypeException e) {
3438                     addProblematicStatusToFile(getStatus());
3439                     logger.warn("Problem with status");
3440                 }
3441             }
3442             List<TaxonBase> tmpList = new ArrayList<>();
3443
3444             Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3445             tmpList.addAll(taxontest.getRecords());
3446
3447             //logger.info("tmpList returned: "+tmpList.size());
3448
3449
3450             INonViralName identicName = null;
3451             boolean foundIdentic=false;
3452             TaxonBase<?> tmpTaxonBase=null;
3453             //            Taxon tmpPartial=null;
3454             for (TaxonBase<?> tmpb:tmpList){
3455                 if(tmpb !=null){
3456                     TaxonName tnb =  tmpb.getName();
3457                     Rank crank=null;
3458                     if (tnb != null){
3459                         if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3460                             crank =tnb.getRank();
3461                             if (crank !=null && rank !=null){
3462                                 if (crank.equals(rank)){
3463                                         identicName = tnb;
3464                                         if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3465                                                 foundIdentic=true;
3466                                                 tmpTaxonBase=tmpb;
3467                                                 break;
3468                                         }
3469                                 }
3470                             }
3471                         }
3472                     }
3473                 }
3474             }
3475             boolean statusMatch=false;
3476             boolean appendedMatch=false;
3477             if(tmpTaxonBase !=null && foundIdentic){
3478                 statusMatch=compareStatus(tmpTaxonBase, statusType);
3479                 if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3480                     appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3481                 }
3482                 if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3483                     appendedMatch=true;
3484                 }
3485
3486             }
3487             if ((tmpTaxonBase == null || !foundIdentic) ||  (tmpTaxonBase != null && !statusMatch) ||  (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3488
3489                 INonViralName tnb;
3490                 if (identicName == null){
3491                         tnb = getNonViralNameAccNomenclature();
3492                         tnb.setRank(rank);
3493
3494                         if(statusType != null) {
3495                             tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3496                         }
3497                         if(StringUtils.isNotBlank(getStatus())) {
3498                             tnb.setAppendedPhrase(getStatus());
3499                         }
3500                         tnb.setTitleCache(newName2,true);
3501                         tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3502                     }else{
3503                         tnb = identicName;
3504                 }
3505
3506                 if(tmpTaxonBase==null){
3507                     tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3508                     if(!state2.getConfig().doKeepOriginalSecundum()) {
3509                         tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3510                     }
3511                     //tmptaxonbase.setSec(refMods);
3512                     if(!isSynonym) {
3513                         classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3514                         sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3515                     }
3516                 }
3517             }
3518
3519             tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3520             if (author != null) {
3521                 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3522                     setLSID(getIdentifier(), tmpTaxonBase);
3523                     importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3524                     tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3525                 }
3526             }
3527             TaxonName tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonName.class);
3528
3529             if(!isSynonym) {
3530                 this.taxon=(Taxon)tmpTaxonBase;
3531             } else {
3532                 if (tmpTaxonBase instanceof Taxon){
3533                         logger.warn("Incorrect status");
3534                 }
3535                 this.syno=(Synonym)tmpTaxonBase;
3536             }
3537
3538             taxonName = tnb;
3539
3540         }
3541
3542         /**
3543          *
3544          */
3545         public void buildTaxon() {
3546             //System.out.println("BUILD TAXON");
3547             logger.info("buildTaxon");
3548             NomenclaturalStatusType statusType = null;
3549             if (!getStatus().isEmpty()){
3550                 status = getStatus();
3551                 String newNameStatus = newNameStatus(status);
3552                 if (newNameStatus != null){
3553                         taxonName.setAppendedPhrase(newNameStatus);
3554                 }else{
3555                         try {
3556                                 statusType = nomStatusString2NomStatus(getStatus());
3557                                 taxonName.addStatus(NomenclaturalStatus.NewInstance(statusType));
3558                         } catch (UnknownCdmTypeException e) {
3559                                 addProblematicStatusToFile(getStatus());
3560                                 logger.warn("Problem with status");
3561                         }
3562                 }
3563             }
3564             importer.getNameService().save(taxonName);
3565
3566             TaxonBase<?> tmpTaxonBase;
3567             if (!isSynonym) {
3568                 tmpTaxonBase =Taxon.NewInstance(taxonName, refMods); //sec set null
3569             }
3570             else {
3571                 tmpTaxonBase =Synonym.NewInstance(taxonName, refMods); //sec set null
3572             }
3573             boolean exist = false;
3574             if (!isSynonym){
3575                     for (TaxonNode node : classification.getAllNodes()){
3576                         try{
3577                                 Taxon nodeTaxon = node.getTaxon();
3578                                 boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3579                                 boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3580                                 boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3581                             if(titleMatches && nomStatusMatches) {
3582                                 if (!isSynonym) {
3583                                         tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3584                                     exist =true;
3585                                 } else {
3586                                     logger.info("Found the same name but from another type (taxon/synonym)");
3587                                     TaxonName existingTnb = getTaxon().getName();
3588                                 tmpTaxonBase = Synonym.NewInstance(existingTnb, refMods);
3589                                 importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3590                                 exist =true;
3591                             }
3592                             }else if (nodeNameReplaceable){
3593                                 nodeTaxon.setName(tmpTaxonBase.getName());
3594                                 tmpTaxonBase = nodeTaxon;
3595                                 exist = true;
3596                             }
3597                         }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3598                     }
3599             }
3600             if (!exist){
3601
3602                 boolean insertAsExisting =false;
3603                 List<Taxon> existingTaxons=new ArrayList<Taxon>();
3604                 try {
3605                     existingTaxons = getMatchingTaxa(taxonName);
3606                 } catch (Exception e1) {
3607                     e1.printStackTrace();
3608                 }
3609                 double similarityScore=0.0;
3610                 double similarityAuthor=-1;
3611                 String author1="";
3612                 String author2="";
3613                 String t1="";
3614                 String t2="";
3615                 for (Taxon bestMatchingTaxon : existingTaxons){
3616                     //System.out.println("tnbase "+taxonname.getTitleCache());
3617                     //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3618                     if(taxonName.getAuthorshipCache()!=null) {
3619                         author1=taxonName.getAuthorshipCache();
3620                     }
3621                     try {
3622                         if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
3623                             author2=bestMatchingTaxon.getName().getAuthorshipCache();
3624                         }
3625                     } catch (Exception e) {
3626                         // TODO Auto-generated catch block
3627                         e.printStackTrace();
3628                     }
3629                     try {
3630                         t1=taxonName.getTitleCache();
3631                         if (author1!=null && !StringUtils.isEmpty(author1)) {
3632                             t1=t1.split(Pattern.quote(author1))[0];
3633                         }
3634                     } catch (Exception e) {
3635                         // TODO Auto-generated catch block
3636                         e.printStackTrace();
3637                     }
3638                     try {
3639                         t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3640                         if (author2!=null && !StringUtils.isEmpty(author2)) {
3641                             t2=t2.split(Pattern.quote(author2))[0];
3642                         }
3643                     } catch (Exception e) {
3644                         // TODO Auto-generated catch block
3645                         e.printStackTrace();
3646                     }
3647
3648                     similarityScore=similarity(t1.trim(), t2.trim());
3649                     //System.out.println("taxonscore "+similarityScore);
3650                     similarityAuthor=similarity(author1.trim(), author2.trim());
3651                     //System.out.println("authorscore "+similarityAuthor);
3652                     insertAsExisting = compareAndCheckTaxon(taxonName, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3653                     if(insertAsExisting) {
3654                         tmpTaxonBase=bestMatchingTaxon;
3655                         break;
3656                     }
3657                 }
3658                 if ( !insertAsExisting ){
3659                     if(!state2.getConfig().doKeepOriginalSecundum()) {
3660                         tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3661                     }
3662
3663                     //                    tmptaxonbase.setSec(refMods);
3664                     if (taxonName.getRank().equals(state2.getConfig().getMaxRank())) {
3665                         //System.out.println("****************************"+tmptaxonbase);
3666                         if (!isSynonym) {
3667                             classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3668                         }
3669                     } else{
3670                         hierarchy = new HashMap<Rank, Taxon>();
3671                         //System.out.println("LOOK FOR PARENT "+taxonname.toString()+", "+tmptaxonbase.toString());
3672                         if (!isSynonym){
3673                             lookForParentNode(taxonName,(Taxon)tmpTaxonBase, refMods,this);
3674                             //System.out.println("HIERARCHY "+hierarchy);
3675                             Taxon parent = buildHierarchy();
3676                             if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3677                                 if(parent !=null) {
3678                                     classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3679                                 } else {
3680                                     classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3681                                 }
3682                                 importer.getClassificationService().saveOrUpdate(classification);
3683                             }
3684                         }
3685                         //                        Set<TaxonNode> nodeList = classification.getAllNodes();
3686                         //                        for(TaxonNode tn:nodeList) {
3687                         //                            System.out.println(tn.getTaxon());
3688                         //                        }
3689                     }
3690                 }
3691                 importer.getClassificationService().saveOrUpdate(classification);
3692                  if(isSynonym) {
3693                     try{
3694                         Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3695                     }catch(Exception e){
3696                         TaxonName existingTnb = tmpTaxonBase.getName();
3697                         Synonym castTest = Synonym.NewInstance(existingTnb, refMods);
3698                         importer.getTaxonService().saveOrUpdate(castTest);
3699                         tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3700                     }
3701                 }
3702             }
3703             if(!isSynonym) {
3704                 taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3705             } else {
3706                 syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3707             }
3708
3709         }
3710
3711                 private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3712                         //TODO preliminary check
3713                         if (newTaxon.isInstanceOf(Synonym.class)){
3714                                 return false;
3715                         }
3716                         INonViralName nodeName = nodeTaxon.getName();
3717                         INonViralName newName = newTaxon.getName();
3718                         if (nodeTaxon.getName() == null ||  newName == null){
3719                                 return false;
3720                         }
3721                         if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3722                                 return false;
3723                         }
3724                         boolean compare = true;
3725                         for (NomenclaturalStatus status : newName.getStatus() ){
3726                                 compare &= compareStatus(nodeTaxon, status.getType());
3727                         }
3728                         if (! compare){
3729                                 return false;
3730                         }
3731
3732                         if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3733                                 if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3734                                         if (newName.getNameCache().length() < newName.getTitleCache().length()){
3735                                                 logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3736                                                 return true;
3737                                         }
3738                                 }
3739                         }
3740
3741                         return false;
3742                 }
3743
3744                 /**
3745          *
3746          */
3747         private Taxon buildHierarchy() {
3748             logger.info("buildHierarchy");
3749             Taxon higherTaxon = null;
3750             //add the maxRank as a root
3751             if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3752                 Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3753                 if(!taxonExistsInClassification(higherTaxon, ct)) {
3754                    classification.addChildTaxon(ct, refMods, null);
3755                 }
3756                 higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3757                 //                return higherTaxon;
3758             }
3759             //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3760
3761             //TODO higher Ranks
3762
3763             if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3764                 higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3765             }
3766             if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3767                 higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3768             }
3769             if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3770                 higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3771             }
3772             if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3773                 higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3774             }
3775             if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3776                 higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3777             }
3778             if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3779                 higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3780             }
3781             importer.getClassificationService().saveOrUpdate(classification);
3782             return higherTaxon;
3783         }
3784
3785         private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3786             Taxon ct=hierarchy.get(r);
3787             if(!taxonExistsInClassification(higherTaxon,ct )) {
3788                 if(higherTaxon != null && ct!=null) {
3789                     classification.addParentChild(higherTaxon, ct, refMods, null);
3790                 } else
3791                     if(higherTaxon == null && ct !=null) {
3792                         classification.addChildTaxon(ct, refMods, null);
3793                 }
3794             }
3795             return ct;
3796         }
3797
3798         private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3799             logger.info("taxonExistsInClassification");
3800             //            System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3801             boolean found=false;
3802             if(parent !=null){
3803                 for (TaxonNode p : classification.getAllNodes()){
3804                     if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3805                         for (TaxonNode c : p.getChildNodes()) {
3806                             if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3807                                 found=true;
3808                                 break;
3809                             }
3810                         }
3811                     }
3812                 }
3813             }
3814             else{
3815                 for (TaxonNode p : classification.getAllNodes()){
3816                     if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3817                         found=true;
3818                         break;
3819                     }
3820                 }
3821             }
3822             //            System.out.println("LOOK IF TAXA EXIST? "+found);
3823             return found;
3824         }
3825         /**
3826          * @param nameToBeFilledTest
3827          */
3828         public void setParsedName(TaxonName nameToBeFilledTest) {
3829             this.taxonName = TaxonName.castAndDeproxy(nameToBeFilledTest);
3830
3831         }
3832         //variety dwcranks:varietyEpithet
3833         /**
3834          * @return the author
3835          */
3836         public String getAuthor() {
3837             return author;
3838         }
3839         /**
3840          * @return
3841          */
3842         public Taxon getTaxon() {
3843             return taxon;
3844         }
3845         /**
3846          * @return
3847          */
3848         public TaxonName getTaxonName() {
3849             return taxonName;
3850         }
3851
3852         /**
3853          * @param findOrCreateTaxon
3854          */
3855         public void setForm(Taxon form) {
3856             this.form=form;
3857
3858         }
3859         /**
3860          * @param findOrCreateTaxon
3861          */
3862         public void setVariety(Taxon variety) {
3863             this.variety=variety;
3864
3865         }
3866         /**
3867          * @param string
3868          * @return
3869          */
3870         @SuppressWarnings("rawtypes")
3871         public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3872             logger.info("findOrCreateTaxon");
3873             sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3874             //takes too much time
3875             //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3876             //            logger.info("tmpList returned: "+tmpList.size());
3877
3878             NomenclaturalStatusType statusType = null;
3879             if (!getStatus().isEmpty()){
3880                 try {
3881                     statusType = nomStatusString2NomStatus(getStatus());
3882                 } catch (UnknownCdmTypeException e) {
3883                     addProblematicStatusToFile(getStatus());
3884                     logger.warn("Problem with status");
3885                 }
3886             }
3887
3888             List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3889
3890             Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3891
3892             tmpListFiltered.addAll(taxontest.getRecords());
3893             taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3894             tmpListFiltered.addAll(taxontest.getRecords());
3895
3896             //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3897
3898             boolean nameCorrected=false;
3899             if (fullname.indexOf(partialname)<0) {
3900                 nameCorrected=true;
3901             }
3902
3903             boolean foundIdentic=false;
3904             Taxon tmp=null;
3905             for (TaxonBase tmpb:tmpListFiltered){
3906                 if(tmpb !=null){
3907                     TaxonName tnb =  tmpb.getName();
3908                     Rank crank=null;
3909                     if (tnb != null){
3910                          if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3911                             if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3912                                 crank =tnb.getRank();
3913                                 if (crank !=null && rank !=null){
3914                                     if (crank.equals(rank)){
3915                                         foundIdentic=true;
3916                                         try{
3917                                             tmp=(Taxon)tmpb;
3918                                             break;
3919                                         }catch(Exception e){
3920                                             e.printStackTrace();
3921                                         }
3922                                     }
3923                                 }
3924                             }
3925                             if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3926                                 if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3927                                     crank =tnb.getRank();
3928                                     if (crank !=null && rank !=null){
3929                                         if (crank.equals(rank)){
3930                                             foundIdentic=true;
3931                                             try{
3932                                                 tmp=(Taxon)tmpb;
3933                                                 break;
3934                                             }catch(Exception e){
3935                                                 e.printStackTrace();
3936                                             }
3937                                         }
3938                                     }
3939                                 }
3940                             }
3941                         }
3942                         else{
3943                             if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3944                                 crank =tnb.getRank();
3945                                 if (crank !=null && rank !=null){
3946                                     if (crank.equals(rank)){
3947                                         foundIdentic=true;
3948                                         try{
3949                                             tmp=(Taxon)tmpb;
3950                                             break;
3951                                         }catch(Exception e){
3952                                             e.printStackTrace();
3953                                         }
3954                                     }
3955                                 }
3956                             }
3957                         }
3958                     }
3959                 }
3960             }
3961             boolean statusMatch=false;
3962             boolean appendedMatch=false;
3963             if(tmp !=null && foundIdentic){
3964                 statusMatch=compareStatus(tmp, statusType);
3965                 if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3966                     appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3967                 }
3968                 if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3969                     appendedMatch=true;
3970                 }
3971
3972             }
3973             if ((tmp == null || !foundIdentic) ||  (tmp != null && !statusMatch) ||  (tmp != null && !appendedMatch && !statusMatch)){
3974
3975                 INonViralName tnb = getNonViralNameAccNomenclature();
3976                 tnb.setRank(rank);
3977
3978                 if(statusType != null) {
3979                     tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3980                 }
3981                 if(StringUtils.isNotBlank(getStatus())) {
3982                     tnb.setAppendedPhrase(getStatus());
3983                 }
3984
3985                 if(rank.equals(Rank.UNKNOWN_RANK())){
3986                     tnb.setTitleCache(fullname, true);
3987                     //                    tnb.setGenusOrUninomial(fullname);
3988                 }
3989                 if(rank.isHigher(Rank.GENUS())) {
3990                     tnb.setGenusOrUninomial(partialname);
3991                 }
3992
3993                 if(rank.isHigher(Rank.SPECIES())) {
3994                     tnb.setTitleCache(partialname, true);
3995                 }
3996
3997                 if (rank.equals(globalrank) && author != null) {
3998
3999                     tnb.setCombinationAuthorship(findOrCreateAuthor(author));
4000                     if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4001                         Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4002                         if (taxonLSID !=null) {
4003                             tmp=taxonLSID;
4004                         }
4005                     }
4006                 }
4007
4008                 if(tmp == null){
4009                     if (rank.equals(Rank.FAMILY())) {
4010                         tmp = buildFamily(tnb);
4011                     }
4012                     if (rank.equals(Rank.SUBFAMILY())) {
4013                         tmp = buildSubfamily(tnb);
4014                     }
4015                     if (rank.equals(Rank.TRIBE())) {
4016                         tmp = buildTribe(tnb);
4017                     }
4018                     if (rank.equals(Rank.SUBTRIBE())) {
4019                         tmp = buildSubtribe(tnb);
4020                     }
4021                     if (rank.equals(Rank.GENUS())) {
4022                         tmp = buildGenus(partialname, tnb);
4023                     }
4024
4025                     if (rank.equals(Rank.SUBGENUS())) {
4026                         tmp = buildSubgenus(partialname, tnb);
4027                     }
4028                     if (rank.equals(Rank.SPECIES())) {
4029                         tmp = buildSpecies(partialname, tnb);
4030                     }
4031
4032                     if (rank.equals(Rank.SUBSPECIES())) {
4033                         tmp = buildSubspecies(partialname, tnb);
4034                     }
4035
4036                     if (rank.equals(Rank.VARIETY())) {
4037                         tmp = buildVariety(fullname, partialname, tnb);
4038                     }
4039
4040                     if (rank.equals(Rank.FORM())) {
4041                         tmp = buildForm(fullname, partialname, tnb);
4042                     }
4043                     if (tmp != null){
4044                         TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4045                     }
4046
4047                     importer.getClassificationService().saveOrUpdate(classification);
4048                 }
4049
4050             }
4051
4052             tmp = CdmBase.deproxy(tmp, Taxon.class);
4053             if (rank.equals(globalrank) && author != null) {
4054                 if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4055                     setLSID(getIdentifier(), tmp);
4056                     importer.getTaxonService().saveOrUpdate(tmp);
4057                     tmp = CdmBase.deproxy(tmp, Taxon.class);
4058                 }
4059             }
4060
4061             this.taxon=tmp;
4062
4063             return tmp;
4064         }
4065
4066         /**
4067          * @param tnb
4068          * @return
4069          */
4070         private Taxon buildSubfamily(INonViralName tnb) {
4071             Taxon tmp;
4072             //            tnb.generateTitle();
4073             tmp = findMatchingTaxon(tnb,refMods);
4074             if(tmp ==null){
4075                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4076                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4077                     tmp.setSec(state2.getConfig().getSecundum());
4078                 }
4079                 //                tmp.setSec(refMods);
4080                 //                sourceHandler.addSource(refMods, tmp);
4081                 if(family != null) {
4082                     classification.addParentChild(family, tmp, null, null);
4083                     higherRank=Rank.FAMILY();
4084                     higherTaxa=family;
4085                 } else {
4086                     //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4087                     classification.addChildTaxon(tmp, null, null);
4088                 }
4089             }
4090             return tmp;
4091         }
4092         /**
4093          * @param tnb
4094          * @return
4095          */
4096         private Taxon buildFamily(INonViralName tnb) {
4097             Taxon tmp;
4098             //            tnb.generateTitle();
4099             tmp = findMatchingTaxon(tnb,refMods);
4100             if(tmp ==null){
4101                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4102                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4103                     tmp.setSec(state2.getConfig().getSecundum());
4104                 }
4105                 //                tmp.setSec(refMods);
4106                 //sourceHandler.addSource(refMods, tmp);
4107                 //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4108                 classification.addChildTaxon(tmp, null, null);
4109             }
4110             return tmp;
4111         }
4112         /**
4113          * @param fullname
4114          * @param tnb
4115          * @return
4116          */
4117         private Taxon buildForm(String fullname, String partialname, INonViralName tnb) {
4118             if (genusName !=null) {
4119                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4120             }
4121             if (subgenusName !=null) {
4122                 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4123             }
4124             if(speciesName !=null) {
4125                 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4126             }
4127             if(subspeciesName != null) {
4128                 tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4129             }
4130             if(partialname!= null) {
4131                 tnb.setInfraSpecificEpithet(partialname);
4132             }
4133              //TODO how to save form??
4134             tnb.setTitleCache(fullname, true);
4135             Taxon tmp = findMatchingTaxon(tnb,refMods);
4136             if(tmp ==null){
4137                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4138                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4139                     tmp.setSec(state2.getConfig().getSecundum());
4140                 }
4141                 //                tmp.setSec(refMods);
4142                 //sourceHandler.addSource(refMods, tmp);
4143                 if (subspecies !=null) {
4144                     classification.addParentChild(subspecies, tmp, null, null);
4145                     higherRank=Rank.SUBSPECIES();
4146                     higherTaxa=subspecies;
4147                 } else {
4148                     if (species !=null) {
4149                         classification.addParentChild(species, tmp, null, null);
4150                         higherRank=Rank.SPECIES();
4151                         higherTaxa=species;
4152                     }
4153                     else{
4154                         //                        System.out.println("ADDCHILDTAXON FORM "+tmp);
4155                         classification.addChildTaxon(tmp, null, null);
4156                     }
4157                 }
4158             }
4159             return tmp;
4160         }
4161         /**
4162          * @param fullname
4163          * @param tnb
4164          * @return
4165          */
4166         private Taxon buildVariety(String fullname, String partialname, INonViralName tnb) {
4167             Taxon tmp;
4168             if (genusName !=null) {
4169                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4170             }
4171             if (subgenusName !=null) {
4172                 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4173             }
4174             if(speciesName !=null) {
4175                 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4176             }
4177             if(subspeciesName != null) {
4178                 tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4179             }
4180             if(partialname != null) {
4181                 tnb.setInfraSpecificEpithet(partialname);
4182             }
4183             //TODO how to save variety?
4184             tnb.setTitleCache(fullname, true);
4185             tmp = findMatchingTaxon(tnb,refMods);
4186             if(tmp ==null){
4187                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4188                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4189                     tmp.setSec(state2.getConfig().getSecundum());
4190                 }
4191                 //                tmp.setSec(refMods);
4192                 //sourceHandler.addSource(refMods, tmp);
4193                 if (subspecies !=null) {
4194                     classification.addParentChild(subspecies, tmp, null, null);
4195                     higherRank=Rank.SUBSPECIES();
4196                     higherTaxa=subspecies;
4197                 } else {
4198                     if(species !=null) {
4199                         classification.addParentChild(species, tmp, null, null);
4200                         higherRank=Rank.SPECIES();
4201                         higherTaxa=species;
4202                     }
4203                     else{
4204                         //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4205                         classification.addChildTaxon(tmp, null, null);
4206                     }
4207                 }
4208             }
4209             return tmp;
4210         }
4211         /**
4212          * @param partialname
4213          * @param tnb
4214          * @return
4215          */
4216         private Taxon buildSubspecies(String partialname, INonViralName tnb) {
4217             if (genusName !=null) {
4218                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4219             }
4220             if (subgenusName !=null) {
4221                 //                            System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4222                 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4223             }
4224             if(speciesName !=null) {
4225                 //                            System.out.println("SPE:"+speciesName.getSpecificEpithet());
4226                 tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4227             }
4228             tnb.setInfraSpecificEpithet(partialname);
4229             Taxon tmp = findMatchingTaxon(tnb,refMods);
4230             if(tmp ==null){
4231                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4232                 if(!state2.getConfig().doKeepOriginalSecundum())
4233                  {
4234                     tmp.setSec(state2.getConfig().getSecundum());
4235                 //                tmp.setSec(refMods);
4236                 //sourceHandler.addSource(refMods, tmp);
4237                 }
4238
4239                 if(species != null) {
4240                     classification.addParentChild(species, tmp, null, null);
4241                     higherRank=Rank.SPECIES();
4242                     higherTaxa=species;
4243                 }
4244                 else{
4245                     //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4246                     classification.addChildTaxon(tmp, null, null);
4247                 }
4248             }
4249             return tmp;
4250         }
4251         /**
4252          * @param partialname
4253          * @param tnb
4254          * @return
4255          */
4256         private Taxon buildSpecies(String partialname, INonViralName tnb) {
4257             if (genusName !=null) {
4258                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4259             }
4260             if (subgenusName !=null) {
4261                 tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4262             }
4263             tnb.setSpecificEpithet(partialname.toLowerCase());
4264             Taxon tmp = findMatchingTaxon(tnb,refMods);
4265             if(tmp ==null){
4266                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4267                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4268                     tmp.setSec(state2.getConfig().getSecundum());
4269                 }
4270                 //                tmp.setSec(refMods);
4271                 //sourceHandler.addSource(refMods, tmp);
4272                 if (subgenus !=null) {
4273                     classification.addParentChild(subgenus, tmp, null, null);
4274                     higherRank=Rank.SUBGENUS();
4275                     higherTaxa=subgenus;
4276                 } else {
4277                     if (genus !=null) {
4278                         classification.addParentChild(genus, tmp, null, null);
4279                         higherRank=Rank.GENUS();
4280                         higherTaxa=genus;
4281                     }
4282                     else{
4283                         //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4284                         classification.addChildTaxon(tmp, null, null);
4285                     }
4286                 }
4287             }
4288             return tmp;
4289         }
4290         /**
4291          * @param partialname
4292          * @param tnb
4293          * @return
4294          */
4295         private Taxon buildSubgenus(String partialname, INonViralName tnb) {
4296             tnb.setInfraGenericEpithet(partialname);
4297             if (genusName !=null) {
4298                 tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4299             }
4300             Taxon tmp = findMatchingTaxon(tnb,refMods);
4301             if(tmp ==null){
4302                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4303                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4304                     tmp.setSec(state2.getConfig().getSecundum());
4305                 }
4306                 //                tmp.setSec(refMods);
4307                 //sourceHandler.addSource(refMods, tmp);
4308                 if(genus != null) {
4309                     classification.addParentChild(genus, tmp, null, null);
4310                     higherRank=Rank.GENUS();
4311                     higherTaxa=genus;
4312                 } else{
4313                     //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4314                     classification.addChildTaxon(tmp, null, null);
4315                 }
4316             }
4317             return tmp;
4318         }
4319         /**
4320          * @param partialname
4321          * @param tnb
4322          * @return
4323          */
4324         private Taxon buildGenus(String partialname, INonViralName tnb) {
4325             Taxon tmp;
4326             tnb.setGenusOrUninomial(partialname);
4327
4328
4329             tmp = findMatchingTaxon(tnb,refMods);
4330             if(tmp ==null){
4331                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4332                 if(!state2.getConfig().doKeepOriginalSecundum())
4333                  {
4334                     tmp.setSec(state2.getConfig().getSecundum());
4335                 //                tmp.setSec(refMods);
4336                 //sourceHandler.addSource(refMods, tmp);
4337                 }
4338
4339                 if(subtribe != null) {
4340                     classification.addParentChild(subtribe, tmp, null, null);
4341                     higherRank=Rank.SUBTRIBE();
4342                     higherTaxa=subtribe;
4343                 } else{
4344                     if(tribe !=null) {
4345                         classification.addParentChild(tribe, tmp, null, null);
4346                         higherRank=Rank.TRIBE();
4347                         higherTaxa=tribe;
4348                     } else{
4349                         if(subfamily !=null) {
4350                             classification.addParentChild(subfamily, tmp, null, null);
4351                             higherRank=Rank.SUBFAMILY();
4352                             higherTaxa=subfamily;
4353                         } else
4354                             if(family !=null) {
4355                                 classification.addParentChild(family, tmp, null, null);
4356                                 higherRank=Rank.FAMILY();
4357                                 higherTaxa=family;
4358                             }
4359                             else{
4360                                 //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4361                                 classification.addChildTaxon(tmp, null, null);
4362                             }
4363                     }
4364                 }
4365             }
4366             return tmp;
4367         }
4368
4369         /**
4370          * @param tnb
4371          * @return
4372          */
4373         private Taxon buildSubtribe(INonViralName tnb) {
4374             Taxon tmp = findMatchingTaxon(tnb,refMods);
4375             if(tmp==null){
4376                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4377                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4378                     tmp.setSec(state2.getConfig().getSecundum());
4379                 }
4380                 //                tmp.setSec(refMods);
4381                 //sourceHandler.addSource(refMods, tmp);
4382                 if(tribe != null) {
4383                     classification.addParentChild(tribe, tmp, null, null);
4384                     higherRank=Rank.TRIBE();
4385                     higherTaxa=tribe;
4386                 } else{
4387                     //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4388                     classification.addChildTaxon(tmp, null, null);
4389                 }
4390             }
4391             return tmp;
4392         }
4393         /**
4394          * @param tnb
4395          * @return
4396          */
4397         private Taxon buildTribe(INonViralName tnb) {
4398             Taxon tmp = findMatchingTaxon(tnb,refMods);
4399             if(tmp==null){
4400                 tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4401                 if(!state2.getConfig().doKeepOriginalSecundum()) {
4402                     tmp.setSec(state2.getConfig().getSecundum());
4403                 }
4404                 //                tmp.setSec(refMods);
4405                 //sourceHandler.addSource(refMods, tmp);
4406                 if (subfamily !=null) {
4407                     classification.addParentChild(subfamily, tmp, null, null);
4408                     higherRank=Rank.SUBFAMILY();
4409                     higherTaxa=subfamily;
4410                 } else {
4411                     if(family != null) {
4412                         classification.addParentChild(family, tmp, null, null);
4413                         higherRank=Rank.FAMILY();
4414                         higherTaxa=family;
4415                     }
4416                     else{
4417                         //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4418                         classification.addChildTaxon(tmp, null, null);
4419                     }
4420                 }
4421             }
4422             return tmp;
4423         }
4424
4425         /**
4426          * @param identifier2
4427          * @return
4428          */
4429         @SuppressWarnings("rawtypes")
4430         private Taxon getTaxonByLSID(String identifier) {
4431             //logger.info("getTaxonByLSID");
4432             //            boolean lsidok=false;
4433             String id = identifier.split("__")[0];
4434             //            String source = identifier.split("__")[1];
4435             LSID lsid = null;
4436             if (id.indexOf("lsid")>-1){
4437                 try {
4438                     lsid = new LSID(id);
4439                     //                    lsidok=true;
4440                 } catch (MalformedLSIDException e) {
4441                     logger.warn("Malformed LSID");
4442                 }
4443             }
4444             if (lsid !=null){
4445                 List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4446                 LSID currentlsid=null;
4447                 for (Taxon t:taxa){
4448                     currentlsid = t.getLsid();
4449                     if (currentlsid !=null){
4450                         if (currentlsid.getLsid().equals(lsid.getLsid())){
4451                             try{
4452                                 return t;
4453                             }
4454                             catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4455                         }
4456                     }
4457                 }
4458             }
4459             return null;
4460         }
4461         /**
4462          * @param author2
4463          * @return
4464          */
4465         @SuppressWarnings("rawtypes")
4466         private Person findOrCreateAuthor(String author2) {
4467             //logger.info("findOrCreateAuthor");
4468             List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4469             for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4470                 if(hibernateP.getTitleCache().equals(author2)) {
4471                     AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4472                     return CdmBase.deproxy(existing, Person.class);
4473                 }
4474             }
4475             Person p = Person.NewInstance();
4476             p.setTitleCache(author2,true);
4477             importer.getAgentService().saveOrUpdate(p);
4478             return CdmBase.deproxy(p, Person.class);
4479         }
4480         /**
4481          * @param author the author to set
4482          */
4483         public void setAuthor(String author) {
4484             this.author = author;
4485         }
4486
4487         /**
4488          * @return the higherTaxa
4489          */
4490         public Taxon getHigherTaxa() {
4491             return higherTaxa;
4492         }
4493         /**
4494          * @param higherTaxa the higherTaxa to set
4495          */
4496         public void setHigherTaxa(Taxon higherTaxa) {
4497             this.higherTaxa = higherTaxa;
4498         }
4499         /**
4500          * @return the higherRank
4501          */
4502         public Rank getHigherRank() {
4503             return higherRank;
4504         }
4505         /**
4506          * @param higherRank the higherRank to set
4507          */
4508         public void setHigherRank(Rank higherRank) {
4509             this.higherRank = higherRank;
4510         }
4511         public String getName(){
4512             if (newName.isEmpty()) {
4513                 return originalName;
4514             } else {
4515                 return newName;
4516             }
4517
4518         }
4519         /**
4520          * @return the fullName
4521          */
4522         public String getOriginalName() {
4523             return originalName;
4524         }
4525         /**
4526          * @param fullName the fullName to set
4527          */
4528         public void setOriginalName(String fullName) {
4529             this.originalName = fullName;
4530         }
4531         /**
4532          * @return the newName
4533          */
4534         public String getNewName() {
4535             return newName;
4536         }
4537         /**
4538          * @param newName the newName to set
4539          */
4540         public void setNewName(String newName) {
4541             this.newName = newName;
4542         }
4543         /**
4544          * @return the rank
4545          */
4546         public Rank getRank() {
4547             return rank;
4548         }
4549         /**
4550          * @param rank the rank to set
4551          */
4552         public void setRank(Rank rank) {
4553             this.rank = rank;
4554         }
4555         /**
4556          * @return the idenfitiger
4557          */
4558         public String getIdentifier() {
4559             return identifier;
4560         }
4561         /**
4562          * @param idenfitiger the idenfitiger to set
4563          */
4564         public void setIdentifier(String identifier) {
4565             this.identifier = identifier;
4566         }
4567         /**
4568          * @return the status
4569          */
4570         public String getStatus() {
4571             if (status == null) {
4572                 return "";
4573             }
4574             return status;
4575         }
4576         /**
4577          * @param status the status to set
4578          */
4579         public void setStatus(String status) {
4580             this.status = status;
4581         }
4582         /**
4583          * @return the family
4584          */
4585         public Taxon getFamily() {
4586             return family;
4587         }
4588         /**
4589          * @param family the family to set
4590          */
4591         @SuppressWarnings("rawtypes")
4592         public void setFamily(Taxon family) {
4593             this.family = family;
4594             familyName = CdmBase.deproxy(family.getName());
4595         }
4596         /**
4597          * @return the subfamily
4598          */
4599         public Taxon getSubfamily() {
4600             return subfamily;
4601         }
4602         /**
4603          * @param subfamily the subfamily to set
4604          */
4605         @SuppressWarnings("rawtypes")
4606         public void setSubfamily(Taxon subfamily) {
4607             this.subfamily = subfamily;
4608             subfamilyName = CdmBase.deproxy(subfamily.getName());
4609         }
4610         /**
4611          * @return the tribe
4612          */
4613         public Taxon getTribe() {
4614             return tribe;
4615         }
4616         /**
4617          * @param tribe the tribe to set
4618          */
4619         @SuppressWarnings("rawtypes")
4620         public void setTribe(Taxon tribe) {
4621             this.tribe = tribe;
4622             tribeName = CdmBase.deproxy(tribe.getName());
4623         }
4624         /**
4625          * @return the subtribe
4626          */
4627         public Taxon getSubtribe() {
4628             return subtribe;
4629         }
4630         /**
4631          * @param subtribe the subtribe to set
4632          */
4633         @SuppressWarnings("rawtypes")
4634         public void setSubtribe(Taxon subtribe) {
4635             this.subtribe = subtribe;
4636             subtribeName =CdmBase.deproxy(subtribe.getName());
4637         }
4638         /**
4639          * @return the genus
4640          */
4641         public Taxon getGenus() {
4642             return genus;
4643         }
4644         /**
4645          * @param genus the genus to set
4646          */
4647         @SuppressWarnings("rawtypes")
4648         public void setGenus(Taxon genus) {
4649             if (genus != null){
4650                         this.genus = genus;
4651                     genusName = CdmBase.deproxy(genus.getName());
4652             }
4653         }
4654         /**
4655          * @return the subgenus
4656          */
4657         public Taxon getSubgenus() {
4658             return subgenus;
4659         }
4660         /**
4661          * @param subgenus the subgenus to set
4662          */
4663         @SuppressWarnings("rawtypes")
4664         public void setSubgenus(Taxon subgenus) {
4665             this.subgenus = subgenus;
4666             subgenusName = CdmBase.deproxy(subgenus.getName());
4667         }
4668         /**
4669          * @return the species
4670          */
4671         public Taxon getSpecies() {
4672             return species;
4673         }
4674         /**
4675          * @param species the species to set
4676          */
4677         public void setSpecies(Taxon species) {
4678                 if (species != null){
4679                     this.species = species;
4680                     speciesName = CdmBase.deproxy(species.getName());
4681                 }
4682         }
4683         /**
4684          * @return the subspecies
4685          */
4686         public Taxon getSubspecies() {
4687             return subspecies;
4688         }
4689         /**
4690          * @param subspecies the subspecies to set
4691          */
4692         @SuppressWarnings("rawtypes")
4693         public void setSubspecies(Taxon subspecies) {
4694             this.subspecies = subspecies;
4695             subspeciesName = CdmBase.deproxy(subspecies.getName());
4696
4697         }
4698
4699
4700
4701     }
4702
4703
4704     /**
4705      * @param status
4706      */
4707     private void addProblematicStatusToFile(String status) {
4708         try{
4709             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4710             BufferedWriter out = new BufferedWriter(fstream);
4711             out.write(status+"\n");
4712             //Close the output stream
4713             out.close();
4714         }catch (Exception e){//Catch exception if any
4715             System.err.println("Error: " + e.getMessage());
4716         }
4717
4718     }
4719
4720
4721
4722     /**
4723      * @param tnb
4724      * @return
4725      */
4726     private Taxon findMatchingTaxon(INonViralName tnb, Reference refMods) {
4727         logger.info("findMatchingTaxon");
4728         Taxon tmp=null;
4729
4730         refMods=CdmBase.deproxy(refMods, Reference.class);
4731         boolean insertAsExisting =false;
4732         List<Taxon> existingTaxa = new ArrayList<Taxon>();
4733         try {
4734             existingTaxa = getMatchingTaxa(TaxonName.castAndDeproxy(tnb));
4735         } catch (Exception e1) {
4736             // TODO Auto-generated catch block
4737             e1.printStackTrace();
4738         }
4739         double similarityScore=0.0;
4740         double similarityAuthor=-1;
4741         String author1="";
4742         String author2="";
4743         String t1="";
4744         String t2="";
4745         for (Taxon bestMatchingTaxon : existingTaxa){
4746             if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4747                 //                System.out.println("tnb "+tnb.getTitleCache());
4748                 //                System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4749                 try {
4750                     if(tnb.getAuthorshipCache()!=null) {
4751                         author1=tnb.getAuthorshipCache();
4752                     }
4753                 } catch (Exception e) {
4754                     // TODO Auto-generated catch block
4755                     e.printStackTrace();
4756                 }
4757                 try {
4758                     if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
4759                         author2=bestMatchingTaxon.getName().getAuthorshipCache();
4760                     }
4761                 } catch (Exception e) {
4762                     // TODO Auto-generated catch block
4763                     e.printStackTrace();
4764                 }
4765                 try {
4766                     t1=tnb.getTitleCache().split("sec.")[0].trim();
4767                     if (author1!=null && !StringUtils.isEmpty(author1)) {
4768                         t1=t1.split(Pattern.quote(author1))[0];
4769                     }
4770                 } catch (Exception e) {
4771                     // TODO Auto-generated catch block
4772                     e.printStackTrace();
4773                 }
4774                 try {
4775                     t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4776                     if (author2!=null && !StringUtils.isEmpty(author2)) {
4777                         t2=t2.split(Pattern.quote(author2))[0];
4778                     }
4779                 } catch (Exception e) {
4780                     // TODO Auto-generated catch block
4781                     e.printStackTrace();
4782                 }
4783                 similarityScore=similarity(t1.trim(), t2.trim());
4784                 //                System.out.println("taxascore: "+similarityScore);
4785                 similarityAuthor=similarity(author1.trim(), author2.trim());
4786                 //                System.out.println("authorscore: "+similarityAuthor);
4787                 insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4788             }
4789             if(insertAsExisting) {
4790                 //System.out.println("KEEP "+bestMatchingTaxon.toString());
4791                 tmp=bestMatchingTaxon;
4792                 sourceHandler.addSource(refMods, tmp);
4793                 return tmp;
4794             }
4795         }
4796         return tmp;
4797     }
4798
4799
4800     /**
4801      * @param tnb
4802      * @param refMods
4803      * @param similarityScore
4804      * @param bestMatchingTaxon
4805      * @param similarityAuthor
4806      * @return
4807      */
4808     private boolean compareAndCheckTaxon(INonViralName tnb, Reference refMods, double similarityScore,
4809             Taxon bestMatchingTaxon, double similarityAuthor) {
4810         //logger.info("compareAndCheckTaxon");
4811         boolean insertAsExisting;
4812         //        if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4813         //            insertAsExisting=false;
4814         //        } else{
4815         //a small hack/automatisation for Chenopodium only
4816         if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4817                 bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4818             insertAsExisting=true;
4819         } else {
4820             insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4821         }
4822         //        }
4823
4824         logDecision(tnb, bestMatchingTaxon, insertAsExisting, refMods);
4825         return insertAsExisting;
4826     }
4827
4828     /**
4829      * @return
4830      */
4831     @SuppressWarnings("rawtypes")
4832     private List<Taxon> getMatchingTaxa(TaxonName tnb) {
4833         //logger.info("getMatchingTaxon");
4834         if (tnb.getTitleCache() == null){
4835                 tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4836         }
4837
4838         Pager<TaxonBase> pager=importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4839         List<TaxonBase>records = pager.getRecords();
4840
4841         List<Taxon> existingTaxons = new ArrayList<Taxon>();
4842         for (TaxonBase r:records){
4843             try{
4844                 Taxon bestMatchingTaxon = (Taxon)r;
4845                 //                System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4846                 if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4847                     existingTaxons.add(bestMatchingTaxon);
4848                 }
4849             }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4850         }
4851         Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4852         if (!existingTaxons.contains(bmt) && bmt!=null) {
4853             if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4854                 existingTaxons.add(bmt);
4855             }
4856         }
4857         return existingTaxons;
4858     }
4859
4860     /**
4861      * Check if the found Taxon can reasonnably be the same
4862      * example: with and without author should match, but the subspecies should not be suggested for a genus
4863      * */
4864     private boolean compareTaxonNameLength(String f, String o){
4865         //logger.info("compareTaxonNameLength");
4866         boolean lengthOk=false;
4867         int sizeF = f.length();
4868         int sizeO = o.length();
4869         if (sizeO>=sizeF) {
4870             lengthOk=true;
4871         }
4872         if(sizeF>sizeO) {
4873             if (sizeF-sizeO>10) {
4874                 lengthOk=false;
4875             } else {
4876                 lengthOk=true;
4877             }
4878         }
4879
4880         //        System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4881         return lengthOk;
4882     }
4883
4884     private double similarity(String s1, String s2) {
4885         //logger.info("similarity");
4886         //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4887         if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4888             String l1=s1.toLowerCase().trim();
4889             String l2=s2.toLowerCase().trim();
4890             if (l1.length() < l2.length()) { // s1 should always be bigger
4891                 String swap = l1; l1 = l2; l2 = swap;
4892             }
4893             int bigLen = l1.length();
4894             if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4895             return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4896         }
4897         else{
4898             if(s1!=null && s2!=null){
4899                 if (s1.equalsIgnoreCase(s2)) {
4900                     return 1;
4901                 }
4902             }
4903             return -1;
4904         }
4905     }
4906
4907     private int computeEditDistance(String s1, String s2) {
4908         //logger.info("computeEditDistance");
4909         int[] costs = new int[s2.length() + 1];
4910         for (int i = 0; i <= s1.length(); i++) {
4911             int lastValue = i;
4912             for (int j = 0; j <= s2.length(); j++) {
4913                 if (i == 0) {
4914                     costs[j] = j;
4915                 } else {
4916                     if (j > 0) {
4917                         int newValue = costs[j - 1];
4918                         if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4919                             newValue = Math.min(Math.min(newValue, lastValue),
4920                                     costs[j]) + 1;
4921                         }
4922                         costs[j - 1] = lastValue;
4923                         lastValue = newValue;
4924                     }
4925                 }
4926             }
4927             if (i > 0) {
4928                 costs[s2.length()] = lastValue;
4929             }
4930         }
4931         return costs[s2.length()];
4932     }
4933
4934     Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
4935     /**
4936      * @param taxonName
4937      */
4938     @SuppressWarnings("rawtypes")
4939     public void lookForParentNode(INonViralName taxonName, Taxon tax, Reference ref, MyName myName) {
4940         logger.info("lookForParentNode "+taxonName.getTitleCache()+" for "+myName.toString());
4941         //System.out.println("LOOK FOR PARENT NODE "+taxonname.toString()+"; "+tax.toString()+"; "+taxonname.getRank());
4942         INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4943         if (taxonName.getRank().equals(Rank.FORM())){
4944             handleFormHierarchy(ref, myName, parser);
4945         }
4946         else if (taxonName.getRank().equals(Rank.VARIETY())){
4947             handleVarietyHierarchy(ref, myName, parser);
4948         }
4949         else if (taxonName.getRank().equals(Rank.SUBSPECIES())){
4950             handleSubSpeciesHierarchy(ref, myName, parser);
4951         }
4952         else if (taxonName.getRank().equals(Rank.SPECIES())){
4953             handleSpeciesHierarchy(ref, myName, parser);
4954         }
4955         else if (taxonName.getRank().equals(Rank.SUBGENUS())){
4956             handleSubgenusHierarchy(ref, myName, parser);
4957         }
4958
4959         if (taxonName.getRank().equals(Rank.GENUS())){
4960             handleGenusHierarchy(ref, myName, parser);
4961         }
4962         if (taxonName.getRank().equals(Rank.SUBTRIBE())){
4963             handleSubtribeHierarchy(ref, myName, parser);
4964         }
4965         if (taxonName.getRank().equals(Rank.TRIBE())){
4966             handleTribeHierarchy(ref, myName, parser);
4967         }
4968
4969         if (taxonName.getRank().equals(Rank.SUBFAMILY())){
4970             handleSubfamilyHierarchy(ref, myName, parser);
4971         }
4972     }
4973
4974     /**
4975      * @param ref
4976      * @param myName
4977      * @param parser
4978      */
4979     private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
4980         System.out.println("handleSubfamilyHierarchy");
4981         String parentStr = myName.getFamilyStr();
4982         Rank r = Rank.FAMILY();
4983         if(parentStr!=null){
4984
4985             Taxon parent = null;
4986             Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
4987             for(TaxonBase tb:taxontest.getRecords()){
4988                 try {
4989                     if (tb.getName().getRank().equals(r)) {
4990                         parent=CdmBase.deproxy(tb, Taxon.class);
4991                     }
4992                     break;
4993                 } catch (Exception e) {
4994                     // TODO Auto-generated catch block
4995                     e.printStackTrace();
4996                 }
4997             }
4998             if(parent == null) {
4999                 INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5000                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5001                 if(tmp ==null)
5002                 {
5003                     parent=Taxon.NewInstance(parentNameName, ref);
5004                     importer.getTaxonService().save(parent);
5005                     parent = CdmBase.deproxy(parent, Taxon.class);
5006                 } else {
5007                     parent=tmp;
5008                 }
5009                 lookForParentNode(parentNameName, parent, ref,myName);
5010
5011             }
5012             hierarchy.put(r,parent);
5013         }
5014     }
5015
5016     /**
5017      * @param ref
5018      * @param myName
5019      * @param parser
5020      */
5021     private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5022         String parentStr = myName.getSubfamilyStr();
5023         Rank r = Rank.SUBFAMILY();
5024         if (parentStr == null){
5025             parentStr = myName.getFamilyStr();
5026             r = Rank.FAMILY();
5027         }
5028         if(parentStr!=null){
5029             INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5030             Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5031             //                    importer.getTaxonService().save(parent);
5032             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5033
5034             boolean parentDoesNotExists = true;
5035             for (TaxonNode p : classification.getAllNodes()){
5036                 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5037                     parentDoesNotExists = false;
5038                     parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5039                     break;
5040                 }
5041             }
5042             //                if(parentDoesNotExists) {
5043             //                    importer.getTaxonService().save(parent);
5044             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5045             //                    lookForParentNode(parentNameName, parent, ref,myName);
5046             //                }
5047             if(parentDoesNotExists) {
5048                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5049                 if(tmp ==null)
5050                 {
5051                     parent=Taxon.NewInstance(parentNameName, ref);
5052                     importer.getTaxonService().save(parent);
5053                     parent = CdmBase.deproxy(parent, Taxon.class);
5054                 } else {
5055                     parent=tmp;
5056                 }
5057                 lookForParentNode(parentNameName, parent, ref,myName);
5058
5059             }
5060             hierarchy.put(r,parent);
5061         }
5062     }
5063
5064     /**
5065      * @param ref
5066      * @param myName
5067      * @param parser
5068      */
5069     private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5070         String parentStr = myName.getTribeStr();
5071         Rank r = Rank.TRIBE();
5072         if (parentStr == null){
5073             parentStr = myName.getSubfamilyStr();
5074             r = Rank.SUBFAMILY();
5075         }
5076         if (parentStr == null){
5077             parentStr = myName.getFamilyStr();
5078             r = Rank.FAMILY();
5079         }
5080         if(parentStr!=null){
5081             INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5082             Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5083             //                    importer.getTaxonService().save(parent);
5084             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5085
5086             boolean parentDoesNotExists = true;
5087             for (TaxonNode p : classification.getAllNodes()){
5088                 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5089                     parentDoesNotExists = false;
5090                     parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5091
5092                     break;
5093                 }
5094             }
5095             //                if(parentDoesNotExists) {
5096             //                    importer.getTaxonService().save(parent);
5097             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5098             //                    lookForParentNode(parentNameName, parent, ref,myName);
5099             //                }
5100             if(parentDoesNotExists) {
5101                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5102                 if(tmp ==null)
5103                 {
5104                     parent=Taxon.NewInstance(parentNameName, ref);
5105                     importer.getTaxonService().save(parent);
5106                     parent = CdmBase.deproxy(parent, Taxon.class);
5107                 } else {
5108                     parent=tmp;
5109                 }
5110                 lookForParentNode(parentNameName, parent, ref,myName);
5111
5112             }
5113             hierarchy.put(r,parent);
5114         }
5115     }
5116
5117     /**
5118      * @param ref
5119      * @param myName
5120      * @param parser
5121      */
5122     private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5123         String parentStr = myName.getSubtribeStr();
5124         Rank r = Rank.SUBTRIBE();
5125         if (parentStr == null){
5126             parentStr = myName.getTribeStr();
5127             r = Rank.TRIBE();
5128         }
5129         if (parentStr == null){
5130             parentStr = myName.getSubfamilyStr();
5131             r = Rank.SUBFAMILY();
5132         }
5133         if (parentStr == null){
5134             parentStr = myName.getFamilyStr();
5135             r = Rank.FAMILY();
5136         }
5137         if(parentStr!=null){
5138             INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5139             Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5140             //                    importer.getTaxonService().save(parent);
5141             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5142
5143             boolean parentDoesNotExist = true;
5144             for (TaxonNode p : classification.getAllNodes()){
5145                 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5146                     //                        System.out.println(p.getTaxon().getUuid());
5147                     //                        System.out.println(parent.getUuid());
5148                     parentDoesNotExist = false;
5149                     parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5150                     break;
5151                 }
5152             }
5153             //                if(parentDoesNotExists) {
5154             //                    importer.getTaxonService().save(parent);
5155             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5156             //                    lookForParentNode(parentNameName, parent, ref,myName);
5157             //                }
5158             if(parentDoesNotExist) {
5159                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5160                 if(tmp ==null){
5161
5162                     parent=Taxon.NewInstance(parentNameName, ref);
5163                     importer.getTaxonService().save(parent);
5164                     parent = CdmBase.deproxy(parent, Taxon.class);
5165                 } else {
5166                     parent=tmp;
5167                 }
5168                 lookForParentNode(parentNameName, parent, ref,myName);
5169
5170             }
5171             hierarchy.put(r,parent);
5172         }
5173     }
5174
5175     /**
5176      * @param ref
5177      * @param myName
5178      * @param parser
5179      */
5180     private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5181         String parentStr = myName.getGenusStr();
5182         Rank r = Rank.GENUS();
5183
5184         if(parentStr==null){
5185             parentStr = myName.getSubtribeStr();
5186             r = Rank.SUBTRIBE();
5187         }
5188         if (parentStr == null){
5189             parentStr = myName.getTribeStr();
5190             r = Rank.TRIBE();
5191         }
5192         if (parentStr == null){
5193             parentStr = myName.getSubfamilyStr();
5194             r = Rank.SUBFAMILY();
5195         }
5196         if (parentStr == null){
5197             parentStr = myName.getFamilyStr();
5198             r = Rank.FAMILY();
5199         }
5200         if(parentStr!=null){
5201             INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5202             Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5203             //                    importer.getTaxonService().save(parent);
5204             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5205
5206             boolean parentDoesNotExists = true;
5207             for (TaxonNode p : classification.getAllNodes()){
5208                 if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5209                     //                        System.out.println(p.getTaxon().getUuid());
5210                     //                        System.out.println(parent.getUuid());
5211                     parentDoesNotExists = false;
5212                     parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5213                     break;
5214                 }
5215             }
5216             //                if(parentDoesNotExists) {
5217             //                    importer.getTaxonService().save(parent);
5218             //                    parent = CdmBase.deproxy(parent, Taxon.class);
5219             //                    lookForParentNode(parentNameName, parent, ref,myName);
5220             //                }
5221             if(parentDoesNotExists) {
5222                 Taxon tmp = findMatchingTaxon(parentNameName,ref);
5223                 if(tmp ==null)
5224                 {
5225                     parent=Taxon.NewInstance(parentNameName, ref);
5226                     importer.getTaxonService().save(parent);
5227                     parent = CdmBase.deproxy(parent, Taxon.class);
5228                 } else {
5229                     parent=tmp;
5230                 }
5231                 lookForParentNode(parentNameName, parent, ref,myName);
5232
5233             }
5234             hierarchy.put(r,parent);
5235         }
5236     }
5237
5238     /**
5239      * @param ref
5240      * @param myName
5241      * @param parser
5242      */
5243     private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5244         String parentStr = myName.getSubgenusStr();
5245         Rank r = Rank.SUBGENUS();
5246
5247         if(parentStr==null){
5248             parentStr = myName.getGenusStr();
5249             r = Rank.GENUS();
5250         }
5251
5252         if(parentStr==null){
5253             parentStr = myName.getSubtribeStr();
5254             r = Rank.SUBTRIBE();
5255         }
5256         if (parentStr == null){
5257             parentStr = myName.getTribeStr();
5258             r = Rank.TRIBE();
5259         }
5260         if (parentStr == null){
5261             parentStr = myName.getSubfamilyStr();
5262             r = Rank.SUBFAMILY();
5263         }
5264         if (parentStr == null){
5265             parentStr = myName.getFamilyStr();
5266             r = Rank.FAMILY();
5267         }
5268         if(parentStr!=null){
5269             Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5270             //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5271             hierarchy.put(r,parent);
5272         }
5273     }
5274
5275     /**
5276      * @param ref
5277      * @param myName
5278      * @param parser
5279      */
5280     private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5281         String parentStr = myName.getSpeciesStr();
5282         Rank r = Rank.SPECIES();
5283
5284
5285         if(parentStr==null){
5286             parentStr = myName.getSubgenusStr();
5287             r = Rank.SUBGENUS();
5288         }
5289
5290         if(parentStr==null){
5291             parentStr = myName.getGenusStr();
5292             r = Rank.GENUS();
5293         }
5294
5295         if(parentStr==null){
5296             parentStr = myName.getSubtribeStr();
5297             r = Rank.SUBTRIBE();
5298         }
5299         if (parentStr == null){
5300             parentStr = myName.getTribeStr();
5301             r = Rank.TRIBE();
5302         }
5303         if (parentStr == null){
5304             parentStr = myName.getSubfamilyStr();
5305             r = Rank.SUBFAMILY();
5306         }
5307         if (parentStr == null){
5308             parentStr = myName.getFamilyStr();
5309             r = Rank.FAMILY();
5310         }
5311         if(parentStr!=null){
5312             Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5313             //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5314             hierarchy.put(r,parent);
5315         }
5316     }
5317
5318
5319     /**
5320      * @param ref
5321      * @param myName
5322      * @param parser
5323      */
5324     private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5325         String parentStr = myName.getSubspeciesStr();
5326         Rank r = Rank.SUBSPECIES();
5327
5328
5329         if(parentStr==null){
5330             parentStr = myName.getSpeciesStr();
5331             r = Rank.SPECIES();
5332         }
5333
5334         if(parentStr==null){
5335             parentStr = myName.getSubgenusStr();
5336             r = Rank.SUBGENUS();
5337         }
5338
5339         if(parentStr==null){
5340             parentStr = myName.getGenusStr();
5341             r = Rank.GENUS();
5342         }
5343
5344         if(parentStr==null){
5345             parentStr = myName.getSubtribeStr();
5346             r = Rank.SUBTRIBE();
5347         }
5348         if (parentStr == null){
5349             parentStr = myName.getTribeStr();
5350             r = Rank.TRIBE();
5351         }
5352         if (parentStr == null){
5353             parentStr = myName.getSubfamilyStr();
5354             r = Rank.SUBFAMILY();
5355         }
5356         if (parentStr == null){
5357             parentStr = myName.getFamilyStr();
5358             r = Rank.FAMILY();
5359         }
5360         if(parentStr!=null){
5361             Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5362             //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5363             hierarchy.put(r,parent);
5364         }
5365     }
5366
5367     /**
5368      * @param ref
5369      * @param myName
5370      * @param parser
5371      */
5372     private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5373         String parentStr = myName.getSubspeciesStr();
5374         Rank r = Rank.SUBSPECIES();
5375
5376         if(parentStr==null){
5377             parentStr = myName.getSpeciesStr();
5378             r = Rank.SPECIES();
5379         }
5380
5381         if(parentStr==null){
5382             parentStr = myName.getSubgenusStr();
5383             r = Rank.SUBGENUS();
5384         }
5385
5386         if(parentStr==null){
5387             parentStr = myName.getGenusStr();
5388             r = Rank.GENUS();
5389         }
5390
5391         if(parentStr==null){
5392             parentStr = myName.getSubtribeStr();
5393             r = Rank.SUBTRIBE();
5394         }
5395         if (parentStr == null){
5396             parentStr = myName.getTribeStr();
5397             r = Rank.TRIBE();
5398         }
5399         if (parentStr == null){
5400             parentStr = myName.getSubfamilyStr();
5401             r = Rank.SUBFAMILY();
5402         }
5403         if (parentStr == null){
5404             parentStr = myName.getFamilyStr();
5405             r = Rank.FAMILY();
5406         }
5407         if(parentStr!=null){
5408             Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5409             //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5410             hierarchy.put(r,parent);
5411         }
5412     }
5413
5414     /**
5415      * @param ref
5416      * @param myName
5417      * @param parser
5418      * @param parentStr
5419      * @param r
5420      * @return
5421      */
5422     private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5423         INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5424         Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5425         //                    importer.getTaxonService().save(parent);
5426         //                    parent = CdmBase.deproxy(parent, Taxon.class);
5427
5428         boolean parentDoesNotExists = true;
5429         for (TaxonNode p : classification.getAllNodes()){
5430             if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5431                 //                        System.out.println(p.getTaxon().getUuid());
5432                 //                        System.out.println(parent.getUuid());
5433                 parentDoesNotExists = false;
5434                 parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5435                 break;
5436             }
5437         }
5438         if(parentDoesNotExists) {
5439             Taxon tmp = findMatchingTaxon(parentNameName,ref);
5440             //                    System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5441             if(tmp ==null){
5442
5443                 parent=Taxon.NewInstance(parentNameName, ref);
5444                 importer.getTaxonService().save(parent);
5445
5446             } else {
5447                 parent=tmp;
5448             }
5449             lookForParentNode(parentNameName, parent, ref,myName);
5450
5451         }
5452         return parent;
5453     }
5454
5455     private void addNameDifferenceToFile(String originalname, String atomisedname){
5456         try{
5457             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5458             BufferedWriter out = new BufferedWriter(fstream);
5459             out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5460             //Close the output stream
5461             out.close();
5462         }catch (Exception e){//Catch exception if any
5463             System.err.println("Error: " + e.getMessage());
5464         }
5465     }
5466     /**
5467      * @param name
5468      * @param author
5469      * @param nomenclaturalCode2
5470      * @param rank
5471      */
5472     private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5473         try{
5474             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5475             BufferedWriter out = new BufferedWriter(fstream);
5476             out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5477             //Close the output stream
5478             out.close();
5479         }catch (Exception e){//Catch exception if any
5480             System.err.println("Error: " + e.getMessage());
5481         }
5482     }
5483
5484
5485     /**
5486      * @param tnb
5487      * @param bestMatchingTaxon
5488      * @param insertAsExisting
5489      * @param refMods
5490      */
5491     private void logDecision(INonViralName tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5492         try{
5493             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt", true);
5494             BufferedWriter out = new BufferedWriter(fstream);
5495             out.write(tnb.getTitleCache() + " sec. " + refMods + "\t" + bestMatchingTaxon.getTitleCache() + "\t" + insertAsExisting + "\n");
5496             //Close the output stream
5497             out.close();
5498         }catch (Exception e){//Catch exception if any
5499             System.err.println("Error: " + e.getMessage());
5500         }
5501     }
5502
5503
5504     @SuppressWarnings("unused")
5505     private String replaceNull(Object in){
5506         if (in == null) {
5507             return "";
5508         }
5509         if (in.getClass().equals(NomenclaturalCode.class)) {
5510             return ((NomenclaturalCode)in).getTitleCache();
5511         }
5512         return in.toString();
5513     }
5514
5515     /**
5516      * @param fullName
5517      * @param nomenclaturalCode2
5518      * @param rank
5519      */
5520     private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5521         try{
5522             FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5523             BufferedWriter out = new BufferedWriter(fstream);
5524             out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5525             //Close the output stream
5526             out.close();
5527         }catch (Exception e){//Catch exception if any
5528             System.err.println("Error: " + e.getMessage());
5529         }
5530
5531     }
5532
5533 }
5534
5535
5536