#5448 Remove non-exisiting hybrid category
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / redlist / gefaesspflanzen / RedListGefaesspflanzenImportNames.java
index 9713e4980e3e37411d27fe40e914c478da4be832..3be4cfd60e6dfc5cec4d9a27ebba86ec081ce62f 100644 (file)
@@ -34,13 +34,17 @@ import eu.etaxonomy.cdm.model.common.Annotation;
 import eu.etaxonomy.cdm.model.common.AnnotationType;
 import eu.etaxonomy.cdm.model.common.CdmBase;
 import eu.etaxonomy.cdm.model.common.Language;
+import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary;
 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
 import eu.etaxonomy.cdm.model.description.TaxonDescription;
 import eu.etaxonomy.cdm.model.name.BotanicalName;
+import eu.etaxonomy.cdm.model.name.CultivarPlantName;
+import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
 import eu.etaxonomy.cdm.model.name.NonViralName;
 import eu.etaxonomy.cdm.model.name.Rank;
+import eu.etaxonomy.cdm.model.name.RankClass;
 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
 import eu.etaxonomy.cdm.model.taxon.Synonym;
 import eu.etaxonomy.cdm.model.taxon.Taxon;
@@ -134,7 +138,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         importAuthors(state, rs, name);
 
         //---TAXON---
-        TaxonBase<?> taxonBase = importTaxon(rs, name);
+        TaxonBase<?> taxonBase = importTaxon(rs, name, state);
         if(taxonBase==null){
             RedListUtil.logMessage(id, "Taxon for name "+name+" could not be created.", logger);
             return;
@@ -187,7 +191,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         }
     }
 
-    private TaxonBase<?> importTaxon(ResultSet rs, NonViralName<?> name) throws SQLException {
+    private TaxonBase<?> importTaxon(ResultSet rs, NonViralName<?> name, RedListGefaesspflanzenImportState state) throws SQLException {
 
         long id = rs.getLong(RedListUtil.NAMNR);
         String taxNameString = rs.getString(RedListUtil.TAXNAME);
@@ -233,7 +237,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         addAnnotation(RedListUtil.WISSK+": "+wisskString, taxonBase);
 
         //check taxon name consistency
-        checkTaxonNameConsistency(id, taxNameString, hybString, taxonBase);
+        checkTaxonConsistency(id, taxNameString, hybString, taxonBase, state);
         return taxonBase;
     }
 
@@ -251,6 +255,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         String zusatzString = rs.getString(RedListUtil.ZUSATZ);
         String authorKombString = rs.getString(RedListUtil.AUTOR_KOMB);
         String authorBasiString = rs.getString(RedListUtil.AUTOR_BASI);
+        String hybString = rs.getString(RedListUtil.HYB);
 
         //combination author
         if(authorKombString.contains(RedListUtil.EX)){
@@ -318,8 +323,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
 
         //check authorship consistency
         String authorString = rs.getString(RedListUtil.AUTOR);
-        String authorshipCache = name.getAuthorshipCache();
-        checkAuthorShipConsistency(id, nomZusatzString, taxZusatzString, zusatzString, authorString, authorshipCache);
+        checkNameConsistency(id, nomZusatzString, taxZusatzString, zusatzString, authorString, hybString, name);
     }
 
     private NonViralName<?> importName(RedListGefaesspflanzenImportState state, ResultSet rs, Set<TaxonNameBase> namesToSave) throws SQLException {
@@ -338,104 +342,122 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
             RedListUtil.logMessage(id, "No name found!", logger);
         }
 
+        NonViralName<?> name = null;
         Rank rank = makeRank(id, state, rangString, ep3String!=null);
-        NonViralName<?> name = BotanicalName.NewInstance(rank);
+        //cultivar
+        if(rank!= null && rank.equals(Rank.CULTIVAR())){
+            CultivarPlantName cultivar = CultivarPlantName.NewInstance(rank);
+            cultivar.setGenusOrUninomial(ep1String);
+            cultivar.setSpecificEpithet(ep2String);
+            cultivar.setCultivarName(ep3String);
+            name = cultivar;
+        }
+        //botanical names
+        else{
+            name = BotanicalName.NewInstance(rank);
 
-        //ep1 should always be present
-        if(CdmUtils.isBlank(ep1String)){
-            RedListUtil.logMessage(id, RedListUtil.EPI1+" is empty!", logger);
-        }
-        name.setGenusOrUninomial(ep1String);
-        if(CdmUtils.isNotBlank(ep2String)){
-            if(rank!=null && rank.isInfraGenericButNotSpeciesGroup()){
-                name.setInfraGenericEpithet(ep2String);
-            }
-            else{
-                name.setSpecificEpithet(ep2String);
+            //ep1 should always be present
+            if(CdmUtils.isBlank(ep1String)){
+                RedListUtil.logMessage(id, RedListUtil.EPI1+" is empty!", logger);
             }
-        }
-        if(CdmUtils.isNotBlank(ep3String)){
-            name.setInfraSpecificEpithet(ep3String);
-        }
-        //nomenclatural status
-        if(CdmUtils.isNotBlank(nomZusatzString)){
-            NomenclaturalStatusType statusType = makeNomenclaturalStatus(id, state, nomZusatzString);
-            if(statusType!=null){
-                NomenclaturalStatus status = NomenclaturalStatus.NewInstance(statusType);
-                //special case for invalid names where the DB entry contains
-                //additional information in brackets e.g. "nom. inval. (sine basion.)"
-                if(statusType.equals(NomenclaturalStatusType.INVALID())){
-                    Pattern pattern = Pattern.compile("\\((.*?)\\)");
-                    Matcher matcher = pattern.matcher(nomZusatzString);
-                    if (matcher.find()){
-                        status.setRuleConsidered(matcher.group(1));
-                    }
+            name.setGenusOrUninomial(ep1String);
+            if(CdmUtils.isNotBlank(ep2String)){
+                if(rank!=null && rank.isInfraGenericButNotSpeciesGroup()){
+                    name.setInfraGenericEpithet(ep2String);
+                }
+                else{
+                    name.setSpecificEpithet(ep2String);
                 }
-                name.addStatus(status);
             }
-        }
-        //hybrid
-        if(CdmUtils.isNotBlank(hybString)){
-            //save hybrid formula
-            if(CdmUtils.isNotBlank(formelString)){
-                Annotation annotation = Annotation.NewDefaultLanguageInstance(formelString);
-                annotation.setAnnotationType(AnnotationType.TECHNICAL());
-                name.addAnnotation(annotation);
+            if(CdmUtils.isNotBlank(ep3String)){
+                name.setInfraSpecificEpithet(ep3String);
             }
 
-            if(hybString.equals(RedListUtil.HYB_X)){
-                name.setBinomHybrid(true);
-            }
-            else if(hybString.equals(RedListUtil.HYB_G)){
-                name.setMonomHybrid(true);
+
+            //nomenclatural status
+            if(CdmUtils.isNotBlank(nomZusatzString)){
+                NomenclaturalStatusType statusType = makeNomenclaturalStatus(id, state, nomZusatzString);
+                if(statusType!=null){
+                    NomenclaturalStatus status = NomenclaturalStatus.NewInstance(statusType);
+                    //special case for invalid names where the DB entry contains
+                    //additional information in brackets e.g. "nom. inval. (sine basion.)"
+                    if(statusType.equals(NomenclaturalStatusType.INVALID())){
+                        Pattern pattern = Pattern.compile("\\((.*?)\\)");
+                        Matcher matcher = pattern.matcher(nomZusatzString);
+                        if (matcher.find()){
+                            status.setRuleConsidered(matcher.group(1));
+                        }
+                    }
+                    name.addStatus(status);
+                }
             }
-            else if(hybString.equals(RedListUtil.HYB_XF)){
-                name.setHybridFormula(true);
-                if(ep1String.contains(RedListUtil.HYB_SIGN)){
-                    RedListUtil.logMessage(id, "EPI1 has hybrid signs but with flag: "+RedListUtil.HYB_XF, logger);
+            //hybrid
+            if(CdmUtils.isNotBlank(hybString)){
+                //save hybrid formula
+                if(CdmUtils.isNotBlank(formelString)){
+                    Annotation annotation = Annotation.NewDefaultLanguageInstance(formelString);
+                    annotation.setAnnotationType(AnnotationType.TECHNICAL());
+                    name.addAnnotation(annotation);
                 }
-                else if(ep2String.contains(RedListUtil.HYB_SIGN)){
-                    String[] split = ep2String.split(RedListUtil.HYB_SIGN);
-                    if(split.length!=2){
-                        RedListUtil.logMessage(id, "Multiple hybrid signs found in "+ep2String, logger);
+                //more than two hybrids not yet handled by name parser
+                //TODO: use parser when implemented to fully support hybrids
+                if(taxNameString.split(RedListUtil.HYB_SIGN).length>2){
+                    name = BotanicalName.NewInstance(rank);
+                    name.setTitleCache(taxNameString, true);
+                }
+                else if(hybString.equals(RedListUtil.HYB_X)){
+                    name.setBinomHybrid(true);
+                }
+                else if(hybString.equals(RedListUtil.HYB_G)){
+                    name.setMonomHybrid(true);
+                }
+                else if(hybString.equals(RedListUtil.HYB_XF)){
+                    name.setHybridFormula(true);
+                    if(ep1String.contains(RedListUtil.HYB_SIGN)){
+                        RedListUtil.logMessage(id, "EPI1 has hybrid signs but with flag: "+RedListUtil.HYB_XF, logger);
+                    }
+                    else if(ep2String.contains(RedListUtil.HYB_SIGN)){
+                        String[] split = ep2String.split(RedListUtil.HYB_SIGN);
+                        String hybridFormula1 = ep1String+" "+split[0].trim();
+                        String hybridFormula2 = ep1String+" "+split[1].trim();
+                        //check if the specific epithets are from the same genus or not like e.g. EPI2 = pratensis × Lolium multiflorum
+                        String[] secondHybrid = split[1].trim().split(" ");
+                        if(secondHybrid.length>1 && secondHybrid[0].matches("[A-Z].*")){
+                            hybridFormula2 = split[1];
+                        }
+                        if(CdmUtils.isNotBlank(ep3String)){
+                            hybridFormula1 += " "+ep3String;
+                            hybridFormula2 += " "+ep3String;
+                        }
+                        String fullFormula = hybridFormula1+" "+RedListUtil.HYB_SIGN+" "+hybridFormula2;
+                        name = NonViralNameParserImpl.NewInstance().parseFullName(fullFormula, NomenclaturalCode.ICNAFP, rank);
                     }
-                    String hybridFormula1 = ep1String+" "+split[0].trim();
-                    String hybridFormula2 = ep1String+" "+split[1].trim();
-                    if(CdmUtils.isNotBlank(ep3String)){
-                        hybridFormula1 += " "+ep3String;
-                        hybridFormula2 += " "+ep3String;
+                    else if(ep3String.contains(RedListUtil.HYB_SIGN)){
+                        String[] split = ep3String.split(RedListUtil.HYB_SIGN);
+                        String hybridFormula1 = ep1String+" "+ep2String+" "+split[0];
+                        String hybridFormula2 = ep1String+" "+ep2String+" "+split[1];
+                        String fullFormula = hybridFormula1+" "+RedListUtil.HYB_SIGN+" "+hybridFormula2;
+                        name = NonViralNameParserImpl.NewInstance().parseFullName(fullFormula, NomenclaturalCode.ICNAFP, rank);
                     }
-                    String fullFormula = hybridFormula1+" "+RedListUtil.HYB_SIGN+" "+hybridFormula2;
-                    name = NonViralNameParserImpl.NewInstance().parseFullName(fullFormula);
                 }
-                else if(ep3String.contains(RedListUtil.HYB_SIGN)){
-                    String[] split = ep3String.split(RedListUtil.HYB_SIGN);
-                    if(split.length!=2){
-                        RedListUtil.logMessage(id, "Multiple hybrid signs found in "+ep3String, logger);
+                else if(hybString.equals(RedListUtil.HYB_N)){
+                    name = NonViralNameParserImpl.NewInstance().parseFullName(taxNameString, NomenclaturalCode.ICNAFP, rank);
+                }
+                else if(hybString.equals(RedListUtil.HYB_GF)){
+                    if(ep1String.contains(RedListUtil.HYB_SIGN)){
+                        name = NonViralNameParserImpl.NewInstance().parseFullName(ep1String, NomenclaturalCode.ICNAFP, rank);
+                    }
+                    else{
+                        RedListUtil.logMessage(id, "HYB is "+hybString+" but "+RedListUtil.HYB+" does not contain "+RedListUtil.HYB_SIGN, logger);
                     }
-                    String hybridFormula1 = ep1String+" "+ep2String+" "+split[0];
-                    String hybridFormula2 = ep1String+" "+ep2String+" "+split[1];
-                    String fullFormula = hybridFormula1+" "+RedListUtil.HYB_SIGN+" "+hybridFormula2;
-                    name = NonViralNameParserImpl.NewInstance().parseFullName(fullFormula);
                 }
-            }
-            else if(hybString.equals(RedListUtil.HYB_N)){
-                name = NonViralNameParserImpl.NewInstance().parseFullName(ep1String+" "+ep2String+" nothosubsp. "+ep3String);
-            }
-            else if(hybString.equals(RedListUtil.HYB_GF)){
-                if(ep1String.contains(RedListUtil.HYB_SIGN)){
-                    name = NonViralNameParserImpl.NewInstance().parseFullName(ep1String);
+                else if(hybString.equals(RedListUtil.HYB_XS)){
+                    //nothing to do
                 }
                 else{
-                    RedListUtil.logMessage(id, "HYB is "+hybString+" but "+RedListUtil.HYB+" does not contain "+RedListUtil.HYB_SIGN, logger);
+                    logger.error("HYB value "+hybString+" not yet handled");
                 }
             }
-            else if(hybString.equals(RedListUtil.HYB_XS)){
-                //nothing to do
-            }
-            else{
-                logger.error("HYB value "+hybString+" not yet handled");
-            }
         }
         //add source
         ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, RedListUtil.NAME_NAMESPACE);
@@ -444,8 +466,18 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         return name;
     }
 
-    private void checkAuthorShipConsistency(long id, String nomZusatzString, String taxZusatzString,
-            String zusatzString, String authorString, String authorshipCache) {
+    private void checkNameConsistency(long id, String nomZusatzString, String taxZusatzString,
+            String zusatzString, String authorString, String hybString, NonViralName<?> name) {
+        String authorshipCache = name.getAuthorshipCache();
+        //FIXME: remove split length check when name parser can parse multiple hybrid parents
+        if(hybString.equals(RedListUtil.HYB_XF) && name.getTitleCache().split(RedListUtil.HYB_SIGN).length==2){
+            if(name.getHybridChildRelations().isEmpty()){
+                RedListUtil.logMessage(id, "Hybrid formula but no hybrid child relations: "+name.getTitleCache(), logger);
+                return;
+            }
+            return;
+        }
+
         if(CdmUtils.isNotBlank(zusatzString)){
             authorString = authorString.replace(", "+zusatzString, "");
         }
@@ -470,25 +502,16 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         }
     }
 
-    private void checkTaxonNameConsistency(long id, String taxNameString, String hybString, TaxonBase<?> taxonBase) {
-        if(hybString.equals(RedListUtil.HYB_XF)){
-            if(HibernateProxyHelper.deproxy(taxonBase.getName(),NonViralName.class).getHybridChildRelations().isEmpty()){
-                RedListUtil.logMessage(id, "Hybrid name but no hybrid child relations: "+taxonBase.getTitleCache(), logger);
-                return;
-            }
-            return;
-        }
-
-
+    private void checkTaxonConsistency(long id, String taxNameString, String hybString, TaxonBase<?> taxonBase, RedListGefaesspflanzenImportState state) {
         String nameCache = HibernateProxyHelper.deproxy(taxonBase.getName(), NonViralName.class).getNameCache().trim();
         taxNameString = taxNameString.trim();
-        taxNameString.replaceAll(" +", " ");
+        taxNameString = taxNameString.replaceAll(" +", " ");
 
         if(taxNameString.endsWith("agg.")){
             taxNameString = taxNameString.replace("agg.", "aggr.");
         }
 
-        if(hybString.equals(RedListUtil.HYB_X)){
+        if(hybString.equals(RedListUtil.HYB_X) || hybString.equals(RedListUtil.HYB_N)){
             taxNameString = taxNameString.replace(" "+RedListUtil.HYB_SIGN+" ", " "+RedListUtil.HYB_SIGN);//hybrid sign has no space after it in titleCache for binomial hybrids
             taxNameString = taxNameString.replace(" x ", " "+RedListUtil.HYB_SIGN);//in some cases a standard 'x' is used
         }
@@ -499,7 +522,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
             taxNameString = taxNameString.replace(" "+RedListUtil.HYB_SIGN, " x");
         }
 
-        if(taxNameString.endsWith("- Gruppe")){String a ="Festuca ×xx Lolium <-> Festuca ×× Lolium";
+        if(taxNameString.endsWith("- Gruppe")){
             taxNameString = taxNameString.replaceAll("- Gruppe", "species group");
         }
         if(taxNameString.endsWith("- group")){
@@ -507,14 +530,22 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         }
 
         taxNameString = taxNameString.replace("[ranglos]", "[unranked]");
+        if(taxonBase.getName().getRank()!=null){
+            if(taxonBase.getName().getRank().equals(Rank.PROLES())){
+                taxNameString = taxNameString.replace("proles", "prol.");
+            }
+            else if(taxonBase.getName().getRank().equals(state.getRank(RedListUtil.uuidRankCollectionSpecies))){
+                taxNameString = taxNameString.replace("\"Sammelart\"", "\"Coll. Species\"");
+            }
+        }
         if(STRICT_TITLE_CHECK){
             if(!taxNameString.trim().equals(nameCache)){
-                RedListUtil.logMessage(id, "Taxon name inconsistent! taxon.titleCache <-> Column "+RedListUtil.TAXNAME+": "+nameCache+" <-> "+taxNameString, logger);
+                RedListUtil.logMessage(id, "Taxon name inconsistent! taxon.nameCache <-> Column "+RedListUtil.TAXNAME+": "+nameCache+" <-> "+taxNameString, logger);
             }
         }
         else{
             if(!taxNameString.startsWith(nameCache)){
-                RedListUtil.logMessage(id, "Taxon name inconsistent! taxon.titleCache <-> Column "+RedListUtil.TAXNAME+": "+nameCache+" <-> "+taxNameString, logger);
+                RedListUtil.logMessage(id, "Taxon name inconsistent! taxon.nameCache <-> Column "+RedListUtil.TAXNAME+": "+nameCache+" <-> "+taxNameString, logger);
             }
         }
     }
@@ -531,6 +562,9 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
                     return Rank.UNRANKED_INFRAGENERIC();
                 }
             }
+            else if(rankStr.equals("SAM")){
+                return getRank(state, RedListUtil.uuidRankCollectionSpecies, "Collective Species", "Collective Species", "\"Coll. Species\"", (OrderedTermVocabulary<Rank>) Rank.GENUS().getVocabulary(), null, RankClass.SpeciesGroup);
+            }
             else{
                 rank = state.getTransformer().getRankByKey(rankStr);
             }