#5448 Remove non-exisiting hybrid category
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / redlist / gefaesspflanzen / RedListGefaesspflanzenImportNames.java
index 52979ad33df907d2cbb73af907991ed05017ebb8..3be4cfd60e6dfc5cec4d9a27ebba86ec081ce62f 100644 (file)
@@ -34,14 +34,17 @@ import eu.etaxonomy.cdm.model.common.Annotation;
 import eu.etaxonomy.cdm.model.common.AnnotationType;
 import eu.etaxonomy.cdm.model.common.CdmBase;
 import eu.etaxonomy.cdm.model.common.Language;
+import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary;
 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
 import eu.etaxonomy.cdm.model.description.TaxonDescription;
 import eu.etaxonomy.cdm.model.name.BotanicalName;
 import eu.etaxonomy.cdm.model.name.CultivarPlantName;
+import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
 import eu.etaxonomy.cdm.model.name.NonViralName;
 import eu.etaxonomy.cdm.model.name.Rank;
+import eu.etaxonomy.cdm.model.name.RankClass;
 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
 import eu.etaxonomy.cdm.model.taxon.Synonym;
 import eu.etaxonomy.cdm.model.taxon.Taxon;
@@ -135,7 +138,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         importAuthors(state, rs, name);
 
         //---TAXON---
-        TaxonBase<?> taxonBase = importTaxon(rs, name);
+        TaxonBase<?> taxonBase = importTaxon(rs, name, state);
         if(taxonBase==null){
             RedListUtil.logMessage(id, "Taxon for name "+name+" could not be created.", logger);
             return;
@@ -188,7 +191,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         }
     }
 
-    private TaxonBase<?> importTaxon(ResultSet rs, NonViralName<?> name) throws SQLException {
+    private TaxonBase<?> importTaxon(ResultSet rs, NonViralName<?> name, RedListGefaesspflanzenImportState state) throws SQLException {
 
         long id = rs.getLong(RedListUtil.NAMNR);
         String taxNameString = rs.getString(RedListUtil.TAXNAME);
@@ -234,7 +237,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         addAnnotation(RedListUtil.WISSK+": "+wisskString, taxonBase);
 
         //check taxon name consistency
-        checkTaxonNameConsistency(id, taxNameString, hybString, taxonBase);
+        checkTaxonConsistency(id, taxNameString, hybString, taxonBase, state);
         return taxonBase;
     }
 
@@ -252,6 +255,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         String zusatzString = rs.getString(RedListUtil.ZUSATZ);
         String authorKombString = rs.getString(RedListUtil.AUTOR_KOMB);
         String authorBasiString = rs.getString(RedListUtil.AUTOR_BASI);
+        String hybString = rs.getString(RedListUtil.HYB);
 
         //combination author
         if(authorKombString.contains(RedListUtil.EX)){
@@ -319,8 +323,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
 
         //check authorship consistency
         String authorString = rs.getString(RedListUtil.AUTOR);
-        String authorshipCache = name.getAuthorshipCache();
-        checkAuthorShipConsistency(id, nomZusatzString, taxZusatzString, zusatzString, authorString, authorshipCache);
+        checkNameConsistency(id, nomZusatzString, taxZusatzString, zusatzString, authorString, hybString, name);
     }
 
     private NonViralName<?> importName(RedListGefaesspflanzenImportState state, ResultSet rs, Set<TaxonNameBase> namesToSave) throws SQLException {
@@ -366,94 +369,91 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
                     name.setSpecificEpithet(ep2String);
                 }
             }
-        }
-        //hybrid
-        if(CdmUtils.isNotBlank(hybString)){
-            //save hybrid formula
-            if(CdmUtils.isNotBlank(formelString)){
-                Annotation annotation = Annotation.NewDefaultLanguageInstance(formelString);
-                annotation.setAnnotationType(AnnotationType.TECHNICAL());
-                name.addAnnotation(annotation);
+            if(CdmUtils.isNotBlank(ep3String)){
+                name.setInfraSpecificEpithet(ep3String);
             }
 
-            if(hybString.equals(RedListUtil.HYB_X)){
-                name.setBinomHybrid(true);
-                if(CdmUtils.isNotBlank(ep3String)){
-                    name.setInfraSpecificEpithet(ep3String);
-                }
-                //nomenclatural status
-                if(CdmUtils.isNotBlank(nomZusatzString)){
-                    NomenclaturalStatusType statusType = makeNomenclaturalStatus(id, state, nomZusatzString);
-                    if(statusType!=null){
-                        NomenclaturalStatus status = NomenclaturalStatus.NewInstance(statusType);
-                        //special case for invalid names where the DB entry contains
-                        //additional information in brackets e.g. "nom. inval. (sine basion.)"
-                        if(statusType.equals(NomenclaturalStatusType.INVALID())){
-                            Pattern pattern = Pattern.compile("\\((.*?)\\)");
-                            Matcher matcher = pattern.matcher(nomZusatzString);
-                            if (matcher.find()){
-                                status.setRuleConsidered(matcher.group(1));
-                            }
+
+            //nomenclatural status
+            if(CdmUtils.isNotBlank(nomZusatzString)){
+                NomenclaturalStatusType statusType = makeNomenclaturalStatus(id, state, nomZusatzString);
+                if(statusType!=null){
+                    NomenclaturalStatus status = NomenclaturalStatus.NewInstance(statusType);
+                    //special case for invalid names where the DB entry contains
+                    //additional information in brackets e.g. "nom. inval. (sine basion.)"
+                    if(statusType.equals(NomenclaturalStatusType.INVALID())){
+                        Pattern pattern = Pattern.compile("\\((.*?)\\)");
+                        Matcher matcher = pattern.matcher(nomZusatzString);
+                        if (matcher.find()){
+                            status.setRuleConsidered(matcher.group(1));
                         }
-                        name.addStatus(status);
                     }
+                    name.addStatus(status);
                 }
-                //hybrid
-                if(CdmUtils.isNotBlank(hybString)){
-                    if(hybString.equals(RedListUtil.HYB_X)){
-                        name.setBinomHybrid(true);
-                    }
-                    else if(hybString.equals(RedListUtil.HYB_G)){
-                        name.setMonomHybrid(true);
+            }
+            //hybrid
+            if(CdmUtils.isNotBlank(hybString)){
+                //save hybrid formula
+                if(CdmUtils.isNotBlank(formelString)){
+                    Annotation annotation = Annotation.NewDefaultLanguageInstance(formelString);
+                    annotation.setAnnotationType(AnnotationType.TECHNICAL());
+                    name.addAnnotation(annotation);
+                }
+                //more than two hybrids not yet handled by name parser
+                //TODO: use parser when implemented to fully support hybrids
+                if(taxNameString.split(RedListUtil.HYB_SIGN).length>2){
+                    name = BotanicalName.NewInstance(rank);
+                    name.setTitleCache(taxNameString, true);
+                }
+                else if(hybString.equals(RedListUtil.HYB_X)){
+                    name.setBinomHybrid(true);
+                }
+                else if(hybString.equals(RedListUtil.HYB_G)){
+                    name.setMonomHybrid(true);
+                }
+                else if(hybString.equals(RedListUtil.HYB_XF)){
+                    name.setHybridFormula(true);
+                    if(ep1String.contains(RedListUtil.HYB_SIGN)){
+                        RedListUtil.logMessage(id, "EPI1 has hybrid signs but with flag: "+RedListUtil.HYB_XF, logger);
                     }
-                    else if(hybString.equals(RedListUtil.HYB_XF)){
-                        name.setHybridFormula(true);
-                        if(ep1String.contains(RedListUtil.HYB_SIGN)){
-                            RedListUtil.logMessage(id, "EPI1 has hybrid signs but with flag: "+RedListUtil.HYB_XF, logger);
+                    else if(ep2String.contains(RedListUtil.HYB_SIGN)){
+                        String[] split = ep2String.split(RedListUtil.HYB_SIGN);
+                        String hybridFormula1 = ep1String+" "+split[0].trim();
+                        String hybridFormula2 = ep1String+" "+split[1].trim();
+                        //check if the specific epithets are from the same genus or not like e.g. EPI2 = pratensis × Lolium multiflorum
+                        String[] secondHybrid = split[1].trim().split(" ");
+                        if(secondHybrid.length>1 && secondHybrid[0].matches("[A-Z].*")){
+                            hybridFormula2 = split[1];
                         }
-                        else if(ep2String.contains(RedListUtil.HYB_SIGN)){
-                            String[] split = ep2String.split(RedListUtil.HYB_SIGN);
-                            if(split.length!=2){
-                                RedListUtil.logMessage(id, "Multiple hybrid signs found in "+ep2String, logger);
-                            }
-                            String hybridFormula1 = ep1String+" "+split[0].trim();
-                            String hybridFormula2 = ep1String+" "+split[1].trim();
-                            if(CdmUtils.isNotBlank(ep3String)){
-                                hybridFormula1 += " "+ep3String;
-                                hybridFormula2 += " "+ep3String;
-                            }
-                            String fullFormula = hybridFormula1+" "+RedListUtil.HYB_SIGN+" "+hybridFormula2;
-                            name = NonViralNameParserImpl.NewInstance().parseFullName(fullFormula);
+                        if(CdmUtils.isNotBlank(ep3String)){
+                            hybridFormula1 += " "+ep3String;
+                            hybridFormula2 += " "+ep3String;
                         }
-                        else if(ep3String.contains(RedListUtil.HYB_SIGN)){
-                            String[] split = ep3String.split(RedListUtil.HYB_SIGN);
-                            if(split.length!=2){
-                                RedListUtil.logMessage(id, "Multiple hybrid signs found in "+ep3String, logger);
-                            }
-                            String hybridFormula1 = ep1String+" "+ep2String+" "+split[0];
-                            String hybridFormula2 = ep1String+" "+ep2String+" "+split[1];
-                            String fullFormula = hybridFormula1+" "+RedListUtil.HYB_SIGN+" "+hybridFormula2;
-                            name = NonViralNameParserImpl.NewInstance().parseFullName(fullFormula);
-                        }
-                    }
-                    else if(hybString.equals(RedListUtil.HYB_N)){
-                        name = NonViralNameParserImpl.NewInstance().parseFullName(ep1String+" "+ep2String+" nothosubsp. "+ep3String);
+                        String fullFormula = hybridFormula1+" "+RedListUtil.HYB_SIGN+" "+hybridFormula2;
+                        name = NonViralNameParserImpl.NewInstance().parseFullName(fullFormula, NomenclaturalCode.ICNAFP, rank);
                     }
-                    else if(hybString.equals(RedListUtil.HYB_GF)){
-                        if(ep1String.contains(RedListUtil.HYB_SIGN)){
-                            name = NonViralNameParserImpl.NewInstance().parseFullName(ep1String);
-                        }
-                        else{
-                            RedListUtil.logMessage(id, "HYB is "+hybString+" but "+RedListUtil.HYB+" does not contain "+RedListUtil.HYB_SIGN, logger);
-                        }
+                    else if(ep3String.contains(RedListUtil.HYB_SIGN)){
+                        String[] split = ep3String.split(RedListUtil.HYB_SIGN);
+                        String hybridFormula1 = ep1String+" "+ep2String+" "+split[0];
+                        String hybridFormula2 = ep1String+" "+ep2String+" "+split[1];
+                        String fullFormula = hybridFormula1+" "+RedListUtil.HYB_SIGN+" "+hybridFormula2;
+                        name = NonViralNameParserImpl.NewInstance().parseFullName(fullFormula, NomenclaturalCode.ICNAFP, rank);
                     }
-                    else if(hybString.equals(RedListUtil.HYB_XS)){
-                        //nothing to do
+                }
+                else if(hybString.equals(RedListUtil.HYB_N)){
+                    name = NonViralNameParserImpl.NewInstance().parseFullName(taxNameString, NomenclaturalCode.ICNAFP, rank);
+                }
+                else if(hybString.equals(RedListUtil.HYB_GF)){
+                    if(ep1String.contains(RedListUtil.HYB_SIGN)){
+                        name = NonViralNameParserImpl.NewInstance().parseFullName(ep1String, NomenclaturalCode.ICNAFP, rank);
                     }
                     else{
-                        logger.error("HYB value "+hybString+" not yet handled");
+                        RedListUtil.logMessage(id, "HYB is "+hybString+" but "+RedListUtil.HYB+" does not contain "+RedListUtil.HYB_SIGN, logger);
                     }
                 }
+                else if(hybString.equals(RedListUtil.HYB_XS)){
+                    //nothing to do
+                }
                 else{
                     logger.error("HYB value "+hybString+" not yet handled");
                 }
@@ -466,8 +466,18 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         return name;
     }
 
-    private void checkAuthorShipConsistency(long id, String nomZusatzString, String taxZusatzString,
-            String zusatzString, String authorString, String authorshipCache) {
+    private void checkNameConsistency(long id, String nomZusatzString, String taxZusatzString,
+            String zusatzString, String authorString, String hybString, NonViralName<?> name) {
+        String authorshipCache = name.getAuthorshipCache();
+        //FIXME: remove split length check when name parser can parse multiple hybrid parents
+        if(hybString.equals(RedListUtil.HYB_XF) && name.getTitleCache().split(RedListUtil.HYB_SIGN).length==2){
+            if(name.getHybridChildRelations().isEmpty()){
+                RedListUtil.logMessage(id, "Hybrid formula but no hybrid child relations: "+name.getTitleCache(), logger);
+                return;
+            }
+            return;
+        }
+
         if(CdmUtils.isNotBlank(zusatzString)){
             authorString = authorString.replace(", "+zusatzString, "");
         }
@@ -492,25 +502,16 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         }
     }
 
-    private void checkTaxonNameConsistency(long id, String taxNameString, String hybString, TaxonBase<?> taxonBase) {
-        if(hybString.equals(RedListUtil.HYB_XF)){
-            if(HibernateProxyHelper.deproxy(taxonBase.getName(),NonViralName.class).getHybridChildRelations().isEmpty()){
-                RedListUtil.logMessage(id, "Hybrid name but no hybrid child relations: "+taxonBase.getTitleCache(), logger);
-                return;
-            }
-            return;
-        }
-
-
+    private void checkTaxonConsistency(long id, String taxNameString, String hybString, TaxonBase<?> taxonBase, RedListGefaesspflanzenImportState state) {
         String nameCache = HibernateProxyHelper.deproxy(taxonBase.getName(), NonViralName.class).getNameCache().trim();
         taxNameString = taxNameString.trim();
-        taxNameString.replaceAll(" +", " ");
+        taxNameString = taxNameString.replaceAll(" +", " ");
 
         if(taxNameString.endsWith("agg.")){
             taxNameString = taxNameString.replace("agg.", "aggr.");
         }
 
-        if(hybString.equals(RedListUtil.HYB_X)){
+        if(hybString.equals(RedListUtil.HYB_X) || hybString.equals(RedListUtil.HYB_N)){
             taxNameString = taxNameString.replace(" "+RedListUtil.HYB_SIGN+" ", " "+RedListUtil.HYB_SIGN);//hybrid sign has no space after it in titleCache for binomial hybrids
             taxNameString = taxNameString.replace(" x ", " "+RedListUtil.HYB_SIGN);//in some cases a standard 'x' is used
         }
@@ -521,7 +522,7 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
             taxNameString = taxNameString.replace(" "+RedListUtil.HYB_SIGN, " x");
         }
 
-        if(taxNameString.endsWith("- Gruppe")){String a ="Festuca ×xx Lolium <-> Festuca ×× Lolium";
+        if(taxNameString.endsWith("- Gruppe")){
             taxNameString = taxNameString.replaceAll("- Gruppe", "species group");
         }
         if(taxNameString.endsWith("- group")){
@@ -529,14 +530,22 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
         }
 
         taxNameString = taxNameString.replace("[ranglos]", "[unranked]");
+        if(taxonBase.getName().getRank()!=null){
+            if(taxonBase.getName().getRank().equals(Rank.PROLES())){
+                taxNameString = taxNameString.replace("proles", "prol.");
+            }
+            else if(taxonBase.getName().getRank().equals(state.getRank(RedListUtil.uuidRankCollectionSpecies))){
+                taxNameString = taxNameString.replace("\"Sammelart\"", "\"Coll. Species\"");
+            }
+        }
         if(STRICT_TITLE_CHECK){
             if(!taxNameString.trim().equals(nameCache)){
-                RedListUtil.logMessage(id, "Taxon name inconsistent! taxon.titleCache <-> Column "+RedListUtil.TAXNAME+": "+nameCache+" <-> "+taxNameString, logger);
+                RedListUtil.logMessage(id, "Taxon name inconsistent! taxon.nameCache <-> Column "+RedListUtil.TAXNAME+": "+nameCache+" <-> "+taxNameString, logger);
             }
         }
         else{
             if(!taxNameString.startsWith(nameCache)){
-                RedListUtil.logMessage(id, "Taxon name inconsistent! taxon.titleCache <-> Column "+RedListUtil.TAXNAME+": "+nameCache+" <-> "+taxNameString, logger);
+                RedListUtil.logMessage(id, "Taxon name inconsistent! taxon.nameCache <-> Column "+RedListUtil.TAXNAME+": "+nameCache+" <-> "+taxNameString, logger);
             }
         }
     }
@@ -553,6 +562,9 @@ public class RedListGefaesspflanzenImportNames extends DbImportBase<RedListGefae
                     return Rank.UNRANKED_INFRAGENERIC();
                 }
             }
+            else if(rankStr.equals("SAM")){
+                return getRank(state, RedListUtil.uuidRankCollectionSpecies, "Collective Species", "Collective Species", "\"Coll. Species\"", (OrderedTermVocabulary<Rank>) Rank.GENUS().getVocabulary(), null, RankClass.SpeciesGroup);
+            }
             else{
                 rank = state.getTransformer().getRankByKey(rankStr);
             }