ref #1444, ref #8508, ref #8509 improve rank handling in ERMS pipeline
authorAndreas Müller <a.mueller@bgbm.org>
Tue, 17 Sep 2019 12:18:16 +0000 (14:18 +0200)
committerAndreas Müller <a.mueller@bgbm.org>
Tue, 17 Sep 2019 12:18:16 +0000 (14:18 +0200)
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsTaxonImport.java
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsTransformer.java
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/out/PesiTransformer.java

index d2bb275ef638e83181c3d63f2d2c3eb66ac7b32c..0c34b2e7aeaa2d689abba9fbb4ad578b4bef3eb8 100644 (file)
@@ -71,12 +71,11 @@ public class ErmsTaxonImport
                super(pluralString, dbTableName, cdmTargetClass);\r
        }\r
 \r
-//     @Override\r
-//     protected String getIdQuery() {\r
-//             String strQuery = " SELECT id FROM tu WHERE id < 300000 " ;\r
-//             return strQuery;\r
-//     }\r
-\r
+       @Override\r
+       protected String getIdQuery() {\r
+               String strQuery = " SELECT id FROM tu WHERE id NOT IN (147415) " ;  //for now we exclude Monera as it has no children and is unclear what classification it has. In ERMS it is alternative accepted name, but according to https://en.wikipedia.org/wiki/Monera it is a super taxon to bacteria.\r
+               return strQuery;\r
+       }\r
 \r
        @Override\r
     protected DbImportMapping<ErmsImportState, ErmsImportConfigurator> getMapping() {\r
index 4e775c26d8c3fef23e5f4e271e91c0efaa10a6e2..c03396d6b3c130e6a55584e76034696d4e846099 100644 (file)
@@ -167,7 +167,6 @@ public final class ErmsTransformer extends InputTransformerBase {
        public static final UUID uuidMarkerFreshwater = UUID.fromString("1190b182-e1d3-4986-8cc3-a6de3c115cf7");\r
        public static final UUID uuidMarkerTerrestrial = UUID.fromString("5ed92edb-e2c6-48da-8367-6e82071c888f");\r
 \r
-\r
        public static NomenclaturalCode kingdomId2NomCode(Integer kingdomId){\r
                switch (kingdomId){\r
                        case 1: return null;\r
@@ -176,12 +175,11 @@ public final class ErmsTransformer extends InputTransformerBase {
                        case 4: return NomenclaturalCode.ICNAFP;  //Fungi\r
                        case 5: return NomenclaturalCode.ICZN ;  //Protozoa\r
                        case 6: return NomenclaturalCode.ICNB ;  //Bacteria\r
-                       case 7: return NomenclaturalCode.ICNAFP;  //Chromista   ??\r
-                       case 147415: return NomenclaturalCode.ICNB;  //Monera\r
+                       case 7: return NomenclaturalCode.ICZN;  //Chromista??\r
+                       //-> formatting of infrageneric taxa and available ranks (rank table) let me assume that ICZN is most suitable                  case 147415: return NomenclaturalCode.ICNB;  //Monera\r
+                       //at the same time time formatting of subsp. (with marker!) behaves like ICNAFP so this is unclear\r
                        default: return null;\r
-\r
                }\r
-\r
        }\r
 \r
        @Override\r
index 995d4f3addd56874a865f7bad35ad64e27ccbcbb..36d650b5f87b6f0b71b07e19486d474b2d5266e2 100644 (file)
@@ -208,13 +208,13 @@ public final class PesiTransformer extends ExportTransformerBase{
        public static String STR_NAMESPACE_POTENTIAL_COMBINATION = "Potential combination from TAX_ID:";\r
 \r
        // Kingdoms\r
-       public static int KINGDOM_NULL = 0;\r
-       public static int KINGDOM_ANIMALIA = 2;\r
-       public static int KINGDOM_PLANTAE = 3;\r
-       public static int KINGDOM_FUNGI = 4;\r
-       public static int KINGDOM_PROTOZOA = 5;\r
-       public static int KINGDOM_BACTERIA = 6;\r
-       public static int KINGDOM_CHROMISTA = 7;\r
+       public static final int KINGDOM_NULL = 0;\r
+       public static final int KINGDOM_ANIMALIA = 2;\r
+       public static final int KINGDOM_PLANTAE = 3;\r
+       public static final int KINGDOM_FUNGI = 4;\r
+       public static final int KINGDOM_PROTOZOA = 5;\r
+       public static final int KINGDOM_BACTERIA = 6;\r
+       public static final int KINGDOM_CHROMISTA = 7;\r
 \r
        // Kingdoms\r
        public static Map<String, Integer> pesiKingdomMap = new HashMap<>();\r
@@ -229,7 +229,8 @@ public final class PesiTransformer extends ExportTransformerBase{
        // Animalia Ranks\r
        public static int Animalia_Kingdom = 10;\r
        public static int Animalia_Subkingdom = 20;\r
-       public static int Animalia_Superphylum = 23;\r
+       public static int Animalia_Infrakingdom = 25;\r
+       public static int Animalia_Superphylum = 28;\r
        public static int Animalia_Phylum = 30;\r
        public static int Animalia_Subphylum = 40;\r
        public static int Animalia_Infraphylum = 45;\r
@@ -259,6 +260,7 @@ public final class PesiTransformer extends ExportTransformerBase{
        public static int Animalia_Variety = 240;\r
        public static int Animalia_Subvariety = 250;\r
        public static int Animalia_Forma = 260;\r
+       public static int Animalia_Subform = 270;\r
 \r
        // Plantae Ranks\r
        public static int Plantae_Kingdom = 10;\r
@@ -327,6 +329,7 @@ public final class PesiTransformer extends ExportTransformerBase{
        //Protozoa Ranks\r
        public static int Protozoa_Kingdom = 10;\r
        public static int Protozoa_Subkingdom = 20;\r
+       public static int Protozoa_Infrakingdom = 35;\r
        public static int Protozoa_Phylum = 30;\r
        public static int Protozoa_Subphylum = 40;\r
        public static int Protozoa_Superclass = 50;\r
@@ -380,6 +383,7 @@ public final class PesiTransformer extends ExportTransformerBase{
        public static int Chromista_Infrakingdom = 25;\r
        public static int Chromista_Phylum = 30;\r
        public static int Chromista_Subphylum = 40;\r
+       public static int Chromista_Infraphylum = 45;\r
        public static int Chromista_Superclass = 50;\r
        public static int Chromista_Class = 60;\r
        public static int Chromista_Subclass = 70;\r
@@ -1331,8 +1335,6 @@ public final class PesiTransformer extends ExportTransformerBase{
                return result;\r
        }\r
 \r
-\r
-\r
        @Override\r
        public Object getKeyByLanguage(Language language) throws UndefinedTransformerMethodException {\r
                return language2LanguageId(language);\r
@@ -1347,7 +1349,6 @@ public final class PesiTransformer extends ExportTransformerBase{
                }\r
        }\r
 \r
-\r
        /**\r
         * Returns the identifier of the given Language.\r
         */\r
@@ -1576,6 +1577,7 @@ public final class PesiTransformer extends ExportTransformerBase{
                } else if (feature.getUuid().equals(BerlinModelTransformer.uuidFeatureConservationStatus)){\r
                        return NoteCategory_Conservation_Status;\r
 \r
+\r
                //E+M\r
                } else if (feature.getUuid().equals(BerlinModelTransformer.uuidFeatureDistrEM)){\r
                        return NoteCategory_general_distribution_euromed;\r
@@ -1615,13 +1617,12 @@ public final class PesiTransformer extends ExportTransformerBase{
 \r
        /**\r
         * Returns the string representation for a given rank.\r
-        * @param rank\r
-        * @param pesiKingdomId\r
-        * @return\r
         */\r
        public String getCacheByRankAndKingdom(Rank rank, Integer pesiKingdomId) {\r
-               if (rank == null || pesiKingdomId == null){\r
+               if (rank == null){\r
                        return null;\r
+               }else if (pesiKingdomId == null && rank.equals(Rank.DOMAIN())){  //might be Superdomain in future\r
+                   return this.rankCacheMap.get(0).get(0);\r
                }else{\r
                    Map<Integer, String> rankMap = this.rankCacheMap.get(pesiKingdomId);\r
                    if (rankMap != null){\r
@@ -1665,7 +1666,7 @@ public final class PesiTransformer extends ExportTransformerBase{
 //             } else if (nomenclaturalCode.equals(NomenclaturalCode.)) { // Biota\r
 //                     result =\r
                } else {\r
-                       logger.error("NomenclaturalCode not yet considered: " + nomenclaturalCode.getUuid() + " (" +  nomenclaturalCode.getTitleCache() + ")");\r
+                       logger.error("NomenclaturalCode not yet considered: " + nomenclaturalCode.getUuid() + " (" +  nomenclaturalCode.getTitleCache() + ")");\r
                }\r
                return result;\r
        }\r
@@ -1687,7 +1688,9 @@ public final class PesiTransformer extends ExportTransformerBase{
                                result = Animalia_Kingdom;\r
             } else if (rank.equals(Rank.SUBKINGDOM())) {\r
                                result = Animalia_Subkingdom;\r
-                       } else if (rank.equals(Rank.SUPERPHYLUM())) {\r
+            } else if (rank.equals(Rank.INFRAKINGDOM())) {\r
+                result = Animalia_Infrakingdom;\r
+            } else if (rank.equals(Rank.SUPERPHYLUM())) {\r
                                result = Animalia_Superphylum;\r
                        } else if (rank.equals(Rank.PHYLUM())) {\r
                                result = Animalia_Phylum;\r
@@ -1745,7 +1748,9 @@ public final class PesiTransformer extends ExportTransformerBase{
                                result = Animalia_Subvariety;\r
                        } else if (rank.equals(Rank.FORM())) {\r
                                result = Animalia_Forma;\r
-                       } else {\r
+                       } else if (rank.equals(Rank.SUBFORM())) {\r
+                result = Animalia_Subform;\r
+            } else {\r
                                //TODO Exception\r
                                logger.warn("Rank for Kingdom Animalia not yet supported in CDM: "+ rank.getLabel());\r
                                return null;\r
@@ -1833,40 +1838,43 @@ public final class PesiTransformer extends ExportTransformerBase{
                                logger.warn("Rank for Kingdom Plantae not yet supported in CDM: "+ rank.getLabel());\r
                                return null;\r
                        }\r
-               } else if (pesiKingdomId != null && pesiKingdomId.intValue() == KINGDOM_BACTERIA) {\r
-                       if (rank.equals(Rank.KINGDOM())) { result = Bacteria_Kingdom; }\r
-                       else if (rank.equals(Rank.SUBKINGDOM())) { result = Bacteria_Subkingdom; }\r
-                       else if (rank.equals(Rank.PHYLUM())) { result = Bacteria_Phylum; }\r
-                       else if (rank.equals(Rank.SUBPHYLUM())) { result = Bacteria_Subphylum; }\r
-                       else if (rank.equals(Rank.SUPERCLASS())) { result = Bacteria_Superclass; }\r
-                       else if (rank.equals(Rank.CLASS())) { result = Bacteria_Class; }\r
-                       else if (rank.equals(Rank.SUBCLASS())) { result = Bacteria_Subclass; }\r
-                       else if (rank.equals(Rank.INFRACLASS())) { result = Bacteria_Infraclass; }\r
-                       else if (rank.equals(Rank.SUPERORDER())) { result = Bacteria_Superorder; }\r
-                       else if (rank.equals(Rank.ORDER())) { result = Bacteria_Order; }\r
-                       else if (rank.equals(Rank.SUBORDER())) { result = Bacteria_Suborder; }\r
-                       else if (rank.equals(Rank.INFRAORDER())) { result = Bacteria_Infraorder; }\r
-                       else if (rank.equals(Rank.SUPERFAMILY())) { result = Bacteria_Superfamily; }\r
-                       else if (rank.equals(Rank.FAMILY())) { result = Bacteria_Family; }\r
-                       else if (rank.equals(Rank.SUBFAMILY())) { result = Bacteria_Subfamily; }\r
-                       else if (rank.equals(Rank.TRIBE())) { result = Bacteria_Tribe; }\r
-                       else if (rank.equals(Rank.SUBTRIBE())) { result = Bacteria_Subtribe; }\r
-                       else if (rank.equals(Rank.GENUS())) { result = Bacteria_Genus; }\r
-                       else if (rank.equals(Rank.SUBGENUS())) { result = Bacteria_Subgenus; }\r
-                       else if (rank.equals(Rank.SPECIES())) { result = Bacteria_Species; }\r
-                       else if (rank.equals(Rank.SUBSPECIES())) { result = Bacteria_Subspecies; }\r
-                       else if (rank.equals(Rank.VARIETY())) { result = Bacteria_Variety; }\r
-                       else if (rank.equals(Rank.FORM())) { result = Bacteria_Forma; }\r
+        }else if (pesiKingdomId != null && pesiKingdomId.intValue() == KINGDOM_PROTOZOA) {\r
+            if (rank.equals(Rank.KINGDOM())) { result = Protozoa_Kingdom; }\r
+            else if (rank.equals(Rank.SUBKINGDOM())) { result = Protozoa_Subkingdom; }\r
+            else if (rank.equals(Rank.INFRAKINGDOM())) { result = Protozoa_Infrakingdom; }\r
+            else if (rank.equals(Rank.PHYLUM())) { result = Protozoa_Phylum; }\r
+            else if (rank.equals(Rank.SUBPHYLUM())) { result = Protozoa_Subphylum; }\r
+            else if (rank.equals(Rank.CLASS())) { result = Protozoa_Class; }\r
+            else if (rank.equals(Rank.SUBCLASS())) { result = Protozoa_Subclass; }\r
+            else if (rank.equals(Rank.ORDER())) { result = Protozoa_Order; }\r
+            else if (rank.equals(Rank.SUBORDER())) { result = Protozoa_Suborder; }\r
+            else if (rank.equals(Rank.INFRAORDER())) { result = Protozoa_Infraorder; }\r
+            else if (rank.equals(Rank.SUPERFAMILY())) { result = Protozoa_Superfamily; }\r
+            else if (rank.equals(Rank.FAMILY())) { result = Protozoa_Family; }\r
+            else if (rank.equals(Rank.SUBFAMILY())) { result = Protozoa_Subfamily; }\r
+            else if (rank.equals(Rank.TRIBE())) { result = Protozoa_Tribe; }\r
+            else if (rank.equals(Rank.SUBTRIBE())) { result = Protozoa_Subtribe; }\r
+            else if (rank.equals(Rank.GENUS())) { result = Protozoa_Genus; }\r
+            else if (rank.equals(Rank.SUBGENUS())) { result = Protozoa_Subgenus; }\r
+            else if (rank.equals(Rank.SPECIES())) { result = Protozoa_Species; }\r
+            else if (rank.equals(Rank.SUBSPECIES())) { result = Protozoa_Subspecies; }\r
+            else if (rank.equals(Rank.VARIETY())) { result = Protozoa_Variety; }\r
+            else if (rank.equals(Rank.FORM())) { result = Protozoa_Forma; }\r
+            else {\r
+                //TODO Exception\r
+                logger.warn("Rank for Kingdom Protozoa not yet supported in CDM: "+ rank.getLabel());\r
+                return null;\r
+            }\r
                } else if (pesiKingdomId != null && pesiKingdomId.intValue() == KINGDOM_FUNGI) {\r
             if (rank.equals(Rank.KINGDOM())) { result = Fungi_Kingdom; }\r
             else if (rank.equals(Rank.SUBKINGDOM())) { result = Fungi_Subkingdom; }\r
+            else if (rank.equals(Rank.PHYLUM())) { result =  Fungi_Division;}  //Phylum and Division is same in ICNAFP\r
             else if (rank.equals(Rank.DIVISION())) { result =  Fungi_Division;}\r
             else if (rank.equals(Rank.SUBDIVISION())) { result =  Fungi_Subdivision;}\r
             else if (rank.equals(Rank.CLASS())) { result = Fungi_Class; }\r
             else if (rank.equals(Rank.SUBCLASS())) { result = Fungi_Subclass; }\r
             else if (rank.equals(Rank.ORDER())) { result = Fungi_Order; }\r
             else if (rank.equals(Rank.SUBORDER())) { result = Fungi_Suborder; }\r
-\r
             else if (rank.equals(Rank.FAMILY())) { result = Fungi_Family; }\r
             else if (rank.equals(Rank.SUBFAMILY())) { result = Fungi_Subfamily; }\r
             else if (rank.equals(Rank.TRIBE())) { result = Fungi_Tribe; }\r
@@ -1881,12 +1889,51 @@ public final class PesiTransformer extends ExportTransformerBase{
             else if (rank.equals(Rank.SUBVARIETY())) { result = Fungi_Subvariety; }\r
             else if (rank.equals(Rank.FORM())) { result = Fungi_Forma; }\r
             else if (rank.equals(Rank.SUBFORM())) { result = Fungi_Subforma;}\r
+            else {\r
+                //TODO Exception\r
+                logger.warn("Rank for Kingdom Fungi not yet supported in CDM: "+ rank.getLabel());\r
+                return null;\r
+            }\r
+        } else if (pesiKingdomId != null && pesiKingdomId.intValue() == KINGDOM_BACTERIA) {\r
+            if (rank.equals(Rank.KINGDOM())) { result = Bacteria_Kingdom; }\r
+            else if (rank.equals(Rank.SUBKINGDOM())) { result = Bacteria_Subkingdom; }\r
+            else if (rank.equals(Rank.PHYLUM())) { result = Bacteria_Phylum; }\r
+            else if (rank.equals(Rank.SUBPHYLUM())) { result = Bacteria_Subphylum; }\r
+            else if (rank.equals(Rank.SUPERCLASS())) { result = Bacteria_Superclass; }\r
+            else if (rank.equals(Rank.CLASS())) { result = Bacteria_Class; }\r
+            else if (rank.equals(Rank.SUBCLASS())) { result = Bacteria_Subclass; }\r
+            else if (rank.equals(Rank.INFRACLASS())) { result = Bacteria_Infraclass; }\r
+            else if (rank.equals(Rank.SUPERORDER())) { result = Bacteria_Superorder; }\r
+            else if (rank.equals(Rank.ORDER())) { result = Bacteria_Order; }\r
+            else if (rank.equals(Rank.SUBORDER())) { result = Bacteria_Suborder; }\r
+            else if (rank.equals(Rank.INFRAORDER())) { result = Bacteria_Infraorder; }\r
+            else if (rank.equals(Rank.SUPERFAMILY())) { result = Bacteria_Superfamily; }\r
+            else if (rank.equals(Rank.FAMILY())) { result = Bacteria_Family; }\r
+            else if (rank.equals(Rank.SUBFAMILY())) { result = Bacteria_Subfamily; }\r
+            else if (rank.equals(Rank.TRIBE())) { result = Bacteria_Tribe; }\r
+            else if (rank.equals(Rank.SUBTRIBE())) { result = Bacteria_Subtribe; }\r
+            else if (rank.equals(Rank.GENUS())) { result = Bacteria_Genus; }\r
+            else if (rank.equals(Rank.SUBGENUS())) { result = Bacteria_Subgenus; }\r
+            else if (rank.equals(Rank.SPECIES())) { result = Bacteria_Species; }\r
+            else if (rank.equals(Rank.SUBSPECIES())) { result = Bacteria_Subspecies; }\r
+            else if (rank.equals(Rank.VARIETY())) { result = Bacteria_Variety; }\r
+            else if (rank.equals(Rank.FORM())) { result = Bacteria_Forma; }\r
+            else {\r
+                //TODO Exception\r
+                logger.warn("Rank for Kingdom Bacteria not yet supported in CDM: "+ rank.getLabel());\r
+                return null;\r
+            }\r
         }else if (pesiKingdomId != null && pesiKingdomId.intValue() == KINGDOM_CHROMISTA) {\r
             if (rank.equals(Rank.KINGDOM())) { result = Chromista_Kingdom; }\r
             else if (rank.equals(Rank.SUBKINGDOM())) { result = Chromista_Subkingdom; }\r
-\r
+            else if (rank.equals(Rank.INFRAKINGDOM())) { result = Chromista_Infrakingdom; }\r
+            else if (rank.equals(Rank.PHYLUM())) { result = Chromista_Phylum; }\r
+            else if (rank.equals(Rank.SUBPHYLUM())) { result = Chromista_Subphylum; }\r
+            else if (rank.equals(Rank.INFRAPHYLUM())) { result = Chromista_Infraphylum; }\r
+            else if (rank.equals(Rank.SUPERCLASS())) { result = Chromista_Superclass; }\r
             else if (rank.equals(Rank.CLASS())) { result = Chromista_Class; }\r
             else if (rank.equals(Rank.SUBCLASS())) { result = Chromista_Subclass; }\r
+            else if (rank.equals(Rank.SUPERORDER())) { result = Chromista_Superorder; }\r
             else if (rank.equals(Rank.ORDER())) { result = Chromista_Order; }\r
             else if (rank.equals(Rank.SUBORDER())) { result = Chromista_Suborder; }\r
             else if (rank.equals(Rank.INFRAORDER())) { result = Chromista_Infraorder; }\r
@@ -1904,9 +1951,11 @@ public final class PesiTransformer extends ExportTransformerBase{
             else if (rank.equals(Rank.VARIETY())) { result = Chromista_Variety; }\r
             else if (rank.equals(Rank.SUBVARIETY())) { result = Chromista_Subvariety; }\r
             else if (rank.equals(Rank.FORM())) { result = Chromista_Forma; }\r
-\r
-\r
-\r
+            else {\r
+                //TODO Exception\r
+                logger.warn("Rank for Kingdom Chromista not yet supported in CDM: "+ rank.getLabel());\r
+                return null;\r
+            }\r
         }else{\r
                        //TODO Exception\r
                        logger.warn("Kingdom not yet supported in CDM: "+ pesiKingdomId);\r
@@ -2486,6 +2535,5 @@ public final class PesiTransformer extends ExportTransformerBase{
 \r
     public static Integer pesiKingdomId(String titleCache) {\r
         return pesiKingdomMap.get(titleCache);\r
-\r
     }\r
 }\r