Fauna Europaea import: Heterotypic synonyms
authora.babadshanjan <a.babadshanjan@localhost>
Wed, 16 Sep 2009 07:35:51 +0000 (07:35 +0000)
committera.babadshanjan <a.babadshanjan@localhost>
Wed, 16 Sep 2009 07:35:51 +0000 (07:35 +0000)
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/faunaEuropaea/FaunaEuropaeaImportBase.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/faunaEuropaea/FaunaEuropaeaRelTaxonIncludeImport.java

index 8150e15a803ecf84f8e640abad452dc85771dd37..449d592170e1805d6b8af6d8f6ad6a7f2e9ee20c 100644 (file)
@@ -136,69 +136,4 @@ implements ICdmImport<FaunaEuropaeaImportConfigurator,FaunaEuropaeaImportState>
                return tree;\r
        }\r
 \r
-       \r
-       protected boolean saveTaxa(FaunaEuropaeaImportState state,\r
-                       int highestTaxonIndex, int limit) {\r
-\r
-               Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();\r
-               MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);\r
-               TransactionStatus txStatus = null;\r
-\r
-               int n = 0;\r
-               int nbrOfTaxa = highestTaxonIndex;\r
-//             int nbrOfTaxa = taxonStore.size();\r
-               boolean success = true;\r
-\r
-               if(logger.isInfoEnabled()) { logger.info("Saving taxa ..."); }\r
-\r
-               if (nbrOfTaxa < limit) {             // TODO: test with critical values\r
-                       limit = nbrOfTaxa;\r
-               } else {\r
-                       n = nbrOfTaxa / limit;\r
-               }\r
-\r
-               if(logger.isInfoEnabled()) { \r
-                       logger.info("number of taxa = " + taxonStore.size() \r
-                                       + ", highest taxon index = " + highestTaxonIndex \r
-                                       + ", limit = " + limit\r
-                                       + ", n = " + n); \r
-               }\r
-\r
-               // save taxa in blocks of <=limit\r
-               \r
-               for (int j = 1; j <= n + 1; j++)\r
-               {\r
-                       int offset = j - 1;\r
-                       int start = offset * limit;\r
-\r
-                       if(logger.isInfoEnabled()) { \r
-                               logger.info("Saving taxa: " + start + " - " + (start + limit - 1)); \r
-                       }\r
-\r
-                       if(logger.isInfoEnabled()) { \r
-                               logger.info("index = " + j \r
-                                               + ", offset = " + offset\r
-                                               + ", start = " + start); \r
-                       }\r
-                       \r
-                       if (j == n + 1) {\r
-                               limit = nbrOfTaxa - n * limit;\r
-                               if(logger.isInfoEnabled()) { \r
-                                       logger.info("n = " + n + ", limit = " + limit); \r
-                               }\r
-                       }\r
-\r
-                       txStatus = startTransaction();\r
-               \r
-                       Collection<TaxonBase> taxonMapPart = taxonStore.objects(start, limit);\r
-                       getTaxonService().saveTaxonAll(taxonMapPart);\r
-                       taxonMapPart = null;\r
-                       taxonStore.removeObjects(start, limit);\r
-                       \r
-                       commitTransaction(txStatus);\r
-\r
-               }\r
-               \r
-               return success;\r
-       }\r
 }\r
index d7dfa4e6e4616367dfdcb99d8b69d4223b0cf8b8..9d56b5bfcb5c68be9c794686853e0c9aa310e5a7 100644 (file)
@@ -31,6 +31,8 @@ import eu.etaxonomy.cdm.io.profiler.ProfilerController;
 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;\r
 import eu.etaxonomy.cdm.model.common.CdmBase;\r
 import eu.etaxonomy.cdm.model.reference.ReferenceBase;\r
+import eu.etaxonomy.cdm.model.taxon.Synonym;\r
+import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;\r
 import eu.etaxonomy.cdm.model.taxon.Taxon;\r
 import eu.etaxonomy.cdm.model.taxon.TaxonBase;\r
 import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;\r
@@ -48,19 +50,24 @@ public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase
        public static final String OS_NAMESPACE_TAXON = "Taxon";\r
        private static final Logger logger = Logger.getLogger(FaunaEuropaeaRelTaxonIncludeImport.class);\r
 \r
-       /* Max number of taxa to retrieve (for test purposes) */\r
-       private int maxTaxa = 0;\r
-       /* Max number of taxa to be saved in CDM DB with one service call */\r
        private int limit = 5000; // TODO: Make configurable\r
        /* Max number of taxa to be retrieved from CDM DB with one service call */\r
-       private int limitRetrieve = 10000; // TODO: Make configurable\r
-       /* Highest taxon index in the FauEu database */\r
-       private int highestTaxonIndex = 0;\r
-       /* Number of times method buildParentName() has been called for one taxon */\r
-       private int callCount = 0;\r
-       //private Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap = new HashMap();\r
+       private ReferenceBase<?> sourceRef = null;\r
 \r
+       \r
+       /**\r
+        * @return the sourceRef\r
+        */\r
+       private ReferenceBase<?> getSourceRef() {\r
+               return sourceRef;\r
+       }\r
 \r
+       /**\r
+        * @param sourceRef the sourceRef to set\r
+        */\r
+       private void setSourceRef(ReferenceBase<?> sourceRef) {\r
+               this.sourceRef = sourceRef;\r
+       }\r
 \r
        /* (non-Javadoc)\r
         * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)\r
@@ -109,17 +116,15 @@ public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase
 \r
                if(logger.isInfoEnabled()) { logger.info("Start making taxonomically included relationships..."); }\r
 \r
-               //ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();\r
                TransactionStatus txStatus = startTransaction();\r
                \r
                TaxonBase taxon = getTaxonService().getTaxonByUuid(UUID.fromString("ac7b30dc-6207-4c71-9752-ee0fb838a271"));\r
-               ReferenceBase<?> sourceRef = taxon.getSec();\r
-               TaxonomicTree tree = getTaxonomicTreeFor(state, sourceRef);\r
+               setSourceRef(taxon.getSec());\r
 \r
+               TaxonomicTree tree = getTaxonomicTreeFor(state, sourceRef);\r
                commitTransaction(txStatus);\r
                \r
                ProfilerController.memorySnapshot();\r
-               \r
                if (state.getConfig().isDoTaxonomicallyIncluded()) {\r
                        success = processParentsChildren(state);\r
                }\r
@@ -128,10 +133,9 @@ public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase
                        success = processMisappliedNames(state);\r
                }\r
                ProfilerController.memorySnapshot();\r
-//             if (state.getConfig().isDoHeterotypicSynonyms()) {\r
-//                     success = processHeterotypicSynonyms(state);\r
-//             }\r
-               \r
+               if (state.getConfig().isDoHeterotypicSynonyms()) {\r
+                       success = processHeterotypicSynonyms(state);\r
+               }\r
                ProfilerController.memorySnapshot();\r
 \r
                logger.info("End making taxa...");\r
@@ -171,13 +175,6 @@ public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase
                        \r
                try {\r
 \r
-//                     String strQuery = \r
-//                             " SELECT dbo.Taxon.UUID AS ChildUuid, Parent.UUID AS ParentUuid " +\r
-//                             " FROM dbo.Taxon INNER JOIN dbo.Taxon AS Parent " +\r
-//                             " ON dbo.Taxon.TAX_TAX_IDPARENT = Parent.TAX_ID " +\r
-//                             " WHERE (dbo.Taxon.TAX_VALID <> 0) AND (dbo.Taxon.TAX_AUT_ID <> " + A_AUCT + " OR dbo.Taxon.TAX_AUT_ID IS NULL )" +\r
-//                             " ORDER BY dbo.Taxon.TAX_RNK_ID ASC";\r
-\r
                        ResultSet rs = source.getResultSet(countQuery);\r
                        rs.next();\r
                        int count = rs.getInt(1);\r
@@ -329,16 +326,107 @@ public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase
        }\r
 \r
        \r
+       /** Retrieve synonyms from FauEuDB DB */\r
+       private boolean processHeterotypicSynonyms(FaunaEuropaeaImportState state) {\r
+\r
+               int limit = state.getConfig().getLimitSave();\r
+\r
+               TransactionStatus txStatus = null;\r
+\r
+               Map<UUID, UUID> childParentMap = null;\r
+               FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();\r
+               Source source = fauEuConfig.getSource();\r
+               int i = 0;\r
+               boolean success = true;\r
+\r
+               String selectCount = \r
+                       " SELECT count(*) ";\r
+\r
+               String selectColumns = " SELECT Taxon.UUID AS SynonymUuid, Parent.UUID AS AcceptedUuid ";\r
+               \r
+               String fromClause = " FROM Taxon INNER JOIN Taxon AS Parent " +\r
+               " ON Taxon.TAX_TAX_IDPARENT = Parent.TAX_ID " +\r
+               " WHERE (Taxon.TAX_VALID = 0) AND (Taxon.TAX_AUT_ID <> " + A_AUCT + ")";\r
+               \r
+               String orderClause = " ORDER BY dbo.Taxon.TAX_RNK_ID ASC ";\r
+\r
+               String countQuery = \r
+                       selectCount + fromClause;\r
+\r
+               String selectQuery = \r
+                       selectColumns + fromClause + orderClause;\r
+                       \r
+               try {\r
+\r
+                       ResultSet rs = source.getResultSet(countQuery);\r
+                       rs.next();\r
+                       int count = rs.getInt(1);\r
+                       \r
+                       rs = source.getResultSet(selectQuery);\r
+\r
+               if (logger.isInfoEnabled()) {\r
+                               logger.info("Number of rows: " + count);\r
+                               logger.info("Count Query: " + countQuery);\r
+                               logger.info("Select Query: " + selectQuery);\r
+                       }\r
+\r
+                       while (rs.next()) {\r
+                               \r
+                               if ((i++ % limit) == 0) {\r
+                                       \r
+                                       txStatus = startTransaction();\r
+                                       childParentMap = new HashMap<UUID, UUID>(limit);\r
+                                       \r
+                                       if(logger.isInfoEnabled()) {\r
+                                               logger.info("Synonyms retrieved: " + (i-1) ); \r
+                                       }\r
+                               }\r
+\r
+                               String childUuidStr = rs.getString("SynonymUuid");\r
+                               String parentUuidStr = rs.getString("AcceptedUuid");\r
+                               UUID childUuid = UUID.fromString(childUuidStr);\r
+                               UUID parentUuid = UUID.fromString(parentUuidStr);\r
+                               \r
+                               if (!childParentMap.containsKey(childUuid)) {\r
+\r
+                                               childParentMap.put(childUuid, parentUuid);\r
+\r
+                               } else {\r
+                                       if(logger.isDebugEnabled()) {\r
+                                               logger.debug("Duplicated child UUID (" + childUuid + ")");\r
+                                       }\r
+                               }\r
+\r
+                               if (((i % limit) == 0 && i != 1 ) || i == count) { \r
+\r
+                                       success = createHeterotypicSynonyms(state, childParentMap);\r
+\r
+                                       childParentMap = null;\r
+                                       commitTransaction(txStatus);\r
+\r
+                                       if(logger.isInfoEnabled()) {\r
+                                               logger.info("i = " + i + " - Transaction committed"); \r
+                                       }\r
+                               }\r
+                       }\r
+\r
+               } catch (SQLException e) {\r
+                       logger.error("SQLException:" +  e);\r
+                       success = false;\r
+               }\r
+               return success;         \r
+       }\r
+       \r
        /* Creates parent-child relationships.\r
         * Parent-child pairs are retrieved in blocks via findByUUID(Set<UUID>) from CDM DB. \r
         */\r
        private boolean createParentChildRelationships(FaunaEuropaeaImportState state, Map<UUID, UUID> childParentMap) {\r
 \r
-               TaxonBase taxon = getTaxonService().getTaxonByUuid(UUID.fromString("ac7b30dc-6207-4c71-9752-ee0fb838a271"));\r
-               ReferenceBase<?> sourceRef = taxon.getSec();\r
+//             TaxonBase taxon = getTaxonService().getTaxonByUuid(UUID.fromString("ac7b30dc-6207-4c71-9752-ee0fb838a271"));\r
+//             ReferenceBase<?> sourceRef = taxon.getSec();\r
                boolean success = true;\r
                \r
-                       TaxonomicTree tree = getTaxonomicTreeFor(state, sourceRef);\r
+                       TaxonomicTree tree = getTaxonomicTreeFor(state, getSourceRef());\r
                        \r
                        Set<TaxonBase> childSet = new HashSet<TaxonBase>(limit);\r
                        \r
@@ -468,8 +556,8 @@ public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase
                return success;\r
        }\r
 \r
-       /* Creates parent-child relationships.\r
-        * Parent-child pairs are retrieved in blocks via findByUUID(Set<UUID>) from CDM DB. \r
+       /* Creates misapplied name relationships.\r
+        * Misapplied name-accepted taxon pairs are retrieved in blocks via findByUUID(Set<UUID>) from CDM DB. \r
         */\r
        private boolean createMisappliedNameRelationships(FaunaEuropaeaImportState state, Map<UUID, UUID> fromToMap) {\r
 \r
@@ -477,8 +565,6 @@ public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase
                ReferenceBase<?> sourceRef = taxon.getSec();\r
                boolean success = true;\r
                \r
-                       TaxonomicTree tree = getTaxonomicTreeFor(state, sourceRef);\r
-                       \r
                        Set<TaxonBase> misappliedNameSet = new HashSet<TaxonBase>(limit);\r
                        \r
                        Set<UUID> misappliedNamesSet = fromToMap.keySet();\r
@@ -598,8 +684,124 @@ public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase
                        misappliedNameSet = null;\r
                        misappliedNames = null;\r
                        acceptedTaxa = null;\r
-                       tree = null;\r
                \r
                return success;\r
        }\r
+\r
+       \r
+       /* Creates heterotypic synonym relationships.\r
+        * Synonym-accepted taxon pairs are retrieved in blocks via findByUUID(Set<UUID>) from CDM DB. \r
+        */\r
+       private boolean createHeterotypicSynonyms(FaunaEuropaeaImportState state, Map<UUID, UUID> fromToMap) {\r
+\r
+               boolean success = true;\r
+\r
+               Set<TaxonBase> synonymSet = new HashSet<TaxonBase>(limit);\r
+\r
+               Set<UUID> synonymUuidSet = fromToMap.keySet();\r
+               Set<UUID> acceptedTaxaUuidSet = new HashSet<UUID>(fromToMap.values());\r
+\r
+               if (logger.isInfoEnabled()) {\r
+                       logger.info("Start reading synonyms names and accepted taxa");\r
+               }\r
+               List<TaxonBase> synonyms = getTaxonService().findByUuid(synonymUuidSet);\r
+               List<TaxonBase> acceptedTaxa = getTaxonService().findByUuid(acceptedTaxaUuidSet);\r
+               Map<UUID, TaxonBase> acceptedTaxaMap = new HashMap<UUID, TaxonBase>(acceptedTaxa.size());\r
+               for (TaxonBase taxonBase : acceptedTaxa){\r
+                       acceptedTaxaMap.put(taxonBase.getUuid(), taxonBase);\r
+               }\r
+\r
+               if (logger.isInfoEnabled()) {\r
+                       logger.info("End reading synonyms names and accepted taxa");\r
+               }\r
+\r
+               if (logger.isTraceEnabled()) {\r
+                       for (UUID uuid : synonymUuidSet) {\r
+                               logger.trace("synonym uuid query: " + uuid);\r
+                       }\r
+               }\r
+               if (logger.isTraceEnabled()) {\r
+                       for (UUID uuid : acceptedTaxaUuidSet) {\r
+                               logger.trace("accepted taxon uuid query: " + uuid);\r
+                       }\r
+               }\r
+               if (logger.isTraceEnabled()) {\r
+                       for (TaxonBase tb : synonyms) {\r
+                               logger.trace("synonym uuid result: " + tb.getUuid());\r
+                       }\r
+               }\r
+               if (logger.isTraceEnabled()) {\r
+                       for (TaxonBase tb : acceptedTaxa) {\r
+                               logger.trace("accepted taxon uuid result: " + tb.getUuid());\r
+                       }\r
+               }\r
+\r
+               UUID mappedAcceptedTaxonUuid = null;\r
+               UUID synonymUuid = null;\r
+               Synonym synonym = null;\r
+               TaxonBase acceptedTaxonBase = null;\r
+               Taxon acceptedTaxon = null;\r
+\r
+               for (TaxonBase synonymTaxonBase : synonyms) {\r
+\r
+                       try {\r
+                               synonym = synonymTaxonBase.deproxy(synonymTaxonBase, Synonym.class);\r
+                               synonymUuid = synonym.getUuid();\r
+                               mappedAcceptedTaxonUuid = fromToMap.get(synonymUuid);\r
+                               acceptedTaxonBase = null;\r
+\r
+                               acceptedTaxonBase = acceptedTaxaMap.get(mappedAcceptedTaxonUuid);\r
+                               if (logger.isDebugEnabled()) {\r
+                                       logger.debug("Parent (" + mappedAcceptedTaxonUuid + ") found for child (" + synonymUuid + ")");\r
+                               }\r
+                               acceptedTaxon = acceptedTaxonBase.deproxy(acceptedTaxonBase, Taxon.class);\r
+\r
+                               if (synonym != null && acceptedTaxon != null) {\r
+\r
+                                       //TODO: in case original genus exists must add synonym to original genus instead of to accepted taxon\r
+                                       acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF());\r
+\r
+                                       if (logger.isDebugEnabled()) {\r
+                                               logger.debug("Accepted taxon - synonym (" + mappedAcceptedTaxonUuid + " - " + synonymUuid + \r
+                                               ") relationship created");\r
+                                       }\r
+                                       if (synonym != null && !synonymSet.contains(synonym)) {\r
+\r
+                                               synonymSet.add(synonym);\r
+\r
+                                               if (logger.isTraceEnabled()) {\r
+                                                       logger.trace("Synonym (" + synonymUuid + ") added to Set");\r
+                                               }\r
+\r
+                                       } else {\r
+                                               if (logger.isDebugEnabled()) {\r
+                                                       logger.debug("Duplicated synonym (" + synonymUuid + ")");\r
+                                               }\r
+                                       }\r
+                               } else {\r
+                                       if (logger.isDebugEnabled()) {\r
+                                               logger.debug("Accepted taxon (" + mappedAcceptedTaxonUuid + ") or misapplied name (" + synonymUuid + " is null");\r
+                                       }\r
+                               }\r
+                       } catch (Exception e) {\r
+                               logger.error("Error creating synonym relationship: accepted taxon-synonym (" + \r
+                                               mappedAcceptedTaxonUuid + "-" + synonymUuid + ")", e);\r
+                       }\r
+               }\r
+               if (logger.isInfoEnabled()) {\r
+                       logger.info("Start saving synonymSet");\r
+               }\r
+               getTaxonService().saveTaxonAll(synonymSet);\r
+               if (logger.isInfoEnabled()) {\r
+                       logger.info("End saving synonymSet");\r
+               }\r
+\r
+               acceptedTaxaUuidSet = null;\r
+               synonymSet = null;\r
+               synonyms = null;\r
+               acceptedTaxa = null;\r
+\r
+               return success;\r
+       }\r
+\r
 }\r