merge pesi->trunk
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelFactsImport.java
index 259902d70687734b05041631d364a2bdce8d38a9..6b669ff16a14ec4867fe71677019692204ef7bd0 100644 (file)
@@ -9,13 +9,16 @@
 \r
 package eu.etaxonomy.cdm.io.berlinModel.in;\r
 \r
+import java.io.IOException;\r
 import java.net.MalformedURLException;\r
 import java.net.URISyntaxException;\r
 import java.net.URL;\r
 import java.sql.ResultSet;\r
 import java.sql.SQLException;\r
 import java.util.Collection;\r
+import java.util.HashMap;\r
 import java.util.HashSet;\r
+import java.util.Map;\r
 import java.util.Set;\r
 \r
 import org.apache.log4j.Logger;\r
@@ -24,10 +27,13 @@ import org.springframework.stereotype.Component;
 import eu.etaxonomy.cdm.common.CdmUtils;\r
 import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;\r
 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;\r
-import eu.etaxonomy.cdm.io.common.ICdmIO;\r
+import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelFactsImportValidator;\r
+import eu.etaxonomy.cdm.io.common.IOValidator;\r
 import eu.etaxonomy.cdm.io.common.MapWrapper;\r
+import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;\r
 import eu.etaxonomy.cdm.io.common.Source;\r
 import eu.etaxonomy.cdm.model.common.Annotation;\r
+import eu.etaxonomy.cdm.model.common.CdmBase;\r
 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;\r
 import eu.etaxonomy.cdm.model.common.Language;\r
 import eu.etaxonomy.cdm.model.common.Marker;\r
@@ -54,25 +60,21 @@ import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
 public class BerlinModelFactsImport  extends BerlinModelImportBase {\r
        private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);\r
 \r
+       public static final String NAMESPACE = "Fact";\r
+       \r
        public static final String SEQUENCE_PREFIX = "ORDER: ";\r
        \r
        private int modCount = 10000;\r
+       private static final String pluralString = "facts";\r
+       private static final String dbTableName = "Fact";\r
+\r
+       //FIXME don't use as class variable\r
+       private MapWrapper<Feature> featureMap;\r
        \r
        public BerlinModelFactsImport(){\r
                super();\r
        }\r
 \r
-       /* (non-Javadoc)\r
-        * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)\r
-        */\r
-       @Override\r
-       protected boolean doCheck(BerlinModelImportState state){\r
-               boolean result = true;\r
-               BerlinModelImportConfigurator bmiConfig = state.getConfig();\r
-               logger.warn("Checking for Facts not yet fully implemented");\r
-               result &= checkDesignationRefsExist(bmiConfig);\r
-               return result;\r
-       }\r
 \r
        private TermVocabulary<Feature> getFeatureVocabulary(){\r
                try {\r
@@ -127,9 +129,7 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
 //                                     RankRestrictionFk       int     Checked\r
                                }\r
                                                                \r
-                       //      featureMap.put(factCategoryId, feature);\r
                                result.put(factCategoryId, feature);\r
-       \r
                        }\r
                        Collection<Feature> col = result.getAllValues();\r
                        getTermService().save((Collection)col);\r
@@ -140,40 +140,49 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                }\r
 \r
        }\r
-       \r
 \r
        /* (non-Javadoc)\r
-        * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)\r
+        * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)\r
         */\r
        @Override\r
        protected boolean doInvoke(BerlinModelImportState state) {\r
-               boolean result = true;\r
-               \r
-               MapWrapper<TaxonBase> taxonMap = (MapWrapper<TaxonBase>)state.getStore(ICdmIO.TAXON_STORE);\r
-               MapWrapper<ReferenceBase> referenceMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.REFERENCE_STORE);\r
-               MapWrapper<ReferenceBase> nomRefMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.NOMREF_STORE);\r
-               \r
-               Set<TaxonBase> taxonStore = new HashSet<TaxonBase>();\r
-               \r
+               featureMap = invokeFactCategories(state.getConfig());\r
+               return super.doInvoke(state);\r
+       }\r
+       \r
+\r
+       /* (non-Javadoc)\r
+        * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)\r
+        */\r
+       @Override\r
+       protected String getRecordQuery(BerlinModelImportConfigurator config) {\r
+               String strQuery = \r
+                       " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " + \r
+                       " FROM Fact " +\r
+               " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +\r
+               " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +\r
+               " WHERE (FactId IN (" + ID_LIST_TOKEN + "))" + \r
+                " ORDER By Sequence";\r
+               return strQuery;\r
+       }\r
+       \r
+\r
+       /* (non-Javadoc)\r
+        * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)\r
+        */\r
+       public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {\r
+               boolean success = true ;\r
                BerlinModelImportConfigurator config = state.getConfig();\r
-               Source source = config.getSource();\r
-               \r
-               logger.info("start makeFacts ...");\r
+               Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();\r
+               Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);\r
+               Map<String, ReferenceBase> biblioRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE);\r
+               Map<String, ReferenceBase> nomRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE);\r
+\r
+               ResultSet rs = partitioner.getResultSet();\r
                \r
-               MapWrapper<Feature> featureMap = invokeFactCategories(config);\r
+               ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();\r
                \r
-               try {\r
-                       //get data from database\r
-                       String strQuery = \r
-                                       " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " + \r
-                                       " FROM Fact " +\r
-                       " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +\r
-                       " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +\r
-                       " WHERE (1=1)" + \r
-                        " ORDER By Sequence";\r
-                       ResultSet rs = source.getResultSet(strQuery) ;\r
-                       ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();\r
-                       \r
+               try{\r
                        int i = 0;\r
                        //for each fact\r
                        while (rs.next()){\r
@@ -184,7 +193,6 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                                        Object taxonIdObj = rs.getObject("taxonId");\r
                                        int taxonId = rs.getInt("taxonId");\r
                                        Object factRefFkObj = rs.getObject("factRefFk");\r
-                                       int factRefFk = rs.getInt("factRefFk");\r
                                        Object categoryFkObj = rs.getObject("factCategoryFk");\r
                                        Integer categoryFk = rs.getInt("factCategoryFk");\r
                                        String details = rs.getString("Details");\r
@@ -198,14 +206,14 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                                        \r
                                        if (taxonBase == null){\r
                                                logger.warn("Taxon for Fact " + factId + " does not exist in store");\r
-                                               result = false;\r
+                                               success = false;\r
                                        }else{\r
                                                Taxon taxon;\r
                                                if ( taxonBase instanceof Taxon ) {\r
                                                        taxon = (Taxon) taxonBase;\r
                                                }else{\r
                                                        logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());\r
-                                                       result = false;\r
+                                                       success = false;\r
                                                        continue;\r
                                                }\r
                                                \r
@@ -232,7 +240,7 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                                                        }\r
                                                        if (taxonDescription == null){\r
                                                                taxonDescription = TaxonDescription.NewInstance();\r
-                                                               taxonDescription.setTitleCache(sourceRef == null ? null:sourceRef.getTitleCache());\r
+                                                               taxonDescription.setTitleCache(sourceRef == null ? null : sourceRef.getTitleCache());\r
                                                                taxon.addDescription(taxonDescription);\r
                                                        }\r
                                                }\r
@@ -240,7 +248,7 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                                                //textData\r
                                                TextData textData = null;\r
                                                boolean newTextData = true;\r
-\r
+       \r
                                                // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData \r
                                                // description element append the fact text to the existing TextData\r
                                                if(categoryFk == 31) {\r
@@ -263,9 +271,9 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                                                        }\r
                                                }\r
                                                \r
-                                               if(newTextData == true) { textData = TextData.NewInstance(); }\r
-\r
-                                               \r
+                                               if(newTextData == true) { \r
+                                                       textData = TextData.NewInstance(); \r
+                                               }\r
                                                \r
                                                //for diptera database\r
                                                if (categoryFk == 99 && notes.contains("<OriginalName>")){\r
@@ -283,21 +291,17 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                                                        textData.setType(feature);\r
                                                }\r
                                                \r
-                                               //\r
-                                               ReferenceBase citation;\r
+                                               //reference\r
+                                               ReferenceBase citation = null;\r
+                                               String factRefFk = String.valueOf(factRefFkObj);\r
                                                if (factRefFkObj != null){\r
-                                                       citation = referenceMap.get(factRefFk); \r
-                                                       if (citation == null){\r
-                                                               citation = nomRefMap.get(factRefFk);\r
-                                                       }\r
-                                                       if (citation == null && (factRefFk != 0)){\r
-                                                               logger.warn("Citation not found in referenceMap: " + factRefFk);\r
-                                                               result = false;\r
-                                                       }\r
-                                               }else{\r
-                                                       citation = null;\r
+                                                       citation = getReferenceOnlyFromMaps(\r
+                                                                       biblioRefMap, nomRefMap, factRefFk);    \r
+                                               }\r
+                                               if (citation == null && (factRefFkObj != null)){\r
+                                                       logger.warn("Citation not found in referenceMap: " + factRefFk);\r
+                                                       success = false;\r
                                                }\r
-\r
                                                if (citation != null || CdmUtils.isNotEmpty(details)){\r
                                                        DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();\r
                                                        originalSource.setCitation(citation);\r
@@ -323,18 +327,18 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                                                }\r
                                                \r
                                                //                                              if (categoryFkObj == FACT_DESCRIPTION){\r
-//                                                     //;\r
-//                                             }else if (categoryFkObj == FACT_OBSERVATION){\r
-//                                                     //;\r
-//                                             }else if (categoryFkObj == FACT_DISTRIBUTION_EM){\r
-//                                                     //\r
-//                                             }else {\r
-//                                                     //TODO\r
-//                                                     //logger.warn("FactCategory " + categoryFk + " not yet implemented");\r
-//                                             }\r
+       //                                              //;\r
+       //                                      }else if (categoryFkObj == FACT_OBSERVATION){\r
+       //                                              //;\r
+       //                                      }else if (categoryFkObj == FACT_DISTRIBUTION_EM){\r
+       //                                              //\r
+       //                                      }else {\r
+       //                                              //TODO\r
+       //                                              //logger.warn("FactCategory " + categoryFk + " not yet implemented");\r
+       //                                      }\r
                                                \r
                                                //notes\r
-                                               doCreatedUpdatedNotes(state, textData, rs, "Fact");\r
+                                               doCreatedUpdatedNotes(state, textData, rs);\r
                                                \r
                                                //TODO\r
                                                //Designation References -> unclear how to map to CDM\r
@@ -343,27 +347,88 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                                                //sequence -> textData is not an identifiable entity therefore extensions are not possible\r
                                                //fact category better\r
                                                \r
-                                               taxonStore.add(taxon);\r
+                                               taxaToSave.add(taxon);\r
                                        }\r
                                } catch (Exception re){\r
                                        logger.error("An exception occurred during the facts import");\r
-                                       result = false;\r
+                                       re.printStackTrace();\r
+                                       success = false;\r
                                }\r
                                //put\r
                        }\r
                        logger.info("Facts handled: " + (i-1));\r
-                       logger.info("Taxa to save: " + taxonStore.size());\r
-                       getTaxonService().save(taxonStore);     \r
+                       logger.info("Taxa to save: " + taxaToSave.size());\r
+                       getTaxonService().save(taxaToSave);     \r
+               }catch(SQLException e){\r
+                       throw new RuntimeException(e);\r
+               }\r
+               return success;\r
+       }\r
+\r
+       /* (non-Javadoc)\r
+        * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)\r
+        */\r
+       public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {\r
+               String nameSpace;\r
+               Class cdmClass;\r
+               Set<String> idSet;\r
+               Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();\r
+               \r
+               try{\r
+                       Set<String> taxonIdSet = new HashSet<String>();\r
+                       Set<String> referenceIdSet = new HashSet<String>();\r
+                       Set<String> refDetailIdSet = new HashSet<String>();\r
+                       while (rs.next()){\r
+                               handleForeignKey(rs, taxonIdSet, "taxonId");\r
+                               handleForeignKey(rs, referenceIdSet, "FactRefFk");\r
+                               handleForeignKey(rs, referenceIdSet, "PTDesignationRefFk");\r
+                               handleForeignKey(rs, refDetailIdSet, "FactRefDetailFk");\r
+                               handleForeignKey(rs, refDetailIdSet, "PTDesignationRefDetailFk");\r
+                       }\r
                        \r
-                       logger.info("end makeFacts ..." + getSuccessString(result));\r
-                       return result;\r
+                       //taxon map\r
+                       nameSpace = BerlinModelTaxonImport.NAMESPACE;\r
+                       cdmClass = TaxonBase.class;\r
+                       idSet = taxonIdSet;\r
+                       Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);\r
+                       result.put(nameSpace, taxonMap);\r
+\r
+\r
+                       //nom reference map\r
+                       nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;\r
+                       cdmClass = ReferenceBase.class;\r
+                       idSet = referenceIdSet;\r
+                       Map<String, ReferenceBase> nomReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);\r
+                       result.put(nameSpace, nomReferenceMap);\r
+\r
+                       //biblio reference map\r
+                       nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;\r
+                       cdmClass = ReferenceBase.class;\r
+                       idSet = referenceIdSet;\r
+                       Map<String, ReferenceBase> biblioReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);\r
+                       result.put(nameSpace, biblioReferenceMap);\r
+                       \r
+                       //nom refDetail map\r
+                       nameSpace = BerlinModelRefDetailImport.NOM_REFDETAIL_NAMESPACE;\r
+                       cdmClass = ReferenceBase.class;\r
+                       idSet = refDetailIdSet;\r
+                       Map<String, ReferenceBase> nomRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);\r
+                       result.put(nameSpace, nomRefDetailMap);\r
+                       \r
+                       //biblio refDetail map\r
+                       nameSpace = BerlinModelRefDetailImport.BIBLIO_REFDETAIL_NAMESPACE;\r
+                       cdmClass = ReferenceBase.class;\r
+                       idSet = refDetailIdSet;\r
+                       Map<String, ReferenceBase> biblioRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);\r
+                       result.put(nameSpace, biblioRefDetailMap);\r
+       \r
                } catch (SQLException e) {\r
-                       logger.error("SQLException:" +  e);\r
-                       return false;\r
+                       throw new RuntimeException(e);\r
                }\r
-\r
+               return result;\r
        }\r
        \r
+       \r
        /**\r
         * @param state \r
         * @param media \r
@@ -385,11 +450,12 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                        return null;\r
                }\r
                try {\r
-                       imageMetaData.readMetaData(url.toURI());\r
+                       imageMetaData.readMetaData(url.toURI(), 0);\r
                }\r
                catch(URISyntaxException e){\r
                        e.printStackTrace();\r
                }\r
+               \r
                MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);\r
                media.addRepresentation(mediaRepresentation);\r
                ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);\r
@@ -400,9 +466,9 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                return taxonDescription;\r
        }\r
 \r
-       private TaxonBase getTaxon(MapWrapper<TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){\r
+       private TaxonBase getTaxon(Map<String, TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){\r
                if (taxonIdObj != null){\r
-                       return taxonMap.get(taxonId);\r
+                       return taxonMap.get(String.valueOf(taxonId));\r
                }else{\r
                        return null;\r
                }\r
@@ -418,29 +484,30 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                \r
        }\r
        \r
-       private boolean checkDesignationRefsExist(BerlinModelImportConfigurator config){\r
-               try {\r
-                       boolean result = true;\r
-                       Source source = config.getSource();\r
-                       String strQueryArticlesWithoutJournal = "SELECT Count(*) as n " +\r
-                                       " FROM Fact " +\r
-                                       " WHERE (NOT (PTDesignationRefFk IS NULL) ) OR " +\r
-                      " (NOT (PTDesignationRefDetailFk IS NULL) )";\r
-                       ResultSet rs = source.getResultSet(strQueryArticlesWithoutJournal);\r
-                       rs.next();\r
-                       int count = rs.getInt("n");\r
-                       if (count > 0){\r
-                               System.out.println("========================================================");\r
-                               logger.warn("There are "+count+" Facts with not empty designation references. Designation references are not imported.");\r
-                               \r
-                               System.out.println("========================================================");\r
-                       }\r
-                       return result;\r
-               } catch (SQLException e) {\r
-                       e.printStackTrace();\r
-                       return false;\r
-               }\r
 \r
+       /* (non-Javadoc)\r
+        * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)\r
+        */\r
+       @Override\r
+       protected boolean doCheck(BerlinModelImportState state){\r
+               IOValidator<BerlinModelImportState> validator = new BerlinModelFactsImportValidator();\r
+               return validator.validate(state);\r
+       }\r
+       \r
+       /* (non-Javadoc)\r
+        * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()\r
+        */\r
+       @Override\r
+       protected String getTableName() {\r
+               return dbTableName;\r
+       }\r
+       \r
+       /* (non-Javadoc)\r
+        * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()\r
+        */\r
+       @Override\r
+       public String getPluralString() {\r
+               return pluralString;\r
        }\r
        \r
        /* (non-Javadoc)\r
@@ -450,4 +517,5 @@ public class BerlinModelFactsImport  extends BerlinModelImportBase {
                return ! state.getConfig().isDoFacts();\r
        }\r
 \r
+\r
 }\r