Use of the new DescriptionBase attribute 'descriptiveSystem'
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / distribution / DistributionImporter.java
index c15e0852841583ddf85b7d172636e63ab09e8058..69b66c6ec2b66c72c8ba17fddc3e918d4f4de819 100644 (file)
@@ -1,28 +1,19 @@
 package eu.etaxonomy.cdm.io.excel.distribution;\r
 \r
-import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
 import java.util.ArrayList;\r
 import java.util.HashMap;\r
-import java.util.HashSet;\r
 import java.util.List;\r
 import java.util.Map;\r
 import java.util.Set;\r
-import java.util.StringTokenizer;\r
-import java.util.regex.Matcher;\r
-import java.util.regex.Pattern;\r
 \r
 import org.apache.log4j.Logger;\r
-import org.apache.poi.hssf.usermodel.HSSFCell;\r
-import org.apache.poi.hssf.usermodel.HSSFRow;\r
-import org.apache.poi.hssf.usermodel.HSSFSheet;\r
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;\r
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;\r
+import org.springframework.stereotype.Component;\r
 import org.springframework.transaction.TransactionStatus;\r
 \r
 import eu.etaxonomy.cdm.api.application.CdmApplicationController;\r
+import eu.etaxonomy.cdm.common.CdmUtils;\r
 import eu.etaxonomy.cdm.common.ExcelUtils;\r
-import eu.etaxonomy.cdm.database.DbSchemaValidation;\r
-import eu.etaxonomy.cdm.database.ICdmDataSource;\r
 import eu.etaxonomy.cdm.io.common.CdmIoBase;\r
 import eu.etaxonomy.cdm.io.common.ICdmIO;\r
 import eu.etaxonomy.cdm.io.common.IImportConfigurator;\r
@@ -36,32 +27,29 @@ import eu.etaxonomy.cdm.model.description.TaxonDescription;
 import eu.etaxonomy.cdm.model.location.NamedArea;\r
 import eu.etaxonomy.cdm.model.location.TdwgArea;\r
 import eu.etaxonomy.cdm.model.name.TaxonNameBase;\r
-import eu.etaxonomy.cdm.model.reference.ReferenceBase;\r
-import eu.etaxonomy.cdm.model.taxon.Synonym;\r
 import eu.etaxonomy.cdm.model.taxon.Taxon;\r
-import eu.etaxonomy.cdm.model.taxon.TaxonBase;\r
 \r
+@Component\r
 public class DistributionImporter extends CdmIoBase<IImportConfigurator> implements ICdmIO<IImportConfigurator> {\r
 \r
     /* used */\r
-    private static String EDIT_NAME_COLUMN = "EDIT";\r
-    private static String TDWG_DISTRIBUTION_COLUMN = "TDWG";\r
-    private static String STATUS_COLUMN = "Status";\r
-    private static String LITERATURE_NUMBER_COLUMN = "Lit.";\r
-    private static String LITERATURE_COLUMN = "Literature";\r
+    private static final String EDIT_NAME_COLUMN = "EDIT";\r
+    private static final String TDWG_DISTRIBUTION_COLUMN = "TDWG";\r
+    private static final String STATUS_COLUMN = "Status";\r
+    private static final String LITERATURE_NUMBER_COLUMN = "Lit.";\r
+    private static final String LITERATURE_COLUMN = "Literature";\r
     /* not yet used */\r
-    private static String VERNACULAR_NAME_COLUMN = "Vernacular";\r
-    private static String HABITAT_COLUMN = "Habitat";\r
-    private static String ISO_DISTRIBUTION_COLUMN = "ISO";\r
-    private static String NOTES_COLUMN = "Notes";\r
-    private static String PAGE_NUMBER_COLUMN = "Page";\r
-    private static String INFO_COLUMN = "Info";\r
+//    private static final String VERNACULAR_NAME_COLUMN = "Vernacular";\r
+//    private static final String HABITAT_COLUMN = "Habitat";\r
+//    private static final String CONTROL_COLUMN = "Control";\r
+//    private static final String TRANSLATED_COLUMN = "Translated";\r
+//    private static final String ISO_DISTRIBUTION_COLUMN = "ISO";\r
+//    private static final String NOTES_COLUMN = "Notes";\r
+//    private static final String PAGE_NUMBER_COLUMN = "Page";\r
+//    private static final String INFO_COLUMN = "Info";\r
     \r
-    private static String SEPARATOR = ",";\r
-\r
        private static final Logger logger = Logger.getLogger(DistributionImporter.class);\r
        \r
-       private CdmApplicationController appCtr = null;\r
        // Stores already processed descriptions\r
        Map<Taxon, TaxonDescription> myDescriptions = new HashMap<Taxon, TaxonDescription>();\r
 \r
@@ -69,28 +57,32 @@ public class DistributionImporter extends CdmIoBase<IImportConfigurator> impleme
        protected boolean doInvoke(IImportConfigurator config,\r
                        Map<String, MapWrapper<? extends CdmBase>> stores) {\r
                \r
-       logger.debug("Importing distribution data");\r
-       appCtr = config.getCdmAppController();\r
+               if (logger.isDebugEnabled()) { logger.debug("Importing distribution data"); }\r
        \r
                // read and save all rows of the excel worksheet\r
-       ArrayList<HashMap<String, String>> recordList = ExcelUtils.parseXLS(config.getSourceNameString());\r
+               ArrayList<HashMap<String, String>> recordList;\r
+       try{\r
+               recordList = ExcelUtils.parseXLS(config.getSourceNameString());\r
+               } catch (FileNotFoundException e1) {\r
+                       logger.error("File not found: " + (String)config.getSource());\r
+                       return false;\r
+               }\r
        if (recordList != null) {\r
                HashMap<String,String> record = null;\r
-               TransactionStatus txStatus = appCtr.startTransaction();\r
+               TransactionStatus txStatus = startTransaction();\r
 \r
                for (int i = 0; i < recordList.size(); i++) {\r
                        record = recordList.get(i);\r
-                       analyzeRecord(config.getDestination(), record);\r
+                       analyzeRecord(record);\r
                }\r
-               appCtr.commitTransaction(txStatus);\r
+               commitTransaction(txStatus);\r
        }\r
        \r
                try {\r
-               appCtr.close();\r
-                       logger.debug("End test distribution data import"); \r
+                       if (logger.isDebugEnabled()) { logger.debug("End distribution data import"); }\r
                                \r
                } catch (Exception e) {\r
-               logger.error("Error clsing the application context");\r
+               logger.error("Error closing the application context");\r
                e.printStackTrace();\r
                }\r
        \r
@@ -98,7 +90,10 @@ public class DistributionImporter extends CdmIoBase<IImportConfigurator> impleme
        }\r
                        \r
 \r
-    private void analyzeRecord(ICdmDataSource db, HashMap record) {\r
+       /** \r
+        *  Reads the data of one Excel sheet row\r
+        */\r
+    private void analyzeRecord(HashMap<String,String> record) {\r
        /*\r
         * Relevant columns:\r
         * Name (EDIT)\r
@@ -109,7 +104,6 @@ public class DistributionImporter extends CdmIoBase<IImportConfigurator> impleme
        */\r
        \r
         String editName = "";\r
-        String distribution = "";\r
         ArrayList<String> distributionList = new ArrayList<String>();\r
         String status = "";\r
         String literatureNumber = "";\r
@@ -121,24 +115,26 @@ public class DistributionImporter extends CdmIoBase<IImportConfigurator> impleme
                \r
                String value = (String) record.get(key);\r
                if (!value.equals("")) {\r
-//                     logger.debug("Key = " + key);\r
-                       logger.debug(key + ": '" + value + "'");\r
+                       if (logger.isDebugEnabled()) { logger.debug(key + ": '" + value + "'"); }\r
                }\r
                \r
                if (key.contains(EDIT_NAME_COLUMN)) {\r
-                       editName = (String) removeDuplicateWhitespace(value.trim());\r
+                       editName = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
                        \r
                        } else if(key.contains(TDWG_DISTRIBUTION_COLUMN)) {\r
-                               distributionList =  buildList(value);\r
+                               distributionList =  CdmUtils.buildList(value);\r
                                \r
                        } else if(key.contains(STATUS_COLUMN)) {\r
-                               status = (String) removeDuplicateWhitespace(value.trim());\r
+                               status = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
                                \r
                        } else if(key.contains(LITERATURE_NUMBER_COLUMN)) {\r
-                               literatureNumber = (String) removeDuplicateWhitespace(value.trim());\r
+                               literatureNumber = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
                                \r
                        } else if(key.contains(LITERATURE_COLUMN)) {\r
-                               literature = (String) removeDuplicateWhitespace(value.trim());\r
+                               literature = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
+                               \r
+                       } else {\r
+                               logger.warn("Column " + key + " ignored");\r
                        }\r
        }\r
        \r
@@ -150,22 +146,22 @@ public class DistributionImporter extends CdmIoBase<IImportConfigurator> impleme
     \r
     \r
        /** \r
-        *  Stores distribution data in the DB\r
+        *  Stores the data of one Excel sheet row in the database\r
         */\r
     private void saveRecord(String taxonName, ArrayList<String> distributionList,\r
                String status, String literatureNumber, String literature) {\r
 \r
                try {\r
                // get the matching names from the DB\r
-               List<TaxonNameBase<?,?>> taxonNameBases = appCtr.getNameService().findNamesByTitle(taxonName);\r
+               List<TaxonNameBase<?,?>> taxonNameBases = getNameService().findNamesByTitle(taxonName);\r
                if (taxonNameBases.isEmpty()) {\r
                        logger.error("Taxon name '" + taxonName + "' not found in DB");\r
                } else {\r
-                       logger.debug("Taxon found");\r
+                       if (logger.isDebugEnabled()) { logger.debug("Taxon found"); }\r
                }\r
 \r
                // get the taxa for the matching names\r
-               for(TaxonNameBase dbTaxonName: taxonNameBases) {\r
+               for(TaxonNameBase<?,?> dbTaxonName: taxonNameBases) {\r
 \r
                        Set<Taxon> taxa = dbTaxonName.getTaxa();\r
                        if (taxa.isEmpty()) {\r
@@ -196,7 +192,8 @@ public class DistributionImporter extends CdmIoBase<IImportConfigurator> impleme
                                } else {\r
                                        presenceAbsenceStatus = PresenceTerm.getPresenceTermByAbbreviation(status);\r
                                }\r
-                               // TODO: Handle absence case\r
+                               // TODO: Handle absence case. \r
+                               // This case has not yet occurred in the excel input file, though.\r
                                        \r
                                /* Set to true if taxon needs to be saved if at least one new distribution exists */\r
                                boolean save = false;\r
@@ -217,7 +214,9 @@ public class DistributionImporter extends CdmIoBase<IImportConfigurator> impleme
                                                        if (descriptionElement instanceof Distribution) {\r
                                                                if (namedArea == ((Distribution)descriptionElement).getArea()) {\r
                                                                        ignore = true;\r
-                                                               logger.debug("Distribution ignored: " + distribution);\r
+                                                                       if (logger.isDebugEnabled()) { \r
+                                                                               logger.debug("Distribution ignored: " + distribution); \r
+                                                                       }\r
                                                                break;\r
                                                                }\r
                                                        }\r
@@ -227,14 +226,16 @@ public class DistributionImporter extends CdmIoBase<IImportConfigurator> impleme
                                                        save = true;\r
                                                        Distribution newDistribution = Distribution.NewInstance(namedArea, presenceAbsenceStatus);\r
                                                        myDescription.addElement(newDistribution);\r
-                                                       logger.debug("Distribution created: " + newDistribution.toString());\r
+                                                       if (logger.isDebugEnabled()) { \r
+                                                               logger.debug("Distribution created: " + newDistribution.toString());\r
+                                                       }\r
                                                }\r
                                                }\r
                                        }\r
                                }\r
                                if (save == true) {\r
-                                       appCtr.getTaxonService().saveTaxon(taxon);\r
-                                       logger.debug("Taxon saved");\r
+                                       getTaxonService().saveTaxon(taxon);\r
+                                       if (logger.isDebugEnabled()) { logger.debug("Taxon saved"); }\r
                                }\r
                        }\r
                } \r
@@ -245,37 +246,6 @@ public class DistributionImporter extends CdmIoBase<IImportConfigurator> impleme
     }\r
     \r
     \r
-    /** Returns a version of the input where all contiguous\r
-     * whitespace characters are replaced with a single\r
-     * space. Line terminators are treated like whitespace.\r
-     * \r
-     * @param inputStr\r
-     * @return\r
-     */\r
-    private static CharSequence removeDuplicateWhitespace(CharSequence inputStr) {\r
-       \r
-        String patternStr = "\\s+";\r
-        String replaceStr = " ";\r
-        Pattern pattern = Pattern.compile(patternStr);\r
-        Matcher matcher = pattern.matcher(inputStr);\r
-        return matcher.replaceAll(replaceStr);\r
-    }\r
-    \r
-\r
-    /** Builds a list of strings by splitting an input string\r
-     * with delimiters whitespace, comma, or semicolon\r
-     * @param value\r
-     * @return\r
-     */\r
-    private ArrayList<String> buildList(String value) {\r
-\r
-       ArrayList<String> resultList = new ArrayList<String>();\r
-       for (String tag : value.split("[\\s,;]+")) {\r
-               resultList.add(tag);\r
-       }\r
-        return resultList;\r
-    }\r
-\r
        @Override\r
        protected boolean doCheck(IImportConfigurator config) {\r
                boolean result = true;\r