Use of the new DescriptionBase attribute 'descriptiveSystem'
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / distribution / DistributionImporter.java
index 9bdc9336a785868683923b41ed0873333ec1333c..69b66c6ec2b66c72c8ba17fddc3e918d4f4de819 100644 (file)
@@ -1,84 +1,88 @@
 package eu.etaxonomy.cdm.io.excel.distribution;\r
 \r
-import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
 import java.util.ArrayList;\r
 import java.util.HashMap;\r
+import java.util.List;\r
 import java.util.Map;\r
 import java.util.Set;\r
-import java.util.StringTokenizer;\r
 \r
 import org.apache.log4j.Logger;\r
-import org.apache.poi.hssf.usermodel.HSSFCell;\r
-import org.apache.poi.hssf.usermodel.HSSFRow;\r
-import org.apache.poi.hssf.usermodel.HSSFSheet;\r
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;\r
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;\r
+import org.springframework.stereotype.Component;\r
 import org.springframework.transaction.TransactionStatus;\r
 \r
 import eu.etaxonomy.cdm.api.application.CdmApplicationController;\r
-import eu.etaxonomy.cdm.database.DbSchemaValidation;\r
-import eu.etaxonomy.cdm.database.ICdmDataSource;\r
+import eu.etaxonomy.cdm.common.CdmUtils;\r
+import eu.etaxonomy.cdm.common.ExcelUtils;\r
 import eu.etaxonomy.cdm.io.common.CdmIoBase;\r
 import eu.etaxonomy.cdm.io.common.ICdmIO;\r
 import eu.etaxonomy.cdm.io.common.IImportConfigurator;\r
 import eu.etaxonomy.cdm.io.common.MapWrapper;\r
 import eu.etaxonomy.cdm.model.common.CdmBase;\r
+import eu.etaxonomy.cdm.model.description.DescriptionElementBase;\r
+import eu.etaxonomy.cdm.model.description.Distribution;\r
+import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;\r
+import eu.etaxonomy.cdm.model.description.PresenceTerm;\r
+import eu.etaxonomy.cdm.model.description.TaxonDescription;\r
+import eu.etaxonomy.cdm.model.location.NamedArea;\r
+import eu.etaxonomy.cdm.model.location.TdwgArea;\r
+import eu.etaxonomy.cdm.model.name.TaxonNameBase;\r
+import eu.etaxonomy.cdm.model.taxon.Taxon;\r
 \r
-public class DistributionImporter extends CdmIoBase implements ICdmIO {\r
+@Component\r
+public class DistributionImporter extends CdmIoBase<IImportConfigurator> implements ICdmIO<IImportConfigurator> {\r
 \r
     /* used */\r
-    private static String EDIT_NAME_COLUMN = "EDIT";\r
-    private static String TDWG_DISTRIBUTION_COLUMN = "TDWG";\r
-    private static String STATUS_COLUMN = "Status";\r
-    private static String LITERATURE_NUMBER_COLUMN = "Lit.";\r
-    private static String LITERATURE_COLUMN = "Literature";\r
+    private static final String EDIT_NAME_COLUMN = "EDIT";\r
+    private static final String TDWG_DISTRIBUTION_COLUMN = "TDWG";\r
+    private static final String STATUS_COLUMN = "Status";\r
+    private static final String LITERATURE_NUMBER_COLUMN = "Lit.";\r
+    private static final String LITERATURE_COLUMN = "Literature";\r
     /* not yet used */\r
-    private static String VERNACULAR_NAME_COLUMN = "Vernacular";\r
-    private static String HABITAT_COLUMN = "Habitat";\r
-    private static String ISO_DISTRIBUTION_COLUMN = "ISO";\r
-    private static String NOTES_COLUMN = "Notes";\r
-    private static String PAGE_NUMBER_COLUMN = "Page";\r
-    private static String INFO_COLUMN = "Info";\r
+//    private static final String VERNACULAR_NAME_COLUMN = "Vernacular";\r
+//    private static final String HABITAT_COLUMN = "Habitat";\r
+//    private static final String CONTROL_COLUMN = "Control";\r
+//    private static final String TRANSLATED_COLUMN = "Translated";\r
+//    private static final String ISO_DISTRIBUTION_COLUMN = "ISO";\r
+//    private static final String NOTES_COLUMN = "Notes";\r
+//    private static final String PAGE_NUMBER_COLUMN = "Page";\r
+//    private static final String INFO_COLUMN = "Info";\r
     \r
-    private static String SEPARATOR = ",";\r
-\r
        private static final Logger logger = Logger.getLogger(DistributionImporter.class);\r
        \r
-       private CdmApplicationController appCtr = null;\r
+       // Stores already processed descriptions\r
+       Map<Taxon, TaxonDescription> myDescriptions = new HashMap<Taxon, TaxonDescription>();\r
 \r
        @Override\r
        protected boolean doInvoke(IImportConfigurator config,\r
                        Map<String, MapWrapper<? extends CdmBase>> stores) {\r
                \r
-       logger.debug("Importing distribution data");\r
-\r
-               try {\r
-                       appCtr = \r
-                               CdmApplicationController.NewInstance(config.getDestination(), DbSchemaValidation.VALIDATE, true);\r
-\r
-               } catch (Exception e) {\r
-                       logger.error("Error creating application controller");\r
-                       e.printStackTrace();\r
-                       System.exit(1);\r
+               if (logger.isDebugEnabled()) { logger.debug("Importing distribution data"); }\r
+       \r
+               // read and save all rows of the excel worksheet\r
+               ArrayList<HashMap<String, String>> recordList;\r
+       try{\r
+               recordList = ExcelUtils.parseXLS(config.getSourceNameString());\r
+               } catch (FileNotFoundException e1) {\r
+                       logger.error("File not found: " + (String)config.getSource());\r
+                       return false;\r
                }\r
-               \r
-       // read and save all rows of the excel worksheet\r
-       ArrayList<HashMap<String, String>> recordList = parseXLS(config.getSourceNameString());\r
        if (recordList != null) {\r
                HashMap<String,String> record = null;\r
+               TransactionStatus txStatus = startTransaction();\r
+\r
                for (int i = 0; i < recordList.size(); i++) {\r
                        record = recordList.get(i);\r
-                       analyzeRecord(config.getDestination(), record);\r
-//                     config.setDbSchemaValidation(DbSchemaValidation.UPDATE);\r
+                       analyzeRecord(record);\r
                }\r
+               commitTransaction(txStatus);\r
        }\r
        \r
                try {\r
-               appCtr.close();\r
-                       logger.debug("End test distribution data import"); \r
+                       if (logger.isDebugEnabled()) { logger.debug("End distribution data import"); }\r
                                \r
                } catch (Exception e) {\r
-               logger.error("Error clsing the application context");\r
+               logger.error("Error closing the application context");\r
                e.printStackTrace();\r
                }\r
        \r
@@ -86,7 +90,10 @@ public class DistributionImporter extends CdmIoBase implements ICdmIO {
        }\r
                        \r
 \r
-    private void analyzeRecord(ICdmDataSource db, HashMap record) {\r
+       /** \r
+        *  Reads the data of one Excel sheet row\r
+        */\r
+    private void analyzeRecord(HashMap<String,String> record) {\r
        /*\r
         * Relevant columns:\r
         * Name (EDIT)\r
@@ -96,16 +103,9 @@ public class DistributionImporter extends CdmIoBase implements ICdmIO {
         * Literature\r
        */\r
        \r
-       /*\r
-        * Find taxon by name\r
-        * TdwgArea.getAreaByTdwgAbbreviation()\r
-        */\r
-       \r
         String editName = "";\r
-        String distribution = "";\r
         ArrayList<String> distributionList = new ArrayList<String>();\r
         String status = "";\r
-        ArrayList<String> statusList = new ArrayList<String>();\r
         String literatureNumber = "";\r
         String literature = "";\r
         \r
@@ -115,120 +115,137 @@ public class DistributionImporter extends CdmIoBase implements ICdmIO {
                \r
                String value = (String) record.get(key);\r
                if (!value.equals("")) {\r
-                       logger.debug("Key = " + key);\r
-                       logger.debug("Value = " + value);\r
+                       if (logger.isDebugEnabled()) { logger.debug(key + ": '" + value + "'"); }\r
                }\r
                \r
                if (key.contains(EDIT_NAME_COLUMN)) {\r
-                       editName = value;\r
-//             logger.debug("Name = " + editName);\r
+                       editName = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
                        \r
                        } else if(key.contains(TDWG_DISTRIBUTION_COLUMN)) {\r
-                               distributionList =  buildList(value);\r
+                               distributionList =  CdmUtils.buildList(value);\r
                                \r
                        } else if(key.contains(STATUS_COLUMN)) {\r
-                               statusList = buildList(value);\r
+                               status = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
                                \r
                        } else if(key.contains(LITERATURE_NUMBER_COLUMN)) {\r
-                               literatureNumber = value;\r
-//             logger.debug("Literature number = " + literatureNumber);\r
+                               literatureNumber = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
                                \r
                        } else if(key.contains(LITERATURE_COLUMN)) {\r
-                               literature = value;\r
-//             logger.debug("Literatur = " + literature);\r
+                               literature = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
+                               \r
+                       } else {\r
+                               logger.warn("Column " + key + " ignored");\r
                        }\r
        }\r
        \r
        // Store the data of this record in the DB\r
-       saveRecord(editName, distributionList, statusList, literatureNumber, literature);\r
+       if (!editName.equals("")) {\r
+               saveRecord(editName, distributionList, status, literatureNumber, literature);\r
+       }\r
     }\r
     \r
     \r
        /** \r
-        *  Stores distribution data in the DB\r
+        *  Stores the data of one Excel sheet row in the database\r
         */\r
     private void saveRecord(String taxonName, ArrayList<String> distributionList,\r
-               ArrayList<String> statusList, String literatureNumber, String literature) {\r
+               String status, String literatureNumber, String literature) {\r
 \r
                try {\r
-               TransactionStatus txStatOne = appCtr.startTransaction();\r
-               \r
-//                     appCtr.getNameService().\r
-       \r
-                               \r
-               } catch (Exception e) {\r
-               logger.error("Error");\r
-               e.printStackTrace();\r
-               }\r
-    }\r
-    \r
-    \r
-    private ArrayList<String> buildList(String value) {\r
-       \r
-       ArrayList<String> resultList = new ArrayList<String>();\r
-       StringTokenizer st = new StringTokenizer(value, SEPARATOR);\r
-        while (st.hasMoreTokens()) {\r
-               String listElement = st.nextToken();\r
-            resultList.add(listElement);\r
-               logger.debug("Next token = " + listElement);\r
-        }\r
-        return resultList;\r
-    }\r
-    \r
+               // get the matching names from the DB\r
+               List<TaxonNameBase<?,?>> taxonNameBases = getNameService().findNamesByTitle(taxonName);\r
+               if (taxonNameBases.isEmpty()) {\r
+                       logger.error("Taxon name '" + taxonName + "' not found in DB");\r
+               } else {\r
+                       if (logger.isDebugEnabled()) { logger.debug("Taxon found"); }\r
+               }\r
 \r
-    /** Reads all rows of an Excel worksheet */\r
-    private static ArrayList<HashMap<String, String>> parseXLS(String fileName) {\r
-       \r
-       ArrayList<HashMap<String, String>> recordList = new ArrayList<HashMap<String, String>>();\r
-\r
-       try {\r
-               POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(fileName));\r
-               HSSFWorkbook wb = new HSSFWorkbook(fs);\r
-               HSSFSheet sheet = wb.getSheetAt(0);\r
-               HSSFRow row;\r
-               HSSFCell cell;\r
-\r
-               int rows; // No of rows\r
-               rows = sheet.getPhysicalNumberOfRows();\r
-\r
-               int cols = 0; // No of columns\r
-               int tmp = 0;\r
-\r
-               // This trick ensures that we get the data properly even if it doesn't start from first few rows\r
-               for(int i = 0; i < 10 || i < rows; i++) {\r
-                       row = sheet.getRow(i);\r
-                       if(row != null) {\r
-                               tmp = sheet.getRow(i).getPhysicalNumberOfCells();\r
-                               if(tmp > cols) cols = tmp;\r
+               // get the taxa for the matching names\r
+               for(TaxonNameBase<?,?> dbTaxonName: taxonNameBases) {\r
+\r
+                       Set<Taxon> taxa = dbTaxonName.getTaxa();\r
+                       if (taxa.isEmpty()) {\r
+                               logger.warn("No taxon found for name '" + taxonName + "'");\r
+                       } else if (taxa.size() > 1) {\r
+                               logger.warn("More than one taxa found for name '" + taxonName + "'");\r
                        }\r
-               }\r
-               HashMap<String, String> headers = null;\r
-               ArrayList<String> columns = new ArrayList<String>();\r
-               row = sheet.getRow(0);\r
-               for (int c = 0; c < cols; c++){\r
-                       cell = row.getCell(c);\r
-                       columns.add(cell.toString());\r
-               }\r
-               for(int r = 1; r < rows; r++) {\r
-                       row = sheet.getRow(r);\r
-                       headers = new HashMap<String, String>();\r
-                       if(row != null) {\r
-                               for(int c = 0; c < cols; c++) {\r
-                                       cell = row.getCell((short)c);\r
-                                       if(cell != null) {\r
-                                               headers.put(columns.get(c), cell.toString());\r
+\r
+                       for(Taxon taxon: taxa) {\r
+\r
+                               TaxonDescription myDescription = null;\r
+\r
+                               // If we have created a description for this taxon earlier, take this one.\r
+                               // Otherwise, create a new description.\r
+                               // We don't update any existing descriptions in the database at this point.\r
+                               if (myDescriptions.containsKey(taxon)) {\r
+                                       myDescription = myDescriptions.get(taxon);\r
+                               } else {\r
+                                       myDescription = TaxonDescription.NewInstance(taxon);\r
+                                       taxon.addDescription(myDescription);\r
+                                       myDescriptions.put(taxon, myDescription);\r
+                               }\r
+\r
+                               // Status\r
+                               PresenceAbsenceTermBase<?> presenceAbsenceStatus = PresenceTerm.NewInstance();\r
+                               if (status.equals("")) {\r
+                                       presenceAbsenceStatus = PresenceTerm.NATIVE();\r
+                               } else {\r
+                                       presenceAbsenceStatus = PresenceTerm.getPresenceTermByAbbreviation(status);\r
+                               }\r
+                               // TODO: Handle absence case. \r
+                               // This case has not yet occurred in the excel input file, though.\r
+                                       \r
+                               /* Set to true if taxon needs to be saved if at least one new distribution exists */\r
+                               boolean save = false;\r
+                               \r
+                               // TDWG areas\r
+                               for (String distribution: distributionList) {\r
+\r
+                        /* Set to true if this distribution is a new one*/\r
+                                       boolean ignore = false;\r
+                                       \r
+                                       if(!distribution.equals("")) {\r
+                                               NamedArea namedArea = TdwgArea.getAreaByTdwgAbbreviation(distribution);\r
+                                               TaxonDescription taxonDescription = myDescriptions.get(taxon);\r
+                                               if (namedArea != null) {    \r
+                                               // Check against existing distributions and ignore the ones that occur multiple times\r
+                                               Set<DescriptionElementBase> myDescriptionElements = taxonDescription.getElements();\r
+                                               for(DescriptionElementBase descriptionElement : myDescriptionElements) {\r
+                                                       if (descriptionElement instanceof Distribution) {\r
+                                                               if (namedArea == ((Distribution)descriptionElement).getArea()) {\r
+                                                                       ignore = true;\r
+                                                                       if (logger.isDebugEnabled()) { \r
+                                                                               logger.debug("Distribution ignored: " + distribution); \r
+                                                                       }\r
+                                                               break;\r
+                                                               }\r
+                                                       }\r
+                                               }\r
+                                               // Create new distribution if not yet exist\r
+                                               if (ignore == false) {\r
+                                                       save = true;\r
+                                                       Distribution newDistribution = Distribution.NewInstance(namedArea, presenceAbsenceStatus);\r
+                                                       myDescription.addElement(newDistribution);\r
+                                                       if (logger.isDebugEnabled()) { \r
+                                                               logger.debug("Distribution created: " + newDistribution.toString());\r
+                                                       }\r
+                                               }\r
+                                               }\r
                                        }\r
                                }\r
+                               if (save == true) {\r
+                                       getTaxonService().saveTaxon(taxon);\r
+                                       if (logger.isDebugEnabled()) { logger.debug("Taxon saved"); }\r
+                               }\r
                        }\r
-                       recordList.add(headers);\r
-               }\r
-\r
-       } catch(Exception ioe) {\r
-               ioe.printStackTrace();\r
+               } \r
+       } catch (Exception e) {\r
+               logger.error("Error");\r
+               e.printStackTrace();\r
        }\r
-       return recordList;\r
     }\r
-\r
+    \r
+    \r
        @Override\r
        protected boolean doCheck(IImportConfigurator config) {\r
                boolean result = true;\r