package eu.etaxonomy.cdm.io.excel.distribution;\r
\r
-import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
import java.util.ArrayList;\r
import java.util.HashMap;\r
+import java.util.List;\r
import java.util.Map;\r
import java.util.Set;\r
-import java.util.StringTokenizer;\r
\r
import org.apache.log4j.Logger;\r
-import org.apache.poi.hssf.usermodel.HSSFCell;\r
-import org.apache.poi.hssf.usermodel.HSSFRow;\r
-import org.apache.poi.hssf.usermodel.HSSFSheet;\r
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;\r
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;\r
+import org.springframework.stereotype.Component;\r
import org.springframework.transaction.TransactionStatus;\r
\r
import eu.etaxonomy.cdm.api.application.CdmApplicationController;\r
-import eu.etaxonomy.cdm.database.DbSchemaValidation;\r
-import eu.etaxonomy.cdm.database.ICdmDataSource;\r
+import eu.etaxonomy.cdm.common.CdmUtils;\r
+import eu.etaxonomy.cdm.common.ExcelUtils;\r
import eu.etaxonomy.cdm.io.common.CdmIoBase;\r
import eu.etaxonomy.cdm.io.common.ICdmIO;\r
import eu.etaxonomy.cdm.io.common.IImportConfigurator;\r
import eu.etaxonomy.cdm.io.common.MapWrapper;\r
import eu.etaxonomy.cdm.model.common.CdmBase;\r
+import eu.etaxonomy.cdm.model.description.DescriptionElementBase;\r
+import eu.etaxonomy.cdm.model.description.Distribution;\r
+import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;\r
+import eu.etaxonomy.cdm.model.description.PresenceTerm;\r
+import eu.etaxonomy.cdm.model.description.TaxonDescription;\r
+import eu.etaxonomy.cdm.model.location.NamedArea;\r
+import eu.etaxonomy.cdm.model.location.TdwgArea;\r
+import eu.etaxonomy.cdm.model.name.TaxonNameBase;\r
+import eu.etaxonomy.cdm.model.taxon.Taxon;\r
\r
-public class DistributionImporter extends CdmIoBase implements ICdmIO {\r
+@Component\r
+public class DistributionImporter extends CdmIoBase<IImportConfigurator> implements ICdmIO<IImportConfigurator> {\r
\r
/* used */\r
- private static String EDIT_NAME_COLUMN = "EDIT";\r
- private static String TDWG_DISTRIBUTION_COLUMN = "TDWG";\r
- private static String STATUS_COLUMN = "Status";\r
- private static String LITERATURE_NUMBER_COLUMN = "Lit.";\r
- private static String LITERATURE_COLUMN = "Literature";\r
+ private static final String EDIT_NAME_COLUMN = "EDIT";\r
+ private static final String TDWG_DISTRIBUTION_COLUMN = "TDWG";\r
+ private static final String STATUS_COLUMN = "Status";\r
+ private static final String LITERATURE_NUMBER_COLUMN = "Lit.";\r
+ private static final String LITERATURE_COLUMN = "Literature";\r
/* not yet used */\r
- private static String VERNACULAR_NAME_COLUMN = "Vernacular";\r
- private static String HABITAT_COLUMN = "Habitat";\r
- private static String ISO_DISTRIBUTION_COLUMN = "ISO";\r
- private static String NOTES_COLUMN = "Notes";\r
- private static String PAGE_NUMBER_COLUMN = "Page";\r
- private static String INFO_COLUMN = "Info";\r
+// private static final String VERNACULAR_NAME_COLUMN = "Vernacular";\r
+// private static final String HABITAT_COLUMN = "Habitat";\r
+// private static final String CONTROL_COLUMN = "Control";\r
+// private static final String TRANSLATED_COLUMN = "Translated";\r
+// private static final String ISO_DISTRIBUTION_COLUMN = "ISO";\r
+// private static final String NOTES_COLUMN = "Notes";\r
+// private static final String PAGE_NUMBER_COLUMN = "Page";\r
+// private static final String INFO_COLUMN = "Info";\r
\r
- private static String SEPARATOR = ",";\r
-\r
private static final Logger logger = Logger.getLogger(DistributionImporter.class);\r
\r
- private CdmApplicationController appCtr = null;\r
+ // Stores already processed descriptions\r
+ Map<Taxon, TaxonDescription> myDescriptions = new HashMap<Taxon, TaxonDescription>();\r
\r
@Override\r
protected boolean doInvoke(IImportConfigurator config,\r
Map<String, MapWrapper<? extends CdmBase>> stores) {\r
\r
- logger.debug("Importing distribution data");\r
-\r
- try {\r
- appCtr = \r
- CdmApplicationController.NewInstance(config.getDestination(), DbSchemaValidation.VALIDATE, true);\r
-\r
- } catch (Exception e) {\r
- logger.error("Error creating application controller");\r
- e.printStackTrace();\r
- System.exit(1);\r
+ if (logger.isDebugEnabled()) { logger.debug("Importing distribution data"); }\r
+ \r
+ // read and save all rows of the excel worksheet\r
+ ArrayList<HashMap<String, String>> recordList;\r
+ try{\r
+ recordList = ExcelUtils.parseXLS(config.getSourceNameString());\r
+ } catch (FileNotFoundException e1) {\r
+ logger.error("File not found: " + (String)config.getSource());\r
+ return false;\r
}\r
- \r
- // read and save all rows of the excel worksheet\r
- ArrayList<HashMap<String, String>> recordList = parseXLS(config.getSourceNameString());\r
if (recordList != null) {\r
HashMap<String,String> record = null;\r
+ TransactionStatus txStatus = startTransaction();\r
+\r
for (int i = 0; i < recordList.size(); i++) {\r
record = recordList.get(i);\r
- analyzeRecord(config.getDestination(), record);\r
-// config.setDbSchemaValidation(DbSchemaValidation.UPDATE);\r
+ analyzeRecord(record);\r
}\r
+ commitTransaction(txStatus);\r
}\r
\r
try {\r
- appCtr.close();\r
- logger.debug("End test distribution data import"); \r
+ if (logger.isDebugEnabled()) { logger.debug("End distribution data import"); }\r
\r
} catch (Exception e) {\r
- logger.error("Error clsing the application context");\r
+ logger.error("Error closing the application context");\r
e.printStackTrace();\r
}\r
\r
}\r
\r
\r
- private void analyzeRecord(ICdmDataSource db, HashMap record) {\r
+ /** \r
+ * Reads the data of one Excel sheet row\r
+ */\r
+ private void analyzeRecord(HashMap<String,String> record) {\r
/*\r
* Relevant columns:\r
* Name (EDIT)\r
* Literature\r
*/\r
\r
- /*\r
- * Find taxon by name\r
- * TdwgArea.getAreaByTdwgAbbreviation()\r
- */\r
- \r
String editName = "";\r
- String distribution = "";\r
ArrayList<String> distributionList = new ArrayList<String>();\r
String status = "";\r
- ArrayList<String> statusList = new ArrayList<String>();\r
String literatureNumber = "";\r
String literature = "";\r
\r
\r
String value = (String) record.get(key);\r
if (!value.equals("")) {\r
- logger.debug("Key = " + key);\r
- logger.debug("Value = " + value);\r
+ if (logger.isDebugEnabled()) { logger.debug(key + ": '" + value + "'"); }\r
}\r
\r
if (key.contains(EDIT_NAME_COLUMN)) {\r
- editName = value;\r
-// logger.debug("Name = " + editName);\r
+ editName = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
\r
} else if(key.contains(TDWG_DISTRIBUTION_COLUMN)) {\r
- distributionList = buildList(value);\r
+ distributionList = CdmUtils.buildList(value);\r
\r
} else if(key.contains(STATUS_COLUMN)) {\r
- statusList = buildList(value);\r
+ status = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
\r
} else if(key.contains(LITERATURE_NUMBER_COLUMN)) {\r
- literatureNumber = value;\r
-// logger.debug("Literature number = " + literatureNumber);\r
+ literatureNumber = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
\r
} else if(key.contains(LITERATURE_COLUMN)) {\r
- literature = value;\r
-// logger.debug("Literatur = " + literature);\r
+ literature = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
+ \r
+ } else {\r
+ logger.warn("Column " + key + " ignored");\r
}\r
}\r
\r
// Store the data of this record in the DB\r
- saveRecord(editName, distributionList, statusList, literatureNumber, literature);\r
+ if (!editName.equals("")) {\r
+ saveRecord(editName, distributionList, status, literatureNumber, literature);\r
+ }\r
}\r
\r
\r
/** \r
- * Stores distribution data in the DB\r
+ * Stores the data of one Excel sheet row in the database\r
*/\r
private void saveRecord(String taxonName, ArrayList<String> distributionList,\r
- ArrayList<String> statusList, String literatureNumber, String literature) {\r
+ String status, String literatureNumber, String literature) {\r
\r
try {\r
- TransactionStatus txStatOne = appCtr.startTransaction();\r
- \r
-// appCtr.getNameService().\r
- \r
- \r
- } catch (Exception e) {\r
- logger.error("Error");\r
- e.printStackTrace();\r
- }\r
- }\r
- \r
- \r
- private ArrayList<String> buildList(String value) {\r
- \r
- ArrayList<String> resultList = new ArrayList<String>();\r
- StringTokenizer st = new StringTokenizer(value, SEPARATOR);\r
- while (st.hasMoreTokens()) {\r
- String listElement = st.nextToken();\r
- resultList.add(listElement);\r
- logger.debug("Next token = " + listElement);\r
- }\r
- return resultList;\r
- }\r
- \r
+ // get the matching names from the DB\r
+ List<TaxonNameBase<?,?>> taxonNameBases = getNameService().findNamesByTitle(taxonName);\r
+ if (taxonNameBases.isEmpty()) {\r
+ logger.error("Taxon name '" + taxonName + "' not found in DB");\r
+ } else {\r
+ if (logger.isDebugEnabled()) { logger.debug("Taxon found"); }\r
+ }\r
\r
- /** Reads all rows of an Excel worksheet */\r
- private static ArrayList<HashMap<String, String>> parseXLS(String fileName) {\r
- \r
- ArrayList<HashMap<String, String>> recordList = new ArrayList<HashMap<String, String>>();\r
-\r
- try {\r
- POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(fileName));\r
- HSSFWorkbook wb = new HSSFWorkbook(fs);\r
- HSSFSheet sheet = wb.getSheetAt(0);\r
- HSSFRow row;\r
- HSSFCell cell;\r
-\r
- int rows; // No of rows\r
- rows = sheet.getPhysicalNumberOfRows();\r
-\r
- int cols = 0; // No of columns\r
- int tmp = 0;\r
-\r
- // This trick ensures that we get the data properly even if it doesn't start from first few rows\r
- for(int i = 0; i < 10 || i < rows; i++) {\r
- row = sheet.getRow(i);\r
- if(row != null) {\r
- tmp = sheet.getRow(i).getPhysicalNumberOfCells();\r
- if(tmp > cols) cols = tmp;\r
+ // get the taxa for the matching names\r
+ for(TaxonNameBase<?,?> dbTaxonName: taxonNameBases) {\r
+\r
+ Set<Taxon> taxa = dbTaxonName.getTaxa();\r
+ if (taxa.isEmpty()) {\r
+ logger.warn("No taxon found for name '" + taxonName + "'");\r
+ } else if (taxa.size() > 1) {\r
+ logger.warn("More than one taxa found for name '" + taxonName + "'");\r
}\r
- }\r
- HashMap<String, String> headers = null;\r
- ArrayList<String> columns = new ArrayList<String>();\r
- row = sheet.getRow(0);\r
- for (int c = 0; c < cols; c++){\r
- cell = row.getCell(c);\r
- columns.add(cell.toString());\r
- }\r
- for(int r = 1; r < rows; r++) {\r
- row = sheet.getRow(r);\r
- headers = new HashMap<String, String>();\r
- if(row != null) {\r
- for(int c = 0; c < cols; c++) {\r
- cell = row.getCell((short)c);\r
- if(cell != null) {\r
- headers.put(columns.get(c), cell.toString());\r
+\r
+ for(Taxon taxon: taxa) {\r
+\r
+ TaxonDescription myDescription = null;\r
+\r
+ // If we have created a description for this taxon earlier, take this one.\r
+ // Otherwise, create a new description.\r
+ // We don't update any existing descriptions in the database at this point.\r
+ if (myDescriptions.containsKey(taxon)) {\r
+ myDescription = myDescriptions.get(taxon);\r
+ } else {\r
+ myDescription = TaxonDescription.NewInstance(taxon);\r
+ taxon.addDescription(myDescription);\r
+ myDescriptions.put(taxon, myDescription);\r
+ }\r
+\r
+ // Status\r
+ PresenceAbsenceTermBase<?> presenceAbsenceStatus = PresenceTerm.NewInstance();\r
+ if (status.equals("")) {\r
+ presenceAbsenceStatus = PresenceTerm.NATIVE();\r
+ } else {\r
+ presenceAbsenceStatus = PresenceTerm.getPresenceTermByAbbreviation(status);\r
+ }\r
+ // TODO: Handle absence case. \r
+ // This case has not yet occurred in the excel input file, though.\r
+ \r
+ /* Set to true if taxon needs to be saved if at least one new distribution exists */\r
+ boolean save = false;\r
+ \r
+ // TDWG areas\r
+ for (String distribution: distributionList) {\r
+\r
+ /* Set to true if this distribution is a new one*/\r
+ boolean ignore = false;\r
+ \r
+ if(!distribution.equals("")) {\r
+ NamedArea namedArea = TdwgArea.getAreaByTdwgAbbreviation(distribution);\r
+ TaxonDescription taxonDescription = myDescriptions.get(taxon);\r
+ if (namedArea != null) { \r
+ // Check against existing distributions and ignore the ones that occur multiple times\r
+ Set<DescriptionElementBase> myDescriptionElements = taxonDescription.getElements();\r
+ for(DescriptionElementBase descriptionElement : myDescriptionElements) {\r
+ if (descriptionElement instanceof Distribution) {\r
+ if (namedArea == ((Distribution)descriptionElement).getArea()) {\r
+ ignore = true;\r
+ if (logger.isDebugEnabled()) { \r
+ logger.debug("Distribution ignored: " + distribution); \r
+ }\r
+ break;\r
+ }\r
+ }\r
+ }\r
+ // Create new distribution if not yet exist\r
+ if (ignore == false) {\r
+ save = true;\r
+ Distribution newDistribution = Distribution.NewInstance(namedArea, presenceAbsenceStatus);\r
+ myDescription.addElement(newDistribution);\r
+ if (logger.isDebugEnabled()) { \r
+ logger.debug("Distribution created: " + newDistribution.toString());\r
+ }\r
+ }\r
+ }\r
}\r
}\r
+ if (save == true) {\r
+ getTaxonService().saveTaxon(taxon);\r
+ if (logger.isDebugEnabled()) { logger.debug("Taxon saved"); }\r
+ }\r
}\r
- recordList.add(headers);\r
- }\r
-\r
- } catch(Exception ioe) {\r
- ioe.printStackTrace();\r
+ } \r
+ } catch (Exception e) {\r
+ logger.error("Error");\r
+ e.printStackTrace();\r
}\r
- return recordList;\r
}\r
-\r
+ \r
+ \r
@Override\r
protected boolean doCheck(IImportConfigurator config) {\r
boolean result = true;\r