package eu.etaxonomy.cdm.io.excel.distribution;\r
\r
-import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
import java.util.ArrayList;\r
import java.util.HashMap;\r
-import java.util.HashSet;\r
import java.util.List;\r
import java.util.Map;\r
import java.util.Set;\r
-import java.util.StringTokenizer;\r
-import java.util.regex.Matcher;\r
-import java.util.regex.Pattern;\r
\r
import org.apache.log4j.Logger;\r
-import org.apache.poi.hssf.usermodel.HSSFCell;\r
-import org.apache.poi.hssf.usermodel.HSSFRow;\r
-import org.apache.poi.hssf.usermodel.HSSFSheet;\r
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;\r
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;\r
+import org.springframework.stereotype.Component;\r
import org.springframework.transaction.TransactionStatus;\r
\r
import eu.etaxonomy.cdm.api.application.CdmApplicationController;\r
+import eu.etaxonomy.cdm.common.CdmUtils;\r
import eu.etaxonomy.cdm.common.ExcelUtils;\r
-import eu.etaxonomy.cdm.database.DbSchemaValidation;\r
-import eu.etaxonomy.cdm.database.ICdmDataSource;\r
import eu.etaxonomy.cdm.io.common.CdmIoBase;\r
import eu.etaxonomy.cdm.io.common.ICdmIO;\r
import eu.etaxonomy.cdm.io.common.IImportConfigurator;\r
import eu.etaxonomy.cdm.model.location.NamedArea;\r
import eu.etaxonomy.cdm.model.location.TdwgArea;\r
import eu.etaxonomy.cdm.model.name.TaxonNameBase;\r
-import eu.etaxonomy.cdm.model.reference.ReferenceBase;\r
-import eu.etaxonomy.cdm.model.taxon.Synonym;\r
import eu.etaxonomy.cdm.model.taxon.Taxon;\r
-import eu.etaxonomy.cdm.model.taxon.TaxonBase;\r
\r
+@Component\r
public class DistributionImporter extends CdmIoBase<IImportConfigurator> implements ICdmIO<IImportConfigurator> {\r
\r
/* used */\r
- private static String EDIT_NAME_COLUMN = "EDIT";\r
- private static String TDWG_DISTRIBUTION_COLUMN = "TDWG";\r
- private static String STATUS_COLUMN = "Status";\r
- private static String LITERATURE_NUMBER_COLUMN = "Lit.";\r
- private static String LITERATURE_COLUMN = "Literature";\r
+ private static final String EDIT_NAME_COLUMN = "EDIT";\r
+ private static final String TDWG_DISTRIBUTION_COLUMN = "TDWG";\r
+ private static final String STATUS_COLUMN = "Status";\r
+ private static final String LITERATURE_NUMBER_COLUMN = "Lit.";\r
+ private static final String LITERATURE_COLUMN = "Literature";\r
/* not yet used */\r
- private static String VERNACULAR_NAME_COLUMN = "Vernacular";\r
- private static String HABITAT_COLUMN = "Habitat";\r
- private static String ISO_DISTRIBUTION_COLUMN = "ISO";\r
- private static String NOTES_COLUMN = "Notes";\r
- private static String PAGE_NUMBER_COLUMN = "Page";\r
- private static String INFO_COLUMN = "Info";\r
+// private static final String VERNACULAR_NAME_COLUMN = "Vernacular";\r
+// private static final String HABITAT_COLUMN = "Habitat";\r
+// private static final String CONTROL_COLUMN = "Control";\r
+// private static final String TRANSLATED_COLUMN = "Translated";\r
+// private static final String ISO_DISTRIBUTION_COLUMN = "ISO";\r
+// private static final String NOTES_COLUMN = "Notes";\r
+// private static final String PAGE_NUMBER_COLUMN = "Page";\r
+// private static final String INFO_COLUMN = "Info";\r
\r
- private static String SEPARATOR = ",";\r
-\r
private static final Logger logger = Logger.getLogger(DistributionImporter.class);\r
\r
- private CdmApplicationController appCtr = null;\r
// Stores already processed descriptions\r
Map<Taxon, TaxonDescription> myDescriptions = new HashMap<Taxon, TaxonDescription>();\r
\r
protected boolean doInvoke(IImportConfigurator config,\r
Map<String, MapWrapper<? extends CdmBase>> stores) {\r
\r
- logger.debug("Importing distribution data");\r
- appCtr = config.getCdmAppController();\r
+ if (logger.isDebugEnabled()) { logger.debug("Importing distribution data"); }\r
\r
// read and save all rows of the excel worksheet\r
- ArrayList<HashMap<String, String>> recordList = ExcelUtils.parseXLS(config.getSourceNameString());\r
+ ArrayList<HashMap<String, String>> recordList;\r
+ try{\r
+ recordList = ExcelUtils.parseXLS(config.getSourceNameString());\r
+ } catch (FileNotFoundException e1) {\r
+ logger.error("File not found: " + (String)config.getSource());\r
+ return false;\r
+ }\r
if (recordList != null) {\r
HashMap<String,String> record = null;\r
- TransactionStatus txStatus = appCtr.startTransaction();\r
+ TransactionStatus txStatus = startTransaction();\r
\r
for (int i = 0; i < recordList.size(); i++) {\r
record = recordList.get(i);\r
- analyzeRecord(config.getDestination(), record);\r
+ analyzeRecord(record);\r
}\r
- appCtr.commitTransaction(txStatus);\r
+ commitTransaction(txStatus);\r
}\r
\r
try {\r
- appCtr.close();\r
- logger.debug("End test distribution data import"); \r
+ if (logger.isDebugEnabled()) { logger.debug("End distribution data import"); }\r
\r
} catch (Exception e) {\r
- logger.error("Error clsing the application context");\r
+ logger.error("Error closing the application context");\r
e.printStackTrace();\r
}\r
\r
}\r
\r
\r
- private void analyzeRecord(ICdmDataSource db, HashMap record) {\r
+ /** \r
+ * Reads the data of one Excel sheet row\r
+ */\r
+ private void analyzeRecord(HashMap<String,String> record) {\r
/*\r
* Relevant columns:\r
* Name (EDIT)\r
*/\r
\r
String editName = "";\r
- String distribution = "";\r
ArrayList<String> distributionList = new ArrayList<String>();\r
String status = "";\r
String literatureNumber = "";\r
\r
String value = (String) record.get(key);\r
if (!value.equals("")) {\r
-// logger.debug("Key = " + key);\r
- logger.debug(key + ": '" + value + "'");\r
+ if (logger.isDebugEnabled()) { logger.debug(key + ": '" + value + "'"); }\r
}\r
\r
if (key.contains(EDIT_NAME_COLUMN)) {\r
- editName = (String) removeDuplicateWhitespace(value.trim());\r
+ editName = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
\r
} else if(key.contains(TDWG_DISTRIBUTION_COLUMN)) {\r
- distributionList = buildList(value);\r
+ distributionList = CdmUtils.buildList(value);\r
\r
} else if(key.contains(STATUS_COLUMN)) {\r
- status = (String) removeDuplicateWhitespace(value.trim());\r
+ status = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
\r
} else if(key.contains(LITERATURE_NUMBER_COLUMN)) {\r
- literatureNumber = (String) removeDuplicateWhitespace(value.trim());\r
+ literatureNumber = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
\r
} else if(key.contains(LITERATURE_COLUMN)) {\r
- literature = (String) removeDuplicateWhitespace(value.trim());\r
+ literature = (String) CdmUtils.removeDuplicateWhitespace(value.trim());\r
+ \r
+ } else {\r
+ logger.warn("Column " + key + " ignored");\r
}\r
}\r
\r
\r
\r
/** \r
- * Stores distribution data in the DB\r
+ * Stores the data of one Excel sheet row in the database\r
*/\r
private void saveRecord(String taxonName, ArrayList<String> distributionList,\r
String status, String literatureNumber, String literature) {\r
\r
try {\r
// get the matching names from the DB\r
- List<TaxonNameBase<?,?>> taxonNameBases = appCtr.getNameService().findNamesByTitle(taxonName);\r
+ List<TaxonNameBase<?,?>> taxonNameBases = getNameService().findNamesByTitle(taxonName);\r
if (taxonNameBases.isEmpty()) {\r
logger.error("Taxon name '" + taxonName + "' not found in DB");\r
} else {\r
- logger.debug("Taxon found");\r
+ if (logger.isDebugEnabled()) { logger.debug("Taxon found"); }\r
}\r
\r
// get the taxa for the matching names\r
- for(TaxonNameBase dbTaxonName: taxonNameBases) {\r
+ for(TaxonNameBase<?,?> dbTaxonName: taxonNameBases) {\r
\r
Set<Taxon> taxa = dbTaxonName.getTaxa();\r
if (taxa.isEmpty()) {\r
} else {\r
presenceAbsenceStatus = PresenceTerm.getPresenceTermByAbbreviation(status);\r
}\r
- // TODO: Handle absence case\r
+ // TODO: Handle absence case. \r
+ // This case has not yet occurred in the excel input file, though.\r
\r
/* Set to true if taxon needs to be saved if at least one new distribution exists */\r
boolean save = false;\r
if (descriptionElement instanceof Distribution) {\r
if (namedArea == ((Distribution)descriptionElement).getArea()) {\r
ignore = true;\r
- logger.debug("Distribution ignored: " + distribution);\r
+ if (logger.isDebugEnabled()) { \r
+ logger.debug("Distribution ignored: " + distribution); \r
+ }\r
break;\r
}\r
}\r
save = true;\r
Distribution newDistribution = Distribution.NewInstance(namedArea, presenceAbsenceStatus);\r
myDescription.addElement(newDistribution);\r
- logger.debug("Distribution created: " + newDistribution.toString());\r
+ if (logger.isDebugEnabled()) { \r
+ logger.debug("Distribution created: " + newDistribution.toString());\r
+ }\r
}\r
}\r
}\r
}\r
if (save == true) {\r
- appCtr.getTaxonService().saveTaxon(taxon);\r
- logger.debug("Taxon saved");\r
+ getTaxonService().saveTaxon(taxon);\r
+ if (logger.isDebugEnabled()) { logger.debug("Taxon saved"); }\r
}\r
}\r
} \r
}\r
\r
\r
- /** Returns a version of the input where all contiguous\r
- * whitespace characters are replaced with a single\r
- * space. Line terminators are treated like whitespace.\r
- * \r
- * @param inputStr\r
- * @return\r
- */\r
- private static CharSequence removeDuplicateWhitespace(CharSequence inputStr) {\r
- \r
- String patternStr = "\\s+";\r
- String replaceStr = " ";\r
- Pattern pattern = Pattern.compile(patternStr);\r
- Matcher matcher = pattern.matcher(inputStr);\r
- return matcher.replaceAll(replaceStr);\r
- }\r
- \r
-\r
- /** Builds a list of strings by splitting an input string\r
- * with delimiters whitespace, comma, or semicolon\r
- * @param value\r
- * @return\r
- */\r
- private ArrayList<String> buildList(String value) {\r
-\r
- ArrayList<String> resultList = new ArrayList<String>();\r
- for (String tag : value.split("[\\s,;]+")) {\r
- resultList.add(tag);\r
- }\r
- return resultList;\r
- }\r
-\r
@Override\r
protected boolean doCheck(IImportConfigurator config) {\r
boolean result = true;\r