From 1b92e94a6ddc8d8e810ecf162103c7b0c0cc7e8e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andreas=20M=C3=BCller?= Date: Tue, 1 Mar 2011 18:49:51 +0000 Subject: [PATCH] Final version of Cyprus Distribution import --- .../cdm/app/cyprus/CyprusActivator.java | 14 +- .../io/cyprus/CyprusDistributionImport.java | 326 +++++++++++------- .../io/cyprus/CyprusImportConfigurator.java | 2 +- .../cdm/io/cyprus/CyprusTransformer.java | 1 + 4 files changed, 217 insertions(+), 126 deletions(-) diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/app/cyprus/CyprusActivator.java b/app-import/src/main/java/eu/etaxonomy/cdm/app/cyprus/CyprusActivator.java index c85d8f26..bf7a5607 100644 --- a/app-import/src/main/java/eu/etaxonomy/cdm/app/cyprus/CyprusActivator.java +++ b/app-import/src/main/java/eu/etaxonomy/cdm/app/cyprus/CyprusActivator.java @@ -27,7 +27,6 @@ import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException; import eu.etaxonomy.cdm.io.cyprus.CyprusImportConfigurator; import eu.etaxonomy.cdm.io.cyprus.CyprusTransformer; import eu.etaxonomy.cdm.model.agent.Person; -import eu.etaxonomy.cdm.model.agent.Team; import eu.etaxonomy.cdm.model.description.Feature; import eu.etaxonomy.cdm.model.description.FeatureNode; import eu.etaxonomy.cdm.model.description.FeatureTree; @@ -43,15 +42,16 @@ public class CyprusActivator { private static final Logger logger = Logger.getLogger(CyprusActivator.class); //database validation status (create, update, validate ...) - static DbSchemaValidation hbm2dll = DbSchemaValidation.CREATE; + static DbSchemaValidation hbm2dll = DbSchemaValidation.VALIDATE; static final URI source = cyprus_distribution(); +// static final URI source = cyprus_local(); -// static final ICdmDataSource cdmDestination = CdmDestinations.localH2(); +// static final ICdmDataBSource cdmDestination = CdmDestinations.localH2(); // static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_postgres_CdmTest(); - static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql(); +// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql(); // static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_dev(); -// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_production(); + static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_production(); //feature tree uuid @@ -77,8 +77,6 @@ public class CyprusActivator { config.setCheck(check); config.setDoDistribution(doDistribution); config.setDoTaxa(doTaxa); -// config.setDefaultLanguageUuid(defaultLanguageUuid); -// config.setDoPrintKeys(doPrintKeys); config.setDbSchemaValidation(hbm2dll); CdmDefaultImport myImport = new CdmDefaultImport(); @@ -158,7 +156,7 @@ public class CyprusActivator { public static URI cyprus_distribution() { URI sourceUrl; try { - sourceUrl = new URI("file:/C:/localCopy/Data/zypern/Zypern_distribution.xls"); + sourceUrl = new URI("file:/C:/localCopy/Data/zypern/Zypern_distribution_RH_corr.xls"); return sourceUrl; } catch (URISyntaxException e) { e.printStackTrace(); diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusDistributionImport.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusDistributionImport.java index 5c481bcd..a3797067 100644 --- a/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusDistributionImport.java +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusDistributionImport.java @@ -9,10 +9,11 @@ package eu.etaxonomy.cdm.io.cyprus; -import java.util.Arrays; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; @@ -21,12 +22,14 @@ import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; +import eu.etaxonomy.cdm.api.service.pager.Pager; import eu.etaxonomy.cdm.common.CdmUtils; import eu.etaxonomy.cdm.io.common.mapping.IInputTransformer; import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException; import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase; import eu.etaxonomy.cdm.model.common.CdmBase; import eu.etaxonomy.cdm.model.common.TermVocabulary; +import eu.etaxonomy.cdm.model.description.DescriptionBase; import eu.etaxonomy.cdm.model.description.DescriptionElementBase; import eu.etaxonomy.cdm.model.description.Distribution; import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase; @@ -35,14 +38,12 @@ import eu.etaxonomy.cdm.model.description.TaxonDescription; import eu.etaxonomy.cdm.model.location.NamedArea; import eu.etaxonomy.cdm.model.location.NamedAreaLevel; import eu.etaxonomy.cdm.model.location.NamedAreaType; +import eu.etaxonomy.cdm.model.location.TdwgArea; +import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry; import eu.etaxonomy.cdm.model.name.BotanicalName; -import eu.etaxonomy.cdm.model.name.Rank; -import eu.etaxonomy.cdm.model.name.TaxonNameBase; import eu.etaxonomy.cdm.model.reference.Reference; import eu.etaxonomy.cdm.model.reference.ReferenceFactory; import eu.etaxonomy.cdm.model.taxon.Taxon; -import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser; -import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; /** * @author a.babadshanjan @@ -54,9 +55,6 @@ import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; public class CyprusDistributionImport extends ExcelImporterBase { private static final Logger logger = Logger.getLogger(CyprusDistributionImport.class); - public static Set validMarkers = new HashSet(Arrays.asList(new String[]{"", "valid", "accepted", "a", "v", "t"})); - public static Set synonymMarkers = new HashSet(Arrays.asList(new String[]{"", "invalid", "synonym", "s", "i"})); - /* (non-Javadoc) * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase) */ @@ -74,64 +72,11 @@ public class CyprusDistributionImport extends ExcelImporterBase record, CyprusImportState state) { - - boolean success = true; - Set keys = record.keySet(); - - CyprusDistributionRow cyprusDistributionRow = new CyprusDistributionRow(); - state.setCyprusDistributionRow(cyprusDistributionRow); - - for (String originalKey: keys) { - Integer index = 0; - String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString(); - String[] split = indexedKey.split("_"); - String key = split[0]; - if (split.length > 1){ - String indexString = split[1]; - try { - index = Integer.valueOf(indexString); - } catch (NumberFormatException e) { - String message = "Index must be integer"; - logger.error(message); - continue; - } - } - - String value = (String) record.get(indexedKey); - if (! StringUtils.isBlank(value)) { - if (logger.isDebugEnabled()) { logger.debug(key + ": " + value); } - value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString(); - }else{ - continue; - } - - - if (key.equalsIgnoreCase(SPECIES_COLUMN)) { - cyprusDistributionRow.setSpecies(value); - - } else if(key.equalsIgnoreCase(DISTRIBUTION_COLUMN)) { - cyprusDistributionRow.setDistribution(value); - - } else if(key.equalsIgnoreCase(REFERENCE_COLUMN)) { - cyprusDistributionRow.setReference(value); - - } else { - success = false; - logger.error("Unexpected column header " + key); - } - } - return success; - } - - private static INonViralNameParser nameParser = NonViralNameParserImpl.NewInstance(); + private Map taxonWithAuthorStore = new HashMap(); + private Map taxonNameOnlyStore = new HashMap(); + + private boolean areasCreated = false; private Map divisions = new HashMap(); @@ -141,35 +86,17 @@ public class CyprusDistributionImport extends ExcelImporterBase status = getStatus(taxon); + status = removeDoubtfulStatus(status); + removeDistributions(taxon); + for (int i = 1; i <= 8; i++){ if (distributionStr.contains(String.valueOf(i))){ NamedArea area = this.divisions.get(String.valueOf(i)); - PresenceAbsenceTermBase status = getStatus(taxon); - status = removeDoubtfulStatus(status); - removeDistributions(taxon); Distribution distribution = Distribution.NewInstance(area, status); distribution.addSource(null, null, ref, null); description.addElement(distribution); } } } + + private TaxonDescription getNewDescription(CyprusImportState state, Taxon taxon) { + Reference excelRef = state.getConfig().getSourceReference(); + TaxonDescription desc = TaxonDescription.NewInstance(taxon, false); + desc.setTitleCache(excelRef.getTitleCache() + " for " + taxon.getTitleCache(), true); + desc.addSource(null, null, excelRef, null); + return desc; + } private PresenceAbsenceTermBase indigenousStatus; private PresenceAbsenceTermBase casualStatus; @@ -230,6 +173,9 @@ public class CyprusDistributionImport extends ExcelImporterBase questionableStatus; private PresenceAbsenceTermBase removeDoubtfulStatus(PresenceAbsenceTermBase status) { + if (status == null){ + return null; + } if (status.getUuid().equals(CyprusTransformer.indigenousDoubtfulUuid)){ status = indigenousStatus; }else if (status.getUuid().equals(CyprusTransformer.casualDoubtfulUuid)){ @@ -273,20 +219,26 @@ public class CyprusDistributionImport extends ExcelImporterBase removeDistributions(Taxon taxon) { - Set result = new HashSet(); + private void removeDistributions(Taxon taxon) { + Set toRemove = new HashSet(); for (TaxonDescription desc : taxon.getDescriptions()){ if (desc.isImageGallery() == NO_IMAGE_GALLERY ){ Iterator iterator = desc.getElements().iterator(); while (iterator.hasNext()){ DescriptionElementBase element = iterator.next(); if (element.isInstanceOf(Distribution.class)){ - iterator.remove(); + toRemove.add(CdmBase.deproxy(element, Distribution.class)); +// iterator.remove(); } } } } - return result; + for (Distribution distribution : toRemove){ + DescriptionBase desc = distribution.getInDescription(); + desc.removeElement(distribution); + getDescriptionService().saveOrUpdate(desc); + } + return; } /** @@ -321,30 +273,170 @@ public class CyprusDistributionImport extends ExcelImporterBase taxonStore = new HashMap(); - - private Taxon getTaxon(CyprusImportState state, String taxonStr) { Taxon result; - if (taxonStore.get(taxonStr) != null){ - result = taxonStore.get(taxonStr); - }else{ + if (taxonWithAuthorStore.get(taxonStr) != null){ + result = taxonWithAuthorStore.get(taxonStr); + }else if(taxonNameOnlyStore.get(taxonStr) != null){ + result = taxonNameOnlyStore.get(taxonStr); + }else { // result = getTaxonService().findBestMatchingTaxon(taxonStr); - TaxonNameBase name = BotanicalName.NewInstance(Rank.SPECIES()); - name.setTitleCache(taxonStr, true); - - result = Taxon.NewInstance(name, null); - if (result == null){ +// TaxonNameBase name = BotanicalName.NewInstance(Rank.SPECIES()); +// name.setTitleCache(taxonStr, true); +// +// result = Taxon.NewInstance(name, null); +// if (result == null){ logger.warn("Taxon not found: " + taxonStr); - }else{ - taxonStore.put(taxonStr, result); - } +// }else{ +// taxonStore.put(taxonStr, result); +// } + result = null; } return result; } + + /** + * + */ + private void loadTaxa() { + List propertyPaths = new ArrayList(); + propertyPaths.add("*.name"); + List taxonList = (List)getTaxonService().list(Taxon.class, null, null, null, propertyPaths); + for (Taxon taxon: taxonList){ + if (taxon.getTaxonNodes().size() == 0){ + continue; + } + String nameTitle = taxon.getName().getTitleCache(); + String nameCache = CdmBase.deproxy(taxon.getName(), BotanicalName.class).getNameCache(); + Taxon returnValue = taxonWithAuthorStore.put(nameTitle, taxon); + if (returnValue != null){ + logger.warn("Duplicate titleCache entry for taxon: " + nameTitle); + } + returnValue = taxonNameOnlyStore.put(nameCache, taxon); + if (returnValue != null){ + logger.warn("Duplicate nameCache entry for taxon: " + nameCache); + } + } + } + + /** + * @param meikle1977List + */ + private void loadReferences() { + Pager meikle1977List = getReferenceService().findByTitle(Reference.class, "R. D. Meikle, Flora of Cyprus 1. 1977", null, null, null, null, null, null); + + if (meikle1977List.getCount() != 1){ + logger.error("There is not exactly 1 Meikle 1977 reference"); + }else{ + refMeikle1977 = meikle1977List.getRecords().iterator().next(); + } + + Pager meikle1985List = getReferenceService().findByTitle(Reference.class, "R. D. Meikle, Flora of Cyprus 2. 1985", null, null, null, null, null, null); + if (meikle1985List.getCount() != 1){ + logger.error("There is not exactly 1 Meikle 1985 reference"); + }else{ + refMeikle1985 = meikle1977List.getRecords().iterator().next(); + } + } + + /** + * + */ + private void loadStatus() { + indigenousStatus = (PresenceTerm)getTermService().find(CyprusTransformer.indigenousUuid); + casualStatus = (PresenceTerm)getTermService().find(CyprusTransformer.casualUuid); + nonInvasiveStatus = (PresenceTerm)getTermService().find(CyprusTransformer.nonInvasiveUuid); + invasiveStatus = (PresenceTerm)getTermService().find(CyprusTransformer.invasiveUuid); + questionableStatus = (PresenceTerm)getTermService().find(CyprusTransformer.questionableUuid); + } + + /** + * @param state + * @param transformer + * @throws UndefinedTransformerMethodException + */ + private void makeNewDivisions(CyprusImportState state, + IInputTransformer transformer) + throws UndefinedTransformerMethodException { + NamedAreaType areaType = NamedAreaType.NATURAL_AREA(); + NamedAreaLevel areaLevel = (NamedAreaLevel)getTermService().find(CyprusTransformer.uuidCyprusDivisionsAreaLevel); + if (areaLevel == null){ + areaLevel = NamedAreaLevel.NewInstance("Cyprus Division", "Cyprus Division", null); + getTermService().save(areaLevel); + } + + TermVocabulary areaVocabulary = getVocabulary(CyprusTransformer.uuidCyprusDivisionsVocabulary, "Cyprus devisions", "Cyprus divisions", null, null, true); + TdwgArea tdwg4Cyprus = (TdwgArea)getTermService().find(UUID.fromString("9d447b51-e363-4dde-ae40-84c55679983c")); + WaterbodyOrCountry isoCountryCyprus = (WaterbodyOrCountry)getTermService().find(UUID.fromString("4b13d6b8-7eca-4d42-8172-f2018051ca19")); + + for(int i = 1; i <= 8; i++){ + UUID divisionUuid = transformer.getNamedAreaUuid(String.valueOf(i)); + NamedArea division = this.getNamedArea(state, divisionUuid, "Division " + i, "Cyprus: Division " + i, String.valueOf(i), areaType, areaLevel, areaVocabulary); + divisions.put(String.valueOf(i), division); + tdwg4Cyprus.addIncludes(division); + isoCountryCyprus.addIncludes(division); + getTermService().save(division); + } + } + + + /* (non-Javadoc) + * @see eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase#analyzeRecord(java.util.HashMap, eu.etaxonomy.cdm.io.excel.common.ExcelImportState) + */ + @Override + protected boolean analyzeRecord(HashMap record, CyprusImportState state) { + + boolean success = true; + Set keys = record.keySet(); + + CyprusDistributionRow cyprusDistributionRow = new CyprusDistributionRow(); + state.setCyprusDistributionRow(cyprusDistributionRow); + + for (String originalKey: keys) { + String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString(); + String[] split = indexedKey.split("_"); + String key = split[0]; + if (split.length > 1){ + String indexString = split[1]; + try { + Integer.valueOf(indexString); + } catch (NumberFormatException e) { + String message = "Index must be integer"; + logger.error(message); + continue; + } + } + + String value = (String) record.get(indexedKey); + if (! StringUtils.isBlank(value)) { + if (logger.isDebugEnabled()) { logger.debug(key + ": " + value); } + value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString(); + }else{ + continue; + } + + + if (key.equalsIgnoreCase(SPECIES_COLUMN)) { + cyprusDistributionRow.setSpecies(value); + + } else if(key.equalsIgnoreCase(DISTRIBUTION_COLUMN)) { + cyprusDistributionRow.setDistribution(value); + + } else if(key.equalsIgnoreCase(REFERENCE_COLUMN)) { + cyprusDistributionRow.setReference(value); + + } else { + success = false; + logger.error("Unexpected column header " + key); + } + } + return success; + } + + /** * */ diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusImportConfigurator.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusImportConfigurator.java index 17f5ddf4..dc96dbc4 100644 --- a/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusImportConfigurator.java +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusImportConfigurator.java @@ -35,7 +35,7 @@ public class CyprusImportConfigurator extends ExcelImportConfiguratorBase implem private UUID uuidCyprusReference = UUID.fromString("b5281cd3-9d5d-4ae2-8d55-b62a592ce846"); - private String cyprusReferenceTitle = "Cyprus"; + private String cyprusReferenceTitle = "Cyprus Distributions Excel Import"; private boolean doDistribution; private boolean isDoTaxa; diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusTransformer.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusTransformer.java index e6a1e1fd..0dec1e3c 100644 --- a/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusTransformer.java +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/cyprus/CyprusTransformer.java @@ -51,6 +51,7 @@ public final class CyprusTransformer extends InputTransformerBase { //Named Area - divisions public static final UUID uuidCyprusDivisionsVocabulary = UUID.fromString("2119f610-1f93-4d87-af28-40aeefaca100"); + public static final UUID uuidCyprusDivisionsAreaLevel = UUID.fromString("ff52bbd9-f73d-4476-af39-f3991fa892bd"); public static final UUID uuidDivision1 = UUID.fromString("ab17eee9-1abb-4ce9-a9a2-563f840cdbfc"); public static final UUID uuidDivision2 = UUID.fromString("c3606165-efb7-4224-a168-63e009eb4aa5"); -- 2.34.1