From acf4e805b2ce1366f135cc4a2faf9c4050121ac1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andreas=20M=C3=BCller?= Date: Fri, 23 Dec 2016 17:18:08 +0100 Subject: [PATCH] ref #6286 first incomplete version of Flora Hellenica import --- .../cdm/app/common/CdmDestinations.java | 8 + .../greece/ChecklistHellenicaActivator.java | 82 ++++++ .../FloraHellenicaImportConfigurator.java | 60 ++++ .../io/greece/FloraHellenicaTaxonImport.java | 263 ++++++++++++++++++ .../io/greece/FloraHellenicaTransformer.java | 26 ++ 5 files changed, 439 insertions(+) create mode 100644 app-import/src/main/java/eu/etaxonomy/cdm/app/greece/ChecklistHellenicaActivator.java create mode 100644 app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaImportConfigurator.java create mode 100644 app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaTaxonImport.java create mode 100644 app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaTransformer.java diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java b/app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java index bc15b825..d822255f 100644 --- a/app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java +++ b/app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java @@ -284,6 +284,14 @@ public class CdmDestinations { return makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null); } + public static ICdmDataSource cdm_greece_checklist_production(){ + DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL; + String cdmServer = "160.45.63.171"; + String cdmDB = "cdm_production_flora_hellenica"; + String cdmUserName = "edit"; + return makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null); + } + public static ICdmDataSource cdm_mexico_rubiaceae_production(){ DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL; String cdmServer = "160.45.63.171"; diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/app/greece/ChecklistHellenicaActivator.java b/app-import/src/main/java/eu/etaxonomy/cdm/app/greece/ChecklistHellenicaActivator.java new file mode 100644 index 00000000..73b4d763 --- /dev/null +++ b/app-import/src/main/java/eu/etaxonomy/cdm/app/greece/ChecklistHellenicaActivator.java @@ -0,0 +1,82 @@ +/** +* Copyright (C) 2016 EDIT +* European Distributed Institute of Taxonomy +* http://www.e-taxonomy.eu +* +* The contents of this file are subject to the Mozilla Public License Version 1.1 +* See LICENSE.TXT at the top of this package for the full license terms. +*/ +package eu.etaxonomy.cdm.app.greece; + +import java.net.URI; +import java.util.UUID; + +import org.apache.log4j.Logger; + +import eu.etaxonomy.cdm.app.common.CdmDestinations; +import eu.etaxonomy.cdm.database.DbSchemaValidation; +import eu.etaxonomy.cdm.database.ICdmDataSource; +import eu.etaxonomy.cdm.io.common.CdmDefaultImport; +import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK; +import eu.etaxonomy.cdm.io.greece.FloraHellenicaImportConfigurator; + +/** + * @author a.mueller + * @date 13.12.2016 + * + */ +public class ChecklistHellenicaActivator { + @SuppressWarnings("unused") + private static final Logger logger = Logger.getLogger(ChecklistHellenicaActivator.class); + + //database validation status (create, update, validate ...) + static DbSchemaValidation hbm2dll = DbSchemaValidation.CREATE; + +// static final ICdmDataSource cdmDestination = CdmDestinations.localH2(); +// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql_test(); + static final ICdmDataSource cdmDestination = CdmDestinations.cdm_greece_checklist_production(); + + //feature tree uuid + public static final UUID featureTreeUuid = UUID.fromString("9e1e0e81-7475-4b28-8619-b7f42cd760b6"); + + //classification + static final UUID classificationUuid = UUID.fromString("e537d69a-c2d9-4ac6-8f79-5b5e3dd5c154"); + private static final String classificationName = "Greek Checklist"; + + //check - import + static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK; + + boolean doVocabularies = (hbm2dll == DbSchemaValidation.CREATE); + + private void doImport(ICdmDataSource cdmDestination){ + + URI source = greekChecklist(); //just any + + //make Source + FloraHellenicaImportConfigurator config= FloraHellenicaImportConfigurator.NewInstance(source, cdmDestination); + config.setClassificationUuid(classificationUuid); + config.setClassificationName(classificationName); + config.setCheck(check); +// config.setDoDistribution(doDistribution); +// config.setDoTaxa(doTaxa); + config.setDbSchemaValidation(hbm2dll); +// config.setSourceReferenceTitle(sourceReferenceTitle); +// config.setDoVocabularies(doVocabularies); + + CdmDefaultImport myImport = new CdmDefaultImport(); + + myImport.invoke(config); + } + + private URI greekChecklist(){ + return URI.create("file:////BGBM-PESIHPC/Greece/VPG_FINAL_June_2016.xlsx"); + } + /** + * @param args + */ + public static void main(String[] args) { + ChecklistHellenicaActivator me = new ChecklistHellenicaActivator(); + me.doImport(cdmDestination); + System.exit(0); + } +} diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaImportConfigurator.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaImportConfigurator.java new file mode 100644 index 00000000..b8d13169 --- /dev/null +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaImportConfigurator.java @@ -0,0 +1,60 @@ +/** +* Copyright (C) 2016 EDIT +* European Distributed Institute of Taxonomy +* http://www.e-taxonomy.eu +* +* The contents of this file are subject to the Mozilla Public License Version 1.1 +* See LICENSE.TXT at the top of this package for the full license terms. +*/ +package eu.etaxonomy.cdm.io.greece; + +import java.net.URI; + +import eu.etaxonomy.cdm.database.ICdmDataSource; +import eu.etaxonomy.cdm.io.common.ImportStateBase; +import eu.etaxonomy.cdm.io.common.mapping.IInputTransformer; +import eu.etaxonomy.cdm.io.excel.common.ExcelImportConfiguratorBase; +import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState; +import eu.etaxonomy.cdm.model.name.NomenclaturalCode; +import eu.etaxonomy.cdm.model.reference.Reference; + +/** + * @author a.mueller + * @date 14.12.2016 + * + */ +public class FloraHellenicaImportConfigurator extends ExcelImportConfiguratorBase{ + + private static final long serialVersionUID = 3782414424818991629L; + private static IInputTransformer defaultTransformer = new FloraHellenicaTransformer(); + private Reference secReference; + + /** + * @param source + * @param cdmDestination + * @return + */ + public static FloraHellenicaImportConfigurator NewInstance(URI source, ICdmDataSource destination) { + return new FloraHellenicaImportConfigurator(source, destination); + } + + + private FloraHellenicaImportConfigurator(URI source, ICdmDataSource destination) { + super(source, destination, defaultTransformer); + setNomenclaturalCode(NomenclaturalCode.ICNAFP); + setSource(source); + setDestination(destination); + } + + @Override + public ImportStateBase getNewState() { + return new SimpleExcelTaxonImportState<>(this); + } + + @Override + protected void makeIoClassList() { + ioClassList = new Class[]{ + FloraHellenicaTaxonImport.class + }; + } +} diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaTaxonImport.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaTaxonImport.java new file mode 100644 index 00000000..3ab3fd68 --- /dev/null +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaTaxonImport.java @@ -0,0 +1,263 @@ +/** +* Copyright (C) 2016 EDIT +* European Distributed Institute of Taxonomy +* http://www.e-taxonomy.eu +* +* The contents of this file are subject to the Mozilla Public License Version 1.1 +* See LICENSE.TXT at the top of this package for the full license terms. +*/ +package eu.etaxonomy.cdm.io.greece; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; + +import eu.etaxonomy.cdm.common.CdmUtils; +import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport; +import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState; +import eu.etaxonomy.cdm.model.common.Language; +import eu.etaxonomy.cdm.model.name.BotanicalName; +import eu.etaxonomy.cdm.model.name.Rank; +import eu.etaxonomy.cdm.model.reference.Reference; +import eu.etaxonomy.cdm.model.taxon.Classification; +import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode; +import eu.etaxonomy.cdm.model.taxon.Taxon; +import eu.etaxonomy.cdm.model.taxon.TaxonNode; +import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; + +/** + * @author a.mueller + * @date 14.12.2016 + * + */ + +@Component +public class FloraHellenicaTaxonImport extends SimpleExcelTaxonImport{ + private static final long serialVersionUID = -6291948918967763381L; + private static final Logger logger = Logger.getLogger(FloraHellenicaTaxonImport.class); + + + private static UUID rootUuid = UUID.fromString("aa667b0b-b417-470e-a9b0-ef9409a3431e"); + private static UUID plantaeUuid = UUID.fromString("4f151932-ab97-4d81-b88e-46fe82cd3e88"); + + private static List expectedKeys= Arrays.asList(new String[]{ + "No","Family","Genus","Species","Species Author","Subspecies","Subspecies Author","IoI","NPi","SPi","Pe","StE","EC","NC","NE","NAe","WAe","Kik","KK","EAe","Stat","Ch","Lf","Hab A","Hab C","Hab G","Hab H","Hab M","Hab P","Hab R","Hab W","comment TR" + }); + + private String lastFamily; + private String lastGenus; + private String lastSpecies; + private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance(); + + @Override + protected String getWorksheetName() { + return "6616 taxa"; + } + + /** + * {@inheritDoc} + */ + @Override + protected void firstPass(SimpleExcelTaxonImportState state) { + String line = state.getCurrentLine() + ": "; + HashMap record = state.getOriginalRecord(); + + Set keys = record.keySet(); + for (String key: keys) { + if (! expectedKeys.contains(key)){ + logger.warn(line + "Unexpected Key: " + key); + } + } + + //Nicht unbedingt notwendig + TaxonNode familyTaxon = getFamilyTaxon(record, state); + if (familyTaxon == null){ + logger.warn(line + "Family not created: " + record.get("Family")); + } + + String genusStr = getValue(record, "Genus"); + String speciesStr = getValue(record, "Species"); + String speciesAuthorStr = getValue(record, "Species Author"); + String subSpeciesStr = getValue(record, "Subspecies"); + String subSpeciesAuthorStr = getValue(record, "Subspecies Author"); + boolean isSubSpecies = isNotBlank(subSpeciesStr); + boolean isAutonym = isSubSpecies && speciesStr.equals(subSpeciesStr); + if (isSubSpecies && ! isAutonym && isBlank(subSpeciesAuthorStr)){ + logger.warn(line + "Non-Autonym subspecies has no auhtor"); + }else if (isSubSpecies && isAutonym && isNotBlank(subSpeciesAuthorStr)){ + logger.warn(line + "Autonym subspecies has subspecies auhtor"); + } + + String[] nameParts; + if (!isSubSpecies){ + nameParts = new String[]{genusStr, speciesStr, speciesAuthorStr}; + }else if (!isAutonym){ + nameParts = new String[]{genusStr, speciesStr, "subsp. " + subSpeciesStr, subSpeciesAuthorStr}; + }else{ + nameParts = new String[]{genusStr, speciesStr, speciesAuthorStr, "subsp. " + subSpeciesStr}; + } + + String nameStr = CdmUtils.concat(" ", nameParts); + Rank rank = isSubSpecies ? Rank.SUBSPECIES() : Rank.SPECIES(); + BotanicalName name = (BotanicalName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank); + if (name.isProtectedTitleCache()){ + logger.warn("Name could not be parsed: " + nameStr); + } + Taxon taxon = Taxon.NewInstance(name, getSecReference(state)); +// String parentStr = isSubSpecies ? makeSpeciesKey(genusStr, speciesStr, speciesAuthorStr) : genusStr; + String parentStr = genusStr; + boolean genusAsBefore = genusStr.equals(lastGenus); + boolean speciesAsBefore = speciesStr.equals(lastSpecies); + TaxonNode parent = getParent(state, parentStr); + if (parent != null){ +// if (!isSubSpecies && genusAsBefore || isSubSpecies && speciesAsBefore){ + if (genusAsBefore ){ + //everything as expected + TaxonNode newNode = parent.addChildTaxon(taxon, getSecReference(state), null); + getTaxonNodeService().save(newNode); + }else{ + logger.warn(line + "Unexpected non-missing parent"); + } + }else{ +// if (isSubSpecies){ +// logger.warn(line + "Subspecies should always have an existing parent"); +// }else + if (genusAsBefore){ + logger.warn(line + "Unexpected missing genus parent"); + }else{ + parent = makeGenusNode(state, record, genusStr); + TaxonNode newNode = parent.addChildTaxon(taxon, getSecReference(state), null); + getTaxonNodeService().save(newNode); + } + } + if (!isSubSpecies){ + state.putHigherTaxon(makeSpeciesKey(genusStr, speciesStr, speciesAuthorStr), taxon); + } + +// this.lastFamily = familyStr + this.lastGenus = genusStr; + this.lastSpecies = speciesStr; + + } + + /** + * @param genusStr + * @param speciesStr + * @param speciesAuthorStr + * @return + */ + private String makeSpeciesKey(String genusStr, String speciesStr, String speciesAuthorStr) { + return CdmUtils.concat(" ", new String[]{genusStr, speciesStr, speciesAuthorStr}); + } + + /** + * @param state + * @param record + * @param genusStr + * @return + */ + private TaxonNode makeGenusNode(SimpleExcelTaxonImportState state, + HashMap record, String genusStr) { + BotanicalName name = BotanicalName.NewInstance(Rank.GENUS()); + name.setGenusOrUninomial(genusStr); + Taxon genus = Taxon.NewInstance(name, getSecReference(state)); + TaxonNode family = getFamilyTaxon(record, state); + TaxonNode genusNode = family.addChildTaxon(genus, getSecReference(state), null); + state.putHigherTaxon(genusStr, genus); + genus.addSource(makeOriginalSource(state)); + getTaxonNodeService().save(genusNode); + return genusNode; + } + + /** + * @param state + * @return + */ + private Reference getSecReference(SimpleExcelTaxonImportState state) { + // TODO Auto-generated method stub + return null; + } + + /** + * @param state + * @param parentStr + * @return + */ + private TaxonNode getParent(SimpleExcelTaxonImportState state, String parentStr) { + Taxon taxon = state.getHigherTaxon(parentStr); + + return taxon == null ? null : taxon.getTaxonNodes().iterator().next(); + } + + /** + * @param record + * @param state + * @return + */ + private TaxonNode getFamilyTaxon(HashMap record, SimpleExcelTaxonImportState state) { + String familyStr = getValue(record, "Family"); + if (familyStr == null){ + return null; + } + familyStr = familyStr.trim(); + + Taxon family = state.getHigherTaxon(familyStr); + TaxonNode familyNode; + if (family != null){ + familyNode = family.getTaxonNodes().iterator().next(); + }else{ + BotanicalName name = makeFamilyName(state, familyStr); + Reference sec = getSecReference(state); + family = Taxon.NewInstance(name, sec); + ITaxonTreeNode rootNode = getClassification(state); + familyNode = rootNode.addChildTaxon(family, sec, null); + state.putHigherTaxon(familyStr, family); + getTaxonNodeService().save(familyNode); + } + + return familyNode; + } + + /** + * @param state + * @param famStr + * @return + */ + private BotanicalName makeFamilyName(SimpleExcelTaxonImportState state, String famStr) { + BotanicalName name = BotanicalName.NewInstance(Rank.FAMILY()); + name.setGenusOrUninomial(famStr); + name.addSource(makeOriginalSource(state)); + return name; + } + + + + private TaxonNode rootNode; + private TaxonNode getClassification(SimpleExcelTaxonImportState state) { + if (rootNode == null){ + Reference sec = getSecReference(state); + String classificationName = state.getConfig().getClassificationName(); + Language language = Language.DEFAULT(); + Classification classification = Classification.NewInstance(classificationName, sec, language); + classification.setUuid(state.getConfig().getClassificationUuid()); + classification.getRootNode().setUuid(rootUuid); + + BotanicalName plantaeName = BotanicalName.NewInstance(Rank.KINGDOM()); + plantaeName.setGenusOrUninomial("Plantae"); + Taxon plantae = Taxon.NewInstance(plantaeName, sec); + TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null); + plantaeNode.setUuid(plantaeUuid); + getClassificationService().save(classification); + + rootNode = plantaeNode; + } + return rootNode; + } + + +} diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaTransformer.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaTransformer.java new file mode 100644 index 00000000..3d0245b5 --- /dev/null +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/greece/FloraHellenicaTransformer.java @@ -0,0 +1,26 @@ +/** +* Copyright (C) 2016 EDIT +* European Distributed Institute of Taxonomy +* http://www.e-taxonomy.eu +* +* The contents of this file are subject to the Mozilla Public License Version 1.1 +* See LICENSE.TXT at the top of this package for the full license terms. +*/ +package eu.etaxonomy.cdm.io.greece; + +import org.apache.log4j.Logger; + +import eu.etaxonomy.cdm.io.common.mapping.InputTransformerBase; + +/** + * @author a.mueller + * @date 14.12.2016 + * + */ +public class FloraHellenicaTransformer extends InputTransformerBase{ + + private static final long serialVersionUID = -3400280126782787668L; + @SuppressWarnings("unused") + private static final Logger logger = Logger.getLogger(FloraHellenicaTransformer.class); + +} -- 2.34.1