From ba5dcea5b85e4b1615e9bfc6bbfa328bd7de3a75 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andreas=20M=C3=BCller?= Date: Tue, 15 May 2018 17:12:57 +0200 Subject: [PATCH] ref #7334 adapt DwCA and CDM light export to new pro parte synonym handling, also add misapplied name handling to CDM light --- .../CdmLightClassificationExport.java | 112 ++++++++++++++---- .../cdm/io/cdmLight/CdmLightExportTable.java | 2 + .../cdm/io/common/CdmExportBase.java | 10 ++ .../cdm/io/dwca/out/DwcaTaxonExport.java | 60 +++++++--- .../io/cdmLight/out/CdmLightExportTest.java | 24 ++-- 5 files changed, 152 insertions(+), 56 deletions(-) diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/cdmLight/CdmLightClassificationExport.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/cdmLight/CdmLightClassificationExport.java index a2c318a135..a00759f14c 100755 --- a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/cdmLight/CdmLightClassificationExport.java +++ b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/cdmLight/CdmLightClassificationExport.java @@ -76,6 +76,8 @@ import eu.etaxonomy.cdm.model.reference.ReferenceType; import eu.etaxonomy.cdm.model.taxon.Synonym; import eu.etaxonomy.cdm.model.taxon.Taxon; import eu.etaxonomy.cdm.model.taxon.TaxonNode; +import eu.etaxonomy.cdm.model.taxon.TaxonRelationship; +import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType; import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException; /** @@ -145,17 +147,17 @@ public class CdmLightClassificationExport @SuppressWarnings("unchecked") TaxonNodeOutStreamPartitioner partitioner - = TaxonNodeOutStreamPartitioner.NewInstance( - this, state, state.getConfig().getTaxonNodeFilter(), - 100, monitor, null); + = TaxonNodeOutStreamPartitioner.NewInstance( + this, state, state.getConfig().getTaxonNodeFilter(), + 100, monitor, null); monitor.subTask("Start partitioning"); TaxonNode node = partitioner.next(); while (node != null){ - handleTaxonNode(state, node); - node = partitioner.next(); + handleTaxonNode(state, node); + node = partitioner.next(); } @@ -175,28 +177,27 @@ public class CdmLightClassificationExport * @param classificationUuid */ private void handleTaxonNode(CdmLightExportState state, TaxonNode taxonNode) { - try { -// TaxonNode taxonNode = getTaxonNodeService().find(taxonNodeUuid); if (taxonNode == null){ String message = "TaxonNode for given taxon node UUID not found. "; //TODO state.getResult().addWarning(message); }else{ - TaxonNode root = taxonNode; - if (root.hasTaxon()){ - handleTaxon(state, root); - }else{ -// for (TaxonNode child : root.getChildNodes()){ -// handleTaxon(state, child); -// //TODO progress monitor -// } + try { + TaxonNode root = taxonNode; + if (root.hasTaxon()){ + handleTaxon(state, root); + }else{ + // for (TaxonNode child : root.getChildNodes()){ + // handleTaxon(state, child); + // //TODO progress monitor + // } + } + } catch (Exception e) { + state.getResult().addException(e, "An unexpected error occurred when handling classification " + + taxonNode.getUuid() + ": " + e.getMessage() + e.getStackTrace()); } } - } catch (Exception e) { - state.getResult().addException(e, "An unexpected error occurred when handling classification " + - taxonNode.getUuid() + ": " + e.getMessage() + e.getStackTrace()); - } } /** @@ -223,7 +224,12 @@ public class CdmLightClassificationExport for (Synonym syn : taxon.getSynonyms()){ handleSynonym(state, syn); } - + for (TaxonRelationship rel : taxon.getProParteAndPartialSynonymRelations()){ + handleProPartePartialMisapplied(state, rel); + } + for (TaxonRelationship rel : taxon.getMisappliedNameRelations()){ + handleProPartePartialMisapplied(state, rel); + } CdmLightExportTable table = CdmLightExportTable.TAXON; String[] csvLine = new String[table.getSize()]; @@ -256,6 +262,7 @@ public class CdmLightClassificationExport } } + /** * @param state * @param taxon @@ -704,11 +711,6 @@ public class CdmLightClassificationExport csvLine[table.getIndex(CdmLightExportTable.NAME_FK)] = getId(state, name); csvLine[table.getIndex(CdmLightExportTable.SEC_REFERENCE_FK)] = getId(state, synonym.getSec()); csvLine[table.getIndex(CdmLightExportTable.SEC_REFERENCE)] = getTitleCache(synonym.getSec()); - if (synonym.isProParte()) { - csvLine[table.getIndex(CdmLightExportTable.IS_PRO_PARTE)] = "1"; - }else { - csvLine[table.getIndex(CdmLightExportTable.IS_PRO_PARTE)] = "0"; - } state.getProcessor().put(table, synonym, csvLine); } catch (Exception e) { @@ -718,6 +720,66 @@ public class CdmLightClassificationExport } + /** + * Handles Misapplied names (including pro parte and partial as well as + * pro parte and partial synonyms + * @param state + * @param rel + */ + private void handleProPartePartialMisapplied(CdmLightExportState state, TaxonRelationship rel) { + try { + Taxon ppSyonym = rel.getFromTaxon(); + if (isUnpublished(state.getConfig(), ppSyonym)){ + return; + } + TaxonName name = ppSyonym.getName(); + handleName(state, name); + + CdmLightExportTable table = CdmLightExportTable.SYNONYM; + String[] csvLine = new String[table.getSize()]; + + csvLine[table.getIndex(CdmLightExportTable.SYNONYM_ID)] = getId(state, rel); + csvLine[table.getIndex(CdmLightExportTable.TAXON_FK)] = getId(state, rel.getToTaxon()); + csvLine[table.getIndex(CdmLightExportTable.NAME_FK)] = getId(state, name); + + //TODO pro parte synonyms have to references, the synonym relationship reference + //and the sec reference of the Taxon representing the synonym. + //As we currently do have only 1 reference column in CDM light the synonym relationship + //reference is used here. This is according to how pro parte synonyms were mapped to + //concept relationships in #7334 + Reference secRef = rel.getCitation(); + csvLine[table.getIndex(CdmLightExportTable.SEC_REFERENCE_FK)] = getId(state, secRef); + csvLine[table.getIndex(CdmLightExportTable.SEC_REFERENCE)] = getTitleCache(secRef); + +// Reference secRef = ppSyonym.getSec(); +// csvLine[table.getIndex(CdmLightExportTable.SEC_REFERENCE_FK)] = getId(state, secRef); +// csvLine[table.getIndex(CdmLightExportTable.SEC_REFERENCE)] = getTitleCache(secRef); +// Reference synSecRef = rel.getCitation(); +// csvLine[table.getIndex(CdmLightExportTable.SYN_SEC_REFERENCE_FK)] = getId(state, secRef); +// csvLine[table.getIndex(CdmLightExportTable.SYN_SEC_REFERENCE)] = getTitleCache(secRef); + + //pro parte type + TaxonRelationshipType type = rel.getType(); + csvLine[table.getIndex(CdmLightExportTable.IS_PRO_PARTE)] = type.isProParte()? "1":"0"; + csvLine[table.getIndex(CdmLightExportTable.IS_PARTIAL)] = type.isPartial()? "1":"0"; + csvLine[table.getIndex(CdmLightExportTable.IS_MISAPPLIED)] = type.isAnyMisappliedName()? "1":"0"; + if (type.isPartial()) { + String message = "Partial synonyms/misapplied names not yet handled by CDM light. Created " + + "pro parte synonym/misapplied name instead for " + rel.getId(); + state.getResult().addWarning(message, "handleProParteSynonym", ppSyonym.getTitleCache()); + csvLine[table.getIndex(CdmLightExportTable.IS_PRO_PARTE)] = "1"; + } + + state.getProcessor().put(table, ppSyonym, csvLine); + } catch (Exception e) { + state.getResult().addException(e, "An unexpected error occurred when handling " + + "pro parte/partial synonym relationship " + + cdmBaseStr(rel) + ": " + e.getMessage()); + } + + } + + /** * @param state * @param name diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/cdmLight/CdmLightExportTable.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/cdmLight/CdmLightExportTable.java index 7663940e13..0d94ac65e9 100644 --- a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/cdmLight/CdmLightExportTable.java +++ b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/cdmLight/CdmLightExportTable.java @@ -46,6 +46,8 @@ public enum CdmLightExportTable { protected static final String SEC_REFERENCE_FK = "SecReference_FK"; protected static final String SEC_REFERENCE = "SecReference"; protected static final String IS_PRO_PARTE = "IsProParteSynonym"; + protected static final String IS_PARTIAL = "IsPartial"; + protected static final String IS_MISAPPLIED = "IsMisapplied"; //Reference protected static final String REFERENCE_ID = "Reference_ID"; protected static final String BIBLIO_SHORT_CITATION = "BibliographicShortCitation"; diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/CdmExportBase.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/CdmExportBase.java index 30d316dd1e..58cd3cfb76 100644 --- a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/CdmExportBase.java +++ b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/CdmExportBase.java @@ -184,4 +184,14 @@ public abstract class CdmExportBasetrue if neither pro parte synonym or misapplied name has state publish nor + * taxon node filter includes unpublished taxa. + */ + protected boolean isUnpublished(CONFIG config, Taxon relatedSynonymOrMisappliedName) { + return ! (relatedSynonymOrMisappliedName.isPublish() + || config.getTaxonNodeFilter().isIncludeUnpublished()); + } + } diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/out/DwcaTaxonExport.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/out/DwcaTaxonExport.java index 2227938b45..ab32982955 100644 --- a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/out/DwcaTaxonExport.java +++ b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/out/DwcaTaxonExport.java @@ -80,7 +80,7 @@ public class DwcaTaxonExport extends DwcaDataExportBase { TaxonName basionym = name.getBasionym(); Classification classification = node.getClassification(); if (! state.recordExists(file, taxon)){ - handleTaxonBase(state, record, taxon, name, taxon, parent, basionym, classification, null, false, false); + handleTaxonBase(state, record, taxon, name, taxon, parent, basionym, classification, null); PrintWriter writer = createPrintWriter(state, file); record.write(state, writer); state.addExistingRecord(file, taxon); @@ -90,6 +90,8 @@ public class DwcaTaxonExport extends DwcaDataExportBase { //synonyms if (state.getConfig().isDoSynonyms()){ handleSynonyms(state, taxon, file, classification, metaRecord); + //pro parte syonyms + handleProparteSynonyms(state, taxon, file, classification, metaRecord); } //misapplied names @@ -97,6 +99,8 @@ public class DwcaTaxonExport extends DwcaDataExportBase { handleMisapplication(state, taxon, file, classification, metaRecord); } + + } catch (Exception e) { String message = "Unexpected exception: " + e.getMessage(); state.getResult().addException(e, message); @@ -117,8 +121,6 @@ public class DwcaTaxonExport extends DwcaDataExportBase { DwcaTaxonRecord record = new DwcaTaxonRecord(metaRecord, state.getConfig()); SynonymType type = synonym.getType(); - boolean isProParte = synonym.isProParte(); - boolean isPartial = synonym.isPartial(); if (type == null){ // should not happen type = SynonymType.SYNONYM_OF(); } @@ -128,7 +130,7 @@ public class DwcaTaxonExport extends DwcaDataExportBase { TaxonName basionym = name.getBasionym(); if (! state.recordExists(file, synonym)){ - handleTaxonBase(state, record, synonym, name, taxon, parent, basionym, classification, type, isProParte, isPartial); + handleTaxonBase(state, record, synonym, name, taxon, parent, basionym, classification, type); PrintWriter writer = createPrintWriter(state, file); record.write(state, writer); state.addExistingRecord(file, synonym); @@ -150,7 +152,7 @@ public class DwcaTaxonExport extends DwcaDataExportBase { if (! state.recordExists(file, misappliedName)){ handleTaxonBase(state, record, misappliedName, name, taxon, parent, basionym, classification, - misappliedNameRel.getType(), false, false); + misappliedNameRel.getType()); PrintWriter writer = createPrintWriter(state, file); record.write(state, writer); state.addExistingRecord(file, misappliedName); @@ -158,6 +160,30 @@ public class DwcaTaxonExport extends DwcaDataExportBase { } } + private void handleProparteSynonyms(DwcaTaxExportState state, Taxon taxon, + DwcaTaxExportFile file, Classification classification, DwcaMetaDataRecord metaRecord) throws FileNotFoundException, UnsupportedEncodingException, IOException { + + Set proParteRels = taxon.getProParteAndPartialSynonymRelations(); + for (TaxonRelationship proParteRel : proParteRels ){ + DwcaTaxonRecord record = new DwcaTaxonRecord(metaRecord, state.getConfig()); + Taxon proParteSynonym = proParteRel.getFromTaxon(); + TaxonName name = proParteSynonym.getName(); + //???? + Taxon parent = null; + TaxonName basionym = name.getBasionym(); + + if (! state.recordExists(file, proParteSynonym)){ + handleTaxonBase(state, record, proParteSynonym, name, taxon, parent, basionym, classification, + proParteRel.getType()); + PrintWriter writer = createPrintWriter(state, file); + record.write(state, writer); + state.addExistingRecord(file, proParteSynonym); + } + } + } + + + /** * @param state * @param record @@ -173,7 +199,7 @@ public class DwcaTaxonExport extends DwcaDataExportBase { */ private void handleTaxonBase(DwcaTaxExportState state, DwcaTaxonRecord record, TaxonBase taxonBase, TaxonName name, Taxon acceptedTaxon, Taxon parent, TaxonName basionym, Classification classification, - RelationshipTermBase relType, boolean isProParte, boolean isPartial) { + RelationshipTermBase relType) { record.setId(taxonBase.getId()); record.setUuid(taxonBase.getUuid()); @@ -256,7 +282,7 @@ public class DwcaTaxonExport extends DwcaDataExportBase { record.setNomenclaturalCode(name.getNameType()); // ??? TODO Misapplied Names, inferred synonyms - handleTaxonomicStatus(record, name, relType, isProParte, isPartial); + handleTaxonomicStatus(record, name, relType); handleNomStatus(record, taxonBase, name); // TODO we need to differentiate technical @@ -368,8 +394,7 @@ public class DwcaTaxonExport extends DwcaDataExportBase { * @param isProParte */ private void handleTaxonomicStatus(DwcaTaxonRecord record, - INonViralName name, RelationshipTermBase type, - boolean isProParte, boolean isPartial) { + INonViralName name, RelationshipTermBase type) { if (type == null){ record.setTaxonomicStatus(name.getNomenclaturalCode().acceptedTaxonStatusLabel()); }else{ @@ -378,18 +403,21 @@ public class DwcaTaxonExport extends DwcaDataExportBase { status = "heterotypicSynonym"; }else if(type.equals(SynonymType.HOMOTYPIC_SYNONYM_OF())){ status = "homotypicSynonym"; + }else if(type.equals(TaxonRelationshipType.PRO_PARTE_SYNONYM_FOR())){ + status = "proParteSynonym"; + }else if(type.equals(TaxonRelationshipType.PARTIAL_SYNONYM_FOR())){ + String message = "Partial synonym is not part of the gbif toxonomic status vocabulary"; + logger.warn(message); + status = "proParteMisapplied"; }else if(type.equals(TaxonRelationshipType.MISAPPLIED_NAME_FOR())){ status = "misapplied"; }else if(type.equals(TaxonRelationshipType.PRO_PARTE_MISAPPLIED_NAME_FOR())){ status = "proParteMisapplied"; + }else if(type.equals(TaxonRelationshipType.PARTIAL_MISAPPLIED_NAME_FOR())){ + String message = "Partial misapplied names are not part of the gbif toxonomic status vocabulary"; + logger.warn(message); + status = "partialMisapplied"; } - if (isProParte){ - status = "proParteSynonym"; - }else if (isPartial){ - String message = "Partial synonym is not part of the gbif toxonomic status vocabulary"; - logger.warn(message); - status = "partialSynonym"; - } record.setTaxonomicStatus(status); } diff --git a/cdmlib-io/src/test/java/eu/etaxonomy/cdm/io/cdmLight/out/CdmLightExportTest.java b/cdmlib-io/src/test/java/eu/etaxonomy/cdm/io/cdmLight/out/CdmLightExportTest.java index 3eb455c422..360c5ccd9b 100755 --- a/cdmlib-io/src/test/java/eu/etaxonomy/cdm/io/cdmLight/out/CdmLightExportTest.java +++ b/cdmlib-io/src/test/java/eu/etaxonomy/cdm/io/cdmLight/out/CdmLightExportTest.java @@ -10,7 +10,6 @@ package eu.etaxonomy.cdm.io.cdmLight.out; import java.io.BufferedReader; import java.io.ByteArrayInputStream; -import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; @@ -160,7 +159,6 @@ public class CdmLightExportTest extends CdmTransactionalIntegrationTest{ @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class, value="/eu/etaxonomy/cdm/database/BlankDataSet.xml") public void testFullTreeWithUnpublished(){ - CdmLightExportConfigurator config = new CdmLightExportConfigurator(null); config.setTarget(TARGET.EXPORT_DATA); config.getTaxonNodeFilter().setIncludeUnpublished(true); @@ -194,8 +192,8 @@ public class CdmLightExportTest extends CdmTransactionalIntegrationTest{ } Assert.assertTrue("There should be 1 synonym", count == 2); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); + Assert.fail("IO Exception thrown during test."); } byte[] taxon = data.get(CdmLightExportTable.TAXON.getTableName()); Assert.assertNotNull("Taxon table must not be null", taxon); @@ -243,7 +241,6 @@ public class CdmLightExportTest extends CdmTransactionalIntegrationTest{ @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class, value="/eu/etaxonomy/cdm/database/BlankDataSet.xml") public void testFullData(){ - File destinationFolder = null; CdmLightExportConfigurator config = new CdmLightExportConfigurator(null); config.setTarget(TARGET.EXPORT_DATA); @@ -270,18 +267,15 @@ public class CdmLightExportTest extends CdmTransactionalIntegrationTest{ } Assert.assertTrue("There should be 4 references", count == 5); try{ - stream = new ByteArrayInputStream(data.get(CdmLightExportTable.SYNONYM.getTableName())); - Assert.fail("There should not be a synonym table, because the only synonym is not public."); + stream = new ByteArrayInputStream(data.get(CdmLightExportTable.SYNONYM.getTableName())); + Assert.fail("There should not be a synonym table, because the only synonym is not public."); }catch(NullPointerException e){ - + //OK, should be thrown } } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); + throw new RuntimeException(e); } - - } /** @@ -301,7 +295,7 @@ public class CdmLightExportTest extends CdmTransactionalIntegrationTest{ TaxonName familyName = parser.parseReferencedName("Family L., Sp. Pl. 3: 22. 1752", NomenclaturalCode.ICNAFP, Rank.FAMILY()); setUuid(familyName,"e983cc5e-4c77-4c80-8cb0-73d43df31ef7"); - setUuid((Reference)familyName.getNomenclaturalReference(), "b0dd7f4a-0c7f-4372-bc5d-3b676363bc63"); + setUuid(familyName.getNomenclaturalReference(), "b0dd7f4a-0c7f-4372-bc5d-3b676363bc63"); Taxon family = Taxon.NewInstance(familyName, sec1); setUuid(family,"3162e136-f2e2-4f9a-9010-3f35908fbae1"); TaxonNode node1 = classification.addChildTaxon(family, sec1, "22"); @@ -311,7 +305,7 @@ public class CdmLightExportTest extends CdmTransactionalIntegrationTest{ TaxonName genusName = parser.parseReferencedName("Genus Humb., The book of botany 3: 22. 1804", NomenclaturalCode.ICNAFP, Rank.GENUS()); setUuid(genusName,"5e83cc5e-4c77-4d80-8cb0-73d63df35ee3"); - setUuid((Reference)genusName.getNomenclaturalReference(), "5ed27f4a-6c7f-4372-bc5d-3b67636abc52"); + setUuid(genusName.getNomenclaturalReference(), "5ed27f4a-6c7f-4372-bc5d-3b67636abc52"); Taxon genus = Taxon.NewInstance(genusName, sec1); setUuid(genus,"3f52e136-f2e1-4f9a-9010-2f35908fbd39"); @@ -323,7 +317,7 @@ public class CdmLightExportTest extends CdmTransactionalIntegrationTest{ TaxonName speciesName = parser.parseReferencedName("Genus species Mill., The book of botany 3: 22. 1804", NomenclaturalCode.ICNAFP, Rank.SPECIES()); setUuid(speciesName,"f983cc5e-4c77-4c80-8cb0-73d43df31ee9"); - setUuid((Reference)speciesName.getNomenclaturalReference(), "a0dd7f4a-0c7f-4372-bc5d-3b676363bc0e"); + setUuid(speciesName.getNomenclaturalReference(), "a0dd7f4a-0c7f-4372-bc5d-3b676363bc0e"); Taxon species = Taxon.NewInstance(speciesName, sec1); setUuid(species,"9182e136-f2e2-4f9a-9010-3f35908fb5e0"); TaxonName synonymName = parser.parseReferencedName("Genus synonym Mill., The book of botany 3: 22. 1804", NomenclaturalCode.ICNAFP, Rank.SPECIES()); @@ -340,7 +334,7 @@ public class CdmLightExportTest extends CdmTransactionalIntegrationTest{ TaxonName subspeciesName = parser.parseReferencedName("Genus species subsp. subspec Mill., The book of botany 3: 22. 1804", NomenclaturalCode.ICNAFP, Rank.SUBSPECIES()); setUuid(subspeciesName,"3483cc5e-4c77-4c80-8cb0-73d43df31ee3"); - setUuid((Reference)subspeciesName.getNomenclaturalReference(), "b8dd7f4a-0c7f-4372-bc5d-3b676363bc0f"); + setUuid(subspeciesName.getNomenclaturalReference(), "b8dd7f4a-0c7f-4372-bc5d-3b676363bc0f"); Taxon subspecies = Taxon.NewInstance(subspeciesName, sec1); setUuid(subspecies, "b2c86698-500e-4efb-b9ae-6bb6e701d4bc"); -- 2.34.1