package eu.etaxonomy.cdm.io.dwca.in;\r
\r
import java.util.ArrayList;\r
+import java.util.HashSet;\r
import java.util.List;\r
import java.util.Map;\r
import java.util.Set;\r
import eu.etaxonomy.cdm.model.common.CdmBase;\r
import eu.etaxonomy.cdm.model.common.IdentifiableSource;\r
import eu.etaxonomy.cdm.model.common.LSID;\r
-import eu.etaxonomy.cdm.model.common.OriginalSourceBase;\r
+import eu.etaxonomy.cdm.model.common.OriginalSourceType;\r
+import eu.etaxonomy.cdm.model.name.BotanicalName;\r
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;\r
import eu.etaxonomy.cdm.model.name.NonViralName;\r
import eu.etaxonomy.cdm.model.name.Rank;\r
import eu.etaxonomy.cdm.model.name.TaxonNameBase;\r
+import eu.etaxonomy.cdm.model.name.ZoologicalName;\r
import eu.etaxonomy.cdm.model.reference.Reference;\r
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;\r
import eu.etaxonomy.cdm.model.taxon.Classification;\r
import eu.etaxonomy.cdm.model.taxon.Synonym;\r
import eu.etaxonomy.cdm.model.taxon.Taxon;\r
import eu.etaxonomy.cdm.model.taxon.TaxonBase;\r
+import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;\r
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;\r
-import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;\r
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;\r
\r
/**\r
private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonConverter.class);\r
\r
private static final String ID = "id";\r
- // key for for case that no dataset information is supplied, TODO use something better\r
+ // temporary key for the case that no dataset information is supplied, TODO use something better\r
public static final String NO_DATASET = "no_dataset_jli773oebhjklw";\r
\r
+ private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();\r
\r
/**\r
* @param state\r
*/\r
public DwcTaxonCsv2CdmTaxonConverter(DwcaImportState state) {\r
- super();\r
- this.state = state;\r
+ super(state);\r
}\r
\r
\r
public IReader<MappedCdmBase> map(CsvStreamItem csvTaxonRecord){\r
List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>(); \r
\r
- //TODO source reference\r
- Reference<?> sourceReference = null;\r
+ //TODO what if not transactional? \r
+ Reference<?> sourceReference = state.getTransactionalSourceReference();\r
String sourceReferenceDetail = null;\r
\r
//taxon\r
\r
//original source\r
String id = csvTaxonRecord.get(ID);\r
- IdentifiableSource source = taxonBase.addSource(id, "Taxon", sourceReference, sourceReferenceDetail);\r
+ IdentifiableSource source = taxonBase.addImportSource(id, "Taxon", sourceReference, sourceReferenceDetail);\r
MappedCdmBase mappedSource = new MappedCdmBase(csvTaxonRecord.get(ID), source);\r
resultList.add(mappedSource);\r
csvTaxonRecord.remove(ID);\r
NomenclaturalCode nomCode = getNomCode(csvTaxonRecord);\r
Rank rank = getRank(csvTaxonRecord, nomCode);\r
\r
- //name\r
- TaxonNameBase<?,?> name = getScientificName(csvTaxonRecord, nomCode, rank, resultList);\r
+ //name && name published in\r
+ TaxonNameBase<?,?> name = getScientificName(csvTaxonRecord, nomCode, rank, resultList, sourceReference);\r
taxonBase.setName(name);\r
\r
- //sec\r
- Reference<?> sec = getNameAccordingTo(csvTaxonRecord, resultList);\r
- taxonBase.setSec(sec);\r
+ //nameAccordingTo\r
+ MappedCdmBase<Reference> sec = getNameAccordingTo(csvTaxonRecord, resultList);\r
+ if (sec == null && state.getConfig().isUseSourceReferenceAsSec()){\r
+ sec = new MappedCdmBase<Reference>(state.getTransactionalSourceReference());\r
+ }\r
+ if (sec != null){\r
+ taxonBase.setSec(sec.getCdmBase());\r
+ }\r
\r
//classification\r
handleDataset(csvTaxonRecord, taxonBase, resultList, sourceReference, sourceReferenceDetail);\r
\r
\r
private void handleDataset(CsvStreamItem item, TaxonBase<?> taxonBase, List<MappedCdmBase> resultList, Reference<?> sourceReference, String sourceReferecenDetail) {\r
- if (state.getConfig().isDatasetsAsClassifications()){\r
- String datasetId = CdmUtils.Nz(item.get(TermUri.DWC_DATASET_ID)).trim();\r
- String datasetName = CdmUtils.Nz(item.get(TermUri.DWC_DATASET_NAME)).trim();\r
- if (CdmUtils.areBlank(datasetId, datasetName) ){\r
+ TermUri idTerm = TermUri.DWC_DATASET_ID;\r
+ TermUri strTerm = TermUri.DWC_DATASET_NAME;\r
+ \r
+ if (config.isDatasetsAsClassifications()){\r
+ String datasetId = CdmUtils.Nz(item.get(idTerm)).trim();\r
+ String datasetName = CdmUtils.Nz(item.get(strTerm)).trim();\r
+ if (CdmUtils.areBlank(datasetId, datasetName) ){\r
datasetId = NO_DATASET;\r
}\r
\r
//check id\r
- boolean classificationExists = state.exists(TermUri.DWC_DATASET_ID.toString() , datasetId, Classification.class);\r
+ boolean classificationExists = state.exists(idTerm.toString() , datasetId, Classification.class);\r
\r
//check name\r
if (!classificationExists){\r
- classificationExists = state.exists(TermUri.DWC_DATASET_NAME.toString() , datasetName, Classification.class);\r
+ classificationExists = state.exists(strTerm.toString() , datasetName, Classification.class);\r
}\r
\r
//if not exists, create new\r
String classificationId = StringUtils.isBlank(datasetId)? datasetName : datasetId;\r
Classification classification = Classification.NewInstance(classificationName);\r
//source\r
- IdentifiableSource source = classification.addSource(classificationId, "Dataset", sourceReference, sourceReferecenDetail);\r
+ IdentifiableSource source = classification.addSource(OriginalSourceType.Lineage, classificationId, "Dataset", sourceReference, sourceReferecenDetail);\r
//add to result\r
- resultList.add(new MappedCdmBase(TermUri.DWC_DATASET_ID, datasetId, classification));\r
- resultList.add(new MappedCdmBase(TermUri.DWC_DATASET_NAME, datasetName, classification));\r
+ resultList.add(new MappedCdmBase(idTerm, datasetId, classification));\r
+ resultList.add(new MappedCdmBase(strTerm, datasetName, classification));\r
resultList.add(new MappedCdmBase(source));\r
//TODO this is not so nice but currently necessary as classifications are requested in the same partition\r
- state.putMapping(TermUri.DWC_DATASET_ID.toString(), classificationId, classification);\r
- state.putMapping(TermUri.DWC_DATASET_NAME.toString(), classificationName, classification);\r
+ state.putMapping(idTerm.toString(), classificationId, classification);\r
+ state.putMapping(strTerm.toString(), classificationName, classification);\r
+ }\r
+ }else if (config.isDatasetsAsSecundumReference() || config.isDatasetsAsOriginalSource()){\r
+ MappedCdmBase<Reference> mappedCitation = getReference(item, resultList, idTerm, strTerm, true);\r
+ if (mappedCitation != null){\r
+ Reference<?> ref = mappedCitation.getCdmBase();\r
+ if (config.isDatasetsAsSecundumReference()){\r
+ //dataset as secundum reference\r
+ taxonBase.setSec(ref);\r
+ }else{\r
+ //dataset as original source\r
+ taxonBase.addSource(OriginalSourceType.Lineage, null, null, ref, null);\r
+ }\r
}\r
}else{\r
- //dataset as original source\r
- TermUri idTerm = TermUri.DWC_DATASET_ID;\r
- TermUri strTerm = TermUri.DWC_DATASET_NAME;\r
- Reference<?> citation = getReference(item, resultList, idTerm, strTerm);\r
- taxonBase.addSource(null, null, citation, null);\r
+ String message = "DatasetUse type not yet implemented. Can't import dataset information.";\r
+ fireWarningEvent(message, item, 4);\r
}\r
\r
//remove to later check if all attributes were used\r
- item.remove(TermUri.DWC_DATASET_ID);\r
- item.remove(TermUri.DWC_DATASET_NAME);\r
+ item.remove(idTerm);\r
+ item.remove(strTerm);\r
\r
}\r
\r
return id;\r
}\r
\r
- private Reference<?> getNameAccordingTo(CsvStreamItem item, List<MappedCdmBase> resultList) {\r
- TermUri idTerm = TermUri.DWC_NAME_ACCORDING_TO_ID;\r
- TermUri strTerm = TermUri.DWC_NAME_ACCORDING_TO;\r
- Reference<?> secRef = getReference(item, resultList, idTerm, strTerm);\r
- return secRef;\r
- \r
+ private MappedCdmBase<Reference> getNameAccordingTo(CsvStreamItem item, List<MappedCdmBase> resultList) {\r
+ if (config.isDatasetsAsSecundumReference()){\r
+ //TODO store nameAccordingTo info some where else or let the user define where to store it.\r
+ return null;\r
+ }else{\r
+ TermUri idTerm = TermUri.DWC_NAME_ACCORDING_TO_ID;\r
+ TermUri strTerm = TermUri.DWC_NAME_ACCORDING_TO;\r
+ MappedCdmBase<Reference> secRef = getReference(item, resultList, idTerm, strTerm, false);\r
+ return secRef;\r
+ }\r
}\r
\r
private NomenclaturalCode getNomCode(CsvStreamItem item) {\r
}\r
// by Kingdom\r
String strKingdom = getValue(item, TermUri.DWC_KINGDOM);\r
- if (strKingdom.equalsIgnoreCase("Plantae")){\r
- nomCode = NomenclaturalCode.ICBN;\r
- }else if (strKingdom.equalsIgnoreCase("Animalia")){\r
- nomCode = NomenclaturalCode.ICZN;\r
- }else if (strKingdom.equalsIgnoreCase("Fungi")){\r
- nomCode = NomenclaturalCode.ICBN;\r
+ if (strKingdom != null){\r
+ if (strKingdom.equalsIgnoreCase("Plantae")){\r
+ nomCode = NomenclaturalCode.ICNAFP;\r
+ }else if (strKingdom.equalsIgnoreCase("Fungi")){\r
+ nomCode = NomenclaturalCode.ICNAFP;\r
+ }else if (strKingdom.equalsIgnoreCase("Animalia")){\r
+ nomCode = NomenclaturalCode.ICZN;\r
+ }else if (strKingdom.equalsIgnoreCase("Protozoa")){\r
+ nomCode = NomenclaturalCode.ICZN;\r
+ }\r
}\r
+ \r
//TODO further kingdoms\r
if (nomCode == null){\r
//TODO warning\r
+ if (config.getNomenclaturalCode() != null){\r
+ nomCode = config.getNomenclaturalCode();\r
+ }\r
}\r
return nomCode;\r
}\r
\r
\r
- private TaxonNameBase<?,?> getScientificName(CsvStreamItem item, NomenclaturalCode nomCode, Rank rank, List<MappedCdmBase> resultList) {\r
+ private TaxonNameBase<?,?> getScientificName(CsvStreamItem item, NomenclaturalCode nomCode, Rank rank, List<MappedCdmBase> resultList, Reference sourceReference) {\r
TaxonNameBase<?,?> name = null;\r
String strScientificName = getValue(item, TermUri.DWC_SCIENTIFIC_NAME);\r
//Name\r
if (strScientificName != null){\r
- INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();\r
name = parser.parseFullName(strScientificName, nomCode, rank);\r
- if (rank != null && name != null && name.getRank() != null && \r
- ! rank.equals(name.getRank())){\r
- String message = "Parsed rank %s (%s) differs from rank %s given by fields 'taxonRank' or 'verbatimTaxonRank'";\r
- message = String.format(message, name.getRank().getTitleCache(), strScientificName, rank.getTitleCache());\r
- fireWarningEvent(message, item, 4);\r
+ if ( rank != null && name != null && name.getRank() != null && ! rank.equals(name.getRank())){\r
+ if (config.isValidateRankConsistency()){\r
+ String message = "Parsed rank %s (%s) differs from rank %s given by fields 'taxonRank' or 'verbatimTaxonRank'";\r
+ message = String.format(message, name.getRank().getTitleCache(), strScientificName, rank.getTitleCache());\r
+ fireWarningEvent(message, item, 4);\r
+ }\r
}\r
checkAuthorship(name, item);\r
resultList.add(new MappedCdmBase(TermUri.DWC_SCIENTIFIC_NAME, strScientificName, name));\r
//By ID\r
String strScientificNameId = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_ID);\r
if (strScientificNameId != null){\r
- if (state.getConfig().isScientificNameIdAsOriginalSourceId()){\r
+ if (config.isScientificNameIdAsOriginalSourceId()){\r
if (name != null){\r
- Reference<?> sourceReference = null; //FIXME\r
- IdentifiableSource source = IdentifiableSource.NewInstance(strScientificNameId, TermUri.DWC_SCIENTIFIC_NAME_ID.toString(), sourceReference, null);\r
+ IdentifiableSource source = IdentifiableSource.NewDataImportInstance(strScientificNameId, \r
+ TermUri.DWC_SCIENTIFIC_NAME_ID.toString(), sourceReference);\r
name.addSource(source);\r
}\r
}else{\r
//namePublishedIn\r
TermUri idTerm = TermUri.DWC_NAME_PUBLISHED_IN_ID;\r
TermUri strTerm = TermUri.DWC_NAME_PUBLISHED_IN;\r
- Reference<?> nomRef = getReference(item, resultList, idTerm, strTerm);\r
+ MappedCdmBase<Reference> nomRef = getReference(item, resultList, idTerm, strTerm, false);\r
\r
if (name != null){\r
if (nomRef != null){\r
- name.setNomenclaturalReference(nomRef); //check if name already has a nomRef, shouldn't be the case usually\r
+ name.setNomenclaturalReference(nomRef.getCdmBase()); //check if name already has a nomRef, shouldn't be the case usually\r
}\r
}else{\r
if (nomRef != null){\r
}\r
\r
\r
- private Reference<?> getReference(CsvStreamItem item, List<MappedCdmBase> resultList, TermUri idTerm, TermUri strTerm) {\r
+ /**\r
+ * General method to handle references used for multiple attributes.\r
+ * @param item\r
+ * @param resultList\r
+ * @param idTerm\r
+ * @param strTerm\r
+ * @param idIsInternal\r
+ * @return\r
+ */\r
+ private MappedCdmBase<Reference> getReference(CsvStreamItem item, List<MappedCdmBase> resultList, TermUri idTerm, TermUri strTerm, boolean idIsInternal) {\r
+ Reference<?> newRef = null;\r
+ Reference<?> sourceCitation = null;\r
\r
- Reference result = null;\r
+ MappedCdmBase<Reference> result = null;\r
if (exists(idTerm, item) || exists(strTerm, item)){\r
- String nomRefId = CdmUtils.Nz(item.get(idTerm)).trim();\r
- String nomRefStr = CdmUtils.Nz(item.get(strTerm)).trim();\r
- if (StringUtils.isNotBlank(nomRefId)){\r
- List<Reference> nomRefs = state.get(idTerm.toString(), nomRefId, Reference.class);\r
- if (nomRefs.size() == 0){\r
- //references should already exist in store if not linking to external links like URLs\r
- String message = "External namePublishedInIDs are not yet supported";\r
- fireWarningEvent(message, item, 4);\r
+ String refId = CdmUtils.Nz(item.get(idTerm)).trim();\r
+ String refStr = CdmUtils.Nz(item.get(strTerm)).trim();\r
+ if (StringUtils.isNotBlank(refId)){\r
+ List<Reference> references = state.get(idTerm.toString(), refId, Reference.class);\r
+ if (references.size() == 0){\r
+ if (! idIsInternal){\r
+ //references should already exist in store if not linking to external links like URLs\r
+ String message = "External namePublishedInIDs are not yet supported";\r
+ fireWarningEvent(message, item, 4);\r
+ }else{\r
+ newRef = ReferenceFactory.newGeneric(); //TODO handle other types if possible\r
+ newRef.addImportSource(refId, idTerm.toString(), sourceCitation, null);\r
+ MappedCdmBase<Reference> idResult = new MappedCdmBase<Reference>(idTerm, refId, newRef);\r
+ resultList.add(idResult);\r
+ }\r
}else{\r
//TODO handle list.size > 1 , do we need a list here ?\r
- result = nomRefs.get(0);\r
+ result = new MappedCdmBase<Reference>(idTerm, refId , references.get(0));\r
}\r
}\r
if (result == null){\r
- List<Reference> nomRefs = state.get(strTerm.toString(), nomRefStr, Reference.class);\r
+ List<Reference> nomRefs = state.get(strTerm.toString(), refStr, Reference.class);\r
if (nomRefs.size() > 0){\r
//TODO handle list.size > 1 , do we need a list here ?\r
- result = nomRefs.get(0);\r
+ result = new MappedCdmBase<Reference>(strTerm, refStr , nomRefs.get(0));\r
}else{\r
// new Reference\r
- result = ReferenceFactory.newGeneric(); //TODO handle other types if possible\r
- result.setTitleCache(nomRefStr, true);\r
+ if (newRef == null){\r
+ newRef = ReferenceFactory.newGeneric(); //TODO handle other types if possible\r
+ }\r
+ newRef.setTitleCache(refStr, true);\r
//TODO distinguish available year, authorship, etc. if\r
- resultList.add(new MappedCdmBase(strTerm, nomRefStr, result));\r
+ result = new MappedCdmBase<Reference>(strTerm, refStr, newRef);\r
+ resultList.add(result);\r
}\r
}\r
}\r
\r
if (! nvName.isProtectedTitleCache()){\r
if (StringUtils.isBlank(nvName.getAuthorshipCache())){\r
- //TODO some more sophisticated stuff can be done here like parsing etc.\r
- nvName.setAuthorshipCache(strAuthors);\r
- //TODO warning (scientific name should always include authorship)\r
+ if (nvName.isInstanceOf(BotanicalName.class) || nvName.isInstanceOf(ZoologicalName.class)){\r
+ //TODO can't we also parse NonViralNames correctly ?\r
+ try {\r
+ parser.parseAuthors(nvName, strAuthors);\r
+ } catch (StringNotParsableException e) {\r
+ nvName.setAuthorshipCache(strAuthors);\r
+ } \r
+ }else{\r
+ nvName.setAuthorshipCache(strAuthors);\r
+ }\r
+ //TODO throw warning (scientific name should always include authorship) by DwC definition\r
}\r
}\r
\r
}\r
\r
\r
+ /**\r
+ * Creates an empty taxon object with a given status.\r
+ * @param item\r
+ * @return\r
+ */\r
private TaxonBase<?> getTaxonBase(CsvStreamItem item) {\r
TaxonNameBase<?,?> name = null;\r
Reference<?> sec = null;\r
TaxonBase<?> result;\r
String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);\r
String status = "";\r
- boolean isMissaplied = false;\r
+ \r
if (taxStatus != null){\r
- if (taxStatus.matches("accepted|valid")){\r
+ if (taxStatus.matches("accepted.*|valid")){\r
status += "A";\r
- }else if (taxStatus.matches(".*synonym|invalid")){\r
+ } else if (taxStatus.matches(".*synonym|invalid|not accepted")){ //not accepted comes from scratchpads\r
status += "S";\r
- }if (taxStatus.matches("misapplied")){\r
+ } else if (taxStatus.matches("misapplied.*")){\r
status += "M";\r
- }else{\r
+ } else{\r
status += "?";\r
}\r
item.remove(TermUri.DWC_TAXONOMIC_STATUS);\r
message = String.format(message, status);\r
fireWarningEvent(message, item, 6);\r
}\r
- }else if (status.contains("S")){\r
+ } else if (status.contains("S")){\r
result = Synonym.NewInstance(name, sec);\r
- }else{\r
+ } else{\r
result = Taxon.NewUnknownStatusInstance(name, sec);\r
}\r
\r
Set<String> keySet = getKeySet(key, fkMap);\r
keySet.add(value);\r
}\r
- if (state.getConfig().isDeduplicateNamePublishedIn()){\r
+ if (config.isDeduplicateNamePublishedIn()){\r
if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN.toString()))){\r
Set<String> keySet = getKeySet(key, fkMap);\r
keySet.add(value);\r
}\r
\r
//nameAccordingTo\r
- if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){\r
- Set<String> keySet = getKeySet(key, fkMap);\r
- keySet.add(value);\r
- }\r
- if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO.toString()))){\r
- Set<String> keySet = getKeySet(key, fkMap);\r
- keySet.add(value);\r
- }\r
- \r
- \r
- //dataset\r
- if (! state.getConfig().isDatasetsAsClassifications()){\r
- //nameAccordingTo\r
- if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){\r
+ if (! config.isDatasetsAsSecundumReference()){\r
+ if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){\r
Set<String> keySet = getKeySet(key, fkMap);\r
keySet.add(value);\r
}\r
- if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){\r
+ if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO.toString()))){\r
Set<String> keySet = getKeySet(key, fkMap);\r
keySet.add(value);\r
}\r
}\r
\r
+ //dataset\r
+ if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){\r
+ Set<String> keySet = getKeySet(key, fkMap);\r
+ keySet.add(value);\r
+ }\r
+ if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){\r
+ Set<String> keySet = getKeySet(key, fkMap);\r
+ keySet.add(value);\r
+ }\r
+ \r
+ }\r
+ \r
+ \r
+ @Override\r
+ public Set<String> requiredSourceNamespaces() {\r
+ Set<String> result = new HashSet<String>();\r
+ result.add(TermUri.DWC_NAME_PUBLISHED_IN_ID.toString());\r
+ result.add(TermUri.DWC_NAME_PUBLISHED_IN.toString());\r
+ if (!config.isDatasetsAsSecundumReference()){\r
+ result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());\r
+ result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());\r
+ }\r
+ result.add(TermUri.DWC_DATASET_ID.toString());\r
+ result.add(TermUri.DWC_DATASET_NAME.toString());\r
+ return result;\r
}\r
\r
//** ***************************** TO STRING *********************************************/\r