1
|
/**
|
2
|
* Copyright (C) 2009 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.dwca.in;
|
10
|
|
11
|
import java.util.ArrayList;
|
12
|
import java.util.HashSet;
|
13
|
import java.util.List;
|
14
|
import java.util.Map;
|
15
|
import java.util.Set;
|
16
|
|
17
|
import org.apache.commons.lang.StringUtils;
|
18
|
import org.apache.log4j.Logger;
|
19
|
|
20
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
21
|
import eu.etaxonomy.cdm.io.stream.IPartitionableConverter;
|
22
|
import eu.etaxonomy.cdm.io.stream.IReader;
|
23
|
import eu.etaxonomy.cdm.io.stream.ItemFilter;
|
24
|
import eu.etaxonomy.cdm.io.stream.ListReader;
|
25
|
import eu.etaxonomy.cdm.io.stream.MappedCdmBase;
|
26
|
import eu.etaxonomy.cdm.io.stream.PartitionableConverterBase;
|
27
|
import eu.etaxonomy.cdm.io.stream.StreamItem;
|
28
|
import eu.etaxonomy.cdm.io.stream.terms.TermUri;
|
29
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
30
|
import eu.etaxonomy.cdm.model.common.Language;
|
31
|
import eu.etaxonomy.cdm.model.common.LanguageString;
|
32
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
33
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
34
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
35
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
36
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
37
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
38
|
|
39
|
/**
|
40
|
* @author a.mueller
|
41
|
* @since 23.11.2011
|
42
|
*/
|
43
|
public class DwcTaxonCsv2CdmTaxonRelationConverter
|
44
|
extends PartitionableConverterBase<DwcaDataImportConfiguratorBase, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase>>
|
45
|
implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String> {
|
46
|
|
47
|
private static final String SINGLE_CLASSIFICATION_ID = "1";
|
48
|
|
49
|
private static final String SINGLE_CLASSIFICATION = "Single Classification";
|
50
|
|
51
|
private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonRelationConverter.class);
|
52
|
|
53
|
private static final String ID = "id";
|
54
|
|
55
|
public DwcTaxonCsv2CdmTaxonRelationConverter(DwcaDataImportStateBase state) {
|
56
|
super(state);
|
57
|
}
|
58
|
|
59
|
@Override
|
60
|
public ItemFilter<StreamItem> getItemFilter() {
|
61
|
if (!config.isDoSplitRelationshipImport()){
|
62
|
return null;
|
63
|
}else{
|
64
|
return new DwcTaxonStreamItem2CdmTaxonConverter<>(state, true); //the converter also is implementing the ItemFilter interfacem, this way we guarantee that the evaluation if the item is a synonym, lower or higher taxon is the same during taxon creation and relationship creation
|
65
|
}
|
66
|
}
|
67
|
|
68
|
@Override
|
69
|
public IReader<MappedCdmBase<? extends CdmBase>> map(StreamItem item){
|
70
|
List<MappedCdmBase<? extends CdmBase>> resultList = new ArrayList<>();
|
71
|
|
72
|
Map<String, String> csvRecord = item.map;
|
73
|
Reference sourceReference = state.getTransactionalSourceReference();
|
74
|
String sourceReferecenDetail = null;
|
75
|
|
76
|
String id = csvRecord.get(ID);
|
77
|
TaxonBase<?> taxonBase = getTaxonBase(id, item, null, state);
|
78
|
if (taxonBase == null){
|
79
|
String warning = "Taxon not available for id '%s'.";
|
80
|
warning = String.format(warning, id);
|
81
|
fireWarningEvent(warning, item, 8);
|
82
|
}else{
|
83
|
|
84
|
MappedCdmBase<? extends CdmBase> mcb = new MappedCdmBase<>(taxonBase);
|
85
|
resultList.add(mcb);
|
86
|
|
87
|
handleAcceptedNameUsage(item, state, taxonBase, id);
|
88
|
|
89
|
handleParentNameUsage(item, state, taxonBase, id, resultList);
|
90
|
|
91
|
handleKingdom(item, state);
|
92
|
|
93
|
handlePhylum(item, state);
|
94
|
|
95
|
handleClass(item, state);
|
96
|
|
97
|
handleOrder(item, state);
|
98
|
|
99
|
handleFamily(item, state);
|
100
|
|
101
|
handleGenus(item, state);
|
102
|
|
103
|
handleSubGenus(item, state);
|
104
|
|
105
|
}
|
106
|
csvRecord.remove(ID);
|
107
|
|
108
|
|
109
|
// <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
|
110
|
// The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
|
111
|
// Fungi, Plantae, Protozoa, Viruses -->
|
112
|
// <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
|
113
|
|
114
|
// <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
|
115
|
// <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
|
116
|
|
117
|
// <!-- Infraspecific epithet -->
|
118
|
// <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
|
119
|
|
120
|
// <!-- Acceptance status published in -->
|
121
|
// <field index='20' term='http://purl.org/dc/terms/source'/>
|
122
|
|
123
|
// <!-- Reference in which the scientific name was first published -->
|
124
|
// <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
|
125
|
|
126
|
// <!-- Scrutiny date -->
|
127
|
// <field index='23' term='http://purl.org/dc/terms/modified'/>
|
128
|
// <!-- Additional data for the taxon -->
|
129
|
|
130
|
// <field index='24' term='http://purl.org/dc/terms/description'/>
|
131
|
// </core>
|
132
|
|
133
|
return new ListReader<>(resultList);
|
134
|
}
|
135
|
|
136
|
@Override
|
137
|
public String getSourceId(StreamItem item) {
|
138
|
String id = item.get(ID);
|
139
|
return id;
|
140
|
}
|
141
|
|
142
|
private void handleSubGenus(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
|
143
|
// TODO Auto-generated method stub
|
144
|
}
|
145
|
|
146
|
private void handleGenus(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
|
147
|
// TODO Auto-generated method stub
|
148
|
}
|
149
|
|
150
|
private void handleFamily(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
|
151
|
// TODO Auto-generated method stub
|
152
|
}
|
153
|
|
154
|
private void handleOrder(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
|
155
|
// TODO Auto-generated method stub
|
156
|
}
|
157
|
|
158
|
private void handleClass(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
|
159
|
// TODO Auto-generated method stub
|
160
|
}
|
161
|
|
162
|
private void handlePhylum(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
|
163
|
// TODO Auto-generated method stub
|
164
|
}
|
165
|
|
166
|
private void handleKingdom(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
|
167
|
// TODO Auto-generated method stub
|
168
|
}
|
169
|
|
170
|
private void handleParentNameUsage(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state,
|
171
|
TaxonBase<?> taxonBase, String id, List<MappedCdmBase<? extends CdmBase>> resultList) {
|
172
|
|
173
|
if (exists(TermUri.DWC_PARENT_NAME_USAGE_ID, item) || exists(TermUri.DWC_PARENT_NAME_USAGE, item)){
|
174
|
String parentId = item.get(TermUri.DWC_PARENT_NAME_USAGE_ID);
|
175
|
if (id.equals(parentId)){
|
176
|
//taxon can't be it's own child
|
177
|
//TODO log
|
178
|
return;
|
179
|
}else if (taxonBase.isInstanceOf(Taxon.class)){
|
180
|
Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
|
181
|
Taxon parentTaxon = getTaxonBase(parentId, item, Taxon.class, state);
|
182
|
if (parentTaxon == null){
|
183
|
String message = "Can't find parent taxon with id '%s' and NON-ID parent Name Usage not yet implemented.";
|
184
|
message = String.format(message, StringUtils.isBlank(parentId)?"-": parentId);
|
185
|
fireWarningEvent(message, item, 4);
|
186
|
}else{
|
187
|
Classification classification = getClassification(item, resultList);
|
188
|
Reference citationForParentChild = null;
|
189
|
if (classification == null){
|
190
|
String warning = "Classification not found. Can't create parent-child relationship";
|
191
|
fireWarningEvent(warning, item, 12);
|
192
|
}else{
|
193
|
try {
|
194
|
classification.addParentChild(parentTaxon, taxon, citationForParentChild, null);
|
195
|
} catch (IllegalStateException e) {
|
196
|
String message = "Exception occurred when trying to add a child to a parent in a classification: %s";
|
197
|
message = String.format(message, e.getMessage());
|
198
|
fireWarningEvent(message, item, 12);
|
199
|
}
|
200
|
}
|
201
|
}
|
202
|
}else if (taxonBase.isInstanceOf(Synonym.class)){
|
203
|
if (! acceptedNameUsageExists(item) && state.getConfig().isUseParentAsAcceptedIfAcceptedNotExists()){
|
204
|
handleAcceptedNameUsageParam(item, state, taxonBase, id, parentId);
|
205
|
}else{
|
206
|
String message = "PARENT_NAME_USAGE given for Synonym and ACCEPTED_NAME_USAGE also exists or configuration does not allow" +
|
207
|
"to use ACCEPTED_NAME_USAGE as parent. This is not allowed in CDM.";
|
208
|
//TODO check "is this Taxon"
|
209
|
fireWarningEvent(message, item, 4);
|
210
|
}
|
211
|
}else{
|
212
|
String message = "Unhandled case";
|
213
|
fireWarningEvent(message, item, 12);
|
214
|
}
|
215
|
}
|
216
|
}
|
217
|
|
218
|
private Classification getClassification(StreamItem item, List<MappedCdmBase<? extends CdmBase>> resultList) {
|
219
|
Set<Classification> resultSet = new HashSet<>();
|
220
|
//
|
221
|
if (config.isDatasetsAsClassifications()){
|
222
|
String datasetKey = item.get(TermUri.DWC_DATASET_ID);
|
223
|
if (CdmUtils.areBlank(datasetKey,item.get(TermUri.DWC_DATASET_NAME))){
|
224
|
datasetKey = DwcTaxonStreamItem2CdmTaxonConverter.NO_DATASET;
|
225
|
}
|
226
|
|
227
|
resultSet.addAll(state.get(TermUri.DWC_DATASET_ID.toString(), datasetKey, Classification.class));
|
228
|
resultSet.addAll(state.get(TermUri.DWC_DATASET_NAME.toString(), item.get(TermUri.DWC_DATASET_NAME), Classification.class));
|
229
|
//TODO accordingToAsClassification
|
230
|
//single classification
|
231
|
}else{
|
232
|
resultSet.addAll(state.get(SINGLE_CLASSIFICATION, SINGLE_CLASSIFICATION_ID, Classification.class));
|
233
|
|
234
|
//classification does not yet exist
|
235
|
if (resultSet.isEmpty()){
|
236
|
Classification newClassification = Classification.NewInstance("Darwin Core Classification");
|
237
|
if (config.getClassificationUuid() != null){
|
238
|
newClassification.setUuid(config.getClassificationUuid());
|
239
|
}
|
240
|
if (StringUtils.isNotBlank(config.getClassificationName())){
|
241
|
newClassification.setName(LanguageString.NewInstance(config.getClassificationName(), Language.DEFAULT()));
|
242
|
}
|
243
|
resultList.add(new MappedCdmBase<>(SINGLE_CLASSIFICATION, SINGLE_CLASSIFICATION_ID, newClassification));
|
244
|
resultSet.add(newClassification);
|
245
|
}
|
246
|
}
|
247
|
if (resultSet.isEmpty()){
|
248
|
return null;
|
249
|
}else if (resultSet.size() > 1){
|
250
|
fireWarningEvent("Dataset is ambigous. I take arbitrary one.", item, 8);
|
251
|
}
|
252
|
return resultSet.iterator().next();
|
253
|
}
|
254
|
|
255
|
private void handleAcceptedNameUsage(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase<?> taxonBase, String id) {
|
256
|
if (acceptedNameUsageExists(item)){
|
257
|
String accId = item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID);
|
258
|
handleAcceptedNameUsageParam(item, state, taxonBase, id, accId);
|
259
|
}else{
|
260
|
if (logger.isDebugEnabled()){logger.debug("No accepted name usage");}
|
261
|
}
|
262
|
}
|
263
|
|
264
|
private void handleAcceptedNameUsageParam(StreamItem item,
|
265
|
DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase<?> taxonBase, String id, String accId) {
|
266
|
if (id.equals(accId)){
|
267
|
//mapping to itself needs no further handling
|
268
|
}else{
|
269
|
String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
|
270
|
|
271
|
Taxon accTaxon = getTaxonBase(accId, item, Taxon.class, state);
|
272
|
if (taxonBase.isInstanceOf(Synonym.class)){
|
273
|
Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
|
274
|
|
275
|
if (accTaxon == null){
|
276
|
fireWarningEvent("NON-ID accepted Name Usage not yet implemented or taxon for name usage id not available", item, 4);
|
277
|
} else{
|
278
|
accTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
|
279
|
}
|
280
|
|
281
|
// FIXME : no information regarding misapplied name available at this point,
|
282
|
// hence a regexp check for 'misapplied' is done to add them as a relationship
|
283
|
} else if(taxonBase.isInstanceOf(Taxon.class) && taxStatus != null && taxStatus.matches("misapplied.*")) {
|
284
|
if (accTaxon == null){
|
285
|
fireWarningEvent("NON-ID based accepted (misapplied) name usage not yet implemented or taxon for name usage id not available", item, 4);
|
286
|
} else{
|
287
|
Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
|
288
|
accTaxon.addMisappliedName(taxon,null,null);
|
289
|
}
|
290
|
} else {
|
291
|
String message = "Accepted name usage is not of type synonym. This is not allowed in CDM. Can't create realtionship";
|
292
|
//TODO check "is this Taxon"
|
293
|
fireWarningEvent(message, item, 4);
|
294
|
}
|
295
|
}
|
296
|
}
|
297
|
|
298
|
private boolean acceptedNameUsageExists(StreamItem item) {
|
299
|
return exists(TermUri.DWC_ACCEPTED_NAME_USAGE_ID, item) || exists(TermUri.DWC_ACCEPTED_NAME_USAGE, item);
|
300
|
}
|
301
|
|
302
|
//**************************** PARTITIONABLE ************************************************
|
303
|
|
304
|
@Override
|
305
|
protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap){
|
306
|
String value;
|
307
|
String key;
|
308
|
if ( hasValue(value = item.get(ID))){
|
309
|
key = TermUri.DWC_TAXON.toString();
|
310
|
Set<String> keySet = getKeySet(key, fkMap);
|
311
|
keySet.add(value);
|
312
|
}
|
313
|
if ( hasValue(value = item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID.toString()))){
|
314
|
key = TermUri.DWC_TAXON.toString();
|
315
|
Set<String> keySet = getKeySet(key, fkMap);
|
316
|
keySet.add(value);
|
317
|
}
|
318
|
if ( hasValue(value = item.get(key = TermUri.DWC_PARENT_NAME_USAGE_ID.toString())) ){
|
319
|
key = TermUri.DWC_TAXON.toString();
|
320
|
Set<String> keySet = getKeySet(key, fkMap);
|
321
|
keySet.add(value);
|
322
|
}
|
323
|
if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
|
324
|
//TODO
|
325
|
Set<String> keySet = getKeySet(key, fkMap);
|
326
|
keySet.add(value);
|
327
|
}
|
328
|
|
329
|
//classification
|
330
|
if (config.isDatasetsAsClassifications()){
|
331
|
boolean hasDefinedClassification = false;
|
332
|
if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
|
333
|
Set<String> keySet = getKeySet(key, fkMap);
|
334
|
keySet.add(value);
|
335
|
hasDefinedClassification = true;
|
336
|
}
|
337
|
if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
|
338
|
Set<String> keySet = getKeySet(key, fkMap);
|
339
|
keySet.add(value);
|
340
|
hasDefinedClassification = true;
|
341
|
}
|
342
|
if (! hasDefinedClassification){
|
343
|
Set<String> keySet = getKeySet(TermUri.DWC_DATASET_ID.toString(), fkMap);
|
344
|
value = DwcTaxonStreamItem2CdmTaxonConverter.NO_DATASET;
|
345
|
keySet.add(value);
|
346
|
}
|
347
|
}else{
|
348
|
key = SINGLE_CLASSIFICATION;
|
349
|
value = SINGLE_CLASSIFICATION_ID;
|
350
|
Set<String> keySet = getKeySet(key, fkMap);
|
351
|
keySet.add(value);
|
352
|
}
|
353
|
|
354
|
//TODO cont.
|
355
|
}
|
356
|
|
357
|
@Override
|
358
|
public Set<String> requiredSourceNamespaces() {
|
359
|
Set<String> result = new HashSet<>();
|
360
|
|
361
|
result.add(TermUri.DWC_TAXON.toString());
|
362
|
|
363
|
result.add(TermUri.DWC_ACCEPTED_NAME_USAGE_ID.toString());
|
364
|
result.add(TermUri.DWC_PARENT_NAME_USAGE_ID.toString());
|
365
|
|
366
|
result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
|
367
|
result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
|
368
|
if (config.isDatasetsAsClassifications()){
|
369
|
result.add(TermUri.DWC_DATASET_ID.toString());
|
370
|
result.add(TermUri.DWC_DATASET_NAME.toString());
|
371
|
}else{
|
372
|
result.add(SINGLE_CLASSIFICATION);
|
373
|
}
|
374
|
|
375
|
return result;
|
376
|
}
|
377
|
|
378
|
//************************************* TO STRING ********************************************
|
379
|
|
380
|
@Override
|
381
|
public String toString(){
|
382
|
return this.getClass().getName();
|
383
|
}
|
384
|
}
|