Project

General

Profile

Download (14.2 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.dwca.in;
10

    
11
import java.util.ArrayList;
12
import java.util.HashSet;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16

    
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.log4j.Logger;
19

    
20
import eu.etaxonomy.cdm.common.CdmUtils;
21
import eu.etaxonomy.cdm.io.stream.IPartitionableConverter;
22
import eu.etaxonomy.cdm.io.stream.IReader;
23
import eu.etaxonomy.cdm.io.stream.ItemFilter;
24
import eu.etaxonomy.cdm.io.stream.ListReader;
25
import eu.etaxonomy.cdm.io.stream.MappedCdmBase;
26
import eu.etaxonomy.cdm.io.stream.PartitionableConverterBase;
27
import eu.etaxonomy.cdm.io.stream.StreamItem;
28
import eu.etaxonomy.cdm.io.stream.terms.TermUri;
29
import eu.etaxonomy.cdm.model.common.CdmBase;
30
import eu.etaxonomy.cdm.model.common.Language;
31
import eu.etaxonomy.cdm.model.common.LanguageString;
32
import eu.etaxonomy.cdm.model.reference.Reference;
33
import eu.etaxonomy.cdm.model.taxon.Classification;
34
import eu.etaxonomy.cdm.model.taxon.Synonym;
35
import eu.etaxonomy.cdm.model.taxon.SynonymType;
36
import eu.etaxonomy.cdm.model.taxon.Taxon;
37
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
38

    
39
/**
40
 * @author a.mueller
41
 * @since 23.11.2011
42
 */
43
public class DwcTaxonCsv2CdmTaxonRelationConverter
44
        extends PartitionableConverterBase<DwcaDataImportConfiguratorBase, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase>>
45
        implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String> {
46

    
47
    private static final String SINGLE_CLASSIFICATION_ID = "1";
48

    
49
	private static final String SINGLE_CLASSIFICATION = "Single Classification";
50

    
51
	private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonRelationConverter.class);
52

    
53
	private static final String ID = "id";
54

    
55
	public DwcTaxonCsv2CdmTaxonRelationConverter(DwcaDataImportStateBase state) {
56
		super(state);
57
	}
58

    
59
    @Override
60
    public ItemFilter<StreamItem> getItemFilter() {
61
        if (!config.isDoSplitRelationshipImport()){
62
            return null;
63
        }else{
64
            return new DwcTaxonStreamItem2CdmTaxonConverter<>(state, true);  //the converter also is implementing the ItemFilter interfacem, this way we guarantee that the evaluation if the item is a synonym, lower or higher taxon is the same during taxon creation and relationship creation
65
        }
66
    }
67

    
68
    @Override
69
    public IReader<MappedCdmBase<? extends CdmBase>> map(StreamItem item){
70
		List<MappedCdmBase<? extends CdmBase>> resultList = new ArrayList<>();
71

    
72
		Map<String, String> csvRecord = item.map;
73
		Reference sourceReference = state.getTransactionalSourceReference();
74
		String sourceReferecenDetail = null;
75

    
76
		String id = csvRecord.get(ID);
77
		TaxonBase<?> taxonBase = getTaxonBase(id, item, null, state);
78
		if (taxonBase == null){
79
			String warning = "Taxon not available for id '%s'.";
80
			warning = String.format(warning, id);
81
			fireWarningEvent(warning, item, 8);
82
		}else{
83

    
84
			MappedCdmBase<? extends CdmBase> mcb = new MappedCdmBase<>(taxonBase);
85
			resultList.add(mcb);
86

    
87
			handleAcceptedNameUsage(item, state, taxonBase, id);
88

    
89
			handleParentNameUsage(item, state, taxonBase, id, resultList);
90

    
91
			handleKingdom(item, state);
92

    
93
			handlePhylum(item, state);
94

    
95
			handleClass(item, state);
96

    
97
			handleOrder(item, state);
98

    
99
			handleFamily(item, state);
100

    
101
			handleGenus(item, state);
102

    
103
			handleSubGenus(item, state);
104

    
105
		}
106
		csvRecord.remove(ID);
107

    
108

    
109
//		    <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
110
//		         The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
111
//		         Fungi, Plantae, Protozoa, Viruses -->
112
//		    <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
113

    
114
//		    <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
115
//		    <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
116

    
117
//		    <!-- Infraspecific epithet -->
118
//		    <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
119

    
120
//			<!-- Acceptance status published in -->
121
//		    <field index='20' term='http://purl.org/dc/terms/source'/>
122

    
123
//		    <!-- Reference in which the scientific name was first published -->
124
//		    <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
125

    
126
//		    <!-- Scrutiny date -->
127
//		    <field index='23' term='http://purl.org/dc/terms/modified'/>
128
//		    <!-- Additional data for the taxon -->
129

    
130
//		    <field index='24' term='http://purl.org/dc/terms/description'/>
131
//		    </core>
132

    
133
		return new ListReader<>(resultList);
134
	}
135

    
136
	@Override
137
	public String getSourceId(StreamItem item) {
138
		String id = item.get(ID);
139
		return id;
140
	}
141

    
142
	private void handleSubGenus(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
143
		// TODO Auto-generated method stub
144
	}
145

    
146
	private void handleGenus(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
147
		// TODO Auto-generated method stub
148
	}
149

    
150
	private void handleFamily(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
151
		// TODO Auto-generated method stub
152
	}
153

    
154
	private void handleOrder(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
155
		// TODO Auto-generated method stub
156
	}
157

    
158
	private void handleClass(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
159
		// TODO Auto-generated method stub
160
	}
161

    
162
	private void handlePhylum(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
163
		// TODO Auto-generated method stub
164
	}
165

    
166
	private void handleKingdom(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
167
		// TODO Auto-generated method stub
168
	}
169

    
170
	private void handleParentNameUsage(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state,
171
	        TaxonBase<?> taxonBase, String id, List<MappedCdmBase<? extends CdmBase>> resultList) {
172

    
173
	    if (exists(TermUri.DWC_PARENT_NAME_USAGE_ID, item) || exists(TermUri.DWC_PARENT_NAME_USAGE, item)){
174
			String parentId = item.get(TermUri.DWC_PARENT_NAME_USAGE_ID);
175
			if (id.equals(parentId)){
176
				//taxon can't be it's own child
177
				//TODO log
178
				return;
179
			}else if (taxonBase.isInstanceOf(Taxon.class)){
180
				Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
181
				Taxon parentTaxon = getTaxonBase(parentId, item, Taxon.class, state);
182
				if (parentTaxon == null){
183
					String message = "Can't find parent taxon with id '%s' and NON-ID parent Name Usage not yet implemented.";
184
					message = String.format(message, StringUtils.isBlank(parentId)?"-": parentId);
185
					fireWarningEvent(message, item, 4);
186
				}else{
187
					Classification classification = getClassification(item, resultList);
188
					Reference citationForParentChild = null;
189
					if (classification == null){
190
						String warning = "Classification not found. Can't create parent-child relationship";
191
						fireWarningEvent(warning, item, 12);
192
					}else{
193
					    try {
194
					        classification.addParentChild(parentTaxon, taxon, citationForParentChild, null);
195
					    } catch (IllegalStateException e) {
196
					        String message = "Exception occurred when trying to add a child to a parent in a classification: %s";
197
					        message = String.format(message, e.getMessage());
198
					        fireWarningEvent(message, item, 12);
199
					    }
200
					}
201
				}
202
			}else if (taxonBase.isInstanceOf(Synonym.class)){
203
				if (! acceptedNameUsageExists(item) && state.getConfig().isUseParentAsAcceptedIfAcceptedNotExists()){
204
					handleAcceptedNameUsageParam(item, state, taxonBase, id, parentId);
205
				}else{
206
					String message = "PARENT_NAME_USAGE given for Synonym and ACCEPTED_NAME_USAGE also exists or configuration does not allow" +
207
							"to use ACCEPTED_NAME_USAGE as parent. This is not allowed in CDM.";
208
					//TODO check "is this Taxon"
209
					fireWarningEvent(message, item, 4);
210
				}
211
			}else{
212
				String message = "Unhandled case";
213
				fireWarningEvent(message, item, 12);
214
			}
215
		}
216
	}
217

    
218
	private Classification getClassification(StreamItem item, List<MappedCdmBase<? extends CdmBase>> resultList) {
219
		Set<Classification> resultSet = new HashSet<>();
220
		//
221
		if (config.isDatasetsAsClassifications()){
222
			String datasetKey = item.get(TermUri.DWC_DATASET_ID);
223
			if (CdmUtils.areBlank(datasetKey,item.get(TermUri.DWC_DATASET_NAME))){
224
				datasetKey = DwcTaxonStreamItem2CdmTaxonConverter.NO_DATASET;
225
			}
226

    
227
			resultSet.addAll(state.get(TermUri.DWC_DATASET_ID.toString(), datasetKey, Classification.class));
228
			resultSet.addAll(state.get(TermUri.DWC_DATASET_NAME.toString(), item.get(TermUri.DWC_DATASET_NAME), Classification.class));
229
		//TODO accordingToAsClassification
230
		//single classification
231
		}else{
232
			resultSet.addAll(state.get(SINGLE_CLASSIFICATION, SINGLE_CLASSIFICATION_ID, Classification.class));
233

    
234
			//classification does not yet exist
235
			if (resultSet.isEmpty()){
236
				Classification newClassification = Classification.NewInstance("Darwin Core Classification");
237
				if (config.getClassificationUuid() != null){
238
					newClassification.setUuid(config.getClassificationUuid());
239
				}
240
				if (StringUtils.isNotBlank(config.getClassificationName())){
241
					newClassification.setName(LanguageString.NewInstance(config.getClassificationName(), Language.DEFAULT()));
242
				}
243
				resultList.add(new MappedCdmBase<>(SINGLE_CLASSIFICATION, SINGLE_CLASSIFICATION_ID, newClassification));
244
				resultSet.add(newClassification);
245
			}
246
		}
247
		if (resultSet.isEmpty()){
248
			return null;
249
		}else if (resultSet.size() > 1){
250
			fireWarningEvent("Dataset is ambigous. I take arbitrary one.", item, 8);
251
		}
252
		return resultSet.iterator().next();
253
	}
254

    
255
	private void handleAcceptedNameUsage(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase<?> taxonBase, String id) {
256
		if (acceptedNameUsageExists(item)){
257
			String accId = item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID);
258
			handleAcceptedNameUsageParam(item, state, taxonBase, id, accId);
259
		}else{
260
			if (logger.isDebugEnabled()){logger.debug("No accepted name usage");}
261
		}
262
	}
263

    
264
	private void handleAcceptedNameUsageParam(StreamItem item,
265
	        DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase<?> taxonBase, String id, String accId) {
266
		if (id.equals(accId)){
267
			//mapping to itself needs no further handling
268
		}else{
269
			String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
270

    
271
			Taxon accTaxon = getTaxonBase(accId, item, Taxon.class, state);
272
			if (taxonBase.isInstanceOf(Synonym.class)){
273
				Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
274

    
275
				if (accTaxon == null){
276
						fireWarningEvent("NON-ID accepted Name Usage not yet implemented or taxon for name usage id not available", item, 4);
277
				} else{
278
					accTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
279
				}
280

    
281
				// FIXME : no information regarding misapplied name available at this point,
282
				//         hence a regexp check for 'misapplied' is done to add them as a relationship
283
			} else if(taxonBase.isInstanceOf(Taxon.class) && taxStatus != null && taxStatus.matches("misapplied.*")) {
284
				if (accTaxon == null){
285
					fireWarningEvent("NON-ID based accepted (misapplied) name usage not yet implemented or taxon for name usage id not available", item, 4);
286
				} else{
287
					Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
288
					accTaxon.addMisappliedName(taxon,null,null);
289
				}
290
			} else {
291
				String message = "Accepted name usage is not of type synonym. This is not allowed in CDM. Can't create realtionship";
292
				//TODO check "is this Taxon"
293
				fireWarningEvent(message, item, 4);
294
			}
295
		}
296
	}
297

    
298
	private boolean acceptedNameUsageExists(StreamItem item) {
299
		return exists(TermUri.DWC_ACCEPTED_NAME_USAGE_ID, item) || exists(TermUri.DWC_ACCEPTED_NAME_USAGE, item);
300
	}
301

    
302
//**************************** PARTITIONABLE ************************************************
303

    
304
	@Override
305
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap){
306
		String value;
307
		String key;
308
		if ( hasValue(value = item.get(ID))){
309
			key = TermUri.DWC_TAXON.toString();
310
			Set<String> keySet = getKeySet(key, fkMap);
311
			keySet.add(value);
312
		}
313
		if ( hasValue(value = item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID.toString()))){
314
			key = TermUri.DWC_TAXON.toString();
315
			Set<String> keySet = getKeySet(key, fkMap);
316
			keySet.add(value);
317
		}
318
		if ( hasValue(value = item.get(key = TermUri.DWC_PARENT_NAME_USAGE_ID.toString())) ){
319
			key = TermUri.DWC_TAXON.toString();
320
			Set<String> keySet = getKeySet(key, fkMap);
321
			keySet.add(value);
322
		}
323
		if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
324
			//TODO
325
			Set<String> keySet = getKeySet(key, fkMap);
326
			keySet.add(value);
327
		}
328

    
329
		//classification
330
		if (config.isDatasetsAsClassifications()){
331
			boolean hasDefinedClassification = false;
332
			if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
333
				Set<String> keySet = getKeySet(key, fkMap);
334
				keySet.add(value);
335
				hasDefinedClassification = true;
336
			}
337
			if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
338
				Set<String> keySet = getKeySet(key, fkMap);
339
				keySet.add(value);
340
				hasDefinedClassification = true;
341
			}
342
			if (! hasDefinedClassification){
343
				Set<String> keySet = getKeySet(TermUri.DWC_DATASET_ID.toString(), fkMap);
344
				value = DwcTaxonStreamItem2CdmTaxonConverter.NO_DATASET;
345
				keySet.add(value);
346
			}
347
		}else{
348
			key = SINGLE_CLASSIFICATION;
349
			value = SINGLE_CLASSIFICATION_ID;
350
			Set<String> keySet = getKeySet(key, fkMap);
351
			keySet.add(value);
352
		}
353

    
354
		//TODO cont.
355
	}
356

    
357
	@Override
358
	public Set<String> requiredSourceNamespaces() {
359
		Set<String> result = new HashSet<>();
360

    
361
		result.add(TermUri.DWC_TAXON.toString());
362

    
363
		result.add(TermUri.DWC_ACCEPTED_NAME_USAGE_ID.toString());
364
 		result.add(TermUri.DWC_PARENT_NAME_USAGE_ID.toString());
365

    
366
 		result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
367
 		result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
368
 		if (config.isDatasetsAsClassifications()){
369
 			result.add(TermUri.DWC_DATASET_ID.toString());
370
 			result.add(TermUri.DWC_DATASET_NAME.toString());
371
 		}else{
372
 			result.add(SINGLE_CLASSIFICATION);
373
 		}
374

    
375
 		return result;
376
	}
377

    
378
//************************************* TO STRING ********************************************
379

    
380
	@Override
381
	public String toString(){
382
		return this.getClass().getName();
383
	}
384
}
(1-1/17)