Project

General

Profile

Download (14 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2009 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.io.dwca.in;
11

    
12
import java.util.ArrayList;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Set;
17

    
18
import org.apache.commons.lang.StringUtils;
19
import org.apache.log4j.Logger;
20

    
21
import eu.etaxonomy.cdm.common.CdmUtils;
22
import eu.etaxonomy.cdm.io.dwca.TermUri;
23
import eu.etaxonomy.cdm.io.stream.StreamItem;
24
import eu.etaxonomy.cdm.model.common.CdmBase;
25
import eu.etaxonomy.cdm.model.common.Language;
26
import eu.etaxonomy.cdm.model.common.LanguageString;
27
import eu.etaxonomy.cdm.model.reference.Reference;
28
import eu.etaxonomy.cdm.model.taxon.Classification;
29
import eu.etaxonomy.cdm.model.taxon.Synonym;
30
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
31
import eu.etaxonomy.cdm.model.taxon.Taxon;
32
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
33

    
34
/**
35
 * @author a.mueller
36
 * @date 23.11.2011
37
 */
38
public class DwcTaxonCsv2CdmTaxonRelationConverter
39
        extends PartitionableConverterBase<DwcaDataImportConfiguratorBase, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase>>
40
        implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String> {
41

    
42
    private static final String SINGLE_CLASSIFICATION_ID = "1";
43

    
44
	private static final String SINGLE_CLASSIFICATION = "Single Classification";
45

    
46
	private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonRelationConverter.class);
47

    
48
	private static final String ID = "id";
49

    
50
	/**
51
	 * @param state
52
	 */
53
	public DwcTaxonCsv2CdmTaxonRelationConverter(DwcaDataImportStateBase state) {
54
		super(state);
55
	}
56

    
57
    @Override
58
    public ItemFilter<StreamItem> getItemFilter() {
59
        if (!config.isDoSplitRelationshipImport()){
60
            return null;
61
        }else{
62
            return new DwcTaxonStreamItem2CdmTaxonConverter(state, true);  //the converter also is implementing the ItemFilter interfacem, this way we guarantee that the evaluation if the item is a synonym, lower or higher taxon is the same during taxon creation and relationship creation
63
        }
64
    }
65

    
66

    
67
    @Override
68
    public IReader<MappedCdmBase> map(StreamItem item){
69
		List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>();
70

    
71
		Map<String, String> csvRecord = item.map;
72
		Reference sourceReference = state.getTransactionalSourceReference();
73
		String sourceReferecenDetail = null;
74

    
75
		String id = csvRecord.get(ID);
76
		TaxonBase<?> taxonBase = getTaxonBase(id, item, null, state);
77
		if (taxonBase == null){
78
			String warning = "Taxon not available for id '%s'.";
79
			warning = String.format(warning, id);
80
			fireWarningEvent(warning, item, 8);
81
		}else{
82

    
83
			MappedCdmBase mcb = new MappedCdmBase(taxonBase);
84
			resultList.add(mcb);
85

    
86
			handleAcceptedNameUsage(item, state, taxonBase, id);
87

    
88
			handleParentNameUsage(item, state, taxonBase, id, resultList);
89

    
90
			handleKingdom(item, state);
91

    
92
			handlePhylum(item, state);
93

    
94
			handleClass(item, state);
95

    
96
			handleOrder(item, state);
97

    
98
			handleFamily(item, state);
99

    
100
			handleGenus(item, state);
101

    
102
			handleSubGenus(item, state);
103

    
104
		}
105
		csvRecord.remove(ID);
106

    
107

    
108
//		    <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
109
//		         The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
110
//		         Fungi, Plantae, Protozoa, Viruses -->
111
//		    <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
112

    
113
//		    <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
114
//		    <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
115

    
116
//		    <!-- Infraspecific epithet -->
117
//		    <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
118

    
119
//			<!-- Acceptance status published in -->
120
//		    <field index='20' term='http://purl.org/dc/terms/source'/>
121

    
122
//		    <!-- Reference in which the scientific name was first published -->
123
//		    <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
124

    
125
//		    <!-- Scrutiny date -->
126
//		    <field index='23' term='http://purl.org/dc/terms/modified'/>
127
//		    <!-- Additional data for the taxon -->
128

    
129
//		    <field index='24' term='http://purl.org/dc/terms/description'/>
130
//		    </core>
131

    
132
		return new ListReader<MappedCdmBase>(resultList);
133
	}
134

    
135

    
136
	@Override
137
	public String getSourceId(StreamItem item) {
138
		String id = item.get(ID);
139
		return id;
140
	}
141

    
142

    
143
	private void handleSubGenus(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
144
		// TODO Auto-generated method stub
145
	}
146

    
147
	private void handleGenus(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
148
		// TODO Auto-generated method stub
149
	}
150

    
151
	private void handleFamily(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
152
		// TODO Auto-generated method stub
153
	}
154

    
155
	private void handleOrder(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
156
		// TODO Auto-generated method stub
157
	}
158

    
159
	private void handleClass(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
160
		// TODO Auto-generated method stub
161
	}
162

    
163
	private void handlePhylum(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
164
		// TODO Auto-generated method stub
165
	}
166

    
167
	private void handleKingdom(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
168
		// TODO Auto-generated method stub
169
	}
170

    
171

    
172
	private void handleParentNameUsage(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase<?> taxonBase, String id, List<MappedCdmBase> resultList) {
173
		if (exists(TermUri.DWC_PARENT_NAME_USAGE_ID, item) || exists(TermUri.DWC_PARENT_NAME_USAGE, item)){
174
			String parentId = item.get(TermUri.DWC_PARENT_NAME_USAGE_ID);
175
			if (id.equals(parentId)){
176
				//taxon can't be it's own child
177
				//TODO log
178
				return;
179
			}else if (taxonBase.isInstanceOf(Taxon.class)){
180
				Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
181
				Taxon parentTaxon = getTaxonBase(parentId, item, Taxon.class, state);
182
				if (parentTaxon == null){
183
					String message = "Can't find parent taxon with id '%s' and NON-ID parent Name Usage not yet implemented.";
184
					message = String.format(message, StringUtils.isBlank(parentId)?"-": parentId);
185
					fireWarningEvent(message, item, 4);
186
				}else{
187
					Classification classification = getClassification(item, resultList);
188
					Reference citationForParentChild = null;
189
					if (classification == null){
190
						String warning = "Classification not found. Can't create parent-child relationship";
191
						fireWarningEvent(warning, item, 12);
192
					}
193
					try {
194
						classification.addParentChild(parentTaxon, taxon, citationForParentChild, null);
195
					} catch (IllegalStateException e) {
196
						String message = "Exception occurred when trying to add a child to a parent in a classification: %s";
197
						message = String.format(message, e.getMessage());
198
						fireWarningEvent(message, item, 12);
199
					}
200
				}
201
			}else if (taxonBase.isInstanceOf(Synonym.class)){
202
				if (! acceptedNameUsageExists(item) && state.getConfig().isUseParentAsAcceptedIfAcceptedNotExists()){
203
					handleAcceptedNameUsageParam(item, state, taxonBase, id, parentId);
204
				}else{
205
					String message = "PARENT_NAME_USAGE given for Synonym and ACCEPTED_NAME_USAGE also exists or configuration does not allow" +
206
							"to use ACCEPTED_NAME_USAGE as parent. This is not allowed in CDM.";
207
					//TODO check "is this Taxon"
208
					fireWarningEvent(message, item, 4);
209
				}
210
			}else{
211
				String message = "Unhandled case";
212
				fireWarningEvent(message, item, 12);
213
			}
214
		}
215

    
216

    
217
	}
218

    
219

    
220
	private Classification getClassification(StreamItem item, List<MappedCdmBase> resultList) {
221
		Set<Classification> resultSet = new HashSet<Classification>();
222
		//
223
		if (config.isDatasetsAsClassifications()){
224
			String datasetKey = item.get(TermUri.DWC_DATASET_ID);
225
			if (CdmUtils.areBlank(datasetKey,item.get(TermUri.DWC_DATASET_NAME))){
226
				datasetKey = DwcTaxonStreamItem2CdmTaxonConverter.NO_DATASET;
227
			}
228

    
229
			resultSet.addAll(state.get(TermUri.DWC_DATASET_ID.toString(), datasetKey, Classification.class));
230
			resultSet.addAll(state.get(TermUri.DWC_DATASET_NAME.toString(), item.get(TermUri.DWC_DATASET_NAME), Classification.class));
231
		//TODO accordingToAsClassification
232
		//single classification
233
		}else{
234
			resultSet.addAll(state.get(SINGLE_CLASSIFICATION, SINGLE_CLASSIFICATION_ID, Classification.class));
235

    
236
			//classification does not yet exist
237
			if (resultSet.isEmpty()){
238
				Classification newClassification = Classification.NewInstance("Darwin Core Classification");
239
				if (config.getClassificationUuid() != null){
240
					newClassification.setUuid(config.getClassificationUuid());
241
				}
242
				if (StringUtils.isNotBlank(config.getClassificationName())){
243
					newClassification.setName(LanguageString.NewInstance(config.getClassificationName(), Language.DEFAULT()));
244
				}
245
				resultList.add(new MappedCdmBase(SINGLE_CLASSIFICATION, SINGLE_CLASSIFICATION_ID, newClassification));
246
				resultSet.add(newClassification);
247
			}
248
		}
249
		if (resultSet.isEmpty()){
250
			return null;
251
		}else if (resultSet.size() > 1){
252
			fireWarningEvent("Dataset is ambigous. I take arbitrary one.", item, 8);
253
		}
254
		return resultSet.iterator().next();
255
	}
256

    
257

    
258
	private void handleAcceptedNameUsage(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase taxonBase, String id) {
259
		if (acceptedNameUsageExists(item)){
260
			String accId = item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID);
261
			handleAcceptedNameUsageParam(item, state, taxonBase, id, accId);
262
		}else{
263
			if (logger.isDebugEnabled()){logger.debug("No accepted name usage");}
264
		}
265
	}
266

    
267

    
268
	/**
269
	 * @param item
270
	 * @param state
271
	 * @param taxonBase
272
	 * @param id
273
	 * @param accId
274
	 * @param taxStatus
275
	 */
276
	private void handleAcceptedNameUsageParam(StreamItem item,
277
	        DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase<?> taxonBase, String id, String accId) {
278
		if (id.equals(accId)){
279
			//mapping to itself needs no further handling
280
		}else{
281
			String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
282

    
283
			Taxon accTaxon = getTaxonBase(accId, item, Taxon.class, state);
284
			if (taxonBase.isInstanceOf(Synonym.class)){
285
				Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
286

    
287
				if (accTaxon == null){
288
						fireWarningEvent("NON-ID accepted Name Usage not yet implemented or taxon for name usage id not available", item, 4);
289
				} else{
290
					accTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),null, null);
291
				}
292
				// FIXME : no information regarding misapplied name available at this point,
293
				//         hence a regexp check for 'misapplied' is done to add them as a relationship
294
			} else if(taxonBase.isInstanceOf(Taxon.class) && taxStatus != null && taxStatus.matches("misapplied.*")) {
295
				if (accTaxon == null){
296
					fireWarningEvent("NON-ID based accepted (misapplied) name usage not yet implemented or taxon for name usage id not available", item, 4);
297
				} else{
298
					Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
299
					accTaxon.addMisappliedName(taxon,null,null);
300
				}
301
			} else {
302
				String message = "Accepted name usage is not of type synonym. This is not allowed in CDM. Can't create realtionship";
303
				//TODO check "is this Taxon"
304
				fireWarningEvent(message, item, 4);
305
			}
306
		}
307
	}
308

    
309

    
310
	/**
311
	 * @param item
312
	 * @return
313
	 */
314
	private boolean acceptedNameUsageExists(StreamItem item) {
315
		return exists(TermUri.DWC_ACCEPTED_NAME_USAGE_ID, item) || exists(TermUri.DWC_ACCEPTED_NAME_USAGE, item);
316
	}
317

    
318

    
319

    
320
//**************************** PARTITIONABLE ************************************************
321

    
322
	@Override
323
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap){
324
		String value;
325
		String key;
326
		if ( hasValue(value = item.get(ID))){
327
			key = TermUri.DWC_TAXON.toString();
328
			Set<String> keySet = getKeySet(key, fkMap);
329
			keySet.add(value);
330
		}
331
		if ( hasValue(value = item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID.toString()))){
332
			key = TermUri.DWC_TAXON.toString();
333
			Set<String> keySet = getKeySet(key, fkMap);
334
			keySet.add(value);
335
		}
336
		if ( hasValue(value = item.get(key = TermUri.DWC_PARENT_NAME_USAGE_ID.toString())) ){
337
			key = TermUri.DWC_TAXON.toString();
338
			Set<String> keySet = getKeySet(key, fkMap);
339
			keySet.add(value);
340
		}
341
		if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
342
			//TODO
343
			Set<String> keySet = getKeySet(key, fkMap);
344
			keySet.add(value);
345
		}
346

    
347
		//classification
348
		if (config.isDatasetsAsClassifications()){
349
			boolean hasDefinedClassification = false;
350
			if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
351
				Set<String> keySet = getKeySet(key, fkMap);
352
				keySet.add(value);
353
				hasDefinedClassification = true;
354
			}
355
			if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
356
				Set<String> keySet = getKeySet(key, fkMap);
357
				keySet.add(value);
358
				hasDefinedClassification = true;
359
			}
360
			if (! hasDefinedClassification){
361
				Set<String> keySet = getKeySet(TermUri.DWC_DATASET_ID.toString(), fkMap);
362
				value = DwcTaxonStreamItem2CdmTaxonConverter.NO_DATASET;
363
				keySet.add(value);
364
			}
365
		}else{
366
			key = SINGLE_CLASSIFICATION;
367
			value = SINGLE_CLASSIFICATION_ID;
368
			Set<String> keySet = getKeySet(key, fkMap);
369
			keySet.add(value);
370
		}
371

    
372
		//TODO cont.
373
	}
374

    
375
	@Override
376
	public Set<String> requiredSourceNamespaces() {
377
		Set<String> result = new HashSet<String>();
378

    
379
		result.add(TermUri.DWC_TAXON.toString());
380

    
381
		result.add(TermUri.DWC_ACCEPTED_NAME_USAGE_ID.toString());
382
 		result.add(TermUri.DWC_PARENT_NAME_USAGE_ID.toString());
383

    
384
 		result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
385
 		result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
386
 		if (config.isDatasetsAsClassifications()){
387
 			result.add(TermUri.DWC_DATASET_ID.toString());
388
 			result.add(TermUri.DWC_DATASET_NAME.toString());
389
 		}else{
390
 			result.add(SINGLE_CLASSIFICATION);
391
 		}
392

    
393
 		return result;
394
	}
395

    
396

    
397
//************************************* TO STRING ********************************************
398

    
399
	@Override
400
	public String toString(){
401
		return this.getClass().getName();
402
	}
403

    
404

    
405

    
406
}
(4-4/37)