Project

General

Profile

Download (14 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.dwca.in;
10

    
11
import java.util.ArrayList;
12
import java.util.HashSet;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16

    
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.log4j.Logger;
19

    
20
import eu.etaxonomy.cdm.common.CdmUtils;
21
import eu.etaxonomy.cdm.io.dwca.TermUri;
22
import eu.etaxonomy.cdm.io.stream.StreamItem;
23
import eu.etaxonomy.cdm.model.common.CdmBase;
24
import eu.etaxonomy.cdm.model.common.Language;
25
import eu.etaxonomy.cdm.model.common.LanguageString;
26
import eu.etaxonomy.cdm.model.reference.Reference;
27
import eu.etaxonomy.cdm.model.taxon.Classification;
28
import eu.etaxonomy.cdm.model.taxon.Synonym;
29
import eu.etaxonomy.cdm.model.taxon.SynonymType;
30
import eu.etaxonomy.cdm.model.taxon.Taxon;
31
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
32

    
33
/**
34
 * @author a.mueller
35
 * @date 23.11.2011
36
 */
37
public class DwcTaxonCsv2CdmTaxonRelationConverter
38
        extends PartitionableConverterBase<DwcaDataImportConfiguratorBase, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase>>
39
        implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String> {
40

    
41
    private static final String SINGLE_CLASSIFICATION_ID = "1";
42

    
43
	private static final String SINGLE_CLASSIFICATION = "Single Classification";
44

    
45
	private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonRelationConverter.class);
46

    
47
	private static final String ID = "id";
48

    
49
	/**
50
	 * @param state
51
	 */
52
	public DwcTaxonCsv2CdmTaxonRelationConverter(DwcaDataImportStateBase state) {
53
		super(state);
54
	}
55

    
56
    @Override
57
    public ItemFilter<StreamItem> getItemFilter() {
58
        if (!config.isDoSplitRelationshipImport()){
59
            return null;
60
        }else{
61
            return new DwcTaxonStreamItem2CdmTaxonConverter(state, true);  //the converter also is implementing the ItemFilter interfacem, this way we guarantee that the evaluation if the item is a synonym, lower or higher taxon is the same during taxon creation and relationship creation
62
        }
63
    }
64

    
65

    
66
    @Override
67
    public IReader<MappedCdmBase> map(StreamItem item){
68
		List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>();
69

    
70
		Map<String, String> csvRecord = item.map;
71
		Reference sourceReference = state.getTransactionalSourceReference();
72
		String sourceReferecenDetail = null;
73

    
74
		String id = csvRecord.get(ID);
75
		TaxonBase<?> taxonBase = getTaxonBase(id, item, null, state);
76
		if (taxonBase == null){
77
			String warning = "Taxon not available for id '%s'.";
78
			warning = String.format(warning, id);
79
			fireWarningEvent(warning, item, 8);
80
		}else{
81

    
82
			MappedCdmBase mcb = new MappedCdmBase(taxonBase);
83
			resultList.add(mcb);
84

    
85
			handleAcceptedNameUsage(item, state, taxonBase, id);
86

    
87
			handleParentNameUsage(item, state, taxonBase, id, resultList);
88

    
89
			handleKingdom(item, state);
90

    
91
			handlePhylum(item, state);
92

    
93
			handleClass(item, state);
94

    
95
			handleOrder(item, state);
96

    
97
			handleFamily(item, state);
98

    
99
			handleGenus(item, state);
100

    
101
			handleSubGenus(item, state);
102

    
103
		}
104
		csvRecord.remove(ID);
105

    
106

    
107
//		    <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
108
//		         The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
109
//		         Fungi, Plantae, Protozoa, Viruses -->
110
//		    <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
111

    
112
//		    <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
113
//		    <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
114

    
115
//		    <!-- Infraspecific epithet -->
116
//		    <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
117

    
118
//			<!-- Acceptance status published in -->
119
//		    <field index='20' term='http://purl.org/dc/terms/source'/>
120

    
121
//		    <!-- Reference in which the scientific name was first published -->
122
//		    <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
123

    
124
//		    <!-- Scrutiny date -->
125
//		    <field index='23' term='http://purl.org/dc/terms/modified'/>
126
//		    <!-- Additional data for the taxon -->
127

    
128
//		    <field index='24' term='http://purl.org/dc/terms/description'/>
129
//		    </core>
130

    
131
		return new ListReader<MappedCdmBase>(resultList);
132
	}
133

    
134

    
135
	@Override
136
	public String getSourceId(StreamItem item) {
137
		String id = item.get(ID);
138
		return id;
139
	}
140

    
141

    
142
	private void handleSubGenus(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
143
		// TODO Auto-generated method stub
144
	}
145

    
146
	private void handleGenus(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
147
		// TODO Auto-generated method stub
148
	}
149

    
150
	private void handleFamily(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
151
		// TODO Auto-generated method stub
152
	}
153

    
154
	private void handleOrder(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
155
		// TODO Auto-generated method stub
156
	}
157

    
158
	private void handleClass(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
159
		// TODO Auto-generated method stub
160
	}
161

    
162
	private void handlePhylum(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
163
		// TODO Auto-generated method stub
164
	}
165

    
166
	private void handleKingdom(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state) {
167
		// TODO Auto-generated method stub
168
	}
169

    
170

    
171
	private void handleParentNameUsage(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase<?> taxonBase, String id, List<MappedCdmBase> resultList) {
172
		if (exists(TermUri.DWC_PARENT_NAME_USAGE_ID, item) || exists(TermUri.DWC_PARENT_NAME_USAGE, item)){
173
			String parentId = item.get(TermUri.DWC_PARENT_NAME_USAGE_ID);
174
			if (id.equals(parentId)){
175
				//taxon can't be it's own child
176
				//TODO log
177
				return;
178
			}else if (taxonBase.isInstanceOf(Taxon.class)){
179
				Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
180
				Taxon parentTaxon = getTaxonBase(parentId, item, Taxon.class, state);
181
				if (parentTaxon == null){
182
					String message = "Can't find parent taxon with id '%s' and NON-ID parent Name Usage not yet implemented.";
183
					message = String.format(message, StringUtils.isBlank(parentId)?"-": parentId);
184
					fireWarningEvent(message, item, 4);
185
				}else{
186
					Classification classification = getClassification(item, resultList);
187
					Reference citationForParentChild = null;
188
					if (classification == null){
189
						String warning = "Classification not found. Can't create parent-child relationship";
190
						fireWarningEvent(warning, item, 12);
191
					}
192
					try {
193
						classification.addParentChild(parentTaxon, taxon, citationForParentChild, null);
194
					} catch (IllegalStateException e) {
195
						String message = "Exception occurred when trying to add a child to a parent in a classification: %s";
196
						message = String.format(message, e.getMessage());
197
						fireWarningEvent(message, item, 12);
198
					}
199
				}
200
			}else if (taxonBase.isInstanceOf(Synonym.class)){
201
				if (! acceptedNameUsageExists(item) && state.getConfig().isUseParentAsAcceptedIfAcceptedNotExists()){
202
					handleAcceptedNameUsageParam(item, state, taxonBase, id, parentId);
203
				}else{
204
					String message = "PARENT_NAME_USAGE given for Synonym and ACCEPTED_NAME_USAGE also exists or configuration does not allow" +
205
							"to use ACCEPTED_NAME_USAGE as parent. This is not allowed in CDM.";
206
					//TODO check "is this Taxon"
207
					fireWarningEvent(message, item, 4);
208
				}
209
			}else{
210
				String message = "Unhandled case";
211
				fireWarningEvent(message, item, 12);
212
			}
213
		}
214

    
215

    
216
	}
217

    
218

    
219
	private Classification getClassification(StreamItem item, List<MappedCdmBase> resultList) {
220
		Set<Classification> resultSet = new HashSet<Classification>();
221
		//
222
		if (config.isDatasetsAsClassifications()){
223
			String datasetKey = item.get(TermUri.DWC_DATASET_ID);
224
			if (CdmUtils.areBlank(datasetKey,item.get(TermUri.DWC_DATASET_NAME))){
225
				datasetKey = DwcTaxonStreamItem2CdmTaxonConverter.NO_DATASET;
226
			}
227

    
228
			resultSet.addAll(state.get(TermUri.DWC_DATASET_ID.toString(), datasetKey, Classification.class));
229
			resultSet.addAll(state.get(TermUri.DWC_DATASET_NAME.toString(), item.get(TermUri.DWC_DATASET_NAME), Classification.class));
230
		//TODO accordingToAsClassification
231
		//single classification
232
		}else{
233
			resultSet.addAll(state.get(SINGLE_CLASSIFICATION, SINGLE_CLASSIFICATION_ID, Classification.class));
234

    
235
			//classification does not yet exist
236
			if (resultSet.isEmpty()){
237
				Classification newClassification = Classification.NewInstance("Darwin Core Classification");
238
				if (config.getClassificationUuid() != null){
239
					newClassification.setUuid(config.getClassificationUuid());
240
				}
241
				if (StringUtils.isNotBlank(config.getClassificationName())){
242
					newClassification.setName(LanguageString.NewInstance(config.getClassificationName(), Language.DEFAULT()));
243
				}
244
				resultList.add(new MappedCdmBase(SINGLE_CLASSIFICATION, SINGLE_CLASSIFICATION_ID, newClassification));
245
				resultSet.add(newClassification);
246
			}
247
		}
248
		if (resultSet.isEmpty()){
249
			return null;
250
		}else if (resultSet.size() > 1){
251
			fireWarningEvent("Dataset is ambigous. I take arbitrary one.", item, 8);
252
		}
253
		return resultSet.iterator().next();
254
	}
255

    
256

    
257
	private void handleAcceptedNameUsage(StreamItem item, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase taxonBase, String id) {
258
		if (acceptedNameUsageExists(item)){
259
			String accId = item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID);
260
			handleAcceptedNameUsageParam(item, state, taxonBase, id, accId);
261
		}else{
262
			if (logger.isDebugEnabled()){logger.debug("No accepted name usage");}
263
		}
264
	}
265

    
266

    
267
	/**
268
	 * @param item
269
	 * @param state
270
	 * @param taxonBase
271
	 * @param id
272
	 * @param accId
273
	 * @param taxStatus
274
	 */
275
	private void handleAcceptedNameUsageParam(StreamItem item,
276
	        DwcaDataImportStateBase<DwcaDataImportConfiguratorBase> state, TaxonBase<?> taxonBase, String id, String accId) {
277
		if (id.equals(accId)){
278
			//mapping to itself needs no further handling
279
		}else{
280
			String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
281

    
282
			Taxon accTaxon = getTaxonBase(accId, item, Taxon.class, state);
283
			if (taxonBase.isInstanceOf(Synonym.class)){
284
				Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
285

    
286
				if (accTaxon == null){
287
						fireWarningEvent("NON-ID accepted Name Usage not yet implemented or taxon for name usage id not available", item, 4);
288
				} else{
289
					accTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
290
				}
291
				// FIXME : no information regarding misapplied name available at this point,
292
				//         hence a regexp check for 'misapplied' is done to add them as a relationship
293
			} else if(taxonBase.isInstanceOf(Taxon.class) && taxStatus != null && taxStatus.matches("misapplied.*")) {
294
				if (accTaxon == null){
295
					fireWarningEvent("NON-ID based accepted (misapplied) name usage not yet implemented or taxon for name usage id not available", item, 4);
296
				} else{
297
					Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
298
					accTaxon.addMisappliedName(taxon,null,null);
299
				}
300
			} else {
301
				String message = "Accepted name usage is not of type synonym. This is not allowed in CDM. Can't create realtionship";
302
				//TODO check "is this Taxon"
303
				fireWarningEvent(message, item, 4);
304
			}
305
		}
306
	}
307

    
308

    
309
	/**
310
	 * @param item
311
	 * @return
312
	 */
313
	private boolean acceptedNameUsageExists(StreamItem item) {
314
		return exists(TermUri.DWC_ACCEPTED_NAME_USAGE_ID, item) || exists(TermUri.DWC_ACCEPTED_NAME_USAGE, item);
315
	}
316

    
317

    
318

    
319
//**************************** PARTITIONABLE ************************************************
320

    
321
	@Override
322
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap){
323
		String value;
324
		String key;
325
		if ( hasValue(value = item.get(ID))){
326
			key = TermUri.DWC_TAXON.toString();
327
			Set<String> keySet = getKeySet(key, fkMap);
328
			keySet.add(value);
329
		}
330
		if ( hasValue(value = item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID.toString()))){
331
			key = TermUri.DWC_TAXON.toString();
332
			Set<String> keySet = getKeySet(key, fkMap);
333
			keySet.add(value);
334
		}
335
		if ( hasValue(value = item.get(key = TermUri.DWC_PARENT_NAME_USAGE_ID.toString())) ){
336
			key = TermUri.DWC_TAXON.toString();
337
			Set<String> keySet = getKeySet(key, fkMap);
338
			keySet.add(value);
339
		}
340
		if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
341
			//TODO
342
			Set<String> keySet = getKeySet(key, fkMap);
343
			keySet.add(value);
344
		}
345

    
346
		//classification
347
		if (config.isDatasetsAsClassifications()){
348
			boolean hasDefinedClassification = false;
349
			if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
350
				Set<String> keySet = getKeySet(key, fkMap);
351
				keySet.add(value);
352
				hasDefinedClassification = true;
353
			}
354
			if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
355
				Set<String> keySet = getKeySet(key, fkMap);
356
				keySet.add(value);
357
				hasDefinedClassification = true;
358
			}
359
			if (! hasDefinedClassification){
360
				Set<String> keySet = getKeySet(TermUri.DWC_DATASET_ID.toString(), fkMap);
361
				value = DwcTaxonStreamItem2CdmTaxonConverter.NO_DATASET;
362
				keySet.add(value);
363
			}
364
		}else{
365
			key = SINGLE_CLASSIFICATION;
366
			value = SINGLE_CLASSIFICATION_ID;
367
			Set<String> keySet = getKeySet(key, fkMap);
368
			keySet.add(value);
369
		}
370

    
371
		//TODO cont.
372
	}
373

    
374
	@Override
375
	public Set<String> requiredSourceNamespaces() {
376
		Set<String> result = new HashSet<String>();
377

    
378
		result.add(TermUri.DWC_TAXON.toString());
379

    
380
		result.add(TermUri.DWC_ACCEPTED_NAME_USAGE_ID.toString());
381
 		result.add(TermUri.DWC_PARENT_NAME_USAGE_ID.toString());
382

    
383
 		result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
384
 		result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
385
 		if (config.isDatasetsAsClassifications()){
386
 			result.add(TermUri.DWC_DATASET_ID.toString());
387
 			result.add(TermUri.DWC_DATASET_NAME.toString());
388
 		}else{
389
 			result.add(SINGLE_CLASSIFICATION);
390
 		}
391

    
392
 		return result;
393
	}
394

    
395

    
396
//************************************* TO STRING ********************************************
397

    
398
	@Override
399
	public String toString(){
400
		return this.getClass().getName();
401
	}
402

    
403

    
404

    
405
}
(4-4/37)