Project

General

Profile

Download (27.6 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2009 EDIT
4
* European Distributed Institute of Taxonomy 
5
* http://www.e-taxonomy.eu
6
* 
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.io.dwca.in;
11

    
12
import java.net.URI;
13
import java.util.ArrayList;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21

    
22
import com.ibm.lsid.MalformedLSIDException;
23

    
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.dwca.TermUri;
26
import eu.etaxonomy.cdm.io.stream.StreamImportBase;
27
import eu.etaxonomy.cdm.io.stream.StreamImportStateBase;
28
import eu.etaxonomy.cdm.io.stream.StreamItem;
29
import eu.etaxonomy.cdm.model.common.Annotation;
30
import eu.etaxonomy.cdm.model.common.CdmBase;
31
import eu.etaxonomy.cdm.model.common.Extension;
32
import eu.etaxonomy.cdm.model.common.ExtensionType;
33
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
34
import eu.etaxonomy.cdm.model.common.LSID;
35
import eu.etaxonomy.cdm.model.common.Language;
36
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
37
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
38
import eu.etaxonomy.cdm.model.description.Distribution;
39
import eu.etaxonomy.cdm.model.description.PresenceTerm;
40
import eu.etaxonomy.cdm.model.description.TaxonDescription;
41
import eu.etaxonomy.cdm.model.location.NamedArea;
42
import eu.etaxonomy.cdm.model.name.BotanicalName;
43
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
44
import eu.etaxonomy.cdm.model.name.NonViralName;
45
import eu.etaxonomy.cdm.model.name.Rank;
46
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
47
import eu.etaxonomy.cdm.model.name.ZoologicalName;
48
import eu.etaxonomy.cdm.model.reference.Reference;
49
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
50
import eu.etaxonomy.cdm.model.taxon.Classification;
51
import eu.etaxonomy.cdm.model.taxon.Synonym;
52
import eu.etaxonomy.cdm.model.taxon.Taxon;
53
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
54
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
55
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
56
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
57

    
58
/**
59
 * @author a.mueller
60
 * @date 22.11.2011
61
 *
62
 */
63
public class  DwcTaxonStreamItem2CdmTaxonConverter<CONFIG extends DwcaDataImportConfiguratorBase, STATE extends StreamImportStateBase<CONFIG, StreamImportBase>>  extends PartitionableConverterBase<CONFIG, STATE> implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String>{
64
	private static final Logger logger = Logger.getLogger(DwcTaxonStreamItem2CdmTaxonConverter.class);
65

    
66
	private static final String ID = "id";
67
	// temporary key for the case that no dataset information is supplied, TODO use something better
68
	public static final String NO_DATASET = "no_dataset_jli773oebhjklw";
69

    
70
	private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
71
	
72
	/**
73
	 * @param state
74
	 */
75
	public DwcTaxonStreamItem2CdmTaxonConverter(STATE state) {
76
		super(state);
77
	}
78

    
79

    
80
	public IReader<MappedCdmBase> map(StreamItem csvTaxonRecord){
81
		List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>(); 
82
		
83
		//TODO what if not transactional? 
84
		Reference<?> sourceReference = state.getTransactionalSourceReference();
85
		String sourceReferenceDetail = null;
86
		
87
		//taxon
88
		TaxonBase<?> taxonBase = getTaxonBase(csvTaxonRecord);
89
		MappedCdmBase  mcb = new MappedCdmBase(csvTaxonRecord.term, csvTaxonRecord.get(ID), taxonBase);
90
		resultList.add(mcb);
91
		
92
		//original source
93
		String id = csvTaxonRecord.get(ID);
94
		IdentifiableSource source = taxonBase.addSource(OriginalSourceType.Import, id, "Taxon", sourceReference, sourceReferenceDetail);
95
		MappedCdmBase mappedSource = new MappedCdmBase(csvTaxonRecord.get(ID), source);
96
		resultList.add(mappedSource);
97
		csvTaxonRecord.remove(ID);
98
		
99
		//rank
100
		NomenclaturalCode nomCode = getNomCode(csvTaxonRecord);
101
		Rank rank = getRank(csvTaxonRecord, nomCode);
102

    
103
		//name && name published in
104
		TaxonNameBase<?,?> name = getScientificName(csvTaxonRecord, nomCode, rank, resultList, sourceReference);
105
		taxonBase.setName(name);
106
		
107
		//nameAccordingTo
108
		MappedCdmBase<Reference> sec = getNameAccordingTo(csvTaxonRecord, resultList);
109
		
110
		if (sec == null && state.getConfig().isUseSourceReferenceAsSec()){
111
			sec = new MappedCdmBase<Reference>(state.getTransactionalSourceReference());
112
		}
113
		if (sec != null){
114
			taxonBase.setSec(sec.getCdmBase());
115
		}
116

    
117
		//classification
118
		handleDataset(csvTaxonRecord, taxonBase, resultList, sourceReference, sourceReferenceDetail);
119
		
120
		//NON core
121
	    //term="http://purl.org/dc/terms/identifier"
122
		//currently only LSIDs
123
		handleIdentifier(csvTaxonRecord, taxonBase); 
124
		
125
		//TaxonRemarks
126
		handleTaxonRemarks(csvTaxonRecord, taxonBase);
127
		
128
		//TDWG_1
129
		handleTdwgArea(csvTaxonRecord, taxonBase);
130
		
131
		//VernecularName
132
		handleCommonNames(csvTaxonRecord, taxonBase);
133

    
134
		//External Sources, ID's and References
135
		handleIdentifiableObjects(csvTaxonRecord, taxonBase);
136
		
137
		
138
		//		    <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
139
//		         The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista, 
140
//		         Fungi, Plantae, Protozoa, Viruses -->
141
//		    <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
142

    
143
//		    <!-- Phylum in which the taxon has been classified -->
144
//		    <field index='11' term='http://rs.tdwg.org/dwc/terms/phylum'/>
145

    
146
		//		    <!-- Class in which the taxon has been classified -->
147
//		    <field index='12' term='http://rs.tdwg.org/dwc/terms/class'/>
148

    
149
		//		    <!-- Order in which the taxon has been classified -->
150
//		    <field index='13' term='http://rs.tdwg.org/dwc/terms/order'/>
151

    
152
		//		    <!-- Family in which the taxon has been classified -->
153
//		    <field index='14' term='http://rs.tdwg.org/dwc/terms/family'/>
154

    
155
		//		    <!-- Genus in which the taxon has been classified -->
156
//		    <field index='15' term='http://rs.tdwg.org/dwc/terms/genus'/>
157

    
158
		//		    <!-- Subgenus in which the taxon has been classified -->
159
//		    <field index='16' term='http://rs.tdwg.org/dwc/terms/subgenus'/>
160
//		    <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
161

    
162
//		    <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
163
//		    <!-- Infraspecific epithet -->
164

    
165
//		    <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
166
//		    <!-- Authorship -->
167

    
168
//		    <field index='19' term='http://rs.tdwg.org/dwc/terms/scientificNameAuthorship'/>
169
//		==> see scientific name
170
//		    
171
//		<!-- Acceptance status published in -->
172
//		    <field index='20' term='http://purl.org/dc/terms/source'/>
173
//		    <!-- Reference in which the scientific name was first published -->
174
//		    <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
175
//		    <!-- Taxon scrutinized by -->
176
//		    <field index='22' term='http://rs.tdwg.org/dwc/terms/nameAccordingTo'/> 
177
//		    <!-- Scrutiny date -->
178
//		    <field index='23' term='http://purl.org/dc/terms/modified'/>
179
//		    <!-- Additional data for the taxon -->
180
//		    <field index='24' term='http://purl.org/dc/terms/description'/>
181
//		    </core>
182

    
183
		return new ListReader<MappedCdmBase>(resultList);
184
	}
185

    
186

    
187
	
188
	/**
189
	 * @param item
190
	 * @param taxonBase
191
	 */
192
	private void handleIdentifiableObjects(StreamItem item,TaxonBase<?> taxonBase) {
193
		
194
		
195
		String references = item.get(TermUri.DC_REFERENCES);
196
		
197
		if (references == null || references == "") {
198
			references = item.get(TermUri.DWC_NAME_PUBLISHED_IN_ID);//lorna temporary until Scratchpads move the reference to the correct place.
199
		}
200
		
201
		if (StringUtils.isNotBlank(references)){
202
			URI uri = makeUriIfIs(references);
203
			if (uri != null){
204
				Extension.NewInstance(taxonBase, references, ExtensionType.URL());
205
			}else{
206
				String message = "Non-URI Dublin Core References not yet handled for taxa. References is: %s";
207
				fireWarningEvent(String.format(message, references), item, 6);
208
			}
209
		}
210
		
211
		
212
		//TODO: Finish properly
213
		String id = item.get(TermUri.CDM_SOURCE_IDINSOURCE);
214
		String idNamespace = item.get(TermUri.CDM_SOURCE_IDNAMESPACE);
215
		String reference = item.get(TermUri.CDM_SOURCE_REFERENCE);
216
		if(StringUtils.isNotBlank(id) && StringUtils.isNotBlank(idNamespace) && StringUtils.isNotBlank(reference)){
217
			Reference<?> ref = ReferenceFactory.newGeneric();
218
			ref.setTitle(reference);
219
			Taxon taxon = (Taxon) taxonBase;
220
			taxon.addSource(OriginalSourceType.Import, id, idNamespace, ref, null);
221
		}
222
		
223
		
224
		
225
	}
226

    
227

    
228
	/**
229
	 * If str is an uri it returns is as an {@link URI}. If not it returns <code>null</code>. 
230
	 * @param str
231
	 * @return the URI.
232
	 */
233
	private URI makeUriIfIs(String str) {
234
		if (! str.startsWith("http:")){
235
			return null;
236
		}else{
237
			try {
238
				URI uri = URI.create(str);
239
				return uri;
240
			} catch (Exception e) {
241
				return null;
242
			}
243
		}
244

    
245
	}
246

    
247

    
248
	/**
249
	 * @param item
250
	 * @param taxonBase
251
	 */
252
	private void handleCommonNames(StreamItem item,TaxonBase<?> taxonBase) {
253
		//TODO: handle comma separated values
254
		String commonName = item.get(TermUri.DWC_VERNACULAR_NAME);
255
		Language language = getLanguage(item);	
256
		CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(commonName, language);
257
		if(taxonBase instanceof Taxon){
258
			Taxon taxon = (Taxon) taxonBase;
259
			TaxonDescription taxonDescription = getTaxonDescription(taxon, false);
260
			taxonDescription.addElement(commonTaxonName);
261
			logger.info("Common name " + commonName + " added to " + taxon.getTitleCache());
262
		}
263
	}
264

    
265

    
266

    
267
	/**
268
	 * @param csvTaxonRecord
269
	 * @param taxonBase
270
	 */
271
	private void handleTdwgArea(StreamItem item, TaxonBase<?> taxonBase) {
272
		// TODO Auto-generated method stub
273
		String tdwg_area = item.get(TermUri.DWC_COUNTRY_CODE);
274
		if (tdwg_area != null){
275
		if(taxonBase instanceof Synonym){
276
			Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
277
			Set<Taxon> acceptedTaxaList = synonym.getAcceptedTaxa();
278
			if(acceptedTaxaList.size()>1){
279
				String message = "Synonym is related to more than one accepted Taxa";
280
				fireWarningEvent(message, item, 4);
281
			}else{
282
				for(Taxon taxon : acceptedTaxaList){
283
					TaxonDescription td = getTaxonDescription(taxon, false);
284
					NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
285
	
286
					if (area == null){
287
						area = NamedArea.getAreaByTdwgLabel(tdwg_area);
288
					}
289
					if (area != null){
290
						Distribution distribution = Distribution.NewInstance(area, PresenceTerm.PRESENT());
291
						td.addElement(distribution);
292
					}
293
				}
294
			}
295
		}
296
		if(!(taxonBase instanceof Synonym)){
297
			Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
298
			TaxonDescription td = getTaxonDescription(taxon, false);
299
			NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
300
	
301
			if (area == null){
302
				area = NamedArea.getAreaByTdwgLabel(tdwg_area);
303
			}
304
			if (area != null){
305
				Distribution distribution = Distribution.NewInstance(area, PresenceTerm.PRESENT());
306
				td.addElement(distribution);
307
			}
308
		}
309
	}
310
	}
311

    
312

    
313
	/**
314
	 * @param item
315
	 * @param taxonBase
316
	 */
317
	private void handleTaxonRemarks(StreamItem item,TaxonBase<?> taxonBase) {
318
		String comment = item.get(TermUri.DWC_TAXON_REMARKS);
319
		Language language = getLanguage(item);	
320
		if(StringUtils.isNotBlank(comment)){
321
				Annotation annotation = Annotation.NewInstance(comment, language);
322
				taxonBase.addAnnotation(annotation);
323
		}else{
324
			String message = "Comment is empty or some error appeared while saving: %s";
325
//			message = String.format(message);
326
			fireWarningEvent(message, item, 1);
327
		}
328
	}
329

    
330

    
331
	//TODO handle non LSIDs
332
	//TODO handle LSIDs for names
333
	private void handleIdentifier(StreamItem csvTaxonRecord, TaxonBase<?> taxonBase) {
334
		String identifier = csvTaxonRecord.get(TermUri.DC_IDENTIFIER);
335
		if (StringUtils.isNotBlank(identifier)){
336
			if (identifier.trim().startsWith("urn:lsid")){
337
				try {
338
					LSID lsid = new LSID(identifier);
339
					taxonBase.setLsid(lsid);
340
				} catch (MalformedLSIDException e) {
341
					String message = "LSID is malformed and can't be handled as LSID: %s";
342
					message = String.format(message, identifier);
343
					fireWarningEvent(message, csvTaxonRecord, 4);
344
				} 
345
			}else{
346
				String message = "Identifier type not supported: %s";
347
				message = String.format(message, identifier);
348
				fireWarningEvent(message, csvTaxonRecord, 4);
349
			}
350
		}
351
		
352
	}
353

    
354

    
355
	private void handleDataset(StreamItem item, TaxonBase<?> taxonBase, List<MappedCdmBase> resultList, Reference<?> sourceReference, String sourceReferecenDetail) {
356
		TermUri idTerm = TermUri.DWC_DATASET_ID;
357
		TermUri strTerm = TermUri.DWC_DATASET_NAME;
358
		
359
		if (config.isDatasetsAsClassifications()){
360
			String datasetId = CdmUtils.Nz(item.get(idTerm)).trim();
361
			String datasetName = CdmUtils.Nz(item.get(strTerm)).trim();
362
				if (CdmUtils.areBlank(datasetId, datasetName) ){
363
				datasetId = NO_DATASET;
364
			}
365
			
366
			//check id
367
			boolean classificationExists = state.exists(idTerm.toString() , datasetId, Classification.class);
368
			
369
			//check name
370
			if (!classificationExists){
371
				classificationExists = state.exists(strTerm.toString() , datasetName, Classification.class);
372
			}
373
			
374
			//if not exists, create new
375
			if (! classificationExists){
376
				String classificationName = StringUtils.isBlank(datasetName)? datasetId : datasetName;
377
				if (classificationName.equals(NO_DATASET)){					
378
					classificationName = config.getClassificationName();
379
					//classificationName = "Classification (no name)";  //TODO define by config or zipfile or metadata
380
				}
381
				
382
				String classificationId = StringUtils.isBlank(datasetId)? datasetName : datasetId;
383
				Classification classification = Classification.NewInstance(classificationName);
384
				//source
385
				IdentifiableSource source = classification.addSource(OriginalSourceType.Import, classificationId, "Dataset", sourceReference, sourceReferecenDetail);
386
				//add to result
387
				resultList.add(new MappedCdmBase(idTerm, datasetId, classification));
388
				resultList.add(new MappedCdmBase(strTerm, datasetName, classification));
389
				resultList.add(new MappedCdmBase(source));
390
				//TODO this is not so nice but currently necessary as classifications are requested in the same partition
391
				state.putMapping(idTerm.toString(), classificationId, classification);
392
				state.putMapping(strTerm.toString(), classificationName, classification);
393
			}
394
		}else if (config.isDatasetsAsSecundumReference() || config.isDatasetsAsOriginalSource()){
395
			MappedCdmBase<Reference> mappedCitation = getReference(item, resultList, idTerm, strTerm, true);
396
			if (mappedCitation != null){
397
				Reference<?> ref = mappedCitation.getCdmBase();
398
				if (config.isDatasetsAsSecundumReference()){
399
					//dataset as secundum reference
400
					taxonBase.setSec(ref);
401
				}else{
402
					//dataset as original source
403
					taxonBase.addSource(OriginalSourceType.Import, null, null, ref, null);
404
				}
405
			}
406
		}else{
407
			String message = "DatasetUse type not yet implemented. Can't import dataset information.";
408
			fireWarningEvent(message, item, 4);
409
		}
410
		
411
		//remove to later check if all attributes were used
412
		item.remove(idTerm);
413
		item.remove(strTerm);
414
		
415
	}
416

    
417
	
418
	@Override
419
	public String getSourceId(StreamItem item) {
420
		String id = item.get(ID);
421
		return id;
422
	}
423

    
424
	private MappedCdmBase<Reference> getNameAccordingTo(StreamItem item, List<MappedCdmBase> resultList) {
425
		if (config.isDatasetsAsSecundumReference()){
426
			//TODO store nameAccordingTo info some where else or let the user define where to store it.
427
			return null;
428
		}else{
429
			TermUri idTerm = TermUri.DWC_NAME_ACCORDING_TO_ID;
430
			TermUri strTerm = TermUri.DWC_NAME_ACCORDING_TO;
431
			MappedCdmBase<Reference> secRef = getReference(item, resultList, idTerm, strTerm, false);
432
			return secRef;
433
		}
434
	}
435

    
436
	private NomenclaturalCode getNomCode(StreamItem item) {
437
		String strNomCode = getValue(item, TermUri.DWC_NOMENCLATURAL_CODE);
438
		NomenclaturalCode nomCode = null;
439
		// by Nomcenclatural Code
440
		if (strNomCode != null){
441
			nomCode = NomenclaturalCode.fromString(strNomCode);
442
			if (nomCode == null){
443
				String message = "NomCode '%s' not recognized";
444
				message = String.format(message, strNomCode);
445
				fireWarningEvent(message, item, 4);
446
			}else{
447
				return nomCode;
448
			}
449
		}
450
		// by Kingdom
451
		String strKingdom = getValue(item, TermUri.DWC_KINGDOM);
452
		if (strKingdom != null){
453
			if (strKingdom.equalsIgnoreCase("Plantae")){
454
				nomCode = NomenclaturalCode.ICNAFP;
455
			}else if (strKingdom.equalsIgnoreCase("Fungi")){
456
				nomCode = NomenclaturalCode.ICNAFP;
457
			}else if (strKingdom.equalsIgnoreCase("Animalia")){
458
				nomCode = NomenclaturalCode.ICZN;
459
			}else if (strKingdom.equalsIgnoreCase("Protozoa")){
460
				nomCode = NomenclaturalCode.ICZN;
461
			}
462
		}
463
		
464
		//TODO further kingdoms
465
		if (nomCode == null){
466
			//TODO warning
467
			if (config.getNomenclaturalCode() != null){
468
				nomCode = config.getNomenclaturalCode();
469
			}
470
		}
471
		return nomCode;
472
	}
473

    
474

    
475
	private TaxonNameBase<?,?> getScientificName(StreamItem item, NomenclaturalCode nomCode, Rank rank, List<MappedCdmBase> resultList, Reference sourceReference) {
476
		TaxonNameBase<?,?> name = null;
477
		String strScientificName = getValue(item, TermUri.DWC_SCIENTIFIC_NAME);
478
		//Name
479
		if (strScientificName != null){
480
			name = parser.parseFullName(strScientificName, nomCode, rank);
481
			if ( rank != null && name != null && name.getRank() != null &&  ! rank.equals(name.getRank())){
482
				if (config.isValidateRankConsistency()){
483
					String message = "Parsed rank %s (%s) differs from rank %s given by fields 'taxonRank' or 'verbatimTaxonRank'";
484
					message = String.format(message, name.getRank().getTitleCache(), strScientificName, rank.getTitleCache());
485
					fireWarningEvent(message, item, 4);
486
				}
487
			}
488
			checkAuthorship(name, item);
489
			resultList.add(new MappedCdmBase(TermUri.DWC_SCIENTIFIC_NAME, strScientificName, name));
490
		}
491
		//By ID
492
		String strScientificNameId = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_ID);
493
		if (strScientificNameId != null){
494
			if (config.isScientificNameIdAsOriginalSourceId()){
495
				if (name != null){
496
					IdentifiableSource source = IdentifiableSource.NewInstance(OriginalSourceType.Import, strScientificNameId, TermUri.DWC_SCIENTIFIC_NAME_ID.toString(), sourceReference, null);
497
					name.addSource(source);
498
				}
499
			}else{
500
				String message = "ScientificNameId not yet implemented: '%s'";
501
				message = String.format(message, strScientificNameId);
502
				fireWarningEvent(message, item, 4);
503
			}
504
		}
505
		
506
		//namePublishedIn
507
		TermUri idTerm = TermUri.DWC_NAME_PUBLISHED_IN_ID;
508
		TermUri strTerm = TermUri.DWC_NAME_PUBLISHED_IN;
509
		MappedCdmBase<Reference> nomRef = getReference(item, resultList, idTerm, strTerm, false);
510
		
511
		if (name != null){
512
			if (nomRef != null){
513
				name.setNomenclaturalReference(nomRef.getCdmBase());  //check if name already has a nomRef, shouldn't be the case usually
514
			}
515
		}else{
516
			if (nomRef != null){
517
				String message = "NamePublishedIn information available but no name exists";
518
				fireWarningEvent(message, item, 4);
519
			}
520
		}
521
		return name;
522
	}
523

    
524

    
525
	/**
526
	 * General method to handle references used for multiple attributes.
527
	 * @param item
528
	 * @param resultList
529
	 * @param idTerm
530
	 * @param strTerm
531
	 * @param idIsInternal
532
	 * @return
533
	 */
534
	private MappedCdmBase<Reference> getReference(StreamItem item, List<MappedCdmBase> resultList, TermUri idTerm, TermUri strTerm, boolean idIsInternal) {
535
		Reference<?> newRef = null;
536
		Reference<?> sourceCitation = null;
537
		
538
		MappedCdmBase<Reference> result = null;
539
		if (exists(idTerm, item) || exists(strTerm, item)){
540
			String refId = CdmUtils.Nz(item.get(idTerm)).trim();
541
			String refStr = CdmUtils.Nz(item.get(strTerm)).trim();
542
			if (StringUtils.isNotBlank(refId)){
543
				List<Reference> references = state.get(idTerm.toString(), refId, Reference.class);
544
				if (references.size() == 0){
545
					if (! idIsInternal){
546
						//references should already exist in store if not linking to external links like URLs
547
						String message = "External namePublishedInIDs are not yet supported";
548
						fireWarningEvent(message, item, 4);//set to DEBUG
549
					}else{
550
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
551
						newRef.addSource(OriginalSourceType.Import, refId, idTerm.toString(), sourceCitation, null);
552
						MappedCdmBase<Reference> idResult = new MappedCdmBase<Reference>(idTerm, refId, newRef);
553
						resultList.add(idResult);
554
					}
555
				}else{
556
					//TODO handle list.size > 1 , do we need a list here ?
557
					result = new MappedCdmBase<Reference>(idTerm, refId , references.get(0));
558
				}
559
			}
560
			if (result == null){
561
				List<Reference> nomRefs = state.get(strTerm.toString(), refStr, Reference.class);
562
				if (nomRefs.size() > 0){
563
					//TODO handle list.size > 1 , do we need a list here ?
564
					result = new MappedCdmBase<Reference>(strTerm, refStr , nomRefs.get(0));
565
				}else{
566
					// new Reference
567
					if (newRef == null){
568
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
569
					}
570
					newRef.setTitleCache(refStr, true);
571
					//TODO distinguish available year, authorship, etc. if
572
					result = new MappedCdmBase<Reference>(strTerm, refStr, newRef);
573
					resultList.add(result);
574
				}
575
			}
576
		}
577
		return result;
578
	}
579

    
580

    
581
	//TODO we may configure in configuration that scientific name never includes Authorship
582
	private void checkAuthorship(TaxonNameBase nameBase, StreamItem item) {
583
		if (!nameBase.isInstanceOf(NonViralName.class)){
584
			return;
585
		}
586
		NonViralName<?> nvName = CdmBase.deproxy(nameBase, NonViralName.class); 
587
		String strAuthors = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_AUTHORS);
588
		
589
		if (! nvName.isProtectedTitleCache()){
590
			if (StringUtils.isBlank(nvName.getAuthorshipCache())){
591
				if (nvName.isInstanceOf(BotanicalName.class) || nvName.isInstanceOf(ZoologicalName.class)){
592
					//TODO can't we also parse NonViralNames correctly ?
593
					try {
594
						parser.parseAuthors(nvName, strAuthors);
595
					} catch (StringNotParsableException e) {
596
						nvName.setAuthorshipCache(strAuthors);
597
					}		
598
				}else{
599
					nvName.setAuthorshipCache(strAuthors);
600
				}
601
				//TODO throw warning (scientific name should always include authorship) by DwC definition
602
			}
603
		}
604
		
605
	}
606

    
607

    
608
	private Rank getRank(StreamItem csvTaxonRecord, NomenclaturalCode nomCode) {
609
		boolean USE_UNKNOWN = true;
610
		Rank rank = null;
611
		String strRank = getValue(csvTaxonRecord,TermUri.DWC_TAXON_RANK);
612
		String strVerbatimRank = getValue(csvTaxonRecord,TermUri.DWC_VERBATIM_TAXON_RANK);
613
		if (strRank != null){
614
			try {
615
				rank = Rank.getRankByEnglishName(strRank, nomCode, USE_UNKNOWN);
616
				if (rank.equals(Rank.UNKNOWN_RANK())){
617
					rank = Rank.getRankByNameOrAbbreviation(strRank, USE_UNKNOWN);
618
					if (rank.equals(Rank.UNKNOWN_RANK())){
619
						String message = "Rank can not be defined for '%s'";
620
						message = String.format(message, strRank);
621
						fireWarningEvent(message, csvTaxonRecord, 4);
622
					}
623
				}
624
			} catch (UnknownCdmTypeException e) {
625
				//should not happen as USE_UNKNOWN is used
626
				rank = Rank.UNKNOWN_RANK();
627
			}
628
		}
629
		if ( (rank == null || rank.equals(Rank.UNKNOWN_RANK())) && strVerbatimRank != null){
630
			try {
631
				rank = Rank.getRankByNameOrAbbreviation(strVerbatimRank, USE_UNKNOWN);
632
				if (rank.equals(Rank.UNKNOWN_RANK())){
633
					String message = "Rank can not be defined for '%s'";
634
					message = String.format(message, strVerbatimRank);
635
					fireWarningEvent(message, csvTaxonRecord, 4);
636
				}
637
			} catch (UnknownCdmTypeException e) {
638
				//should not happen as USE_UNKNOWN is used
639
				rank = Rank.UNKNOWN_RANK();
640
			}
641
		}
642
		return rank;
643
	}
644

    
645

    
646
	/**
647
	 * Creates an empty taxon object with a given status.
648
	 * @param item
649
	 * @return
650
	 */
651
	private TaxonBase<?> getTaxonBase(StreamItem item) {
652
		TaxonNameBase<?,?> name = null;
653
		Reference<?> sec = null;
654
		TaxonBase<?> result;
655
		String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
656
		String status = "";
657
		
658
		if (taxStatus != null){
659
			if (taxStatus.matches("accepted.*|valid")){
660
				status += "A";
661
			} else if (taxStatus.matches(".*synonym|invalid|not accepted")){   //not accepted comes from scratchpads
662
				status += "S";
663
			} else if (taxStatus.matches("misapplied.*")){
664
				status += "M";
665
			} else{
666
				status += "?";
667
			}
668
			item.remove(TermUri.DWC_TAXONOMIC_STATUS);
669
		}
670
		if (! CdmUtils.isBlank(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
671
			// acceptedNameUsageId = id
672
			if (getSourceId(item).equals(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
673
				status += "A";
674
			}else{
675
				status += "S";
676
			}
677
		}
678
		if (status.contains("A") || status.contains("M")){
679
			result = Taxon.NewInstance(name, sec);
680
			if (status.contains("S") && ! status.contains("M") ){
681
				String message = "Ambigous taxon status (%s)";
682
				message = String.format(message, status);
683
				fireWarningEvent(message, item, 6);
684
			}
685
		} else if (status.contains("S")){
686
			result = Synonym.NewInstance(name, sec);
687
		} else{
688
			result = Taxon.NewUnknownStatusInstance(name, sec);
689
		}
690
			
691
		return result;
692

    
693
	}
694
	
695

    
696
	/**
697
	 * @param item
698
	 * @return
699
	 */
700
	private Language getLanguage(StreamItem item) {
701
		String langItem = item.get(TermUri.DC_LANGUAGE);
702
		Language language = null;
703

    
704
		if(StringUtils.equalsIgnoreCase(langItem, "de")){
705
			language = Language.GERMAN();
706
		}else if(StringUtils.equalsIgnoreCase(langItem, "en")){
707
			language = Language.ENGLISH();
708
		}else{
709
			language = Language.DEFAULT();
710
		}
711
		return language;
712
	}
713

    
714
// ********************** PARTITIONABLE ****************************************/
715

    
716

    
717
	@Override
718
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap) {
719
		String value;
720
		String key;
721
		
722
		//namePublishedIn
723
		if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN_ID.toString()))){
724
			Set<String> keySet = getKeySet(key, fkMap);
725
			keySet.add(value);
726
		}
727
		if (config.isDeduplicateNamePublishedIn()){
728
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN.toString()))){
729
				Set<String> keySet = getKeySet(key, fkMap);
730
				keySet.add(value);
731
			}
732
		}
733
		
734
		//nameAccordingTo
735
		if (! config.isDatasetsAsSecundumReference()){
736
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
737
				Set<String> keySet = getKeySet(key, fkMap);
738
				keySet.add(value);
739
			}
740
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO.toString()))){
741
				Set<String> keySet = getKeySet(key, fkMap);
742
				keySet.add(value);
743
			}
744
		}
745
		
746
		//dataset
747
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
748
			Set<String> keySet = getKeySet(key, fkMap);
749
			keySet.add(value);
750
		}
751
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
752
			Set<String> keySet = getKeySet(key, fkMap);
753
			keySet.add(value);
754
		}
755
		
756
	}
757
	
758
	
759
	@Override
760
	public Set<String> requiredSourceNamespaces() {
761
		Set<String> result = new HashSet<String>();
762
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN_ID.toString());
763
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN.toString());
764
 		if (!config.isDatasetsAsSecundumReference()){
765
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
766
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
767
 		}
768
	 	result.add(TermUri.DWC_DATASET_ID.toString());
769
	 	result.add(TermUri.DWC_DATASET_NAME.toString());
770
	 	return result;
771
	}
772
	
773
	
774
	
775
	
776
//** ***************************** TO STRING *********************************************/
777
	
778
	@Override
779
	public String toString(){
780
		return this.getClass().getName();
781
	}
782

    
783

    
784
	
785
}
(5-5/35)