Project

General

Profile

Download (28.2 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.dwca.in;
10

    
11
import java.net.URI;
12
import java.util.ArrayList;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Set;
17

    
18
import org.apache.commons.lang.StringUtils;
19
import org.apache.log4j.Logger;
20

    
21
import com.ibm.lsid.MalformedLSIDException;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.io.dwca.TermUri;
25
import eu.etaxonomy.cdm.io.stream.StreamImportBase;
26
import eu.etaxonomy.cdm.io.stream.StreamImportStateBase;
27
import eu.etaxonomy.cdm.io.stream.StreamItem;
28
import eu.etaxonomy.cdm.model.common.Annotation;
29
import eu.etaxonomy.cdm.model.common.CdmBase;
30
import eu.etaxonomy.cdm.model.common.Extension;
31
import eu.etaxonomy.cdm.model.common.ExtensionType;
32
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
33
import eu.etaxonomy.cdm.model.common.LSID;
34
import eu.etaxonomy.cdm.model.common.Language;
35
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
36
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
37
import eu.etaxonomy.cdm.model.description.Distribution;
38
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
39
import eu.etaxonomy.cdm.model.description.TaxonDescription;
40
import eu.etaxonomy.cdm.model.location.NamedArea;
41
import eu.etaxonomy.cdm.model.name.BotanicalName;
42
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
43
import eu.etaxonomy.cdm.model.name.NonViralName;
44
import eu.etaxonomy.cdm.model.name.Rank;
45
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
46
import eu.etaxonomy.cdm.model.name.ZoologicalName;
47
import eu.etaxonomy.cdm.model.reference.Reference;
48
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
49
import eu.etaxonomy.cdm.model.taxon.Classification;
50
import eu.etaxonomy.cdm.model.taxon.Synonym;
51
import eu.etaxonomy.cdm.model.taxon.Taxon;
52
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
53
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
54
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
55
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
56

    
57
/**
58
 * @author a.mueller
59
 * @date 22.11.2011
60
 *
61
 */
62
public class  DwcTaxonStreamItem2CdmTaxonConverter<CONFIG extends DwcaDataImportConfiguratorBase, STATE extends StreamImportStateBase<CONFIG, StreamImportBase>>
63
        extends PartitionableConverterBase<CONFIG, STATE>
64
        implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String>, ItemFilter<StreamItem> {
65

    
66
    private static final Logger logger = Logger.getLogger(DwcTaxonStreamItem2CdmTaxonConverter.class);
67

    
68
    //if this converter is used as filter we may not want to delete item parts during evaluation
69
    boolean isFilterOnly = false;
70

    
71
    private static final String ID = "id";
72
	// temporary key for the case that no dataset information is supplied, TODO use something better
73
	public static final String NO_DATASET = "no_dataset_jli773oebhjklw";
74

    
75
	private final NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
76

    
77
	/**
78
	 * @param state
79
	 */
80
	public DwcTaxonStreamItem2CdmTaxonConverter(STATE state) {
81
		super(state);
82
	}
83

    
84
    public DwcTaxonStreamItem2CdmTaxonConverter(STATE state, boolean isFilter) {
85
        super(state);
86
        this.isFilterOnly = isFilter;
87
    }
88

    
89
    @Override
90
    public boolean toBeRemovedFromStream(StreamItem item) {
91
        if (!config.isDoSplitRelationshipImport()){
92
            return false;
93
        }else{
94
            if (isSynonym(item)){
95
                return ! this.config.isDoSynonymRelationships();
96
            }else{
97
                NomenclaturalCode nomCode = getNomCode(item);
98
                Rank rank = getRank(item, nomCode);
99
                boolean isHigherRank = rank == null || rank.isHigher(Rank.SPECIES());
100
                if (isHigherRank){
101
                    return ! config.isDoHigherRankRelationships();
102
                }else{
103
                    return ! config.isDoLowerRankRelationships();
104
                }
105
            }
106
        }
107
    }
108

    
109
    private boolean isSynonym(StreamItem item) {
110
        TaxonBase<?> taxonBase = getTaxonBase(item);
111
        return taxonBase instanceof Synonym;
112
    }
113

    
114
	@Override
115
    public IReader<MappedCdmBase> map(StreamItem csvTaxonRecord){
116
		List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>();
117

    
118
		//TODO what if not transactional?
119
		Reference sourceReference = state.getTransactionalSourceReference();
120
		String sourceReferenceDetail = null;
121

    
122
		//taxon
123
		TaxonBase<?> taxonBase = getTaxonBase(csvTaxonRecord);
124
		MappedCdmBase  mcb = new MappedCdmBase(csvTaxonRecord.term, csvTaxonRecord.get(ID), taxonBase);
125
		resultList.add(mcb);
126

    
127
		//original source
128
		String id = csvTaxonRecord.get(ID);
129
		IdentifiableSource source = taxonBase.addSource(OriginalSourceType.Import, id, "Taxon", sourceReference, sourceReferenceDetail);
130
		MappedCdmBase mappedSource = new MappedCdmBase(csvTaxonRecord.get(ID), source);
131
		resultList.add(mappedSource);
132
		csvTaxonRecord.remove(ID);
133

    
134
		//rank
135
		NomenclaturalCode nomCode = getNomCode(csvTaxonRecord);
136
		Rank rank = getRank(csvTaxonRecord, nomCode);
137

    
138
		//name && name published in
139
		TaxonNameBase<?,?> name = getScientificName(csvTaxonRecord, nomCode, rank, resultList, sourceReference);
140
		taxonBase.setName(name);
141

    
142
		//nameAccordingTo
143
		MappedCdmBase<Reference> sec = getNameAccordingTo(csvTaxonRecord, resultList);
144

    
145
		if (sec == null && state.getConfig().isUseSourceReferenceAsSec()){
146
			sec = new MappedCdmBase<Reference>(state.getTransactionalSourceReference());
147
		}
148
		if (sec != null){
149
			taxonBase.setSec(sec.getCdmBase());
150
		}
151

    
152
		//classification
153
		handleDataset(csvTaxonRecord, taxonBase, resultList, sourceReference, sourceReferenceDetail);
154

    
155
		//NON core
156
	    //term="http://purl.org/dc/terms/identifier"
157
		//currently only LSIDs
158
		handleIdentifier(csvTaxonRecord, taxonBase);
159

    
160
		//TaxonRemarks
161
		handleTaxonRemarks(csvTaxonRecord, taxonBase);
162

    
163
		//TDWG_1
164
		handleTdwgArea(csvTaxonRecord, taxonBase);
165

    
166
		//VernecularName
167
		handleCommonNames(csvTaxonRecord, taxonBase);
168

    
169
		//External Sources, ID's and References
170
		handleIdentifiableObjects(csvTaxonRecord, taxonBase);
171

    
172

    
173
		//		    <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
174
//		         The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
175
//		         Fungi, Plantae, Protozoa, Viruses -->
176
//		    <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
177

    
178
//		    <!-- Phylum in which the taxon has been classified -->
179
//		    <field index='11' term='http://rs.tdwg.org/dwc/terms/phylum'/>
180

    
181
		//		    <!-- Class in which the taxon has been classified -->
182
//		    <field index='12' term='http://rs.tdwg.org/dwc/terms/class'/>
183

    
184
		//		    <!-- Order in which the taxon has been classified -->
185
//		    <field index='13' term='http://rs.tdwg.org/dwc/terms/order'/>
186

    
187
		//		    <!-- Family in which the taxon has been classified -->
188
//		    <field index='14' term='http://rs.tdwg.org/dwc/terms/family'/>
189

    
190
		//		    <!-- Genus in which the taxon has been classified -->
191
//		    <field index='15' term='http://rs.tdwg.org/dwc/terms/genus'/>
192

    
193
		//		    <!-- Subgenus in which the taxon has been classified -->
194
//		    <field index='16' term='http://rs.tdwg.org/dwc/terms/subgenus'/>
195
//		    <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
196

    
197
//		    <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
198
//		    <!-- Infraspecific epithet -->
199

    
200
//		    <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
201
//		    <!-- Authorship -->
202

    
203
//		    <field index='19' term='http://rs.tdwg.org/dwc/terms/scientificNameAuthorship'/>
204
//		==> see scientific name
205
//
206
//		<!-- Acceptance status published in -->
207
//		    <field index='20' term='http://purl.org/dc/terms/source'/>
208
//		    <!-- Reference in which the scientific name was first published -->
209
//		    <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
210
//		    <!-- Taxon scrutinized by -->
211
//		    <field index='22' term='http://rs.tdwg.org/dwc/terms/nameAccordingTo'/> 
212
//		    <!-- Scrutiny date -->
213
//		    <field index='23' term='http://purl.org/dc/terms/modified'/>
214
//		    <!-- Additional data for the taxon -->
215
//		    <field index='24' term='http://purl.org/dc/terms/description'/>
216
//		    </core>
217

    
218
		return new ListReader<MappedCdmBase>(resultList);
219
	}
220

    
221

    
222

    
223
	/**
224
	 * @param item
225
	 * @param taxonBase
226
	 */
227
	private void handleIdentifiableObjects(StreamItem item,TaxonBase<?> taxonBase) {
228

    
229

    
230
		String references = item.get(TermUri.DC_REFERENCES);
231

    
232
		if (references == null || references == "") {
233
			references = item.get(TermUri.DWC_NAME_PUBLISHED_IN_ID);//lorna temporary until Scratchpads move the reference to the correct place.
234
		}
235

    
236
		if (StringUtils.isNotBlank(references)){
237
			URI uri = makeUriIfIs(references);
238
			if (uri != null){
239
				Extension.NewInstance(taxonBase, references, ExtensionType.URL());
240
			}else{
241
				String message = "Non-URI Dublin Core References not yet handled for taxa. References is: %s";
242
				fireWarningEvent(String.format(message, references), item, 6);
243
			}
244
		}
245

    
246

    
247
		//TODO: Finish properly
248
		String id = item.get(TermUri.CDM_SOURCE_IDINSOURCE);
249
		String idNamespace = item.get(TermUri.CDM_SOURCE_IDNAMESPACE);
250
		String reference = item.get(TermUri.CDM_SOURCE_REFERENCE);
251
		if(StringUtils.isNotBlank(id) && StringUtils.isNotBlank(idNamespace) && StringUtils.isNotBlank(reference)){
252
			Reference ref = ReferenceFactory.newGeneric();
253
			ref.setTitle(reference);
254
			Taxon taxon = (Taxon) taxonBase;
255
			taxon.addSource(OriginalSourceType.Import, id, idNamespace, ref, null);
256
		}
257

    
258

    
259

    
260
	}
261

    
262

    
263
	/**
264
	 * If str is an uri it returns is as an {@link URI}. If not it returns <code>null</code>.
265
	 * @param str
266
	 * @return the URI.
267
	 */
268
	private URI makeUriIfIs(String str) {
269
		if (! str.startsWith("http:")){
270
			return null;
271
		}else{
272
			try {
273
				URI uri = URI.create(str);
274
				return uri;
275
			} catch (Exception e) {
276
				return null;
277
			}
278
		}
279

    
280
	}
281

    
282

    
283
	/**
284
	 * @param item
285
	 * @param taxonBase
286
	 */
287
	private void handleCommonNames(StreamItem item,TaxonBase<?> taxonBase) {
288
		//TODO: handle comma separated values
289
		String commonName = item.get(TermUri.DWC_VERNACULAR_NAME);
290
		if (StringUtils.isNotBlank(commonName)){
291

    
292
			Language language = getLanguage(item);
293
			CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(commonName, language);
294
			if(taxonBase instanceof Taxon){
295
				Taxon taxon = (Taxon) taxonBase;
296
				TaxonDescription taxonDescription = getTaxonDescription(taxon, false);
297
				taxonDescription.addElement(commonTaxonName);
298
				logger.info("Common name " + commonName + " added to " + taxon.getTitleCache());
299
			}
300
		}
301
	}
302

    
303

    
304

    
305
	/**
306
	 * @param csvTaxonRecord
307
	 * @param taxonBase
308
	 */
309
	private void handleTdwgArea(StreamItem item, TaxonBase<?> taxonBase) {
310
		String tdwg_area = item.get(TermUri.DWC_COUNTRY_CODE);
311
		if (tdwg_area != null){
312
    		if(taxonBase instanceof Synonym){
313
    			Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
314
    			Taxon acceptedTaxon = synonym.getAcceptedTaxon();
315
    			if (acceptedTaxon != null){
316
    			    TaxonDescription td = getTaxonDescription(acceptedTaxon, false);
317
    			    NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
318

    
319
    			    if (area == null){
320
    			        area = NamedArea.getAreaByTdwgLabel(tdwg_area);
321
    			    }
322
    			    if (area != null){
323
    			        Distribution distribution = Distribution.NewInstance(area, PresenceAbsenceTerm.PRESENT());
324
    			        td.addElement(distribution);
325
    			    }
326
    			}
327
    		}
328
    		if(!(taxonBase instanceof Synonym)){
329
    			Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
330
    			TaxonDescription td = getTaxonDescription(taxon, false);
331
    			NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
332

    
333
    			if (area == null){
334
    				area = NamedArea.getAreaByTdwgLabel(tdwg_area);
335
    			}
336
    			if (area != null){
337
    				Distribution distribution = Distribution.NewInstance(area, PresenceAbsenceTerm.PRESENT());
338
    				td.addElement(distribution);
339
    			}
340
    		}
341
    	}
342
	}
343

    
344

    
345
	/**
346
	 * @param item
347
	 * @param taxonBase
348
	 */
349
	private void handleTaxonRemarks(StreamItem item,TaxonBase<?> taxonBase) {
350
		String comment = item.get(TermUri.DWC_TAXON_REMARKS);
351
		Language language = getLanguage(item);
352
		if(StringUtils.isNotBlank(comment)){
353
				Annotation annotation = Annotation.NewInstance(comment, language);
354
				taxonBase.addAnnotation(annotation);
355
		}else{
356
//			String message = "Comment is empty or some error appeared while saving: %s";
357
////			message = String.format(message);
358
//			fireWarningEvent(message, item, 1);
359
		}
360
	}
361

    
362

    
363
	//TODO handle non LSIDs
364
	//TODO handle LSIDs for names
365
	private void handleIdentifier(StreamItem csvTaxonRecord, TaxonBase<?> taxonBase) {
366
		String identifier = csvTaxonRecord.get(TermUri.DC_IDENTIFIER);
367
		if (StringUtils.isNotBlank(identifier)){
368
			if (identifier.trim().startsWith("urn:lsid")){
369
				try {
370
					LSID lsid = new LSID(identifier);
371
					taxonBase.setLsid(lsid);
372
				} catch (MalformedLSIDException e) {
373
					String message = "LSID is malformed and can't be handled as LSID: %s";
374
					message = String.format(message, identifier);
375
					fireWarningEvent(message, csvTaxonRecord, 4);
376
				}
377
			}else{
378
				String message = "Identifier type not supported: %s";
379
				message = String.format(message, identifier);
380
				fireWarningEvent(message, csvTaxonRecord, 4);
381
			}
382
		}
383

    
384
	}
385

    
386

    
387
	private void handleDataset(StreamItem item, TaxonBase<?> taxonBase, List<MappedCdmBase> resultList, Reference sourceReference, String sourceReferecenDetail) {
388
		TermUri idTerm = TermUri.DWC_DATASET_ID;
389
		TermUri strTerm = TermUri.DWC_DATASET_NAME;
390

    
391
		if (config.isDatasetsAsClassifications()){
392
			String datasetId = CdmUtils.Nz(item.get(idTerm)).trim();
393
			String datasetName = CdmUtils.Nz(item.get(strTerm)).trim();
394
				if (CdmUtils.areBlank(datasetId, datasetName) ){
395
				datasetId = NO_DATASET;
396
			}
397

    
398
			//check id
399
			boolean classificationExists = state.exists(idTerm.toString() , datasetId, Classification.class);
400

    
401
			//check name
402
			if (!classificationExists){
403
				classificationExists = state.exists(strTerm.toString() , datasetName, Classification.class);
404
			}
405

    
406
			//if not exists, create new
407
			if (! classificationExists){
408
				String classificationName = StringUtils.isBlank(datasetName)? datasetId : datasetName;
409
				if (classificationName.equals(NO_DATASET)){
410
					classificationName = config.getClassificationName();
411
					//classificationName = "Classification (no name)";  //TODO define by config or zipfile or metadata
412
				}
413

    
414
				String classificationId = StringUtils.isBlank(datasetId)? datasetName : datasetId;
415
				Classification classification = Classification.NewInstance(classificationName);
416
				//source
417
				IdentifiableSource source = classification.addSource(OriginalSourceType.Import, classificationId, "Dataset", sourceReference, sourceReferecenDetail);
418
				//add to result
419
				resultList.add(new MappedCdmBase(idTerm, datasetId, classification));
420
				resultList.add(new MappedCdmBase(strTerm, datasetName, classification));
421
				resultList.add(new MappedCdmBase(source));
422
				//TODO this is not so nice but currently necessary as classifications are requested in the same partition
423
				state.putMapping(idTerm.toString(), classificationId, classification);
424
				state.putMapping(strTerm.toString(), classificationName, classification);
425
			}
426
		}else if (config.isDatasetsAsSecundumReference() || config.isDatasetsAsOriginalSource()){
427
			MappedCdmBase<Reference> mappedCitation = getReference(item, resultList, idTerm, strTerm, true);
428
			if (mappedCitation != null){
429
				Reference ref = mappedCitation.getCdmBase();
430
				if (config.isDatasetsAsSecundumReference()){
431
					//dataset as secundum reference
432
					taxonBase.setSec(ref);
433
				}else{
434
					//dataset as original source
435
					taxonBase.addSource(OriginalSourceType.Import, null, null, ref, null);
436
				}
437
			}
438
		}else{
439
			String message = "DatasetUse type not yet implemented. Can't import dataset information.";
440
			fireWarningEvent(message, item, 4);
441
		}
442

    
443
		//remove to later check if all attributes were used
444
		removeItemInfo(item, idTerm);
445
		removeItemInfo(item, strTerm);
446
	}
447

    
448

    
449
	@Override
450
	public String getSourceId(StreamItem item) {
451
		String id = item.get(ID);
452
		return id;
453
	}
454

    
455
	private MappedCdmBase<Reference> getNameAccordingTo(StreamItem item, List<MappedCdmBase> resultList) {
456
		if (config.isDatasetsAsSecundumReference()){
457
			//TODO store nameAccordingTo info some where else or let the user define where to store it.
458
			return null;
459
		}else{
460
			TermUri idTerm = TermUri.DWC_NAME_ACCORDING_TO_ID;
461
			TermUri strTerm = TermUri.DWC_NAME_ACCORDING_TO;
462
			MappedCdmBase<Reference> secRef = getReference(item, resultList, idTerm, strTerm, false);
463
			return secRef;
464
		}
465
	}
466

    
467
	private NomenclaturalCode getNomCode(StreamItem item) {
468
		String strNomCode = getValue(item, TermUri.DWC_NOMENCLATURAL_CODE);
469
		NomenclaturalCode nomCode = null;
470
		// by Nomcenclatural Code
471
		if (strNomCode != null){
472
			nomCode = NomenclaturalCode.fromString(strNomCode);
473
			if (nomCode == null){
474
				String message = "NomCode '%s' not recognized";
475
				message = String.format(message, strNomCode);
476
				fireWarningEvent(message, item, 4);
477
			}else{
478
				return nomCode;
479
			}
480
		}
481
		// by Kingdom
482
		String strKingdom = getValue(item, TermUri.DWC_KINGDOM);
483
		if (strKingdom != null){
484
			if (strKingdom.equalsIgnoreCase("Plantae")){
485
				nomCode = NomenclaturalCode.ICNAFP;
486
			}else if (strKingdom.equalsIgnoreCase("Fungi")){
487
				nomCode = NomenclaturalCode.ICNAFP;
488
			}else if (strKingdom.equalsIgnoreCase("Animalia")){
489
				nomCode = NomenclaturalCode.ICZN;
490
			}else if (strKingdom.equalsIgnoreCase("Protozoa")){
491
				nomCode = NomenclaturalCode.ICZN;
492
			}
493
		}
494

    
495
		//TODO further kingdoms
496
		if (nomCode == null){
497
			//TODO warning
498
			if (config.getNomenclaturalCode() != null){
499
				nomCode = config.getNomenclaturalCode();
500
			}
501
		}
502
		return nomCode;
503
	}
504

    
505

    
506
	private TaxonNameBase<?,?> getScientificName(StreamItem item, NomenclaturalCode nomCode, Rank rank, List<MappedCdmBase> resultList, Reference sourceReference) {
507
		TaxonNameBase<?,?> name = null;
508
		String strScientificName = getValue(item, TermUri.DWC_SCIENTIFIC_NAME);
509
		//Name
510
		if (strScientificName != null){
511
			name = (TaxonNameBase<?,?>)parser.parseFullName(strScientificName, nomCode, rank);
512
			if ( rank != null && name != null && name.getRank() != null &&  ! rank.equals(name.getRank())){
513
				if (config.isValidateRankConsistency()){
514
					String message = "Parsed rank %s (%s) differs from rank %s given by fields 'taxonRank' or 'verbatimTaxonRank'";
515
					message = String.format(message, name.getRank().getTitleCache(), strScientificName, rank.getTitleCache());
516
					fireWarningEvent(message, item, 4);
517
				}
518
			}
519
			checkAuthorship(name, item);
520
			resultList.add(new MappedCdmBase(TermUri.DWC_SCIENTIFIC_NAME, strScientificName, name));
521
		}
522
		//By ID
523
		String strScientificNameId = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_ID);
524
		if (strScientificNameId != null){
525
			if (config.isScientificNameIdAsOriginalSourceId()){
526
				if (name != null){
527
					IdentifiableSource source = IdentifiableSource.NewInstance(OriginalSourceType.Import, strScientificNameId, TermUri.DWC_SCIENTIFIC_NAME_ID.toString(), sourceReference, null);
528
					name.addSource(source);
529
				}
530
			}else{
531
				String message = "ScientificNameId not yet implemented: '%s'";
532
				message = String.format(message, strScientificNameId);
533
				fireWarningEvent(message, item, 4);
534
			}
535
		}
536

    
537
		//namePublishedIn
538
		TermUri idTerm = TermUri.DWC_NAME_PUBLISHED_IN_ID;
539
		TermUri strTerm = TermUri.DWC_NAME_PUBLISHED_IN;
540
		MappedCdmBase<Reference> nomRef = getReference(item, resultList, idTerm, strTerm, false);
541

    
542
		if (name != null){
543
			if (nomRef != null){
544
				name.setNomenclaturalReference(nomRef.getCdmBase());  //check if name already has a nomRef, shouldn't be the case usually
545
			}
546
		}else{
547
			if (nomRef != null){
548
				String message = "NamePublishedIn information available but no name exists";
549
				fireWarningEvent(message, item, 4);
550
			}
551
		}
552
		return name;
553
	}
554

    
555

    
556
	/**
557
	 * General method to handle references used for multiple attributes.
558
	 * @param item
559
	 * @param resultList
560
	 * @param idTerm
561
	 * @param strTerm
562
	 * @param idIsInternal
563
	 * @return
564
	 */
565
	private MappedCdmBase<Reference> getReference(StreamItem item, List<MappedCdmBase> resultList, TermUri idTerm, TermUri strTerm, boolean idIsInternal) {
566
		Reference newRef = null;
567
		Reference sourceCitation = null;
568

    
569
		MappedCdmBase<Reference> result = null;
570
		if (exists(idTerm, item) || exists(strTerm, item)){
571
			String refId = CdmUtils.Nz(item.get(idTerm)).trim();
572
			String refStr = CdmUtils.Nz(item.get(strTerm)).trim();
573
			if (StringUtils.isNotBlank(refId)){
574
				List<Reference> references = state.get(idTerm.toString(), refId, Reference.class);
575
				if (references.size() == 0){
576
					if (! idIsInternal){
577
						//references should already exist in store if not linking to external links like URLs
578
						String message = "External namePublishedInIDs are not yet supported";
579
						fireWarningEvent(message, item, 4);//set to DEBUG
580
					}else{
581
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
582
						newRef.addSource(OriginalSourceType.Import, refId, idTerm.toString(), sourceCitation, null);
583
						MappedCdmBase<Reference> idResult = new MappedCdmBase<Reference>(idTerm, refId, newRef);
584
						resultList.add(idResult);
585
					}
586
				}else{
587
					//TODO handle list.size > 1 , do we need a list here ?
588
					result = new MappedCdmBase<Reference>(idTerm, refId , references.get(0));
589
				}
590
			}
591
			if (result == null){
592
				List<Reference> nomRefs = state.get(strTerm.toString(), refStr, Reference.class);
593
				if (nomRefs.size() > 0){
594
					//TODO handle list.size > 1 , do we need a list here ?
595
					result = new MappedCdmBase<Reference>(strTerm, refStr , nomRefs.get(0));
596
				}else{
597
					// new Reference
598
					if (newRef == null){
599
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
600
					}
601
					newRef.setTitleCache(refStr, true);
602
					//TODO distinguish available year, authorship, etc. if
603
					result = new MappedCdmBase<Reference>(strTerm, refStr, newRef);
604
					resultList.add(result);
605
				}
606
			}
607
		}
608
		return result;
609
	}
610

    
611

    
612
	//TODO we may configure in configuration that scientific name never includes Authorship
613
	private void checkAuthorship(TaxonNameBase<?,?> nameBase, StreamItem item) {
614
		if (!nameBase.isInstanceOf(NonViralName.class)){
615
			return;
616
		}
617
		String strAuthors = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_AUTHORS);
618

    
619
		if (! nameBase.isProtectedTitleCache()){
620
			if (StringUtils.isBlank(nameBase.getAuthorshipCache())){
621
				if (nameBase.isInstanceOf(BotanicalName.class) || nameBase.isInstanceOf(ZoologicalName.class)){
622
					//TODO can't we also parse NonViralNames correctly ?
623
					try {
624
						parser.parseAuthors(nameBase, strAuthors);
625
					} catch (StringNotParsableException e) {
626
					    nameBase.setAuthorshipCache(strAuthors);
627
					}
628
				}else{
629
				    nameBase.setAuthorshipCache(strAuthors);
630
				}
631
				//TODO throw warning (scientific name should always include authorship) by DwC definition
632
			}
633
		}
634

    
635
	}
636

    
637

    
638
	private Rank getRank(StreamItem csvTaxonRecord, NomenclaturalCode nomCode) {
639
		boolean USE_UNKNOWN = true;
640
		Rank rank = null;
641
		String strRank = getValue(csvTaxonRecord,TermUri.DWC_TAXON_RANK);
642
		String strVerbatimRank = getValue(csvTaxonRecord,TermUri.DWC_VERBATIM_TAXON_RANK);
643
		if (strRank != null){
644
			try {
645
				rank = Rank.getRankByEnglishName(strRank, nomCode, USE_UNKNOWN);
646
				if (rank.equals(Rank.UNKNOWN_RANK())){
647
					rank = Rank.getRankByNameOrIdInVoc(strRank, USE_UNKNOWN);
648
					if (rank.equals(Rank.UNKNOWN_RANK())){
649
						String message = "Rank can not be defined for '%s'";
650
						message = String.format(message, strRank);
651
						fireWarningEvent(message, csvTaxonRecord, 4);
652
					}
653
				}
654
			} catch (UnknownCdmTypeException e) {
655
				//should not happen as USE_UNKNOWN is used
656
				rank = Rank.UNKNOWN_RANK();
657
			}
658
		}
659
		if ( (rank == null || rank.equals(Rank.UNKNOWN_RANK())) && strVerbatimRank != null){
660
			try {
661
				rank = Rank.getRankByNameOrIdInVoc(strVerbatimRank, USE_UNKNOWN);
662
				if (rank.equals(Rank.UNKNOWN_RANK())){
663
					String message = "Rank can not be defined for '%s'";
664
					message = String.format(message, strVerbatimRank);
665
					fireWarningEvent(message, csvTaxonRecord, 4);
666
				}
667
			} catch (UnknownCdmTypeException e) {
668
				//should not happen as USE_UNKNOWN is used
669
				rank = Rank.UNKNOWN_RANK();
670
			}
671
		}
672
		return rank;
673
	}
674

    
675

    
676
	/**
677
	 * Creates an empty taxon object with a given status.
678
	 * <i>Empty</i> taxon means, without a defined name or sec.
679
	 * @param item
680
	 * @return
681
	 */
682
	private TaxonBase<?> getTaxonBase(StreamItem item) {
683
		TaxonNameBase<?,?> name = null;
684
		Reference sec = null;
685
		TaxonBase<?> result;
686
		String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
687
		String status = "";
688

    
689
		if (taxStatus != null){
690
			if (taxStatus.matches("accepted.*|valid")){
691
				status += "A";
692
			} else if (taxStatus.matches(".*synonym|invalid|not accepted")){   //not accepted comes from scratchpads
693
				status += "S";
694
			} else if (taxStatus.matches("misapplied.*")){
695
				status += "M";
696
			} else{
697
				status += "?";
698
			}
699
			removeItemInfo(item, TermUri.DWC_TAXONOMIC_STATUS);
700
		}
701
		if (! CdmUtils.isBlank(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
702
			// acceptedNameUsageId = id
703
			if (getSourceId(item).equals(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
704
				status += "A";
705
			}else{
706
				status += "S";
707
			}
708
		}
709
		if (status.contains("A") || status.contains("M")){
710
			result = Taxon.NewInstance(name, sec);
711
			if (status.contains("S") && ! status.contains("M") ){
712
				String message = "Ambigous taxon status (%s)";
713
				message = String.format(message, status);
714
				fireWarningEvent(message, item, 6);
715
			}
716
		} else if (status.contains("S")){
717
			result = Synonym.NewInstance(name, sec);
718
		} else{
719
			result = Taxon.NewUnknownStatusInstance(name, sec);
720
		}
721

    
722
		return result;
723

    
724
	}
725

    
726

    
727

    
728
    /**
729
	 * @param item
730
	 * @return
731
	 */
732
	private Language getLanguage(StreamItem item) {
733
		String langItem = item.get(TermUri.DC_LANGUAGE);
734
		Language language = null;
735

    
736
		if(StringUtils.equalsIgnoreCase(langItem, "de")){
737
			language = Language.GERMAN();
738
		}else if(StringUtils.equalsIgnoreCase(langItem, "en")){
739
			language = Language.ENGLISH();
740
		}else{
741
			language = Language.DEFAULT();
742
		}
743
		return language;
744
	}
745

    
746
// ********************** PARTITIONABLE ****************************************/
747

    
748

    
749
	@Override
750
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap) {
751
		String value;
752
		String key;
753

    
754
		//namePublishedIn
755
		if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN_ID.toString()))){
756
			Set<String> keySet = getKeySet(key, fkMap);
757
			keySet.add(value);
758
		}
759
		if (config.isDeduplicateNamePublishedIn()){
760
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN.toString()))){
761
				Set<String> keySet = getKeySet(key, fkMap);
762
				keySet.add(value);
763
			}
764
		}
765

    
766
		//nameAccordingTo
767
		if (! config.isDatasetsAsSecundumReference()){
768
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
769
				Set<String> keySet = getKeySet(key, fkMap);
770
				keySet.add(value);
771
			}
772
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO.toString()))){
773
				Set<String> keySet = getKeySet(key, fkMap);
774
				keySet.add(value);
775
			}
776
		}
777

    
778
		//dataset
779
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
780
			Set<String> keySet = getKeySet(key, fkMap);
781
			keySet.add(value);
782
		}
783
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
784
			Set<String> keySet = getKeySet(key, fkMap);
785
			keySet.add(value);
786
		}
787

    
788
	}
789

    
790

    
791
	@Override
792
	public Set<String> requiredSourceNamespaces() {
793
		Set<String> result = new HashSet<String>();
794
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN_ID.toString());
795
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN.toString());
796
 		if (!config.isDatasetsAsSecundumReference()){
797
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
798
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
799
 		}
800
	 	result.add(TermUri.DWC_DATASET_ID.toString());
801
	 	result.add(TermUri.DWC_DATASET_NAME.toString());
802
	 	return result;
803
	}
804

    
805

    
806
    /**
807
     * @param item
808
     * @param dwcTaxonomicStatus
809
     */
810
    private void removeItemInfo(StreamItem item, TermUri dwcTaxonomicStatus) {
811
        if (!isFilterOnly){
812
            item.remove(dwcTaxonomicStatus);
813
        }
814
    }
815

    
816

    
817
//** ***************************** TO STRING *********************************************/
818

    
819
	@Override
820
	public String toString(){
821
		return this.getClass().getName();
822
	}
823

    
824

    
825

    
826

    
827
}
(5-5/37)