Project

General

Profile

Download (31.2 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.dwca.in;
10

    
11
import java.util.ArrayList;
12
import java.util.HashSet;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16
import java.util.UUID;
17

    
18
import org.apache.commons.lang.StringUtils;
19
import org.apache.logging.log4j.LogManager;import org.apache.logging.log4j.Logger;
20

    
21
import com.ibm.lsid.MalformedLSIDException;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.common.URI;
25
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
26
import eu.etaxonomy.cdm.io.stream.IPartitionableConverter;
27
import eu.etaxonomy.cdm.io.stream.IReader;
28
import eu.etaxonomy.cdm.io.stream.ItemFilter;
29
import eu.etaxonomy.cdm.io.stream.ListReader;
30
import eu.etaxonomy.cdm.io.stream.MappedCdmBase;
31
import eu.etaxonomy.cdm.io.stream.PartitionableConverterBase;
32
import eu.etaxonomy.cdm.io.stream.StreamImportBase;
33
import eu.etaxonomy.cdm.io.stream.StreamImportStateBase;
34
import eu.etaxonomy.cdm.io.stream.StreamItem;
35
import eu.etaxonomy.cdm.io.stream.terms.TermUri;
36
import eu.etaxonomy.cdm.model.common.Annotation;
37
import eu.etaxonomy.cdm.model.common.CdmBase;
38
import eu.etaxonomy.cdm.model.common.Extension;
39
import eu.etaxonomy.cdm.model.common.ExtensionType;
40
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
41
import eu.etaxonomy.cdm.model.common.Identifier;
42
import eu.etaxonomy.cdm.model.common.LSID;
43
import eu.etaxonomy.cdm.model.common.Language;
44
import eu.etaxonomy.cdm.model.common.Marker;
45
import eu.etaxonomy.cdm.model.common.MarkerType;
46
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
47
import eu.etaxonomy.cdm.model.description.Distribution;
48
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
49
import eu.etaxonomy.cdm.model.description.TaxonDescription;
50
import eu.etaxonomy.cdm.model.location.NamedArea;
51
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
52
import eu.etaxonomy.cdm.model.name.Rank;
53
import eu.etaxonomy.cdm.model.name.TaxonName;
54
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
55
import eu.etaxonomy.cdm.model.reference.Reference;
56
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
57
import eu.etaxonomy.cdm.model.taxon.Classification;
58
import eu.etaxonomy.cdm.model.taxon.Synonym;
59
import eu.etaxonomy.cdm.model.taxon.Taxon;
60
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
61
import eu.etaxonomy.cdm.model.term.DefinedTerm;
62
import eu.etaxonomy.cdm.model.term.DefinedTermBase;
63
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
64
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
65
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
66

    
67
/**
68
 * @author a.mueller
69
 * @since 22.11.2011
70
 */
71
public class  DwcTaxonStreamItem2CdmTaxonConverter<CONFIG extends DwcaDataImportConfiguratorBase, STATE extends StreamImportStateBase<CONFIG, StreamImportBase>>
72
        extends PartitionableConverterBase<CONFIG, STATE>
73
        implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String>, ItemFilter<StreamItem> {
74

    
75
    private static final Logger logger = LogManager.getLogger(DwcTaxonStreamItem2CdmTaxonConverter.class);
76

    
77
    //if this converter is used as filter we may not want to delete item parts during evaluation
78
    boolean isFilterOnly = false;
79

    
80
    private static final String ID = "id";
81
	// temporary key for the case that no dataset information is supplied, TODO use something better
82
	public static final String NO_DATASET = "no_dataset_jli773oebhjklw";
83

    
84
	private final NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
85

    
86
	public DwcTaxonStreamItem2CdmTaxonConverter(STATE state) {
87
		super(state);
88
	}
89

    
90
    public DwcTaxonStreamItem2CdmTaxonConverter(STATE state, boolean isFilter) {
91
        super(state);
92
        this.isFilterOnly = isFilter;
93
    }
94

    
95
    @Override
96
    public boolean toBeRemovedFromStream(StreamItem item) {
97
        if (!config.isDoSplitRelationshipImport()){
98
            return false;
99
        }else{
100
            if (isSynonym(item)){
101
                return ! this.config.isDoSynonymRelationships();
102
            }else{
103
                NomenclaturalCode nomCode = getNomCode(item);
104
                Rank rank = getRank(item, nomCode);
105
                boolean isHigherRank = rank == null || rank.isHigher(Rank.SPECIES());
106
                if (isHigherRank){
107
                    return ! config.isDoHigherRankRelationships();
108
                }else{
109
                    return ! config.isDoLowerRankRelationships();
110
                }
111
            }
112
        }
113
    }
114

    
115
    private boolean isSynonym(StreamItem item) {
116
        TaxonBase<?> taxonBase = getTaxonBase(item);
117
        return taxonBase instanceof Synonym;
118
    }
119

    
120
	@Override
121
    public IReader<MappedCdmBase<? extends CdmBase>> map(StreamItem csvTaxonRecord){
122
		List<MappedCdmBase<? extends CdmBase>> resultList = new ArrayList<>();
123

    
124
		//TODO what if not transactional?
125
		Reference sourceReference = state.getTransactionalSourceReference();
126
		String sourceReferenceDetail = null;
127

    
128
		//taxon
129
		TaxonBase<?> taxonBase = getTaxonBase(csvTaxonRecord);
130
		MappedCdmBase<TaxonBase<?>>  mcb = new MappedCdmBase<>(csvTaxonRecord.term, csvTaxonRecord.get(ID), taxonBase);
131
		resultList.add(mcb);
132

    
133
		//original source
134
		String id = csvTaxonRecord.get(ID);
135
		IdentifiableSource source = taxonBase.addSource(OriginalSourceType.Import, id, "Taxon", sourceReference, sourceReferenceDetail);
136
		MappedCdmBase<IdentifiableSource> mappedSource = new MappedCdmBase<>(csvTaxonRecord.get(ID), source);
137
		resultList.add(mappedSource);
138
		csvTaxonRecord.remove(ID);
139

    
140
		//rank
141
		NomenclaturalCode nomCode = getNomCode(csvTaxonRecord);
142
		Rank rank = getRank(csvTaxonRecord, nomCode);
143

    
144
		//name && name published in
145
		TaxonName name = getScientificName(csvTaxonRecord, nomCode, rank, resultList, sourceReference);
146
		taxonBase.setName(name);
147

    
148
		//nameAccordingTo
149
		MappedCdmBase<Reference> sec = getNameAccordingTo(csvTaxonRecord, resultList);
150

    
151
		if (sec == null && state.getConfig().isUseSourceReferenceAsSec()){
152
			sec = new MappedCdmBase<>(state.getTransactionalSourceReference());
153
		}
154
		if (sec != null){
155
			taxonBase.setSec(sec.getCdmBase());
156
		}
157

    
158
		//classification
159
		handleDataset(csvTaxonRecord, taxonBase, resultList, sourceReference, sourceReferenceDetail);
160

    
161
		//NON core
162
	    //term="http://purl.org/dc/terms/identifier"
163
		//currently only LSIDs or generic
164
		handleIdentifier(csvTaxonRecord, taxonBase);
165

    
166
		//TaxonRemarks
167
		handleTaxonRemarks(csvTaxonRecord, taxonBase);
168

    
169
		//TDWG_1
170
		handleTdwgArea(csvTaxonRecord, taxonBase);
171

    
172
		//VernecularName
173
		handleCommonNames(csvTaxonRecord, taxonBase);
174

    
175
		//External Sources, ID's and References
176
		handleIdentifiableObjects(csvTaxonRecord, taxonBase);
177

    
178

    
179
		//		    <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
180
//		         The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
181
//		         Fungi, Plantae, Protozoa, Viruses -->
182
//		    <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
183

    
184
//		    <!-- Phylum in which the taxon has been classified -->
185
//		    <field index='11' term='http://rs.tdwg.org/dwc/terms/phylum'/>
186

    
187
		//		    <!-- Class in which the taxon has been classified -->
188
//		    <field index='12' term='http://rs.tdwg.org/dwc/terms/class'/>
189

    
190
		//		    <!-- Order in which the taxon has been classified -->
191
//		    <field index='13' term='http://rs.tdwg.org/dwc/terms/order'/>
192

    
193
		//		    <!-- Family in which the taxon has been classified -->
194
//		    <field index='14' term='http://rs.tdwg.org/dwc/terms/family'/>
195

    
196
		//		    <!-- Genus in which the taxon has been classified -->
197
//		    <field index='15' term='http://rs.tdwg.org/dwc/terms/genus'/>
198

    
199
		//		    <!-- Subgenus in which the taxon has been classified -->
200
//		    <field index='16' term='http://rs.tdwg.org/dwc/terms/subgenus'/>
201
//		    <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
202

    
203
//		    <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
204
//		    <!-- Infraspecific epithet -->
205

    
206
//		    <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
207
//		    <!-- Authorship -->
208

    
209
//		    <field index='19' term='http://rs.tdwg.org/dwc/terms/scientificNameAuthorship'/>
210
//		==> see scientific name
211
//
212
//		<!-- Acceptance status published in -->
213
//		    <field index='20' term='http://purl.org/dc/terms/source'/>
214
//		    <!-- Reference in which the scientific name was first published -->
215
//		    <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
216
//		    <!-- Taxon scrutinized by -->
217
//		    <field index='22' term='http://rs.tdwg.org/dwc/terms/nameAccordingTo'/> 
218
//		    <!-- Scrutiny date -->
219
//		    <field index='23' term='http://purl.org/dc/terms/modified'/>
220
//		    <!-- Additional data for the taxon -->
221
//		    <field index='24' term='http://purl.org/dc/terms/description'/>
222
//		    </core>
223

    
224
		handleModified(csvTaxonRecord, taxonBase);
225

    
226
		handleIsExtinct(csvTaxonRecord, taxonBase);
227

    
228

    
229

    
230
		return new ListReader<>(resultList);
231
	}
232

    
233

    
234

    
235
    /**
236
     * @param csvTaxonRecord
237
     * @param taxonBase
238
     */
239
    private void handleIsExtinct(StreamItem item, TaxonBase<?> taxonBase) {
240
        String isExtinctStr = item.get(TermUri.GBIF_IS_EXTINCT);
241
        if (isBlank(isExtinctStr)){
242
            return;
243
        }
244
        Boolean isExtinct = getBoolean(isExtinctStr, item);
245
        if (isExtinct != null){
246
            try {
247
                UUID isExtinctUuid = state.getTransformer().getMarkerTypeUuid("isExtinct");
248
                MarkerType markerType = state.getCurrentIO().getMarkerType(state, isExtinctUuid, "extinct", "extinct", "extinct");
249
                Marker.NewInstance(taxonBase, isExtinct, markerType);
250

    
251
            } catch (UndefinedTransformerMethodException e) {
252
                String message = "GetMarkerType not available for import. This should not happen. Please conntact developer";
253
                fireWarningEvent(message, item.getLocation(), 8);
254
            }
255
        }
256

    
257
    }
258

    
259
    /**
260
     * @param item
261
     * @param isExtinctStr
262
     * @return
263
     */
264
    private Boolean getBoolean(String booleanStr, StreamItem item) {
265
        try {
266
            return Boolean.valueOf(booleanStr);
267
        } catch (Exception e) {
268
            String message = "Boolean value could not be parsed";
269
            fireWarningEvent(message, item, 4);
270
            return null;
271
        }
272
    }
273

    
274

    
275

    
276
    /**
277
     * @param csvTaxonRecord
278
     * @param taxonBase
279
     */
280
    private void handleModified(StreamItem item, TaxonBase<?> taxonBase) {
281
        String modifiedStr = item.get(TermUri.DC_MODIFIED);
282
        if (isBlank(modifiedStr)){
283
            return;
284
        }
285

    
286
        try {
287
            UUID modifiedUuid = state.getTransformer().getExtensionTypeUuid("modified");
288
            ExtensionType extensionType = state.getCurrentIO().getExtensionType(state, modifiedUuid, "modified", "modified", "modified");
289
            Extension.NewInstance(taxonBase, modifiedStr, extensionType);
290

    
291
        } catch (UndefinedTransformerMethodException e) {
292
            String message = "GetMarkerType not available for import. This should not happen. Please conntact developer";
293
            fireWarningEvent(message, item.getLocation(), 8);
294
        }
295

    
296

    
297
    }
298

    
299
    /**
300
	 * @param item
301
	 * @param taxonBase
302
	 */
303
	private void handleIdentifiableObjects(StreamItem item,TaxonBase<?> taxonBase) {
304

    
305
		String references = item.get(TermUri.DC_REFERENCES);
306

    
307
		if (references == null || references == "") {
308
			references = item.get(TermUri.DWC_NAME_PUBLISHED_IN_ID);//lorna temporary until Scratchpads move the reference to the correct place.
309
		}
310

    
311
		if (StringUtils.isNotBlank(references)){
312
			URI uri = makeUriIfIs(references);
313
			if (uri != null){
314
				Extension.NewInstance(taxonBase, references, ExtensionType.URL());
315
			}else{
316
				String message = "Non-URI Dublin Core References not yet handled for taxa. References is: %s";
317
				fireWarningEvent(String.format(message, references), item, 6);
318
			}
319
		}
320

    
321

    
322
		//TODO: Finish properly
323
		String id = item.get(TermUri.CDM_SOURCE_IDINSOURCE);
324
		String idNamespace = item.get(TermUri.CDM_SOURCE_IDNAMESPACE);
325
		String reference = item.get(TermUri.CDM_SOURCE_REFERENCE);
326
		if(StringUtils.isNotBlank(id) && StringUtils.isNotBlank(idNamespace) && StringUtils.isNotBlank(reference)){
327
			Reference ref = ReferenceFactory.newGeneric();
328
			ref.setTitle(reference);
329
			Taxon taxon = (Taxon) taxonBase;
330
			taxon.addSource(OriginalSourceType.Import, id, idNamespace, ref, null);
331
		}
332

    
333
	}
334

    
335

    
336
	/**
337
	 * If str is an uri it returns is as an {@link URI}. If not it returns <code>null</code>.
338
	 * @param str
339
	 * @return the URI.
340
	 */
341
	private URI makeUriIfIs(String str) {
342
		if (! str.startsWith("http:")){
343
			return null;
344
		}else{
345
			try {
346
				URI uri = URI.create(str);
347
				return uri;
348
			} catch (Exception e) {
349
				return null;
350
			}
351
		}
352

    
353
	}
354

    
355

    
356
	/**
357
	 * @param item
358
	 * @param taxonBase
359
	 */
360
	private void handleCommonNames(StreamItem item,TaxonBase<?> taxonBase) {
361
		//TODO: handle comma separated values
362
		String commonName = item.get(TermUri.DWC_VERNACULAR_NAME);
363
		if (StringUtils.isNotBlank(commonName)){
364

    
365
			Language language = getLanguage(item);
366
			CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(commonName, language);
367
			if(taxonBase instanceof Taxon){
368
				Taxon taxon = (Taxon) taxonBase;
369
				TaxonDescription taxonDescription = getTaxonDescription(taxon, false);
370
				taxonDescription.addElement(commonTaxonName);
371
				logger.info("Common name " + commonName + " added to " + taxon.getTitleCache());
372
			}
373
		}
374
	}
375

    
376

    
377

    
378
	/**
379
	 * @param csvTaxonRecord
380
	 * @param taxonBase
381
	 */
382
	private void handleTdwgArea(StreamItem item, TaxonBase<?> taxonBase) {
383
		String tdwg_area = item.get(TermUri.DWC_COUNTRY_CODE);
384
		if (tdwg_area != null){
385
    		if(taxonBase instanceof Synonym){
386
    			Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
387
    			Taxon acceptedTaxon = synonym.getAcceptedTaxon();
388
    			if (acceptedTaxon != null){
389
    			    TaxonDescription td = getTaxonDescription(acceptedTaxon, false);
390
    			    NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
391

    
392
    			    if (area == null){
393
    			        area = NamedArea.getAreaByTdwgLabel(tdwg_area);
394
    			    }
395
    			    if (area != null){
396
    			        Distribution distribution = Distribution.NewInstance(area, PresenceAbsenceTerm.PRESENT());
397
    			        td.addElement(distribution);
398
    			    }
399
    			}
400
    		}
401
    		if(!(taxonBase instanceof Synonym)){
402
    			Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
403
    			TaxonDescription td = getTaxonDescription(taxon, false);
404
    			NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
405

    
406
    			if (area == null){
407
    				area = NamedArea.getAreaByTdwgLabel(tdwg_area);
408
    			}
409
    			if (area != null){
410
    				Distribution distribution = Distribution.NewInstance(area, PresenceAbsenceTerm.PRESENT());
411
    				td.addElement(distribution);
412
    			}
413
    		}
414
    	}
415
	}
416

    
417

    
418
	/**
419
	 * @param item
420
	 * @param taxonBase
421
	 */
422
	private void handleTaxonRemarks(StreamItem item,TaxonBase<?> taxonBase) {
423
		String comment = item.get(TermUri.DWC_TAXON_REMARKS);
424
		Language language = getLanguage(item);
425
		if(StringUtils.isNotBlank(comment)){
426
				Annotation annotation = Annotation.NewInstance(comment, language);
427
				taxonBase.addAnnotation(annotation);
428
		}else{
429
//			String message = "Comment is empty or some error appeared while saving: %s";
430
////			message = String.format(message);
431
//			fireWarningEvent(message, item, 1);
432
		}
433
	}
434

    
435

    
436
	//TODO handle non LSIDs
437
	//TODO handle LSIDs for names
438
	private void handleIdentifier(StreamItem csvTaxonRecord, TaxonBase<?> taxonBase) {
439
		String identifier = csvTaxonRecord.get(TermUri.DC_IDENTIFIER);
440
		if (StringUtils.isNotBlank(identifier)){
441
			if (identifier.trim().startsWith("urn:lsid")){
442
				try {
443
					LSID lsid = new LSID(identifier);
444
					taxonBase.setLsid(lsid);
445
				} catch (MalformedLSIDException e) {
446
					String message = "LSID is malformed and can't be handled as LSID: %s";
447
					message = String.format(message, identifier);
448
					fireWarningEvent(message, csvTaxonRecord, 4);
449
					Identifier.NewInstance(taxonBase, identifier, DefinedTermBase.getTermByClassAndUUID(DefinedTerm.class, DefinedTerm.uuidLsid));
450
				}
451
			}else{
452
				Identifier.NewInstance(taxonBase, identifier, null);
453
			    String message = "Identifier type not recognized. Create generic identifier: %s";
454
				message = String.format(message, identifier);
455
				fireWarningEvent(message, csvTaxonRecord, 1);
456
			}
457
		}
458

    
459
	}
460

    
461

    
462
	private void handleDataset(StreamItem item, TaxonBase<?> taxonBase,
463
	        List<MappedCdmBase<? extends CdmBase>> resultList,
464
	        Reference sourceReference,
465
	        String sourceReferecenDetail) {
466

    
467
		TermUri idTerm = TermUri.DWC_DATASET_ID;
468
		TermUri strTerm = TermUri.DWC_DATASET_NAME;
469

    
470
		if (config.isDatasetsAsClassifications()){
471
			String datasetId = CdmUtils.Nz(item.get(idTerm)).trim();
472
			String datasetName = CdmUtils.Nz(item.get(strTerm)).trim();
473
				if (CdmUtils.areBlank(datasetId, datasetName) ){
474
				datasetId = NO_DATASET;
475
			}
476

    
477
			//check id
478
			boolean classificationExists = state.exists(idTerm.toString() , datasetId, Classification.class);
479

    
480
			//check name
481
			if (!classificationExists){
482
				classificationExists = state.exists(strTerm.toString() , datasetName, Classification.class);
483
			}
484

    
485
			//if not exists, create new
486
			if (! classificationExists){
487
				String classificationName = StringUtils.isBlank(datasetName)? datasetId : datasetName;
488
				if (classificationName.equals(NO_DATASET)){
489
					classificationName = config.getClassificationName();
490
					//classificationName = "Classification (no name)";  //TODO define by config or zipfile or metadata
491
				}
492

    
493
				String classificationId = StringUtils.isBlank(datasetId)? datasetName : datasetId;
494
				Classification classification = Classification.NewInstance(classificationName);
495
				//source
496
				IdentifiableSource source = classification.addSource(OriginalSourceType.Import, classificationId, "Dataset", sourceReference, sourceReferecenDetail);
497
				//add to result
498
				resultList.add(new MappedCdmBase<>(idTerm, datasetId, classification));
499
				resultList.add(new MappedCdmBase<>(strTerm, datasetName, classification));
500
				resultList.add(new MappedCdmBase<>(source));
501
				//TODO this is not so nice but currently necessary as classifications are requested in the same partition
502
				state.putMapping(idTerm.toString(), classificationId, classification);
503
				state.putMapping(strTerm.toString(), classificationName, classification);
504
			}
505
		}else if (config.isDatasetsAsSecundumReference() || config.isDatasetsAsOriginalSource()){
506
			MappedCdmBase<Reference> mappedCitation = getReference(item, resultList, idTerm, strTerm, true);
507
			if (mappedCitation != null){
508
				Reference ref = mappedCitation.getCdmBase();
509
				if (config.isDatasetsAsSecundumReference()){
510
					//dataset as secundum reference
511
					taxonBase.setSec(ref);
512
				}else{
513
					//dataset as original source
514
					taxonBase.addSource(OriginalSourceType.Import, null, null, ref, null);
515
				}
516
			}
517
		}else{
518
			String message = "DatasetUse type not yet implemented. Can't import dataset information.";
519
			fireWarningEvent(message, item, 4);
520
		}
521

    
522
		//remove to later check if all attributes were used
523
		removeItemInfo(item, idTerm);
524
		removeItemInfo(item, strTerm);
525
	}
526

    
527

    
528
	@Override
529
	public String getSourceId(StreamItem item) {
530
		String id = item.get(ID);
531
		return id;
532
	}
533

    
534
	private MappedCdmBase<Reference> getNameAccordingTo(StreamItem item, List<MappedCdmBase<? extends CdmBase>> resultList) {
535
		if (config.isDatasetsAsSecundumReference()){
536
			//TODO store nameAccordingTo info some where else or let the user define where to store it.
537
			return null;
538
		}else{
539
			TermUri idTerm = TermUri.DWC_NAME_ACCORDING_TO_ID;
540
			TermUri strTerm = TermUri.DWC_NAME_ACCORDING_TO;
541
			MappedCdmBase<Reference> secRef = getReference(item, resultList, idTerm, strTerm, false);
542
			return secRef;
543
		}
544
	}
545

    
546
	private NomenclaturalCode getNomCode(StreamItem item) {
547
		String strNomCode = getValue(item, TermUri.DWC_NOMENCLATURAL_CODE);
548
		NomenclaturalCode nomCode = null;
549
		// by Nomcenclatural Code
550
		if (strNomCode != null){
551
			nomCode = NomenclaturalCode.fromString(strNomCode);
552
			if (nomCode == null){
553
				String message = "NomCode '%s' not recognized";
554
				message = String.format(message, strNomCode);
555
				fireWarningEvent(message, item, 4);
556
			}else{
557
				return nomCode;
558
			}
559
		}
560
		// by Kingdom
561
		String strKingdom = getValue(item, TermUri.DWC_KINGDOM);
562
		if (strKingdom != null){
563
			if (strKingdom.equalsIgnoreCase("Plantae")){
564
				nomCode = NomenclaturalCode.ICNAFP;
565
			}else if (strKingdom.equalsIgnoreCase("Fungi")){
566
				nomCode = NomenclaturalCode.ICNAFP;
567
			}else if (strKingdom.equalsIgnoreCase("Animalia")){
568
				nomCode = NomenclaturalCode.ICZN;
569
			}else if (strKingdom.equalsIgnoreCase("Protozoa")){
570
				nomCode = NomenclaturalCode.ICZN;
571
			}
572
		}
573

    
574
		//TODO further kingdoms
575
		if (nomCode == null){
576
			//TODO warning
577
			if (config.getNomenclaturalCode() != null){
578
				nomCode = config.getNomenclaturalCode();
579
			}
580
		}
581
		return nomCode;
582
	}
583

    
584

    
585
	private TaxonName getScientificName(StreamItem item, NomenclaturalCode nomCode, Rank rank, List<MappedCdmBase<? extends CdmBase>> resultList, Reference sourceReference) {
586
		TaxonName name = null;
587
		String strScientificName = getValue(item, TermUri.DWC_SCIENTIFIC_NAME);
588
		//Name
589
		if (strScientificName != null){
590
			name = (TaxonName)parser.parseFullName(strScientificName, nomCode, rank);
591
			if ( rank != null && name != null && name.getRank() != null &&  ! rank.equals(name.getRank())){
592
				if (config.isValidateRankConsistency()){
593
					String message = "Parsed rank %s (%s) differs from rank %s given by fields 'taxonRank' or 'verbatimTaxonRank'";
594
					message = String.format(message, name.getRank().getTitleCache(), strScientificName, rank.getTitleCache());
595
					fireWarningEvent(message, item, 4);
596
				}
597
			}
598
			checkAuthorship(name, item);
599
			resultList.add(new MappedCdmBase(TermUri.DWC_SCIENTIFIC_NAME, strScientificName, name));
600
		}
601
		//By ID
602
		String strScientificNameId = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_ID);
603
		if (strScientificNameId != null){
604
			if (config.isScientificNameIdAsOriginalSourceId()){
605
				if (name != null){
606
					IdentifiableSource source = IdentifiableSource.NewInstance(OriginalSourceType.Import, strScientificNameId, TermUri.DWC_SCIENTIFIC_NAME_ID.toString(), sourceReference, null);
607
					name.addSource(source);
608
				}
609
			}else{
610
				String message = "ScientificNameId not yet implemented: '%s'";
611
				message = String.format(message, strScientificNameId);
612
				fireWarningEvent(message, item, 4);
613
			}
614
		}
615

    
616
		//namePublishedIn
617
		TermUri idTerm = TermUri.DWC_NAME_PUBLISHED_IN_ID;
618
		TermUri strTerm = TermUri.DWC_NAME_PUBLISHED_IN;
619
		MappedCdmBase<Reference> nomRef = getReference(item, resultList, idTerm, strTerm, false);
620

    
621
		if (name != null){
622
			if (nomRef != null){
623
				name.setNomenclaturalReference(nomRef.getCdmBase());  //check if name already has a nomRef, shouldn't be the case usually
624
			}
625
		}else{
626
			if (nomRef != null){
627
				String message = "NamePublishedIn information available but no name exists";
628
				fireWarningEvent(message, item, 4);
629
			}
630
		}
631
		return name;
632
	}
633

    
634

    
635
	/**
636
	 * General method to handle references used for multiple attributes.
637
	 * @param item
638
	 * @param resultList
639
	 * @param idTerm
640
	 * @param strTerm
641
	 * @param idIsInternal
642
	 * @return
643
	 */
644
	private MappedCdmBase<Reference> getReference(StreamItem item,
645
	        List<MappedCdmBase<? extends CdmBase>> resultList, TermUri idTerm,
646
	        TermUri strTerm, boolean idIsInternal) {
647
		Reference newRef = null;
648
		Reference sourceCitation = null;
649

    
650
		MappedCdmBase<Reference> result = null;
651
		if (exists(idTerm, item) || exists(strTerm, item)){
652
			String refId = CdmUtils.Nz(item.get(idTerm)).trim();
653
			String refStr = CdmUtils.Nz(item.get(strTerm)).trim();
654
			if (StringUtils.isNotBlank(refId)){
655
				List<Reference> references = state.get(idTerm.toString(), refId, Reference.class);
656
				if (references.size() == 0){
657
					if (! idIsInternal){
658
						//references should already exist in store if not linking to external links like URLs
659
						String message = "External namePublishedInIDs are not yet supported";
660
						fireWarningEvent(message, item, 4);//set to DEBUG
661
					}else{
662
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
663
						newRef.addSource(OriginalSourceType.Import, refId, idTerm.toString(), sourceCitation, null);
664
						MappedCdmBase<Reference> idResult = new MappedCdmBase<>(idTerm, refId, newRef);
665
						resultList.add(idResult);
666
					}
667
				}else{
668
					//TODO handle list.size > 1 , do we need a list here ?
669
					result = new MappedCdmBase<Reference>(idTerm, refId , references.get(0));
670
				}
671
			}
672
			if (result == null){
673
				List<Reference> nomRefs = state.get(strTerm.toString(), refStr, Reference.class);
674
				if (nomRefs.size() > 0){
675
					//TODO handle list.size > 1 , do we need a list here ?
676
					result = new MappedCdmBase<>(strTerm, refStr , nomRefs.get(0));
677
				}else{
678
					// new Reference
679
					if (newRef == null){
680
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
681
					}
682
					newRef.setTitleCache(refStr, true);
683
					//TODO distinguish available year, authorship, etc. if
684
					result = new MappedCdmBase<>(strTerm, refStr, newRef);
685
					resultList.add(result);
686
				}
687
			}
688
		}
689
		return result;
690
	}
691

    
692

    
693
	//TODO we may configure in configuration that scientific name never includes Authorship
694
	private void checkAuthorship(TaxonName nameBase, StreamItem item) {
695
		if (nameBase.isViral()){
696
			return;
697
		}
698
		String strAuthors = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_AUTHORS);
699

    
700
		if (! nameBase.isProtectedTitleCache()){
701
			if (isBlank(nameBase.getAuthorshipCache())){
702
				if (nameBase.isBotanical() || nameBase.isZoological()){
703
					//TODO can't we also parse NonViralNames correctly ?
704
					try {
705
						parser.parseAuthors(nameBase, strAuthors);
706
					} catch (StringNotParsableException e) {
707
					    nameBase.setAuthorshipCache(strAuthors);
708
					}
709
				}else{
710
				    nameBase.setAuthorshipCache(strAuthors);
711
				}
712
				//TODO throw warning (scientific name should always include authorship) by DwC definition
713
			}
714
		}
715

    
716
	}
717

    
718

    
719
	private Rank getRank(StreamItem csvTaxonRecord, NomenclaturalCode nomCode) {
720
		boolean USE_UNKNOWN = true;
721
		Rank rank = null;
722
		String strRank = getValue(csvTaxonRecord,TermUri.DWC_TAXON_RANK);
723
		String strVerbatimRank = getValue(csvTaxonRecord,TermUri.DWC_VERBATIM_TAXON_RANK);
724
		if (strRank != null){
725
			try {
726
				rank = Rank.getRankByEnglishName(strRank, nomCode, USE_UNKNOWN);
727
				if (rank.equals(Rank.UNKNOWN_RANK())){
728
					rank = Rank.getRankByLatinNameOrIdInVoc(strRank, USE_UNKNOWN);
729
					if (rank.equals(Rank.UNKNOWN_RANK())){
730
						String message = "Rank can not be defined for '%s'";
731
						message = String.format(message, strRank);
732
						fireWarningEvent(message, csvTaxonRecord, 4);
733
					}
734
				}
735
			} catch (UnknownCdmTypeException e) {
736
				//should not happen as USE_UNKNOWN is used
737
				rank = Rank.UNKNOWN_RANK();
738
			}
739
		}
740
		if ( (rank == null || rank.equals(Rank.UNKNOWN_RANK())) && strVerbatimRank != null){
741
			try {
742
				rank = Rank.getRankByLatinNameOrIdInVoc(strVerbatimRank, USE_UNKNOWN);
743
				if (rank.equals(Rank.UNKNOWN_RANK())){
744
					String message = "Rank can not be defined for '%s'";
745
					message = String.format(message, strVerbatimRank);
746
					fireWarningEvent(message, csvTaxonRecord, 4);
747
				}
748
			} catch (UnknownCdmTypeException e) {
749
				//should not happen as USE_UNKNOWN is used
750
				rank = Rank.UNKNOWN_RANK();
751
			}
752
		}
753
		return rank;
754
	}
755

    
756

    
757
	/**
758
	 * Creates an empty taxon object with a given status.
759
	 * <i>Empty</i> taxon means, without a defined name or sec.
760
	 * @param item
761
	 * @return
762
	 */
763
	private TaxonBase<?> getTaxonBase(StreamItem item) {
764
		TaxonName name = null;
765
		Reference sec = null;
766
		TaxonBase<?> result;
767
		String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
768
		String status = "";
769

    
770
		if (taxStatus != null){
771
			if (taxStatus.matches("accepted.*|valid")){
772
				status += "A";
773
			} else if (taxStatus.matches(".*synonym|invalid|not accepted")){   //not accepted comes from scratchpads
774
				status += "S";
775
			} else if (taxStatus.matches("misapplied.*")){
776
				status += "M";
777
			} else{
778
				status += "?";
779
			}
780
			removeItemInfo(item, TermUri.DWC_TAXONOMIC_STATUS);
781
		}
782
		if (! CdmUtils.isBlank(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
783
			// acceptedNameUsageId = id
784
			if (getSourceId(item).equals(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
785
				status += "A";
786
			}else{
787
				status += "S";
788
			}
789
		}
790
		if (status.contains("A") || status.contains("M")){
791
			result = Taxon.NewInstance(name, sec);
792
			if (status.contains("S") && ! status.contains("M") ){
793
				String message = "Ambigous taxon status (%s)";
794
				message = String.format(message, status);
795
				fireWarningEvent(message, item, 6);
796
			}
797
		} else if (status.contains("S")){
798
			result = Synonym.NewInstance(name, sec);
799
		} else{
800
			result = Taxon.NewUnknownStatusInstance(name, sec);
801
		}
802

    
803
		return result;
804

    
805
	}
806

    
807

    
808

    
809
    /**
810
	 * @param item
811
	 * @return
812
	 */
813
	private Language getLanguage(StreamItem item) {
814
		String langItem = item.get(TermUri.DC_LANGUAGE);
815
		Language language = null;
816

    
817
		if(StringUtils.equalsIgnoreCase(langItem, "de")){
818
			language = Language.GERMAN();
819
		}else if(StringUtils.equalsIgnoreCase(langItem, "en")){
820
			language = Language.ENGLISH();
821
		}else{
822
			language = Language.DEFAULT();
823
		}
824
		return language;
825
	}
826

    
827
// ********************** PARTITIONABLE ****************************************/
828

    
829

    
830
	@Override
831
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap) {
832
		String value;
833
		String key;
834

    
835
		//namePublishedIn
836
		if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN_ID.toString()))){
837
			Set<String> keySet = getKeySet(key, fkMap);
838
			keySet.add(value);
839
		}
840
		if (config.isDeduplicateNamePublishedIn()){
841
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN.toString()))){
842
				Set<String> keySet = getKeySet(key, fkMap);
843
				keySet.add(value);
844
			}
845
		}
846

    
847
		//nameAccordingTo
848
		if (! config.isDatasetsAsSecundumReference()){
849
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
850
				Set<String> keySet = getKeySet(key, fkMap);
851
				keySet.add(value);
852
			}
853
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO.toString()))){
854
				Set<String> keySet = getKeySet(key, fkMap);
855
				keySet.add(value);
856
			}
857
		}
858

    
859
		//dataset
860
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
861
			Set<String> keySet = getKeySet(key, fkMap);
862
			keySet.add(value);
863
		}
864
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
865
			Set<String> keySet = getKeySet(key, fkMap);
866
			keySet.add(value);
867
		}
868

    
869
	}
870

    
871

    
872
	@Override
873
	public Set<String> requiredSourceNamespaces() {
874
		Set<String> result = new HashSet<>();
875
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN_ID.toString());
876
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN.toString());
877
 		if (!config.isDatasetsAsSecundumReference()){
878
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
879
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
880
 		}
881
	 	result.add(TermUri.DWC_DATASET_ID.toString());
882
	 	result.add(TermUri.DWC_DATASET_NAME.toString());
883
	 	return result;
884
	}
885

    
886

    
887
    /**
888
     * @param item
889
     * @param dwcTaxonomicStatus
890
     */
891
    private void removeItemInfo(StreamItem item, TermUri dwcTaxonomicStatus) {
892
        if (!isFilterOnly){
893
            item.remove(dwcTaxonomicStatus);
894
        }
895
    }
896

    
897

    
898
//** ***************************** TO STRING *********************************************/
899

    
900
	@Override
901
	public String toString(){
902
		return this.getClass().getName();
903
	}
904
}
(2-2/17)