Project

General

Profile

Download (31.1 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.dwca.in;
10

    
11
import java.net.URI;
12
import java.util.ArrayList;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Set;
17
import java.util.UUID;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21

    
22
import com.ibm.lsid.MalformedLSIDException;
23

    
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
26
import eu.etaxonomy.cdm.io.stream.IPartitionableConverter;
27
import eu.etaxonomy.cdm.io.stream.IReader;
28
import eu.etaxonomy.cdm.io.stream.ItemFilter;
29
import eu.etaxonomy.cdm.io.stream.ListReader;
30
import eu.etaxonomy.cdm.io.stream.MappedCdmBase;
31
import eu.etaxonomy.cdm.io.stream.PartitionableConverterBase;
32
import eu.etaxonomy.cdm.io.stream.StreamImportBase;
33
import eu.etaxonomy.cdm.io.stream.StreamImportStateBase;
34
import eu.etaxonomy.cdm.io.stream.StreamItem;
35
import eu.etaxonomy.cdm.io.stream.terms.TermUri;
36
import eu.etaxonomy.cdm.model.common.Annotation;
37
import eu.etaxonomy.cdm.model.common.CdmBase;
38
import eu.etaxonomy.cdm.model.common.Extension;
39
import eu.etaxonomy.cdm.model.common.ExtensionType;
40
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
41
import eu.etaxonomy.cdm.model.common.Identifier;
42
import eu.etaxonomy.cdm.model.common.LSID;
43
import eu.etaxonomy.cdm.model.common.Language;
44
import eu.etaxonomy.cdm.model.common.Marker;
45
import eu.etaxonomy.cdm.model.common.MarkerType;
46
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
47
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
48
import eu.etaxonomy.cdm.model.description.Distribution;
49
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
50
import eu.etaxonomy.cdm.model.description.TaxonDescription;
51
import eu.etaxonomy.cdm.model.location.NamedArea;
52
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
53
import eu.etaxonomy.cdm.model.name.Rank;
54
import eu.etaxonomy.cdm.model.name.TaxonName;
55
import eu.etaxonomy.cdm.model.reference.Reference;
56
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
57
import eu.etaxonomy.cdm.model.taxon.Classification;
58
import eu.etaxonomy.cdm.model.taxon.Synonym;
59
import eu.etaxonomy.cdm.model.taxon.Taxon;
60
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
61
import eu.etaxonomy.cdm.model.term.DefinedTerm;
62
import eu.etaxonomy.cdm.model.term.DefinedTermBase;
63
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
64
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
65
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
66

    
67
/**
68
 * @author a.mueller
69
 * @since 22.11.2011
70
 *
71
 */
72
public class  DwcTaxonStreamItem2CdmTaxonConverter<CONFIG extends DwcaDataImportConfiguratorBase, STATE extends StreamImportStateBase<CONFIG, StreamImportBase>>
73
        extends PartitionableConverterBase<CONFIG, STATE>
74
        implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String>, ItemFilter<StreamItem> {
75

    
76
    private static final Logger logger = Logger.getLogger(DwcTaxonStreamItem2CdmTaxonConverter.class);
77

    
78
    //if this converter is used as filter we may not want to delete item parts during evaluation
79
    boolean isFilterOnly = false;
80

    
81
    private static final String ID = "id";
82
	// temporary key for the case that no dataset information is supplied, TODO use something better
83
	public static final String NO_DATASET = "no_dataset_jli773oebhjklw";
84

    
85
	private final NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
86

    
87
	/**
88
	 * @param state
89
	 */
90
	public DwcTaxonStreamItem2CdmTaxonConverter(STATE state) {
91
		super(state);
92
	}
93

    
94
    public DwcTaxonStreamItem2CdmTaxonConverter(STATE state, boolean isFilter) {
95
        super(state);
96
        this.isFilterOnly = isFilter;
97
    }
98

    
99
    @Override
100
    public boolean toBeRemovedFromStream(StreamItem item) {
101
        if (!config.isDoSplitRelationshipImport()){
102
            return false;
103
        }else{
104
            if (isSynonym(item)){
105
                return ! this.config.isDoSynonymRelationships();
106
            }else{
107
                NomenclaturalCode nomCode = getNomCode(item);
108
                Rank rank = getRank(item, nomCode);
109
                boolean isHigherRank = rank == null || rank.isHigher(Rank.SPECIES());
110
                if (isHigherRank){
111
                    return ! config.isDoHigherRankRelationships();
112
                }else{
113
                    return ! config.isDoLowerRankRelationships();
114
                }
115
            }
116
        }
117
    }
118

    
119
    private boolean isSynonym(StreamItem item) {
120
        TaxonBase<?> taxonBase = getTaxonBase(item);
121
        return taxonBase instanceof Synonym;
122
    }
123

    
124
	@Override
125
    public IReader<MappedCdmBase<? extends CdmBase>> map(StreamItem csvTaxonRecord){
126
		List<MappedCdmBase<? extends CdmBase>> resultList = new ArrayList<>();
127

    
128
		//TODO what if not transactional?
129
		Reference sourceReference = state.getTransactionalSourceReference();
130
		String sourceReferenceDetail = null;
131

    
132
		//taxon
133
		TaxonBase<?> taxonBase = getTaxonBase(csvTaxonRecord);
134
		MappedCdmBase<TaxonBase<?>>  mcb = new MappedCdmBase<>(csvTaxonRecord.term, csvTaxonRecord.get(ID), taxonBase);
135
		resultList.add(mcb);
136

    
137
		//original source
138
		String id = csvTaxonRecord.get(ID);
139
		IdentifiableSource source = taxonBase.addSource(OriginalSourceType.Import, id, "Taxon", sourceReference, sourceReferenceDetail);
140
		MappedCdmBase<IdentifiableSource> mappedSource = new MappedCdmBase<>(csvTaxonRecord.get(ID), source);
141
		resultList.add(mappedSource);
142
		csvTaxonRecord.remove(ID);
143

    
144
		//rank
145
		NomenclaturalCode nomCode = getNomCode(csvTaxonRecord);
146
		Rank rank = getRank(csvTaxonRecord, nomCode);
147

    
148
		//name && name published in
149
		TaxonName name = getScientificName(csvTaxonRecord, nomCode, rank, resultList, sourceReference);
150
		taxonBase.setName(name);
151

    
152
		//nameAccordingTo
153
		MappedCdmBase<Reference> sec = getNameAccordingTo(csvTaxonRecord, resultList);
154

    
155
		if (sec == null && state.getConfig().isUseSourceReferenceAsSec()){
156
			sec = new MappedCdmBase<>(state.getTransactionalSourceReference());
157
		}
158
		if (sec != null){
159
			taxonBase.setSec(sec.getCdmBase());
160
		}
161

    
162
		//classification
163
		handleDataset(csvTaxonRecord, taxonBase, resultList, sourceReference, sourceReferenceDetail);
164

    
165
		//NON core
166
	    //term="http://purl.org/dc/terms/identifier"
167
		//currently only LSIDs or generic
168
		handleIdentifier(csvTaxonRecord, taxonBase);
169

    
170
		//TaxonRemarks
171
		handleTaxonRemarks(csvTaxonRecord, taxonBase);
172

    
173
		//TDWG_1
174
		handleTdwgArea(csvTaxonRecord, taxonBase);
175

    
176
		//VernecularName
177
		handleCommonNames(csvTaxonRecord, taxonBase);
178

    
179
		//External Sources, ID's and References
180
		handleIdentifiableObjects(csvTaxonRecord, taxonBase);
181

    
182

    
183
		//		    <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
184
//		         The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
185
//		         Fungi, Plantae, Protozoa, Viruses -->
186
//		    <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
187

    
188
//		    <!-- Phylum in which the taxon has been classified -->
189
//		    <field index='11' term='http://rs.tdwg.org/dwc/terms/phylum'/>
190

    
191
		//		    <!-- Class in which the taxon has been classified -->
192
//		    <field index='12' term='http://rs.tdwg.org/dwc/terms/class'/>
193

    
194
		//		    <!-- Order in which the taxon has been classified -->
195
//		    <field index='13' term='http://rs.tdwg.org/dwc/terms/order'/>
196

    
197
		//		    <!-- Family in which the taxon has been classified -->
198
//		    <field index='14' term='http://rs.tdwg.org/dwc/terms/family'/>
199

    
200
		//		    <!-- Genus in which the taxon has been classified -->
201
//		    <field index='15' term='http://rs.tdwg.org/dwc/terms/genus'/>
202

    
203
		//		    <!-- Subgenus in which the taxon has been classified -->
204
//		    <field index='16' term='http://rs.tdwg.org/dwc/terms/subgenus'/>
205
//		    <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
206

    
207
//		    <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
208
//		    <!-- Infraspecific epithet -->
209

    
210
//		    <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
211
//		    <!-- Authorship -->
212

    
213
//		    <field index='19' term='http://rs.tdwg.org/dwc/terms/scientificNameAuthorship'/>
214
//		==> see scientific name
215
//
216
//		<!-- Acceptance status published in -->
217
//		    <field index='20' term='http://purl.org/dc/terms/source'/>
218
//		    <!-- Reference in which the scientific name was first published -->
219
//		    <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
220
//		    <!-- Taxon scrutinized by -->
221
//		    <field index='22' term='http://rs.tdwg.org/dwc/terms/nameAccordingTo'/> 
222
//		    <!-- Scrutiny date -->
223
//		    <field index='23' term='http://purl.org/dc/terms/modified'/>
224
//		    <!-- Additional data for the taxon -->
225
//		    <field index='24' term='http://purl.org/dc/terms/description'/>
226
//		    </core>
227

    
228
		handleModified(csvTaxonRecord, taxonBase);
229

    
230
		handleIsExtinct(csvTaxonRecord, taxonBase);
231

    
232

    
233

    
234
		return new ListReader<>(resultList);
235
	}
236

    
237

    
238

    
239
    /**
240
     * @param csvTaxonRecord
241
     * @param taxonBase
242
     */
243
    private void handleIsExtinct(StreamItem item, TaxonBase<?> taxonBase) {
244
        String isExtinctStr = item.get(TermUri.GBIF_IS_EXTINCT);
245
        if (isBlank(isExtinctStr)){
246
            return;
247
        }
248
        Boolean isExtinct = getBoolean(isExtinctStr, item);
249
        if (isExtinct != null){
250
            try {
251
                UUID isExtinctUuid = state.getTransformer().getMarkerTypeUuid("isExtinct");
252
                MarkerType markerType = state.getCurrentIO().getMarkerType(state, isExtinctUuid, "extinct", "extinct", "extinct");
253
                Marker.NewInstance(taxonBase, isExtinct, markerType);
254

    
255
            } catch (UndefinedTransformerMethodException e) {
256
                String message = "GetMarkerType not available for import. This should not happen. Please conntact developer";
257
                fireWarningEvent(message, item.getLocation(), 8);
258
            }
259
        }
260

    
261
    }
262

    
263
    /**
264
     * @param item
265
     * @param isExtinctStr
266
     * @return
267
     */
268
    private Boolean getBoolean(String booleanStr, StreamItem item) {
269
        try {
270
            return Boolean.valueOf(booleanStr);
271
        } catch (Exception e) {
272
            String message = "Boolean value could not be parsed";
273
            fireWarningEvent(message, item, 4);
274
            return null;
275
        }
276
    }
277

    
278

    
279

    
280
    /**
281
     * @param csvTaxonRecord
282
     * @param taxonBase
283
     */
284
    private void handleModified(StreamItem item, TaxonBase<?> taxonBase) {
285
        String modifiedStr = item.get(TermUri.DC_MODIFIED);
286
        if (isBlank(modifiedStr)){
287
            return;
288
        }
289

    
290
        try {
291
            UUID modifiedUuid = state.getTransformer().getExtensionTypeUuid("modified");
292
            ExtensionType extensionType = state.getCurrentIO().getExtensionType(state, modifiedUuid, "modified", "modified", "modified");
293
            Extension.NewInstance(taxonBase, modifiedStr, extensionType);
294

    
295
        } catch (UndefinedTransformerMethodException e) {
296
            String message = "GetMarkerType not available for import. This should not happen. Please conntact developer";
297
            fireWarningEvent(message, item.getLocation(), 8);
298
        }
299

    
300

    
301
    }
302

    
303
    /**
304
	 * @param item
305
	 * @param taxonBase
306
	 */
307
	private void handleIdentifiableObjects(StreamItem item,TaxonBase<?> taxonBase) {
308

    
309
		String references = item.get(TermUri.DC_REFERENCES);
310

    
311
		if (references == null || references == "") {
312
			references = item.get(TermUri.DWC_NAME_PUBLISHED_IN_ID);//lorna temporary until Scratchpads move the reference to the correct place.
313
		}
314

    
315
		if (StringUtils.isNotBlank(references)){
316
			URI uri = makeUriIfIs(references);
317
			if (uri != null){
318
				Extension.NewInstance(taxonBase, references, ExtensionType.URL());
319
			}else{
320
				String message = "Non-URI Dublin Core References not yet handled for taxa. References is: %s";
321
				fireWarningEvent(String.format(message, references), item, 6);
322
			}
323
		}
324

    
325

    
326
		//TODO: Finish properly
327
		String id = item.get(TermUri.CDM_SOURCE_IDINSOURCE);
328
		String idNamespace = item.get(TermUri.CDM_SOURCE_IDNAMESPACE);
329
		String reference = item.get(TermUri.CDM_SOURCE_REFERENCE);
330
		if(StringUtils.isNotBlank(id) && StringUtils.isNotBlank(idNamespace) && StringUtils.isNotBlank(reference)){
331
			Reference ref = ReferenceFactory.newGeneric();
332
			ref.setTitle(reference);
333
			Taxon taxon = (Taxon) taxonBase;
334
			taxon.addSource(OriginalSourceType.Import, id, idNamespace, ref, null);
335
		}
336

    
337
	}
338

    
339

    
340
	/**
341
	 * If str is an uri it returns is as an {@link URI}. If not it returns <code>null</code>.
342
	 * @param str
343
	 * @return the URI.
344
	 */
345
	private URI makeUriIfIs(String str) {
346
		if (! str.startsWith("http:")){
347
			return null;
348
		}else{
349
			try {
350
				URI uri = URI.create(str);
351
				return uri;
352
			} catch (Exception e) {
353
				return null;
354
			}
355
		}
356

    
357
	}
358

    
359

    
360
	/**
361
	 * @param item
362
	 * @param taxonBase
363
	 */
364
	private void handleCommonNames(StreamItem item,TaxonBase<?> taxonBase) {
365
		//TODO: handle comma separated values
366
		String commonName = item.get(TermUri.DWC_VERNACULAR_NAME);
367
		if (StringUtils.isNotBlank(commonName)){
368

    
369
			Language language = getLanguage(item);
370
			CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(commonName, language);
371
			if(taxonBase instanceof Taxon){
372
				Taxon taxon = (Taxon) taxonBase;
373
				TaxonDescription taxonDescription = getTaxonDescription(taxon, false);
374
				taxonDescription.addElement(commonTaxonName);
375
				logger.info("Common name " + commonName + " added to " + taxon.getTitleCache());
376
			}
377
		}
378
	}
379

    
380

    
381

    
382
	/**
383
	 * @param csvTaxonRecord
384
	 * @param taxonBase
385
	 */
386
	private void handleTdwgArea(StreamItem item, TaxonBase<?> taxonBase) {
387
		String tdwg_area = item.get(TermUri.DWC_COUNTRY_CODE);
388
		if (tdwg_area != null){
389
    		if(taxonBase instanceof Synonym){
390
    			Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
391
    			Taxon acceptedTaxon = synonym.getAcceptedTaxon();
392
    			if (acceptedTaxon != null){
393
    			    TaxonDescription td = getTaxonDescription(acceptedTaxon, false);
394
    			    NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
395

    
396
    			    if (area == null){
397
    			        area = NamedArea.getAreaByTdwgLabel(tdwg_area);
398
    			    }
399
    			    if (area != null){
400
    			        Distribution distribution = Distribution.NewInstance(area, PresenceAbsenceTerm.PRESENT());
401
    			        td.addElement(distribution);
402
    			    }
403
    			}
404
    		}
405
    		if(!(taxonBase instanceof Synonym)){
406
    			Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
407
    			TaxonDescription td = getTaxonDescription(taxon, false);
408
    			NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
409

    
410
    			if (area == null){
411
    				area = NamedArea.getAreaByTdwgLabel(tdwg_area);
412
    			}
413
    			if (area != null){
414
    				Distribution distribution = Distribution.NewInstance(area, PresenceAbsenceTerm.PRESENT());
415
    				td.addElement(distribution);
416
    			}
417
    		}
418
    	}
419
	}
420

    
421

    
422
	/**
423
	 * @param item
424
	 * @param taxonBase
425
	 */
426
	private void handleTaxonRemarks(StreamItem item,TaxonBase<?> taxonBase) {
427
		String comment = item.get(TermUri.DWC_TAXON_REMARKS);
428
		Language language = getLanguage(item);
429
		if(StringUtils.isNotBlank(comment)){
430
				Annotation annotation = Annotation.NewInstance(comment, language);
431
				taxonBase.addAnnotation(annotation);
432
		}else{
433
//			String message = "Comment is empty or some error appeared while saving: %s";
434
////			message = String.format(message);
435
//			fireWarningEvent(message, item, 1);
436
		}
437
	}
438

    
439

    
440
	//TODO handle non LSIDs
441
	//TODO handle LSIDs for names
442
	private void handleIdentifier(StreamItem csvTaxonRecord, TaxonBase<?> taxonBase) {
443
		String identifier = csvTaxonRecord.get(TermUri.DC_IDENTIFIER);
444
		if (StringUtils.isNotBlank(identifier)){
445
			if (identifier.trim().startsWith("urn:lsid")){
446
				try {
447
					LSID lsid = new LSID(identifier);
448
					taxonBase.setLsid(lsid);
449
				} catch (MalformedLSIDException e) {
450
					String message = "LSID is malformed and can't be handled as LSID: %s";
451
					message = String.format(message, identifier);
452
					fireWarningEvent(message, csvTaxonRecord, 4);
453
					Identifier.NewInstance(taxonBase, identifier, DefinedTermBase.getTermByClassAndUUID(DefinedTerm.class, DefinedTerm.uuidLsid));
454
				}
455
			}else{
456
				Identifier.NewInstance(taxonBase, identifier, null);
457
			    String message = "Identifier type not recognized. Create generic identifier: %s";
458
				message = String.format(message, identifier);
459
				fireWarningEvent(message, csvTaxonRecord, 1);
460
			}
461
		}
462

    
463
	}
464

    
465

    
466
	private void handleDataset(StreamItem item, TaxonBase<?> taxonBase,
467
	        List<MappedCdmBase<? extends CdmBase>> resultList,
468
	        Reference sourceReference,
469
	        String sourceReferecenDetail) {
470

    
471
		TermUri idTerm = TermUri.DWC_DATASET_ID;
472
		TermUri strTerm = TermUri.DWC_DATASET_NAME;
473

    
474
		if (config.isDatasetsAsClassifications()){
475
			String datasetId = CdmUtils.Nz(item.get(idTerm)).trim();
476
			String datasetName = CdmUtils.Nz(item.get(strTerm)).trim();
477
				if (CdmUtils.areBlank(datasetId, datasetName) ){
478
				datasetId = NO_DATASET;
479
			}
480

    
481
			//check id
482
			boolean classificationExists = state.exists(idTerm.toString() , datasetId, Classification.class);
483

    
484
			//check name
485
			if (!classificationExists){
486
				classificationExists = state.exists(strTerm.toString() , datasetName, Classification.class);
487
			}
488

    
489
			//if not exists, create new
490
			if (! classificationExists){
491
				String classificationName = StringUtils.isBlank(datasetName)? datasetId : datasetName;
492
				if (classificationName.equals(NO_DATASET)){
493
					classificationName = config.getClassificationName();
494
					//classificationName = "Classification (no name)";  //TODO define by config or zipfile or metadata
495
				}
496

    
497
				String classificationId = StringUtils.isBlank(datasetId)? datasetName : datasetId;
498
				Classification classification = Classification.NewInstance(classificationName);
499
				//source
500
				IdentifiableSource source = classification.addSource(OriginalSourceType.Import, classificationId, "Dataset", sourceReference, sourceReferecenDetail);
501
				//add to result
502
				resultList.add(new MappedCdmBase<>(idTerm, datasetId, classification));
503
				resultList.add(new MappedCdmBase<>(strTerm, datasetName, classification));
504
				resultList.add(new MappedCdmBase<>(source));
505
				//TODO this is not so nice but currently necessary as classifications are requested in the same partition
506
				state.putMapping(idTerm.toString(), classificationId, classification);
507
				state.putMapping(strTerm.toString(), classificationName, classification);
508
			}
509
		}else if (config.isDatasetsAsSecundumReference() || config.isDatasetsAsOriginalSource()){
510
			MappedCdmBase<Reference> mappedCitation = getReference(item, resultList, idTerm, strTerm, true);
511
			if (mappedCitation != null){
512
				Reference ref = mappedCitation.getCdmBase();
513
				if (config.isDatasetsAsSecundumReference()){
514
					//dataset as secundum reference
515
					taxonBase.setSec(ref);
516
				}else{
517
					//dataset as original source
518
					taxonBase.addSource(OriginalSourceType.Import, null, null, ref, null);
519
				}
520
			}
521
		}else{
522
			String message = "DatasetUse type not yet implemented. Can't import dataset information.";
523
			fireWarningEvent(message, item, 4);
524
		}
525

    
526
		//remove to later check if all attributes were used
527
		removeItemInfo(item, idTerm);
528
		removeItemInfo(item, strTerm);
529
	}
530

    
531

    
532
	@Override
533
	public String getSourceId(StreamItem item) {
534
		String id = item.get(ID);
535
		return id;
536
	}
537

    
538
	private MappedCdmBase<Reference> getNameAccordingTo(StreamItem item, List<MappedCdmBase<? extends CdmBase>> resultList) {
539
		if (config.isDatasetsAsSecundumReference()){
540
			//TODO store nameAccordingTo info some where else or let the user define where to store it.
541
			return null;
542
		}else{
543
			TermUri idTerm = TermUri.DWC_NAME_ACCORDING_TO_ID;
544
			TermUri strTerm = TermUri.DWC_NAME_ACCORDING_TO;
545
			MappedCdmBase<Reference> secRef = getReference(item, resultList, idTerm, strTerm, false);
546
			return secRef;
547
		}
548
	}
549

    
550
	private NomenclaturalCode getNomCode(StreamItem item) {
551
		String strNomCode = getValue(item, TermUri.DWC_NOMENCLATURAL_CODE);
552
		NomenclaturalCode nomCode = null;
553
		// by Nomcenclatural Code
554
		if (strNomCode != null){
555
			nomCode = NomenclaturalCode.fromString(strNomCode);
556
			if (nomCode == null){
557
				String message = "NomCode '%s' not recognized";
558
				message = String.format(message, strNomCode);
559
				fireWarningEvent(message, item, 4);
560
			}else{
561
				return nomCode;
562
			}
563
		}
564
		// by Kingdom
565
		String strKingdom = getValue(item, TermUri.DWC_KINGDOM);
566
		if (strKingdom != null){
567
			if (strKingdom.equalsIgnoreCase("Plantae")){
568
				nomCode = NomenclaturalCode.ICNAFP;
569
			}else if (strKingdom.equalsIgnoreCase("Fungi")){
570
				nomCode = NomenclaturalCode.ICNAFP;
571
			}else if (strKingdom.equalsIgnoreCase("Animalia")){
572
				nomCode = NomenclaturalCode.ICZN;
573
			}else if (strKingdom.equalsIgnoreCase("Protozoa")){
574
				nomCode = NomenclaturalCode.ICZN;
575
			}
576
		}
577

    
578
		//TODO further kingdoms
579
		if (nomCode == null){
580
			//TODO warning
581
			if (config.getNomenclaturalCode() != null){
582
				nomCode = config.getNomenclaturalCode();
583
			}
584
		}
585
		return nomCode;
586
	}
587

    
588

    
589
	private TaxonName getScientificName(StreamItem item, NomenclaturalCode nomCode, Rank rank, List<MappedCdmBase<? extends CdmBase>> resultList, Reference sourceReference) {
590
		TaxonName name = null;
591
		String strScientificName = getValue(item, TermUri.DWC_SCIENTIFIC_NAME);
592
		//Name
593
		if (strScientificName != null){
594
			name = (TaxonName)parser.parseFullName(strScientificName, nomCode, rank);
595
			if ( rank != null && name != null && name.getRank() != null &&  ! rank.equals(name.getRank())){
596
				if (config.isValidateRankConsistency()){
597
					String message = "Parsed rank %s (%s) differs from rank %s given by fields 'taxonRank' or 'verbatimTaxonRank'";
598
					message = String.format(message, name.getRank().getTitleCache(), strScientificName, rank.getTitleCache());
599
					fireWarningEvent(message, item, 4);
600
				}
601
			}
602
			checkAuthorship(name, item);
603
			resultList.add(new MappedCdmBase(TermUri.DWC_SCIENTIFIC_NAME, strScientificName, name));
604
		}
605
		//By ID
606
		String strScientificNameId = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_ID);
607
		if (strScientificNameId != null){
608
			if (config.isScientificNameIdAsOriginalSourceId()){
609
				if (name != null){
610
					IdentifiableSource source = IdentifiableSource.NewInstance(OriginalSourceType.Import, strScientificNameId, TermUri.DWC_SCIENTIFIC_NAME_ID.toString(), sourceReference, null);
611
					name.addSource(source);
612
				}
613
			}else{
614
				String message = "ScientificNameId not yet implemented: '%s'";
615
				message = String.format(message, strScientificNameId);
616
				fireWarningEvent(message, item, 4);
617
			}
618
		}
619

    
620
		//namePublishedIn
621
		TermUri idTerm = TermUri.DWC_NAME_PUBLISHED_IN_ID;
622
		TermUri strTerm = TermUri.DWC_NAME_PUBLISHED_IN;
623
		MappedCdmBase<Reference> nomRef = getReference(item, resultList, idTerm, strTerm, false);
624

    
625
		if (name != null){
626
			if (nomRef != null){
627
				name.setNomenclaturalReference(nomRef.getCdmBase());  //check if name already has a nomRef, shouldn't be the case usually
628
			}
629
		}else{
630
			if (nomRef != null){
631
				String message = "NamePublishedIn information available but no name exists";
632
				fireWarningEvent(message, item, 4);
633
			}
634
		}
635
		return name;
636
	}
637

    
638

    
639
	/**
640
	 * General method to handle references used for multiple attributes.
641
	 * @param item
642
	 * @param resultList
643
	 * @param idTerm
644
	 * @param strTerm
645
	 * @param idIsInternal
646
	 * @return
647
	 */
648
	private MappedCdmBase<Reference> getReference(StreamItem item,
649
	        List<MappedCdmBase<? extends CdmBase>> resultList, TermUri idTerm,
650
	        TermUri strTerm, boolean idIsInternal) {
651
		Reference newRef = null;
652
		Reference sourceCitation = null;
653

    
654
		MappedCdmBase<Reference> result = null;
655
		if (exists(idTerm, item) || exists(strTerm, item)){
656
			String refId = CdmUtils.Nz(item.get(idTerm)).trim();
657
			String refStr = CdmUtils.Nz(item.get(strTerm)).trim();
658
			if (StringUtils.isNotBlank(refId)){
659
				List<Reference> references = state.get(idTerm.toString(), refId, Reference.class);
660
				if (references.size() == 0){
661
					if (! idIsInternal){
662
						//references should already exist in store if not linking to external links like URLs
663
						String message = "External namePublishedInIDs are not yet supported";
664
						fireWarningEvent(message, item, 4);//set to DEBUG
665
					}else{
666
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
667
						newRef.addSource(OriginalSourceType.Import, refId, idTerm.toString(), sourceCitation, null);
668
						MappedCdmBase<Reference> idResult = new MappedCdmBase<>(idTerm, refId, newRef);
669
						resultList.add(idResult);
670
					}
671
				}else{
672
					//TODO handle list.size > 1 , do we need a list here ?
673
					result = new MappedCdmBase<Reference>(idTerm, refId , references.get(0));
674
				}
675
			}
676
			if (result == null){
677
				List<Reference> nomRefs = state.get(strTerm.toString(), refStr, Reference.class);
678
				if (nomRefs.size() > 0){
679
					//TODO handle list.size > 1 , do we need a list here ?
680
					result = new MappedCdmBase<>(strTerm, refStr , nomRefs.get(0));
681
				}else{
682
					// new Reference
683
					if (newRef == null){
684
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
685
					}
686
					newRef.setTitleCache(refStr, true);
687
					//TODO distinguish available year, authorship, etc. if
688
					result = new MappedCdmBase<>(strTerm, refStr, newRef);
689
					resultList.add(result);
690
				}
691
			}
692
		}
693
		return result;
694
	}
695

    
696

    
697
	//TODO we may configure in configuration that scientific name never includes Authorship
698
	private void checkAuthorship(TaxonName nameBase, StreamItem item) {
699
		if (nameBase.isViral()){
700
			return;
701
		}
702
		String strAuthors = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_AUTHORS);
703

    
704
		if (! nameBase.isProtectedTitleCache()){
705
			if (isBlank(nameBase.getAuthorshipCache())){
706
				if (nameBase.isBotanical() || nameBase.isZoological()){
707
					//TODO can't we also parse NonViralNames correctly ?
708
					try {
709
						parser.parseAuthors(nameBase, strAuthors);
710
					} catch (StringNotParsableException e) {
711
					    nameBase.setAuthorshipCache(strAuthors);
712
					}
713
				}else{
714
				    nameBase.setAuthorshipCache(strAuthors);
715
				}
716
				//TODO throw warning (scientific name should always include authorship) by DwC definition
717
			}
718
		}
719

    
720
	}
721

    
722

    
723
	private Rank getRank(StreamItem csvTaxonRecord, NomenclaturalCode nomCode) {
724
		boolean USE_UNKNOWN = true;
725
		Rank rank = null;
726
		String strRank = getValue(csvTaxonRecord,TermUri.DWC_TAXON_RANK);
727
		String strVerbatimRank = getValue(csvTaxonRecord,TermUri.DWC_VERBATIM_TAXON_RANK);
728
		if (strRank != null){
729
			try {
730
				rank = Rank.getRankByEnglishName(strRank, nomCode, USE_UNKNOWN);
731
				if (rank.equals(Rank.UNKNOWN_RANK())){
732
					rank = Rank.getRankByNameOrIdInVoc(strRank, USE_UNKNOWN);
733
					if (rank.equals(Rank.UNKNOWN_RANK())){
734
						String message = "Rank can not be defined for '%s'";
735
						message = String.format(message, strRank);
736
						fireWarningEvent(message, csvTaxonRecord, 4);
737
					}
738
				}
739
			} catch (UnknownCdmTypeException e) {
740
				//should not happen as USE_UNKNOWN is used
741
				rank = Rank.UNKNOWN_RANK();
742
			}
743
		}
744
		if ( (rank == null || rank.equals(Rank.UNKNOWN_RANK())) && strVerbatimRank != null){
745
			try {
746
				rank = Rank.getRankByNameOrIdInVoc(strVerbatimRank, USE_UNKNOWN);
747
				if (rank.equals(Rank.UNKNOWN_RANK())){
748
					String message = "Rank can not be defined for '%s'";
749
					message = String.format(message, strVerbatimRank);
750
					fireWarningEvent(message, csvTaxonRecord, 4);
751
				}
752
			} catch (UnknownCdmTypeException e) {
753
				//should not happen as USE_UNKNOWN is used
754
				rank = Rank.UNKNOWN_RANK();
755
			}
756
		}
757
		return rank;
758
	}
759

    
760

    
761
	/**
762
	 * Creates an empty taxon object with a given status.
763
	 * <i>Empty</i> taxon means, without a defined name or sec.
764
	 * @param item
765
	 * @return
766
	 */
767
	private TaxonBase<?> getTaxonBase(StreamItem item) {
768
		TaxonName name = null;
769
		Reference sec = null;
770
		TaxonBase<?> result;
771
		String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
772
		String status = "";
773

    
774
		if (taxStatus != null){
775
			if (taxStatus.matches("accepted.*|valid")){
776
				status += "A";
777
			} else if (taxStatus.matches(".*synonym|invalid|not accepted")){   //not accepted comes from scratchpads
778
				status += "S";
779
			} else if (taxStatus.matches("misapplied.*")){
780
				status += "M";
781
			} else{
782
				status += "?";
783
			}
784
			removeItemInfo(item, TermUri.DWC_TAXONOMIC_STATUS);
785
		}
786
		if (! CdmUtils.isBlank(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
787
			// acceptedNameUsageId = id
788
			if (getSourceId(item).equals(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
789
				status += "A";
790
			}else{
791
				status += "S";
792
			}
793
		}
794
		if (status.contains("A") || status.contains("M")){
795
			result = Taxon.NewInstance(name, sec);
796
			if (status.contains("S") && ! status.contains("M") ){
797
				String message = "Ambigous taxon status (%s)";
798
				message = String.format(message, status);
799
				fireWarningEvent(message, item, 6);
800
			}
801
		} else if (status.contains("S")){
802
			result = Synonym.NewInstance(name, sec);
803
		} else{
804
			result = Taxon.NewUnknownStatusInstance(name, sec);
805
		}
806

    
807
		return result;
808

    
809
	}
810

    
811

    
812

    
813
    /**
814
	 * @param item
815
	 * @return
816
	 */
817
	private Language getLanguage(StreamItem item) {
818
		String langItem = item.get(TermUri.DC_LANGUAGE);
819
		Language language = null;
820

    
821
		if(StringUtils.equalsIgnoreCase(langItem, "de")){
822
			language = Language.GERMAN();
823
		}else if(StringUtils.equalsIgnoreCase(langItem, "en")){
824
			language = Language.ENGLISH();
825
		}else{
826
			language = Language.DEFAULT();
827
		}
828
		return language;
829
	}
830

    
831
// ********************** PARTITIONABLE ****************************************/
832

    
833

    
834
	@Override
835
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap) {
836
		String value;
837
		String key;
838

    
839
		//namePublishedIn
840
		if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN_ID.toString()))){
841
			Set<String> keySet = getKeySet(key, fkMap);
842
			keySet.add(value);
843
		}
844
		if (config.isDeduplicateNamePublishedIn()){
845
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN.toString()))){
846
				Set<String> keySet = getKeySet(key, fkMap);
847
				keySet.add(value);
848
			}
849
		}
850

    
851
		//nameAccordingTo
852
		if (! config.isDatasetsAsSecundumReference()){
853
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
854
				Set<String> keySet = getKeySet(key, fkMap);
855
				keySet.add(value);
856
			}
857
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO.toString()))){
858
				Set<String> keySet = getKeySet(key, fkMap);
859
				keySet.add(value);
860
			}
861
		}
862

    
863
		//dataset
864
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
865
			Set<String> keySet = getKeySet(key, fkMap);
866
			keySet.add(value);
867
		}
868
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
869
			Set<String> keySet = getKeySet(key, fkMap);
870
			keySet.add(value);
871
		}
872

    
873
	}
874

    
875

    
876
	@Override
877
	public Set<String> requiredSourceNamespaces() {
878
		Set<String> result = new HashSet<>();
879
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN_ID.toString());
880
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN.toString());
881
 		if (!config.isDatasetsAsSecundumReference()){
882
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
883
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
884
 		}
885
	 	result.add(TermUri.DWC_DATASET_ID.toString());
886
	 	result.add(TermUri.DWC_DATASET_NAME.toString());
887
	 	return result;
888
	}
889

    
890

    
891
    /**
892
     * @param item
893
     * @param dwcTaxonomicStatus
894
     */
895
    private void removeItemInfo(StreamItem item, TermUri dwcTaxonomicStatus) {
896
        if (!isFilterOnly){
897
            item.remove(dwcTaxonomicStatus);
898
        }
899
    }
900

    
901

    
902
//** ***************************** TO STRING *********************************************/
903

    
904
	@Override
905
	public String toString(){
906
		return this.getClass().getName();
907
	}
908
}
(2-2/17)