Project

General

Profile

Download (28.1 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.dwca.in;
10

    
11
import java.net.URI;
12
import java.util.ArrayList;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Set;
17

    
18
import org.apache.commons.lang.StringUtils;
19
import org.apache.log4j.Logger;
20

    
21
import com.ibm.lsid.MalformedLSIDException;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.io.dwca.TermUri;
25
import eu.etaxonomy.cdm.io.stream.StreamImportBase;
26
import eu.etaxonomy.cdm.io.stream.StreamImportStateBase;
27
import eu.etaxonomy.cdm.io.stream.StreamItem;
28
import eu.etaxonomy.cdm.model.common.Annotation;
29
import eu.etaxonomy.cdm.model.common.CdmBase;
30
import eu.etaxonomy.cdm.model.common.Extension;
31
import eu.etaxonomy.cdm.model.common.ExtensionType;
32
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
33
import eu.etaxonomy.cdm.model.common.LSID;
34
import eu.etaxonomy.cdm.model.common.Language;
35
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
36
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
37
import eu.etaxonomy.cdm.model.description.Distribution;
38
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
39
import eu.etaxonomy.cdm.model.description.TaxonDescription;
40
import eu.etaxonomy.cdm.model.location.NamedArea;
41
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
42
import eu.etaxonomy.cdm.model.name.Rank;
43
import eu.etaxonomy.cdm.model.name.TaxonName;
44
import eu.etaxonomy.cdm.model.reference.Reference;
45
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
46
import eu.etaxonomy.cdm.model.taxon.Classification;
47
import eu.etaxonomy.cdm.model.taxon.Synonym;
48
import eu.etaxonomy.cdm.model.taxon.Taxon;
49
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
50
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
51
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
52
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
53

    
54
/**
55
 * @author a.mueller
56
 * @date 22.11.2011
57
 *
58
 */
59
public class  DwcTaxonStreamItem2CdmTaxonConverter<CONFIG extends DwcaDataImportConfiguratorBase, STATE extends StreamImportStateBase<CONFIG, StreamImportBase>>
60
        extends PartitionableConverterBase<CONFIG, STATE>
61
        implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String>, ItemFilter<StreamItem> {
62

    
63
    private static final Logger logger = Logger.getLogger(DwcTaxonStreamItem2CdmTaxonConverter.class);
64

    
65
    //if this converter is used as filter we may not want to delete item parts during evaluation
66
    boolean isFilterOnly = false;
67

    
68
    private static final String ID = "id";
69
	// temporary key for the case that no dataset information is supplied, TODO use something better
70
	public static final String NO_DATASET = "no_dataset_jli773oebhjklw";
71

    
72
	private final NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
73

    
74
	/**
75
	 * @param state
76
	 */
77
	public DwcTaxonStreamItem2CdmTaxonConverter(STATE state) {
78
		super(state);
79
	}
80

    
81
    public DwcTaxonStreamItem2CdmTaxonConverter(STATE state, boolean isFilter) {
82
        super(state);
83
        this.isFilterOnly = isFilter;
84
    }
85

    
86
    @Override
87
    public boolean toBeRemovedFromStream(StreamItem item) {
88
        if (!config.isDoSplitRelationshipImport()){
89
            return false;
90
        }else{
91
            if (isSynonym(item)){
92
                return ! this.config.isDoSynonymRelationships();
93
            }else{
94
                NomenclaturalCode nomCode = getNomCode(item);
95
                Rank rank = getRank(item, nomCode);
96
                boolean isHigherRank = rank == null || rank.isHigher(Rank.SPECIES());
97
                if (isHigherRank){
98
                    return ! config.isDoHigherRankRelationships();
99
                }else{
100
                    return ! config.isDoLowerRankRelationships();
101
                }
102
            }
103
        }
104
    }
105

    
106
    private boolean isSynonym(StreamItem item) {
107
        TaxonBase<?> taxonBase = getTaxonBase(item);
108
        return taxonBase instanceof Synonym;
109
    }
110

    
111
	@Override
112
    public IReader<MappedCdmBase<? extends CdmBase>> map(StreamItem csvTaxonRecord){
113
		List<MappedCdmBase<? extends CdmBase>> resultList = new ArrayList<>();
114

    
115
		//TODO what if not transactional?
116
		Reference sourceReference = state.getTransactionalSourceReference();
117
		String sourceReferenceDetail = null;
118

    
119
		//taxon
120
		TaxonBase<?> taxonBase = getTaxonBase(csvTaxonRecord);
121
		MappedCdmBase<TaxonBase<?>>  mcb = new MappedCdmBase<>(csvTaxonRecord.term, csvTaxonRecord.get(ID), taxonBase);
122
		resultList.add(mcb);
123

    
124
		//original source
125
		String id = csvTaxonRecord.get(ID);
126
		IdentifiableSource source = taxonBase.addSource(OriginalSourceType.Import, id, "Taxon", sourceReference, sourceReferenceDetail);
127
		MappedCdmBase<IdentifiableSource> mappedSource = new MappedCdmBase<>(csvTaxonRecord.get(ID), source);
128
		resultList.add(mappedSource);
129
		csvTaxonRecord.remove(ID);
130

    
131
		//rank
132
		NomenclaturalCode nomCode = getNomCode(csvTaxonRecord);
133
		Rank rank = getRank(csvTaxonRecord, nomCode);
134

    
135
		//name && name published in
136
		TaxonName name = getScientificName(csvTaxonRecord, nomCode, rank, resultList, sourceReference);
137
		taxonBase.setName(name);
138

    
139
		//nameAccordingTo
140
		MappedCdmBase<Reference> sec = getNameAccordingTo(csvTaxonRecord, resultList);
141

    
142
		if (sec == null && state.getConfig().isUseSourceReferenceAsSec()){
143
			sec = new MappedCdmBase<>(state.getTransactionalSourceReference());
144
		}
145
		if (sec != null){
146
			taxonBase.setSec(sec.getCdmBase());
147
		}
148

    
149
		//classification
150
		handleDataset(csvTaxonRecord, taxonBase, resultList, sourceReference, sourceReferenceDetail);
151

    
152
		//NON core
153
	    //term="http://purl.org/dc/terms/identifier"
154
		//currently only LSIDs
155
		handleIdentifier(csvTaxonRecord, taxonBase);
156

    
157
		//TaxonRemarks
158
		handleTaxonRemarks(csvTaxonRecord, taxonBase);
159

    
160
		//TDWG_1
161
		handleTdwgArea(csvTaxonRecord, taxonBase);
162

    
163
		//VernecularName
164
		handleCommonNames(csvTaxonRecord, taxonBase);
165

    
166
		//External Sources, ID's and References
167
		handleIdentifiableObjects(csvTaxonRecord, taxonBase);
168

    
169

    
170
		//		    <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
171
//		         The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
172
//		         Fungi, Plantae, Protozoa, Viruses -->
173
//		    <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
174

    
175
//		    <!-- Phylum in which the taxon has been classified -->
176
//		    <field index='11' term='http://rs.tdwg.org/dwc/terms/phylum'/>
177

    
178
		//		    <!-- Class in which the taxon has been classified -->
179
//		    <field index='12' term='http://rs.tdwg.org/dwc/terms/class'/>
180

    
181
		//		    <!-- Order in which the taxon has been classified -->
182
//		    <field index='13' term='http://rs.tdwg.org/dwc/terms/order'/>
183

    
184
		//		    <!-- Family in which the taxon has been classified -->
185
//		    <field index='14' term='http://rs.tdwg.org/dwc/terms/family'/>
186

    
187
		//		    <!-- Genus in which the taxon has been classified -->
188
//		    <field index='15' term='http://rs.tdwg.org/dwc/terms/genus'/>
189

    
190
		//		    <!-- Subgenus in which the taxon has been classified -->
191
//		    <field index='16' term='http://rs.tdwg.org/dwc/terms/subgenus'/>
192
//		    <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
193

    
194
//		    <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
195
//		    <!-- Infraspecific epithet -->
196

    
197
//		    <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
198
//		    <!-- Authorship -->
199

    
200
//		    <field index='19' term='http://rs.tdwg.org/dwc/terms/scientificNameAuthorship'/>
201
//		==> see scientific name
202
//
203
//		<!-- Acceptance status published in -->
204
//		    <field index='20' term='http://purl.org/dc/terms/source'/>
205
//		    <!-- Reference in which the scientific name was first published -->
206
//		    <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
207
//		    <!-- Taxon scrutinized by -->
208
//		    <field index='22' term='http://rs.tdwg.org/dwc/terms/nameAccordingTo'/> 
209
//		    <!-- Scrutiny date -->
210
//		    <field index='23' term='http://purl.org/dc/terms/modified'/>
211
//		    <!-- Additional data for the taxon -->
212
//		    <field index='24' term='http://purl.org/dc/terms/description'/>
213
//		    </core>
214

    
215
		return new ListReader<MappedCdmBase<? extends CdmBase>>(resultList);
216
	}
217

    
218

    
219

    
220
	/**
221
	 * @param item
222
	 * @param taxonBase
223
	 */
224
	private void handleIdentifiableObjects(StreamItem item,TaxonBase<?> taxonBase) {
225

    
226

    
227
		String references = item.get(TermUri.DC_REFERENCES);
228

    
229
		if (references == null || references == "") {
230
			references = item.get(TermUri.DWC_NAME_PUBLISHED_IN_ID);//lorna temporary until Scratchpads move the reference to the correct place.
231
		}
232

    
233
		if (StringUtils.isNotBlank(references)){
234
			URI uri = makeUriIfIs(references);
235
			if (uri != null){
236
				Extension.NewInstance(taxonBase, references, ExtensionType.URL());
237
			}else{
238
				String message = "Non-URI Dublin Core References not yet handled for taxa. References is: %s";
239
				fireWarningEvent(String.format(message, references), item, 6);
240
			}
241
		}
242

    
243

    
244
		//TODO: Finish properly
245
		String id = item.get(TermUri.CDM_SOURCE_IDINSOURCE);
246
		String idNamespace = item.get(TermUri.CDM_SOURCE_IDNAMESPACE);
247
		String reference = item.get(TermUri.CDM_SOURCE_REFERENCE);
248
		if(StringUtils.isNotBlank(id) && StringUtils.isNotBlank(idNamespace) && StringUtils.isNotBlank(reference)){
249
			Reference ref = ReferenceFactory.newGeneric();
250
			ref.setTitle(reference);
251
			Taxon taxon = (Taxon) taxonBase;
252
			taxon.addSource(OriginalSourceType.Import, id, idNamespace, ref, null);
253
		}
254

    
255
	}
256

    
257

    
258
	/**
259
	 * If str is an uri it returns is as an {@link URI}. If not it returns <code>null</code>.
260
	 * @param str
261
	 * @return the URI.
262
	 */
263
	private URI makeUriIfIs(String str) {
264
		if (! str.startsWith("http:")){
265
			return null;
266
		}else{
267
			try {
268
				URI uri = URI.create(str);
269
				return uri;
270
			} catch (Exception e) {
271
				return null;
272
			}
273
		}
274

    
275
	}
276

    
277

    
278
	/**
279
	 * @param item
280
	 * @param taxonBase
281
	 */
282
	private void handleCommonNames(StreamItem item,TaxonBase<?> taxonBase) {
283
		//TODO: handle comma separated values
284
		String commonName = item.get(TermUri.DWC_VERNACULAR_NAME);
285
		if (StringUtils.isNotBlank(commonName)){
286

    
287
			Language language = getLanguage(item);
288
			CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(commonName, language);
289
			if(taxonBase instanceof Taxon){
290
				Taxon taxon = (Taxon) taxonBase;
291
				TaxonDescription taxonDescription = getTaxonDescription(taxon, false);
292
				taxonDescription.addElement(commonTaxonName);
293
				logger.info("Common name " + commonName + " added to " + taxon.getTitleCache());
294
			}
295
		}
296
	}
297

    
298

    
299

    
300
	/**
301
	 * @param csvTaxonRecord
302
	 * @param taxonBase
303
	 */
304
	private void handleTdwgArea(StreamItem item, TaxonBase<?> taxonBase) {
305
		String tdwg_area = item.get(TermUri.DWC_COUNTRY_CODE);
306
		if (tdwg_area != null){
307
    		if(taxonBase instanceof Synonym){
308
    			Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
309
    			Taxon acceptedTaxon = synonym.getAcceptedTaxon();
310
    			if (acceptedTaxon != null){
311
    			    TaxonDescription td = getTaxonDescription(acceptedTaxon, false);
312
    			    NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
313

    
314
    			    if (area == null){
315
    			        area = NamedArea.getAreaByTdwgLabel(tdwg_area);
316
    			    }
317
    			    if (area != null){
318
    			        Distribution distribution = Distribution.NewInstance(area, PresenceAbsenceTerm.PRESENT());
319
    			        td.addElement(distribution);
320
    			    }
321
    			}
322
    		}
323
    		if(!(taxonBase instanceof Synonym)){
324
    			Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
325
    			TaxonDescription td = getTaxonDescription(taxon, false);
326
    			NamedArea area = NamedArea.getAreaByTdwgAbbreviation(tdwg_area);
327

    
328
    			if (area == null){
329
    				area = NamedArea.getAreaByTdwgLabel(tdwg_area);
330
    			}
331
    			if (area != null){
332
    				Distribution distribution = Distribution.NewInstance(area, PresenceAbsenceTerm.PRESENT());
333
    				td.addElement(distribution);
334
    			}
335
    		}
336
    	}
337
	}
338

    
339

    
340
	/**
341
	 * @param item
342
	 * @param taxonBase
343
	 */
344
	private void handleTaxonRemarks(StreamItem item,TaxonBase<?> taxonBase) {
345
		String comment = item.get(TermUri.DWC_TAXON_REMARKS);
346
		Language language = getLanguage(item);
347
		if(StringUtils.isNotBlank(comment)){
348
				Annotation annotation = Annotation.NewInstance(comment, language);
349
				taxonBase.addAnnotation(annotation);
350
		}else{
351
//			String message = "Comment is empty or some error appeared while saving: %s";
352
////			message = String.format(message);
353
//			fireWarningEvent(message, item, 1);
354
		}
355
	}
356

    
357

    
358
	//TODO handle non LSIDs
359
	//TODO handle LSIDs for names
360
	private void handleIdentifier(StreamItem csvTaxonRecord, TaxonBase<?> taxonBase) {
361
		String identifier = csvTaxonRecord.get(TermUri.DC_IDENTIFIER);
362
		if (StringUtils.isNotBlank(identifier)){
363
			if (identifier.trim().startsWith("urn:lsid")){
364
				try {
365
					LSID lsid = new LSID(identifier);
366
					taxonBase.setLsid(lsid);
367
				} catch (MalformedLSIDException e) {
368
					String message = "LSID is malformed and can't be handled as LSID: %s";
369
					message = String.format(message, identifier);
370
					fireWarningEvent(message, csvTaxonRecord, 4);
371
				}
372
			}else{
373
				String message = "Identifier type not supported: %s";
374
				message = String.format(message, identifier);
375
				fireWarningEvent(message, csvTaxonRecord, 4);
376
			}
377
		}
378

    
379
	}
380

    
381

    
382
	private void handleDataset(StreamItem item, TaxonBase<?> taxonBase, List<MappedCdmBase<? extends CdmBase>> resultList, Reference sourceReference, String sourceReferecenDetail) {
383
		TermUri idTerm = TermUri.DWC_DATASET_ID;
384
		TermUri strTerm = TermUri.DWC_DATASET_NAME;
385

    
386
		if (config.isDatasetsAsClassifications()){
387
			String datasetId = CdmUtils.Nz(item.get(idTerm)).trim();
388
			String datasetName = CdmUtils.Nz(item.get(strTerm)).trim();
389
				if (CdmUtils.areBlank(datasetId, datasetName) ){
390
				datasetId = NO_DATASET;
391
			}
392

    
393
			//check id
394
			boolean classificationExists = state.exists(idTerm.toString() , datasetId, Classification.class);
395

    
396
			//check name
397
			if (!classificationExists){
398
				classificationExists = state.exists(strTerm.toString() , datasetName, Classification.class);
399
			}
400

    
401
			//if not exists, create new
402
			if (! classificationExists){
403
				String classificationName = StringUtils.isBlank(datasetName)? datasetId : datasetName;
404
				if (classificationName.equals(NO_DATASET)){
405
					classificationName = config.getClassificationName();
406
					//classificationName = "Classification (no name)";  //TODO define by config or zipfile or metadata
407
				}
408

    
409
				String classificationId = StringUtils.isBlank(datasetId)? datasetName : datasetId;
410
				Classification classification = Classification.NewInstance(classificationName);
411
				//source
412
				IdentifiableSource source = classification.addSource(OriginalSourceType.Import, classificationId, "Dataset", sourceReference, sourceReferecenDetail);
413
				//add to result
414
				resultList.add(new MappedCdmBase<>(idTerm, datasetId, classification));
415
				resultList.add(new MappedCdmBase<>(strTerm, datasetName, classification));
416
				resultList.add(new MappedCdmBase<>(source));
417
				//TODO this is not so nice but currently necessary as classifications are requested in the same partition
418
				state.putMapping(idTerm.toString(), classificationId, classification);
419
				state.putMapping(strTerm.toString(), classificationName, classification);
420
			}
421
		}else if (config.isDatasetsAsSecundumReference() || config.isDatasetsAsOriginalSource()){
422
			MappedCdmBase<Reference> mappedCitation = getReference(item, resultList, idTerm, strTerm, true);
423
			if (mappedCitation != null){
424
				Reference ref = mappedCitation.getCdmBase();
425
				if (config.isDatasetsAsSecundumReference()){
426
					//dataset as secundum reference
427
					taxonBase.setSec(ref);
428
				}else{
429
					//dataset as original source
430
					taxonBase.addSource(OriginalSourceType.Import, null, null, ref, null);
431
				}
432
			}
433
		}else{
434
			String message = "DatasetUse type not yet implemented. Can't import dataset information.";
435
			fireWarningEvent(message, item, 4);
436
		}
437

    
438
		//remove to later check if all attributes were used
439
		removeItemInfo(item, idTerm);
440
		removeItemInfo(item, strTerm);
441
	}
442

    
443

    
444
	@Override
445
	public String getSourceId(StreamItem item) {
446
		String id = item.get(ID);
447
		return id;
448
	}
449

    
450
	private MappedCdmBase<Reference> getNameAccordingTo(StreamItem item, List<MappedCdmBase<? extends CdmBase>> resultList) {
451
		if (config.isDatasetsAsSecundumReference()){
452
			//TODO store nameAccordingTo info some where else or let the user define where to store it.
453
			return null;
454
		}else{
455
			TermUri idTerm = TermUri.DWC_NAME_ACCORDING_TO_ID;
456
			TermUri strTerm = TermUri.DWC_NAME_ACCORDING_TO;
457
			MappedCdmBase<Reference> secRef = getReference(item, resultList, idTerm, strTerm, false);
458
			return secRef;
459
		}
460
	}
461

    
462
	private NomenclaturalCode getNomCode(StreamItem item) {
463
		String strNomCode = getValue(item, TermUri.DWC_NOMENCLATURAL_CODE);
464
		NomenclaturalCode nomCode = null;
465
		// by Nomcenclatural Code
466
		if (strNomCode != null){
467
			nomCode = NomenclaturalCode.fromString(strNomCode);
468
			if (nomCode == null){
469
				String message = "NomCode '%s' not recognized";
470
				message = String.format(message, strNomCode);
471
				fireWarningEvent(message, item, 4);
472
			}else{
473
				return nomCode;
474
			}
475
		}
476
		// by Kingdom
477
		String strKingdom = getValue(item, TermUri.DWC_KINGDOM);
478
		if (strKingdom != null){
479
			if (strKingdom.equalsIgnoreCase("Plantae")){
480
				nomCode = NomenclaturalCode.ICNAFP;
481
			}else if (strKingdom.equalsIgnoreCase("Fungi")){
482
				nomCode = NomenclaturalCode.ICNAFP;
483
			}else if (strKingdom.equalsIgnoreCase("Animalia")){
484
				nomCode = NomenclaturalCode.ICZN;
485
			}else if (strKingdom.equalsIgnoreCase("Protozoa")){
486
				nomCode = NomenclaturalCode.ICZN;
487
			}
488
		}
489

    
490
		//TODO further kingdoms
491
		if (nomCode == null){
492
			//TODO warning
493
			if (config.getNomenclaturalCode() != null){
494
				nomCode = config.getNomenclaturalCode();
495
			}
496
		}
497
		return nomCode;
498
	}
499

    
500

    
501
	private TaxonName getScientificName(StreamItem item, NomenclaturalCode nomCode, Rank rank, List<MappedCdmBase<? extends CdmBase>> resultList, Reference sourceReference) {
502
		TaxonName name = null;
503
		String strScientificName = getValue(item, TermUri.DWC_SCIENTIFIC_NAME);
504
		//Name
505
		if (strScientificName != null){
506
			name = (TaxonName)parser.parseFullName(strScientificName, nomCode, rank);
507
			if ( rank != null && name != null && name.getRank() != null &&  ! rank.equals(name.getRank())){
508
				if (config.isValidateRankConsistency()){
509
					String message = "Parsed rank %s (%s) differs from rank %s given by fields 'taxonRank' or 'verbatimTaxonRank'";
510
					message = String.format(message, name.getRank().getTitleCache(), strScientificName, rank.getTitleCache());
511
					fireWarningEvent(message, item, 4);
512
				}
513
			}
514
			checkAuthorship(name, item);
515
			resultList.add(new MappedCdmBase(TermUri.DWC_SCIENTIFIC_NAME, strScientificName, name));
516
		}
517
		//By ID
518
		String strScientificNameId = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_ID);
519
		if (strScientificNameId != null){
520
			if (config.isScientificNameIdAsOriginalSourceId()){
521
				if (name != null){
522
					IdentifiableSource source = IdentifiableSource.NewInstance(OriginalSourceType.Import, strScientificNameId, TermUri.DWC_SCIENTIFIC_NAME_ID.toString(), sourceReference, null);
523
					name.addSource(source);
524
				}
525
			}else{
526
				String message = "ScientificNameId not yet implemented: '%s'";
527
				message = String.format(message, strScientificNameId);
528
				fireWarningEvent(message, item, 4);
529
			}
530
		}
531

    
532
		//namePublishedIn
533
		TermUri idTerm = TermUri.DWC_NAME_PUBLISHED_IN_ID;
534
		TermUri strTerm = TermUri.DWC_NAME_PUBLISHED_IN;
535
		MappedCdmBase<Reference> nomRef = getReference(item, resultList, idTerm, strTerm, false);
536

    
537
		if (name != null){
538
			if (nomRef != null){
539
				name.setNomenclaturalReference(nomRef.getCdmBase());  //check if name already has a nomRef, shouldn't be the case usually
540
			}
541
		}else{
542
			if (nomRef != null){
543
				String message = "NamePublishedIn information available but no name exists";
544
				fireWarningEvent(message, item, 4);
545
			}
546
		}
547
		return name;
548
	}
549

    
550

    
551
	/**
552
	 * General method to handle references used for multiple attributes.
553
	 * @param item
554
	 * @param resultList
555
	 * @param idTerm
556
	 * @param strTerm
557
	 * @param idIsInternal
558
	 * @return
559
	 */
560
	private MappedCdmBase<Reference> getReference(StreamItem item,
561
	        List<MappedCdmBase<? extends CdmBase>> resultList, TermUri idTerm,
562
	        TermUri strTerm, boolean idIsInternal) {
563
		Reference newRef = null;
564
		Reference sourceCitation = null;
565

    
566
		MappedCdmBase<Reference> result = null;
567
		if (exists(idTerm, item) || exists(strTerm, item)){
568
			String refId = CdmUtils.Nz(item.get(idTerm)).trim();
569
			String refStr = CdmUtils.Nz(item.get(strTerm)).trim();
570
			if (StringUtils.isNotBlank(refId)){
571
				List<Reference> references = state.get(idTerm.toString(), refId, Reference.class);
572
				if (references.size() == 0){
573
					if (! idIsInternal){
574
						//references should already exist in store if not linking to external links like URLs
575
						String message = "External namePublishedInIDs are not yet supported";
576
						fireWarningEvent(message, item, 4);//set to DEBUG
577
					}else{
578
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
579
						newRef.addSource(OriginalSourceType.Import, refId, idTerm.toString(), sourceCitation, null);
580
						MappedCdmBase<Reference> idResult = new MappedCdmBase<Reference>(idTerm, refId, newRef);
581
						resultList.add(idResult);
582
					}
583
				}else{
584
					//TODO handle list.size > 1 , do we need a list here ?
585
					result = new MappedCdmBase<Reference>(idTerm, refId , references.get(0));
586
				}
587
			}
588
			if (result == null){
589
				List<Reference> nomRefs = state.get(strTerm.toString(), refStr, Reference.class);
590
				if (nomRefs.size() > 0){
591
					//TODO handle list.size > 1 , do we need a list here ?
592
					result = new MappedCdmBase<Reference>(strTerm, refStr , nomRefs.get(0));
593
				}else{
594
					// new Reference
595
					if (newRef == null){
596
						newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
597
					}
598
					newRef.setTitleCache(refStr, true);
599
					//TODO distinguish available year, authorship, etc. if
600
					result = new MappedCdmBase<Reference>(strTerm, refStr, newRef);
601
					resultList.add(result);
602
				}
603
			}
604
		}
605
		return result;
606
	}
607

    
608

    
609
	//TODO we may configure in configuration that scientific name never includes Authorship
610
	private void checkAuthorship(TaxonName nameBase, StreamItem item) {
611
		if (!nameBase.isNonViral()){
612
			return;
613
		}
614
		String strAuthors = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_AUTHORS);
615

    
616
		if (! nameBase.isProtectedTitleCache()){
617
			if (StringUtils.isBlank(nameBase.getAuthorshipCache())){
618
				if (nameBase.isBotanical() || nameBase.isZoological()){
619
					//TODO can't we also parse NonViralNames correctly ?
620
					try {
621
						parser.parseAuthors(nameBase, strAuthors);
622
					} catch (StringNotParsableException e) {
623
					    nameBase.setAuthorshipCache(strAuthors);
624
					}
625
				}else{
626
				    nameBase.setAuthorshipCache(strAuthors);
627
				}
628
				//TODO throw warning (scientific name should always include authorship) by DwC definition
629
			}
630
		}
631

    
632
	}
633

    
634

    
635
	private Rank getRank(StreamItem csvTaxonRecord, NomenclaturalCode nomCode) {
636
		boolean USE_UNKNOWN = true;
637
		Rank rank = null;
638
		String strRank = getValue(csvTaxonRecord,TermUri.DWC_TAXON_RANK);
639
		String strVerbatimRank = getValue(csvTaxonRecord,TermUri.DWC_VERBATIM_TAXON_RANK);
640
		if (strRank != null){
641
			try {
642
				rank = Rank.getRankByEnglishName(strRank, nomCode, USE_UNKNOWN);
643
				if (rank.equals(Rank.UNKNOWN_RANK())){
644
					rank = Rank.getRankByNameOrIdInVoc(strRank, USE_UNKNOWN);
645
					if (rank.equals(Rank.UNKNOWN_RANK())){
646
						String message = "Rank can not be defined for '%s'";
647
						message = String.format(message, strRank);
648
						fireWarningEvent(message, csvTaxonRecord, 4);
649
					}
650
				}
651
			} catch (UnknownCdmTypeException e) {
652
				//should not happen as USE_UNKNOWN is used
653
				rank = Rank.UNKNOWN_RANK();
654
			}
655
		}
656
		if ( (rank == null || rank.equals(Rank.UNKNOWN_RANK())) && strVerbatimRank != null){
657
			try {
658
				rank = Rank.getRankByNameOrIdInVoc(strVerbatimRank, USE_UNKNOWN);
659
				if (rank.equals(Rank.UNKNOWN_RANK())){
660
					String message = "Rank can not be defined for '%s'";
661
					message = String.format(message, strVerbatimRank);
662
					fireWarningEvent(message, csvTaxonRecord, 4);
663
				}
664
			} catch (UnknownCdmTypeException e) {
665
				//should not happen as USE_UNKNOWN is used
666
				rank = Rank.UNKNOWN_RANK();
667
			}
668
		}
669
		return rank;
670
	}
671

    
672

    
673
	/**
674
	 * Creates an empty taxon object with a given status.
675
	 * <i>Empty</i> taxon means, without a defined name or sec.
676
	 * @param item
677
	 * @return
678
	 */
679
	private TaxonBase<?> getTaxonBase(StreamItem item) {
680
		TaxonName name = null;
681
		Reference sec = null;
682
		TaxonBase<?> result;
683
		String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
684
		String status = "";
685

    
686
		if (taxStatus != null){
687
			if (taxStatus.matches("accepted.*|valid")){
688
				status += "A";
689
			} else if (taxStatus.matches(".*synonym|invalid|not accepted")){   //not accepted comes from scratchpads
690
				status += "S";
691
			} else if (taxStatus.matches("misapplied.*")){
692
				status += "M";
693
			} else{
694
				status += "?";
695
			}
696
			removeItemInfo(item, TermUri.DWC_TAXONOMIC_STATUS);
697
		}
698
		if (! CdmUtils.isBlank(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
699
			// acceptedNameUsageId = id
700
			if (getSourceId(item).equals(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
701
				status += "A";
702
			}else{
703
				status += "S";
704
			}
705
		}
706
		if (status.contains("A") || status.contains("M")){
707
			result = Taxon.NewInstance(name, sec);
708
			if (status.contains("S") && ! status.contains("M") ){
709
				String message = "Ambigous taxon status (%s)";
710
				message = String.format(message, status);
711
				fireWarningEvent(message, item, 6);
712
			}
713
		} else if (status.contains("S")){
714
			result = Synonym.NewInstance(name, sec);
715
		} else{
716
			result = Taxon.NewUnknownStatusInstance(name, sec);
717
		}
718

    
719
		return result;
720

    
721
	}
722

    
723

    
724

    
725
    /**
726
	 * @param item
727
	 * @return
728
	 */
729
	private Language getLanguage(StreamItem item) {
730
		String langItem = item.get(TermUri.DC_LANGUAGE);
731
		Language language = null;
732

    
733
		if(StringUtils.equalsIgnoreCase(langItem, "de")){
734
			language = Language.GERMAN();
735
		}else if(StringUtils.equalsIgnoreCase(langItem, "en")){
736
			language = Language.ENGLISH();
737
		}else{
738
			language = Language.DEFAULT();
739
		}
740
		return language;
741
	}
742

    
743
// ********************** PARTITIONABLE ****************************************/
744

    
745

    
746
	@Override
747
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap) {
748
		String value;
749
		String key;
750

    
751
		//namePublishedIn
752
		if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN_ID.toString()))){
753
			Set<String> keySet = getKeySet(key, fkMap);
754
			keySet.add(value);
755
		}
756
		if (config.isDeduplicateNamePublishedIn()){
757
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN.toString()))){
758
				Set<String> keySet = getKeySet(key, fkMap);
759
				keySet.add(value);
760
			}
761
		}
762

    
763
		//nameAccordingTo
764
		if (! config.isDatasetsAsSecundumReference()){
765
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
766
				Set<String> keySet = getKeySet(key, fkMap);
767
				keySet.add(value);
768
			}
769
			if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO.toString()))){
770
				Set<String> keySet = getKeySet(key, fkMap);
771
				keySet.add(value);
772
			}
773
		}
774

    
775
		//dataset
776
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
777
			Set<String> keySet = getKeySet(key, fkMap);
778
			keySet.add(value);
779
		}
780
		if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
781
			Set<String> keySet = getKeySet(key, fkMap);
782
			keySet.add(value);
783
		}
784

    
785
	}
786

    
787

    
788
	@Override
789
	public Set<String> requiredSourceNamespaces() {
790
		Set<String> result = new HashSet<String>();
791
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN_ID.toString());
792
 		result.add(TermUri.DWC_NAME_PUBLISHED_IN.toString());
793
 		if (!config.isDatasetsAsSecundumReference()){
794
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
795
	 		result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
796
 		}
797
	 	result.add(TermUri.DWC_DATASET_ID.toString());
798
	 	result.add(TermUri.DWC_DATASET_NAME.toString());
799
	 	return result;
800
	}
801

    
802

    
803
    /**
804
     * @param item
805
     * @param dwcTaxonomicStatus
806
     */
807
    private void removeItemInfo(StreamItem item, TermUri dwcTaxonomicStatus) {
808
        if (!isFilterOnly){
809
            item.remove(dwcTaxonomicStatus);
810
        }
811
    }
812

    
813

    
814
//** ***************************** TO STRING *********************************************/
815

    
816
	@Override
817
	public String toString(){
818
		return this.getClass().getName();
819
	}
820

    
821

    
822

    
823

    
824
}
(5-5/37)