Project

General

Profile

Download (8.96 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.dwca.in;
10

    
11
import java.net.URI;
12
import java.util.ArrayList;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Set;
17

    
18
import org.apache.commons.lang.StringUtils;
19
import org.apache.log4j.Logger;
20

    
21
import com.ibm.lsid.MalformedLSIDException;
22

    
23
import eu.etaxonomy.cdm.io.stream.IPartitionableConverter;
24
import eu.etaxonomy.cdm.io.stream.IReader;
25
import eu.etaxonomy.cdm.io.stream.ListReader;
26
import eu.etaxonomy.cdm.io.stream.MappedCdmBase;
27
import eu.etaxonomy.cdm.io.stream.PartitionableConverterBase;
28
import eu.etaxonomy.cdm.io.stream.StreamItem;
29
import eu.etaxonomy.cdm.io.stream.terms.TermUri;
30
import eu.etaxonomy.cdm.model.agent.Team;
31
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
32
import eu.etaxonomy.cdm.model.common.CdmBase;
33
import eu.etaxonomy.cdm.model.common.LSID;
34
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
35
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
36
import eu.etaxonomy.cdm.model.description.Feature;
37
import eu.etaxonomy.cdm.model.description.TaxonDescription;
38
import eu.etaxonomy.cdm.model.description.TextData;
39
import eu.etaxonomy.cdm.model.name.INonViralName;
40
import eu.etaxonomy.cdm.model.name.IZoologicalName;
41
import eu.etaxonomy.cdm.model.name.TaxonName;
42
import eu.etaxonomy.cdm.model.reference.Reference;
43
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
44
import eu.etaxonomy.cdm.model.taxon.Synonym;
45
import eu.etaxonomy.cdm.model.taxon.Taxon;
46
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
47
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
48

    
49
/**
50
 * @author a.mueller
51
 * @since 22.11.2011
52
 *
53
 */
54
public class GbifReferenceCsv2CdmConverter extends PartitionableConverterBase<DwcaDataImportConfiguratorBase, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase>>
55
						implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String>{
56

    
57
	private static final Logger logger = Logger.getLogger(GbifReferenceCsv2CdmConverter.class);
58

    
59
	private static final String CORE_ID = "coreId";
60

    
61
	/**
62
	 * @param state
63
	 */
64
	public GbifReferenceCsv2CdmConverter(DwcaDataImportStateBase state) {
65
		super(state);
66
	}
67

    
68
	@Override
69
    public IReader<MappedCdmBase<? extends CdmBase>> map(StreamItem item ){
70
		List<MappedCdmBase<? extends CdmBase>> resultList = new ArrayList<>();
71

    
72
		Map<String, String> csv = item.map;
73
		Reference sourceReference = state.getTransactionalSourceReference();
74
		String sourceReferecenDetail = null;
75

    
76
		String id = getSourceId(item);
77
		TaxonBase<?> taxon = getTaxonBase(id, item, TaxonBase.class, state);
78
		if (isNotBlank(id) && taxon == null){
79
			String message = "Taxon for id %s could not be found";
80
			message = String.format(message, id);
81
			fireWarningEvent(message, item, 8);
82
		}
83

    
84
		String strCreator = getValue(item, TermUri.DC_CREATOR);
85
		String strDate = getValue(item, TermUri.DC_DATE);
86
		String strTitle = getValue(item, TermUri.DC_TITLE);
87
		String strSource = getValue(item, TermUri.DC_SOURCE);
88
		String strIdentifier = getValue(item, TermUri.DC_IDENTIFIER);
89
		String strType = getValue(item, TermUri.DC_TYPE);
90

    
91
		Reference reference = ReferenceFactory.newGeneric();
92
		resultList.add(new MappedCdmBase<CdmBase>(reference));
93

    
94
		//author
95
		TeamOrPersonBase<?> author = handleCreator(strCreator);
96
		reference.setAuthorship(author);
97
		//date
98
		VerbatimTimePeriod publicationDate = handleDate(strDate);
99
		reference.setDatePublished(publicationDate);
100
		//title
101
		reference.setTitle(strTitle);
102
		//inreference
103
		Reference inRef = handleInRef(strSource);
104
		if (inRef != null){
105
			reference.setInReference(inRef);
106
			resultList.add(new MappedCdmBase<>(inRef));
107
		}
108

    
109
		//URI
110
		handleIdentifier(strIdentifier, reference);
111

    
112
		//type
113
		handleType(reference, strType, taxon, resultList, item);
114

    
115

    
116
		return new ListReader<>(resultList);
117
	}
118

    
119

    
120
	private void handleType(Reference reference, String strType, TaxonBase<?> taxon,
121
	        List<MappedCdmBase<? extends CdmBase>> resultList, StreamItem item) {
122
		// TODO handleType not yet implemented
123

    
124
		if (taxon == null){
125
			String message = "Taxon is null. Reference not imported.";
126
			fireWarningEvent(message,item, 4);
127
			//do nothing
128
		}else{
129
			boolean isNomRef = false;
130
			if (isNotBlank(strType)){
131
				if (strType.matches("Botanical Protologue")){
132
					if (taxon.getName() != null && reference != null && taxon.getName().isNonViral()){
133
						INonViralName nvn = taxon.getName();
134
						nvn.setNomenclaturalReference(reference);
135
						isNomRef = true;
136
					}else{
137
						//TODO
138
					}
139
				}
140
			}
141

    
142
			//guess a nom ref
143
			if (isNomRef == false && config.isGuessNomenclaturalReferences()){
144
				//if reference equals in author and year we assume that it is the nom ref
145
				//this information is usually only available for ICZN names
146
				if (taxon.getName() != null && reference != null && taxon.getName().isNonViral()){
147
					INonViralName nvn = taxon.getName();
148
					String taxonAuthor = nvn.getAuthorshipCache();
149
					String refAuthor = reference.getAuthorship().getNomenclaturalTitle();
150
					Integer combYear = null;
151
					Integer origYear = null;
152
					if (nvn.isZoological()){
153
						IZoologicalName zooName = (IZoologicalName)CdmBase.deproxy(nvn);
154
						combYear = zooName.getPublicationYear();
155
						origYear = zooName.getOriginalPublicationYear();
156
					}
157
					String refYear = reference.getYear();
158

    
159
					//combination compare
160
					if (taxonAuthor != null && taxonAuthor.equals(refAuthor)){
161
						if (combYear != null && String.valueOf(combYear).equals(refYear)){
162
							//is nom Ref
163
							isNomRef = true;
164
							nvn.setNomenclaturalReference(reference);
165
						}else if (origYear != null && String.valueOf(origYear).equals(refYear)){
166
							//TODO not yet handled by CDM
167
						}
168
					}
169

    
170
				}
171
			}
172
			if (config.isHandleAllRefsAsCitation()){
173
				if (taxon.isInstanceOf(Taxon.class)){
174
					TaxonDescription desc = getTaxonDescription(CdmBase.deproxy(taxon, Taxon.class), false);
175
					createCitation(desc, reference, taxon.getName());
176
					resultList.add(new MappedCdmBase<CdmBase>(desc));
177
				}else if (taxon.isInstanceOf(Synonym.class)){
178
					Synonym syn = CdmBase.deproxy(taxon, Synonym.class);
179
					Taxon tax = syn.getAcceptedTaxon();
180
					if (tax != null){
181
    					TaxonDescription desc = getTaxonDescription(tax, false);
182
    					createCitation(desc, reference, syn.getName());
183
    					resultList.add(new MappedCdmBase<CdmBase>(desc));
184
					}
185
				}
186

    
187
			}
188

    
189
		}
190

    
191

    
192
	}
193

    
194
	private void createCitation(TaxonDescription desc, Reference ref, TaxonName nameUsedInSource) {
195
		Feature feature = Feature.CITATION();
196
		TextData textData = TextData.NewInstance(feature);
197
		DescriptionElementSource source = DescriptionElementSource.NewPrimarySourceInstance(ref, null, nameUsedInSource, null);
198
		textData.addSource(source);
199
		desc.addElement(textData);
200
	}
201

    
202
	private void handleIdentifier(String strIdentifier, Reference reference) {
203
		if (StringUtils.isBlank(strIdentifier)){
204
			return;
205
		}else if (LSID.isLsid(strIdentifier)){
206
			LSID lsid;
207
			try {
208
				lsid = new LSID(strIdentifier);
209
				reference.setLsid(lsid);
210
			} catch (MalformedLSIDException e) {
211
				//TODO should not happen as we have checked before
212
				throw new RuntimeException(e);
213
			}
214
		}
215
		try {
216
			URI uri = URI.create(strIdentifier);
217
			reference.setUri(uri);
218
		} catch (Exception e) {
219
			logger.debug("Reference is not an URI");
220
		}
221
		//TODO further identifier types
222

    
223
	}
224

    
225
	private Reference handleInRef(String strSource) {
226
		if (StringUtils.isBlank(strSource)){
227
			return null;
228
		}else{
229
			Reference inRef = ReferenceFactory.newGeneric();
230
			return inRef;
231
		}
232
	}
233

    
234

    
235
	private VerbatimTimePeriod handleDate(String strDate) {
236
	    VerbatimTimePeriod tp = TimePeriodParser.parseStringVerbatim(strDate);
237
		return tp;
238
	}
239

    
240
	private TeamOrPersonBase<?> handleCreator(String strCreator) {
241
		Team team = Team.NewTitledInstance(strCreator, strCreator);
242
		return team;
243
	}
244

    
245
	@Override
246
	public String getSourceId(StreamItem item) {
247
		String id = item.get(CORE_ID);
248
		return id;
249
	}
250

    
251

    
252
//********************** PARTITIONABLE **************************************/
253

    
254
	@Override
255
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap) {
256
		String value;
257
		String key;
258
		if ( hasValue(value = item.get(CORE_ID))){
259
			key = TermUri.DWC_TAXON.toString();
260
			Set<String> keySet = getKeySet(key, fkMap);
261
			keySet.add(value);
262
		}
263
	}
264

    
265

    
266
	@Override
267
	public Set<String> requiredSourceNamespaces() {
268
		Set<String> result = new HashSet<String>();
269
 		result.add(TermUri.DWC_TAXON.toString());
270
 		return result;
271
	}
272

    
273
//******************* TO STRING ******************************************/
274

    
275
	@Override
276
	public String toString(){
277
		return this.getClass().getName();
278
	}
279

    
280

    
281
}
(15-15/17)