Project

General

Profile

Download (8.8 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2009 EDIT
4
* European Distributed Institute of Taxonomy 
5
* http://www.e-taxonomy.eu
6
* 
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.io.dwca.in;
11

    
12
import java.net.URI;
13
import java.util.ArrayList;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21

    
22
import com.ibm.lsid.MalformedLSIDException;
23

    
24
import eu.etaxonomy.cdm.io.dwca.TermUri;
25
import eu.etaxonomy.cdm.io.stream.StreamItem;
26
import eu.etaxonomy.cdm.model.agent.Team;
27
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
28
import eu.etaxonomy.cdm.model.common.CdmBase;
29
import eu.etaxonomy.cdm.model.common.LSID;
30
import eu.etaxonomy.cdm.model.common.TimePeriod;
31
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
32
import eu.etaxonomy.cdm.model.description.Feature;
33
import eu.etaxonomy.cdm.model.description.TaxonDescription;
34
import eu.etaxonomy.cdm.model.description.TextData;
35
import eu.etaxonomy.cdm.model.name.NonViralName;
36
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
37
import eu.etaxonomy.cdm.model.name.ZoologicalName;
38
import eu.etaxonomy.cdm.model.reference.Reference;
39
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
40
import eu.etaxonomy.cdm.model.taxon.Synonym;
41
import eu.etaxonomy.cdm.model.taxon.Taxon;
42
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
43
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
44

    
45
/**
46
 * @author a.mueller
47
 * @date 22.11.2011
48
 *
49
 */
50
public class GbifReferenceCsv2CdmConverter extends PartitionableConverterBase<DwcaDataImportConfiguratorBase, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase>>  
51
						implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String>{
52
	
53
	private static final Logger logger = Logger.getLogger(GbifReferenceCsv2CdmConverter.class);
54

    
55
	private static final String CORE_ID = "coreId";
56
	
57
	/**
58
	 * @param state
59
	 */
60
	public GbifReferenceCsv2CdmConverter(DwcaDataImportStateBase state) {
61
		super(state);
62
	}
63

    
64
	public IReader<MappedCdmBase> map(StreamItem item ){
65
		List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>(); 
66
		
67
		Map<String, String> csv = item.map;
68
		Reference<?> sourceReference = state.getTransactionalSourceReference();
69
		String sourceReferecenDetail = null;
70
		
71
		String id = getSourceId(item);
72
		TaxonBase<?> taxon = getTaxonBase(id, item, TaxonBase.class, state);
73
		if (isNotBlank(id) && taxon == null){
74
			String message = "Taxon for id %s could not be found";
75
			message = String.format(message, id);
76
			fireWarningEvent(message, item, 8);
77
		}
78
		
79
		String strCreator = getValue(item, TermUri.DC_CREATOR);
80
		String strDate = getValue(item, TermUri.DC_DATE);
81
		String strTitle = getValue(item, TermUri.DC_TITLE);
82
		String strSource = getValue(item, TermUri.DC_SOURCE);
83
		String strIdentifier = getValue(item, TermUri.DC_IDENTIFIER);
84
		String strType = getValue(item, TermUri.DC_TYPE);
85
		
86
		Reference<?> reference = ReferenceFactory.newGeneric();
87
		resultList.add(new MappedCdmBase<CdmBase>(reference));
88

    
89
		//author
90
		TeamOrPersonBase<?> author = handleCreator(strCreator);
91
		reference.setAuthorship(author);
92
		//date
93
		TimePeriod publicationDate = handleDate(strDate);
94
		reference.setDatePublished(publicationDate);
95
		//title
96
		reference.setTitle(strTitle);
97
		//inreference
98
		Reference<?> inRef = handleInRef(strSource);
99
		if (inRef != null){
100
			reference.setInReference(inRef);
101
			resultList.add(new MappedCdmBase<CdmBase>(inRef));
102
		}
103

    
104
		//URI
105
		handleIdentifier(strIdentifier, reference);
106
		
107
		//type
108
		handleType(reference, strType, taxon, resultList, item);
109
		
110
		
111
		return new ListReader<MappedCdmBase>(resultList);
112
	}
113

    
114
	
115
	private void handleType(Reference<?> reference, String strType, TaxonBase<?> taxon, List<MappedCdmBase> resultList, StreamItem item) {
116
		// TODO handleType not yet implemented
117
		
118
		if (taxon == null){
119
			String message = "Taxon is null. Reference not imported.";
120
			fireWarningEvent(message,item, 4);
121
			//do nothing
122
		}else{
123
			boolean isNomRef = false;
124
			if (isNotBlank(strType)){
125
				if (strType.matches("Botanical Protologue")){
126
					if (taxon.getName() != null && reference != null && taxon.getName().isInstanceOf(NonViralName.class)){
127
						NonViralName<?> nvn = CdmBase.deproxy(taxon.getName(), NonViralName.class);
128
						nvn.setNomenclaturalReference(reference);
129
						isNomRef = true;
130
					}else{
131
						//TODO
132
					}
133
				}
134
			}
135
			
136
			//guess a nom ref
137
			if (isNomRef == false && config.isGuessNomenclaturalReferences()){
138
				//if reference equals in author and year we assume that it is the nom ref
139
				//this information is usually only available for ICZN names
140
				if (taxon.getName() != null && reference != null && taxon.getName().isInstanceOf(NonViralName.class)){
141
					NonViralName<?> nvn = CdmBase.deproxy(taxon.getName(), NonViralName.class);
142
					String taxonAuthor = nvn.getAuthorshipCache();
143
					String refAuthor = reference.getAuthorship().getNomenclaturalTitle();
144
					Integer combYear = null;
145
					Integer origYear = null;
146
					if (nvn.isInstanceOf(ZoologicalName.class)){
147
						ZoologicalName zooName = CdmBase.deproxy(nvn, ZoologicalName.class);
148
						combYear = zooName.getPublicationYear();
149
						origYear = zooName.getOriginalPublicationYear();
150
					}
151
					String refYear = reference.getYear();
152
					
153
					//combination compare
154
					if (taxonAuthor != null && taxonAuthor.equals(refAuthor)){
155
						if (combYear != null && String.valueOf(combYear).equals(refYear)){
156
							//is nom Ref
157
							isNomRef = true;
158
							nvn.setNomenclaturalReference(reference);
159
						}else if (origYear != null && String.valueOf(origYear).equals(refYear)){
160
							//TODO not yet handled by CDM
161
						}
162
					}
163
	
164
				}
165
			}
166
			if (config.isHandleAllRefsAsCitation()){
167
				if (taxon.isInstanceOf(Taxon.class)){
168
					TaxonDescription desc = getTaxonDescription(CdmBase.deproxy(taxon, Taxon.class), false);
169
					createCitation(desc, reference, taxon.getName());
170
					resultList.add(new MappedCdmBase<CdmBase>(desc));
171
				}else if (taxon.isInstanceOf(Synonym.class)){
172
					Synonym syn = CdmBase.deproxy(taxon, Synonym.class);
173
					for (Taxon tax: syn.getAcceptedTaxa()){
174
						TaxonDescription desc = getTaxonDescription(tax, false);
175
						createCitation(desc, reference, syn.getName());
176
						resultList.add(new MappedCdmBase<CdmBase>(desc));
177
					}
178
				}
179
				
180
			}
181
		
182
		}		
183
		
184
		
185
	}
186

    
187
	private void createCitation(TaxonDescription desc, Reference ref, TaxonNameBase nameUsedInSource) {
188
		Feature feature = Feature.CITATION();
189
		TextData textData = TextData.NewInstance(feature);
190
		DescriptionElementSource source = DescriptionElementSource.NewPrimarySourceInstance(ref, null, nameUsedInSource, null);
191
		textData.addSource(source);
192
		desc.addElement(textData);
193
	}
194

    
195
	private void handleIdentifier(String strIdentifier, Reference reference) {
196
		if (StringUtils.isBlank(strIdentifier)){
197
			return;
198
		}else if (LSID.isLsid(strIdentifier)){
199
			LSID lsid;
200
			try {
201
				lsid = new LSID(strIdentifier);
202
				reference.setLsid(lsid);
203
			} catch (MalformedLSIDException e) {
204
				//TODO should not happen as we have checked before
205
				throw new RuntimeException(e);
206
			}
207
		}
208
		try {
209
			URI uri = URI.create(strIdentifier);
210
			reference.setUri(uri);
211
		} catch (Exception e) {
212
			logger.debug("Reference is not an URI");
213
		}
214
		//TODO further identifier types
215
		
216
	}
217

    
218
	private Reference<?> handleInRef(String strSource) {
219
		if (StringUtils.isBlank(strSource)){
220
			return null;
221
		}else{
222
			Reference<?> inRef = ReferenceFactory.newGeneric();
223
			return inRef;
224
		}
225
	}
226
	
227

    
228
	private TimePeriod handleDate(String strDate) {
229
		TimePeriod tp = TimePeriodParser.parseString(strDate);
230
		return tp;
231
	}
232

    
233
	private TeamOrPersonBase handleCreator(String strCreator) {
234
		Team team = Team.NewTitledInstance(strCreator, strCreator);
235
		return team;
236
	}
237

    
238
	@Override
239
	public String getSourceId(StreamItem item) {
240
		String id = item.get(CORE_ID);
241
		return id;
242
	}
243

    
244
	
245
//********************** PARTITIONABLE **************************************/
246

    
247
	@Override
248
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap) {
249
		String value;
250
		String key;
251
		if ( hasValue(value = item.get(CORE_ID))){
252
			key = TermUri.DWC_TAXON.toString();
253
			Set<String> keySet = getKeySet(key, fkMap);
254
			keySet.add(value);
255
		}
256
	}
257
	
258
	
259
	@Override
260
	public Set<String> requiredSourceNamespaces() {
261
		Set<String> result = new HashSet<String>();
262
 		result.add(TermUri.DWC_TAXON.toString());
263
 		return result;
264
	}
265
	
266
//******************* TO STRING ******************************************/
267
	
268
	@Override
269
	public String toString(){
270
		return this.getClass().getName();
271
	}
272

    
273

    
274
}
(19-19/37)