Project

General

Profile

Download (8.04 KB) Statistics
| Branch: | Tag: | Revision:
1 c88bcdaa Andreas Müller
// $Id$
2
/**
3
* Copyright (C) 2009 EDIT
4
* European Distributed Institute of Taxonomy 
5
* http://www.e-taxonomy.eu
6
* 
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.io.dwca.in;
11
12 8298b25b Andreas Müller
import java.net.URI;
13 c88bcdaa Andreas Müller
import java.util.ArrayList;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
19 8298b25b Andreas Müller
import org.apache.commons.lang.StringUtils;
20 c88bcdaa Andreas Müller
import org.apache.log4j.Logger;
21
22 8298b25b Andreas Müller
import com.ibm.lsid.MalformedLSIDException;
23
24 c88bcdaa Andreas Müller
import eu.etaxonomy.cdm.io.dwca.TermUri;
25 8298b25b Andreas Müller
import eu.etaxonomy.cdm.model.agent.Team;
26
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27 c88bcdaa Andreas Müller
import eu.etaxonomy.cdm.model.common.CdmBase;
28 8298b25b Andreas Müller
import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
29
import eu.etaxonomy.cdm.model.common.LSID;
30
import eu.etaxonomy.cdm.model.common.TimePeriod;
31
import eu.etaxonomy.cdm.model.description.Feature;
32
import eu.etaxonomy.cdm.model.description.TaxonDescription;
33
import eu.etaxonomy.cdm.model.description.TextData;
34
import eu.etaxonomy.cdm.model.name.NonViralName;
35
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
36
import eu.etaxonomy.cdm.model.name.ZoologicalName;
37 c88bcdaa Andreas Müller
import eu.etaxonomy.cdm.model.reference.Reference;
38 8298b25b Andreas Müller
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
39
import eu.etaxonomy.cdm.model.taxon.Synonym;
40 c88bcdaa Andreas Müller
import eu.etaxonomy.cdm.model.taxon.Taxon;
41 8298b25b Andreas Müller
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
42 c88bcdaa Andreas Müller
43
/**
44
 * @author a.mueller
45
 * @date 22.11.2011
46
 *
47
 */
48 02b0debc Andreas Müller
public class GbifReferenceCsv2CdmConverter extends PartitionableConverterBase<DwcaImportState>  
49 c88bcdaa Andreas Müller
						implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String>{
50
	
51 02b0debc Andreas Müller
	private static final Logger logger = Logger.getLogger(GbifReferenceCsv2CdmConverter.class);
52 c88bcdaa Andreas Müller
53
	private static final String CORE_ID = "coreId";
54
	
55
	/**
56
	 * @param state
57
	 */
58 02b0debc Andreas Müller
	public GbifReferenceCsv2CdmConverter(DwcaImportState state) {
59 c88bcdaa Andreas Müller
		super(state);
60
	}
61
62
	public IReader<MappedCdmBase> map(CsvStreamItem item ){
63
		List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>(); 
64
		
65
		Map<String, String> csv = item.map;
66 8298b25b Andreas Müller
		Reference<?> sourceReference = state.getTransactionalSourceReference();
67 c88bcdaa Andreas Müller
		String sourceReferecenDetail = null;
68
		
69
		String id = getSourceId(item);
70 8298b25b Andreas Müller
		TaxonBase<?> taxon = getTaxonBase(id, item, TaxonBase.class, state);
71
		
72
		String strCreator = getValue(item, TermUri.DC_CREATOR);
73
		String strDate = getValue(item, TermUri.DC_DATE);
74
		String strTitle = getValue(item, TermUri.DC_TITLE);
75
		String strSource = getValue(item, TermUri.DC_SOURCE);
76
		String strIdentifier = getValue(item, TermUri.DC_IDENTIFIER);
77
		String strType = getValue(item, TermUri.DC_TYPE);
78
		
79 0684e761 Andreas Müller
		Reference<?> reference = ReferenceFactory.newGeneric();
80 8298b25b Andreas Müller
		resultList.add(new MappedCdmBase<CdmBase>(reference));
81
82
		//author
83 0684e761 Andreas Müller
		TeamOrPersonBase<?> author = handleCreator(strCreator);
84 8298b25b Andreas Müller
		reference.setAuthorTeam(author);
85
		//date
86
		TimePeriod publicationDate = handleDate(strDate);
87
		reference.setDatePublished(publicationDate);
88
		//title
89
		reference.setTitle(strTitle);
90
		//inreference
91 0684e761 Andreas Müller
		Reference<?> inRef = handleInRef(strSource);
92 8298b25b Andreas Müller
		if (inRef != null){
93
			reference.setInReference(inRef);
94
			resultList.add(new MappedCdmBase<CdmBase>(inRef));
95 c88bcdaa Andreas Müller
		}
96 8298b25b Andreas Müller
97
		//URI
98
		handleIdentifier(strIdentifier, reference);
99
		
100
		//type
101 0684e761 Andreas Müller
		handleType(reference, strType, taxon, resultList, item);
102 8298b25b Andreas Müller
		
103 c88bcdaa Andreas Müller
		
104
		return new ListReader<MappedCdmBase>(resultList);
105
	}
106
107
	
108 0684e761 Andreas Müller
	private void handleType(Reference<?> reference, String strType, TaxonBase<?> taxon, List<MappedCdmBase> resultList, CsvStreamItem item) {
109 8298b25b Andreas Müller
		// TODO handleType not yet implemented
110
		
111
		//guess a nom ref
112
		if (config.isGuessNomenclaturalReferences()){
113
			//if reference equals in author and year we assume that it is the nom ref
114
			//this information is usually only available for ICZN names
115
			if (taxon != null && taxon.getName() != null && reference != null && taxon.getName().isInstanceOf(NonViralName.class)){
116
				boolean isNomRef = false;
117 0684e761 Andreas Müller
				NonViralName<?> nvn = CdmBase.deproxy(taxon.getName(), NonViralName.class);
118 8298b25b Andreas Müller
				String taxonAuthor = nvn.getAuthorshipCache();
119
				String refAuthor = reference.getAuthorTeam().getNomenclaturalTitle();
120
				Integer combYear = null;
121
				Integer origYear = null;
122
				if (nvn.isInstanceOf(ZoologicalName.class)){
123
					ZoologicalName zooName = CdmBase.deproxy(nvn, ZoologicalName.class);
124
					combYear = zooName.getPublicationYear();
125
					origYear = zooName.getOriginalPublicationYear();
126
				}
127
				String refYear = reference.getYear();
128
				
129
				//combination compare
130
				if (taxonAuthor != null && taxonAuthor.equals(refAuthor)){
131
					if (combYear != null && String.valueOf(combYear).equals(refYear)){
132
						//is nom Ref
133
						isNomRef = true;
134
						nvn.setNomenclaturalReference(reference);
135
					}else if (origYear != null && String.valueOf(origYear).equals(refYear)){
136
						//TODO not yet handled by CDM
137
					}
138
				}
139
140
			}
141
		}
142
		if (config.isHandleAllRefsAsCitation()){
143 0684e761 Andreas Müller
			if (taxon == null){
144
				String message = "Reference entry does not belong to an existing taxon. Reference type can not be determined!";
145
				fireWarningEvent(message,item, 4);
146
				//do nothing
147
			}else if (taxon.isInstanceOf(Taxon.class)){
148 8298b25b Andreas Müller
				TaxonDescription desc = getTaxonDescription(CdmBase.deproxy(taxon, Taxon.class), false);
149
				createCitation(desc, reference, taxon.getName());
150
				resultList.add(new MappedCdmBase<CdmBase>(desc));
151
			}else if (taxon.isInstanceOf(Synonym.class)){
152
				Synonym syn = CdmBase.deproxy(taxon, Synonym.class);
153
				for (Taxon tax: syn.getAcceptedTaxa()){
154
					TaxonDescription desc = getTaxonDescription(tax, false);
155
					createCitation(desc, reference, syn.getName());
156
					resultList.add(new MappedCdmBase<CdmBase>(desc));
157
				}
158
			}
159
			
160
		}
161
		
162
		
163
		
164
		
165
	}
166
167
	private void createCitation(TaxonDescription desc, Reference ref, TaxonNameBase nameUsedInSource) {
168
		Feature feature = Feature.CITATION();
169
		TextData textData = TextData.NewInstance(feature);
170
		DescriptionElementSource source = DescriptionElementSource.NewInstance(ref, null, nameUsedInSource, null);
171
		textData.addSource(source);
172
		desc.addElement(textData);
173
	}
174
175
	private void handleIdentifier(String strIdentifier, Reference reference) {
176
		if (StringUtils.isBlank(strIdentifier)){
177
			return;
178
		}else if (LSID.isLsid(strIdentifier)){
179
			LSID lsid;
180
			try {
181
				lsid = new LSID(strIdentifier);
182
				reference.setLsid(lsid);
183
			} catch (MalformedLSIDException e) {
184
				//TODO should not happen as we have checked before
185
				throw new RuntimeException(e);
186
			}
187
		}
188
		try {
189
			URI uri = URI.create(strIdentifier);
190
			reference.setUri(uri);
191
		} catch (Exception e) {
192
			logger.debug("Reference is not an URI");
193
		}
194
		//TODO further identifier types
195
		
196
	}
197
198
	private Reference<?> handleInRef(String strSource) {
199
		if (StringUtils.isBlank(strSource)){
200
			return null;
201
		}else{
202
			Reference<?> inRef = ReferenceFactory.newGeneric();
203
			return inRef;
204
		}
205
	}
206
	
207
208
	private TimePeriod handleDate(String strDate) {
209
		TimePeriod tp = TimePeriod.parseString(strDate);
210
		return tp;
211
	}
212
213
	private TeamOrPersonBase handleCreator(String strCreator) {
214
		Team team = Team.NewTitledInstance(strCreator, strCreator);
215
		return team;
216
	}
217
218 c88bcdaa Andreas Müller
	@Override
219
	public String getSourceId(CsvStreamItem item) {
220
		String id = item.get(CORE_ID);
221
		return id;
222
	}
223
224
	
225
//********************** PARTITIONABLE **************************************/
226
227
	@Override
228
	protected void makeForeignKeysForItem(CsvStreamItem item, Map<String, Set<String>> fkMap) {
229
		String value;
230
		String key;
231
		if ( hasValue(value = item.get(CORE_ID))){
232
			key = TermUri.DWC_TAXON.toString();
233
			Set<String> keySet = getKeySet(key, fkMap);
234
			keySet.add(value);
235
		}
236
	}
237
	
238
	
239
	@Override
240
	public Set<String> requiredSourceNamespaces() {
241
		Set<String> result = new HashSet<String>();
242
 		result.add(TermUri.DWC_TAXON.toString());
243
 		return result;
244
	}
245
	
246
//******************* TO STRING ******************************************/
247
	
248
	@Override
249
	public String toString(){
250
		return this.getClass().getName();
251
	}
252
253
254
}