update factory methods for original sources #1549
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / dwca / in / GbifReferenceCsv2CdmConverter.java
1 // $Id$
2 /**
3 * Copyright (C) 2009 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.dwca.in;
11
12 import java.net.URI;
13 import java.util.ArrayList;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18
19 import org.apache.commons.lang.StringUtils;
20 import org.apache.log4j.Logger;
21
22 import com.ibm.lsid.MalformedLSIDException;
23
24 import eu.etaxonomy.cdm.io.dwca.TermUri;
25 import eu.etaxonomy.cdm.model.agent.Team;
26 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27 import eu.etaxonomy.cdm.model.common.CdmBase;
28 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
29 import eu.etaxonomy.cdm.model.common.LSID;
30 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
31 import eu.etaxonomy.cdm.model.common.TimePeriod;
32 import eu.etaxonomy.cdm.model.description.Feature;
33 import eu.etaxonomy.cdm.model.description.TaxonDescription;
34 import eu.etaxonomy.cdm.model.description.TextData;
35 import eu.etaxonomy.cdm.model.name.NonViralName;
36 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
37 import eu.etaxonomy.cdm.model.name.ZoologicalName;
38 import eu.etaxonomy.cdm.model.reference.Reference;
39 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
40 import eu.etaxonomy.cdm.model.taxon.Synonym;
41 import eu.etaxonomy.cdm.model.taxon.Taxon;
42 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
43
44 /**
45 * @author a.mueller
46 * @date 22.11.2011
47 *
48 */
49 public class GbifReferenceCsv2CdmConverter extends PartitionableConverterBase<DwcaImportState>
50 implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String>{
51
52 private static final Logger logger = Logger.getLogger(GbifReferenceCsv2CdmConverter.class);
53
54 private static final String CORE_ID = "coreId";
55
56 /**
57 * @param state
58 */
59 public GbifReferenceCsv2CdmConverter(DwcaImportState state) {
60 super(state);
61 }
62
63 public IReader<MappedCdmBase> map(CsvStreamItem item ){
64 List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>();
65
66 Map<String, String> csv = item.map;
67 Reference<?> sourceReference = state.getTransactionalSourceReference();
68 String sourceReferecenDetail = null;
69
70 String id = getSourceId(item);
71 TaxonBase<?> taxon = getTaxonBase(id, item, TaxonBase.class, state);
72 if (isNotBlank(id) && taxon == null){
73 String message = "Taxon for id %s could not be found";
74 message = String.format(message, id);
75 fireWarningEvent(message, item, 8);
76 }
77
78 String strCreator = getValue(item, TermUri.DC_CREATOR);
79 String strDate = getValue(item, TermUri.DC_DATE);
80 String strTitle = getValue(item, TermUri.DC_TITLE);
81 String strSource = getValue(item, TermUri.DC_SOURCE);
82 String strIdentifier = getValue(item, TermUri.DC_IDENTIFIER);
83 String strType = getValue(item, TermUri.DC_TYPE);
84
85 Reference<?> reference = ReferenceFactory.newGeneric();
86 resultList.add(new MappedCdmBase<CdmBase>(reference));
87
88 //author
89 TeamOrPersonBase<?> author = handleCreator(strCreator);
90 reference.setAuthorTeam(author);
91 //date
92 TimePeriod publicationDate = handleDate(strDate);
93 reference.setDatePublished(publicationDate);
94 //title
95 reference.setTitle(strTitle);
96 //inreference
97 Reference<?> inRef = handleInRef(strSource);
98 if (inRef != null){
99 reference.setInReference(inRef);
100 resultList.add(new MappedCdmBase<CdmBase>(inRef));
101 }
102
103 //URI
104 handleIdentifier(strIdentifier, reference);
105
106 //type
107 handleType(reference, strType, taxon, resultList, item);
108
109
110 return new ListReader<MappedCdmBase>(resultList);
111 }
112
113
114 private void handleType(Reference<?> reference, String strType, TaxonBase<?> taxon, List<MappedCdmBase> resultList, CsvStreamItem item) {
115 // TODO handleType not yet implemented
116
117 if (taxon == null){
118 String message = "Taxon is null. Reference not imported.";
119 fireWarningEvent(message,item, 4);
120 //do nothing
121 }else{
122 boolean isNomRef = false;
123 if (isNotBlank(strType)){
124 if (strType.matches("Botanical Protologue")){
125 if (taxon.getName() != null && reference != null && taxon.getName().isInstanceOf(NonViralName.class)){
126 NonViralName<?> nvn = CdmBase.deproxy(taxon.getName(), NonViralName.class);
127 nvn.setNomenclaturalReference(reference);
128 isNomRef = true;
129 }else{
130 //TODO
131 }
132 }
133 }
134
135 //guess a nom ref
136 if (isNomRef == false && config.isGuessNomenclaturalReferences()){
137 //if reference equals in author and year we assume that it is the nom ref
138 //this information is usually only available for ICZN names
139 if (taxon.getName() != null && reference != null && taxon.getName().isInstanceOf(NonViralName.class)){
140 NonViralName<?> nvn = CdmBase.deproxy(taxon.getName(), NonViralName.class);
141 String taxonAuthor = nvn.getAuthorshipCache();
142 String refAuthor = reference.getAuthorTeam().getNomenclaturalTitle();
143 Integer combYear = null;
144 Integer origYear = null;
145 if (nvn.isInstanceOf(ZoologicalName.class)){
146 ZoologicalName zooName = CdmBase.deproxy(nvn, ZoologicalName.class);
147 combYear = zooName.getPublicationYear();
148 origYear = zooName.getOriginalPublicationYear();
149 }
150 String refYear = reference.getYear();
151
152 //combination compare
153 if (taxonAuthor != null && taxonAuthor.equals(refAuthor)){
154 if (combYear != null && String.valueOf(combYear).equals(refYear)){
155 //is nom Ref
156 isNomRef = true;
157 nvn.setNomenclaturalReference(reference);
158 }else if (origYear != null && String.valueOf(origYear).equals(refYear)){
159 //TODO not yet handled by CDM
160 }
161 }
162
163 }
164 }
165 if (config.isHandleAllRefsAsCitation()){
166 if (taxon.isInstanceOf(Taxon.class)){
167 TaxonDescription desc = getTaxonDescription(CdmBase.deproxy(taxon, Taxon.class), false);
168 createCitation(desc, reference, taxon.getName());
169 resultList.add(new MappedCdmBase<CdmBase>(desc));
170 }else if (taxon.isInstanceOf(Synonym.class)){
171 Synonym syn = CdmBase.deproxy(taxon, Synonym.class);
172 for (Taxon tax: syn.getAcceptedTaxa()){
173 TaxonDescription desc = getTaxonDescription(tax, false);
174 createCitation(desc, reference, syn.getName());
175 resultList.add(new MappedCdmBase<CdmBase>(desc));
176 }
177 }
178
179 }
180
181 }
182
183
184 }
185
186 private void createCitation(TaxonDescription desc, Reference ref, TaxonNameBase nameUsedInSource) {
187 Feature feature = Feature.CITATION();
188 TextData textData = TextData.NewInstance(feature);
189 DescriptionElementSource source = DescriptionElementSource.NewPrimarySourceInstance(ref, null, nameUsedInSource, null);
190 textData.addSource(source);
191 desc.addElement(textData);
192 }
193
194 private void handleIdentifier(String strIdentifier, Reference reference) {
195 if (StringUtils.isBlank(strIdentifier)){
196 return;
197 }else if (LSID.isLsid(strIdentifier)){
198 LSID lsid;
199 try {
200 lsid = new LSID(strIdentifier);
201 reference.setLsid(lsid);
202 } catch (MalformedLSIDException e) {
203 //TODO should not happen as we have checked before
204 throw new RuntimeException(e);
205 }
206 }
207 try {
208 URI uri = URI.create(strIdentifier);
209 reference.setUri(uri);
210 } catch (Exception e) {
211 logger.debug("Reference is not an URI");
212 }
213 //TODO further identifier types
214
215 }
216
217 private Reference<?> handleInRef(String strSource) {
218 if (StringUtils.isBlank(strSource)){
219 return null;
220 }else{
221 Reference<?> inRef = ReferenceFactory.newGeneric();
222 return inRef;
223 }
224 }
225
226
227 private TimePeriod handleDate(String strDate) {
228 TimePeriod tp = TimePeriod.parseString(strDate);
229 return tp;
230 }
231
232 private TeamOrPersonBase handleCreator(String strCreator) {
233 Team team = Team.NewTitledInstance(strCreator, strCreator);
234 return team;
235 }
236
237 @Override
238 public String getSourceId(CsvStreamItem item) {
239 String id = item.get(CORE_ID);
240 return id;
241 }
242
243
244 //********************** PARTITIONABLE **************************************/
245
246 @Override
247 protected void makeForeignKeysForItem(CsvStreamItem item, Map<String, Set<String>> fkMap) {
248 String value;
249 String key;
250 if ( hasValue(value = item.get(CORE_ID))){
251 key = TermUri.DWC_TAXON.toString();
252 Set<String> keySet = getKeySet(key, fkMap);
253 keySet.add(value);
254 }
255 }
256
257
258 @Override
259 public Set<String> requiredSourceNamespaces() {
260 Set<String> result = new HashSet<String>();
261 result.add(TermUri.DWC_TAXON.toString());
262 return result;
263 }
264
265 //******************* TO STRING ******************************************/
266
267 @Override
268 public String toString(){
269 return this.getClass().getName();
270 }
271
272
273 }