latest developments in DwcA import
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / dwca / in / DwcTaxonCsv2CdmTaxonRelationConverter.java
1 // $Id$
2 /**
3 * Copyright (C) 2009 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.dwca.in;
11
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18
19 import org.apache.log4j.Logger;
20
21 import eu.etaxonomy.cdm.common.CdmUtils;
22 import eu.etaxonomy.cdm.io.dwca.TermUri;
23 import eu.etaxonomy.cdm.model.common.CdmBase;
24 import eu.etaxonomy.cdm.model.reference.Reference;
25 import eu.etaxonomy.cdm.model.taxon.Classification;
26 import eu.etaxonomy.cdm.model.taxon.Synonym;
27 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
28 import eu.etaxonomy.cdm.model.taxon.Taxon;
29 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
30
31 /**
32 * @author a.mueller
33 * @date 23.11.2011
34 *
35 */
36 public class DwcTaxonCsv2CdmTaxonRelationConverter<STATE extends DwcaImportState> extends PartitionableConverterBase<DwcaImportState>
37 implements IPartitionableConverter<CsvStreamItem, INamespaceReader<CdmBase>, String>{
38 private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonRelationConverter.class);
39
40 private static final String ID = "id";
41
42 /**
43 * @param state
44 */
45 public DwcTaxonCsv2CdmTaxonRelationConverter(DwcaImportState state) {
46 super();
47 this.state = state;
48 }
49
50
51 public IReader<MappedCdmBase> map(CsvStreamItem item){
52 List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>();
53
54 Map<String, String> csvRecord = item.map;
55 Reference<?> sourceReference = null;
56 String sourceReferecenDetail = null;
57
58 String id = csvRecord.get(ID);
59 TaxonBase<?> taxonBase = getTaxonBase(id, item, null);
60 if (taxonBase == null){
61 String warning = "Taxon not available for id %s.";
62 warning = String.format(warning, id);
63 fireWarningEvent(warning, item, 8);
64 }else{
65
66 MappedCdmBase mcb = new MappedCdmBase(taxonBase);
67 resultList.add(mcb);
68
69 handleAcceptedNameUsage(item, state, taxonBase, id);
70
71 handleParentNameUsage(item, state, taxonBase, resultList);
72
73 handleKingdom(item, state);
74
75 handlePhylum(item, state);
76
77 handleClass(item, state);
78
79 handleOrder(item, state);
80
81 handleFamily(item, state);
82
83 handleGenus(item, state);
84
85 handleSubGenus(item, state);
86
87 }
88 csvRecord.remove(ID);
89
90
91 // <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
92 // The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
93 // Fungi, Plantae, Protozoa, Viruses -->
94 // <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
95
96 // <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
97 // <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
98
99 // <!-- Infraspecific epithet -->
100 // <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
101
102 // <!-- Acceptance status published in -->
103 // <field index='20' term='http://purl.org/dc/terms/source'/>
104
105 // <!-- Reference in which the scientific name was first published -->
106 // <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
107
108 // <!-- Scrutiny date -->
109 // <field index='23' term='http://purl.org/dc/terms/modified'/>
110 // <!-- Additional data for the taxon -->
111
112 // <field index='24' term='http://purl.org/dc/terms/description'/>
113 // </core>
114
115 return new ListReader<MappedCdmBase>(resultList);
116 }
117
118
119 @Override
120 public String getSourceId(CsvStreamItem item) {
121 String id = item.get(ID);
122 return id;
123 }
124
125
126 private void handleSubGenus(CsvStreamItem item, DwcaImportState state) {
127 // TODO Auto-generated method stub
128
129 }
130
131
132 private void handleGenus(CsvStreamItem item, DwcaImportState state) {
133 // TODO Auto-generated method stub
134
135 }
136
137
138 private void handleFamily(CsvStreamItem item, DwcaImportState state) {
139 // TODO Auto-generated method stub
140
141 }
142
143
144 private void handleOrder(CsvStreamItem item, DwcaImportState state) {
145 // TODO Auto-generated method stub
146
147 }
148
149
150 private void handleClass(CsvStreamItem item, DwcaImportState state) {
151 // TODO Auto-generated method stub
152
153 }
154
155
156 private void handlePhylum(CsvStreamItem item, DwcaImportState state) {
157 // TODO Auto-generated method stub
158
159 }
160
161
162 private void handleKingdom(CsvStreamItem item, DwcaImportState state) {
163 // TODO Auto-generated method stub
164
165 }
166
167
168 private void handleParentNameUsage(CsvStreamItem item, DwcaImportState state, TaxonBase<?> taxonBase, List<MappedCdmBase> resultList) {
169 if (exists(TermUri.DWC_PARENT_NAME_USAGE_ID, item) || exists(TermUri.DWC_PARENT_NAME_USAGE, item)){
170 if (taxonBase.isInstanceOf(Taxon.class)){
171 Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
172 String accId = item.get(TermUri.DWC_PARENT_NAME_USAGE_ID);
173 Taxon parentTaxon = getTaxonBase(accId, item, Taxon.class);
174 if (parentTaxon == null){
175 fireWarningEvent("NON-ID parent Name Usage not yet implemented or parent name usage id not available", item, 4);
176 }else{
177 Classification classification = getClassification(item);
178 Reference<?> citation = null;
179 classification.addParentChild(parentTaxon, taxon, citation, null);
180 resultList.add(new MappedCdmBase(classification));
181 }
182 }else{
183 String message = "PARENT_NAME_USAGE given for Synonym. This is not allowed in CDM.";
184 //TODO check "is this Taxon"
185 fireWarningEvent(message, item, 4);
186 }
187 }
188
189
190 }
191
192
193 private Classification getClassification(CsvStreamItem item) {
194 Set<Classification> result = new HashSet<Classification>();
195 String datasetKey = item.get(TermUri.DWC_DATASET_ID);
196 if (CdmUtils.areBlank(datasetKey,item.get(TermUri.DWC_DATASET_NAME))){
197 datasetKey = DwcTaxonCsv2CdmTaxonConverter.NO_DATASET;
198 }
199
200 result.addAll(state.get(TermUri.DWC_DATASET_ID.toString(), datasetKey, Classification.class));
201 result.addAll(state.get(TermUri.DWC_DATASET_NAME.toString(), item.get(TermUri.DWC_DATASET_NAME), Classification.class));
202 if (result.isEmpty()){
203 return null;
204 }else if (result.size() > 1){
205 fireWarningEvent("Dataset is ambigous. I take arbitrary one.", item, 8);
206 }
207 return result.iterator().next();
208 }
209
210
211 private void handleAcceptedNameUsage(CsvStreamItem item, DwcaImportState state, TaxonBase taxonBase, String id) {
212 if (exists(TermUri.DWC_ACCEPTED_NAME_USAGE_ID, item) || exists(TermUri.DWC_ACCEPTED_NAME_USAGE, item)){
213 String accId = item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID);
214 if (id.equals(accId)){
215 return; //mapping to itself needs no further handling
216 }
217 if (taxonBase.isInstanceOf(Synonym.class)){
218 Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
219 Taxon accTaxon = getTaxonBase(accId, item, Taxon.class);
220 if (accTaxon == null){
221 fireWarningEvent("NON-ID accepted Name Usage not yet implemented or taxon for name usage id not available", item, 4);
222 }else{
223 accTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF(),null, null);
224 }
225 } else{
226 String message = "Accepted name usage is not of type synonym. This is not allowed in CDM. Can't create realtionship";
227 //TODO check "is this Taxon"
228 fireWarningEvent(message, item, 4);
229 }
230 }else{
231 if (logger.isDebugEnabled()){logger.debug("");}
232 }
233 }
234
235
236 private <T extends TaxonBase> T getTaxonBase(String id, CsvStreamItem item, Class<T> clazz) {
237 if (clazz == null){
238 clazz = (Class)TaxonBase.class;
239 }
240 List<T> taxonList = state.get(TermUri.DWC_TAXON.toString(), id, clazz);
241 if (taxonList.size() > 1){
242 String message = "Undefined taxon mapping for id %s.";
243 message = String.format(message, id);
244 fireWarningEvent(message, item, 8);
245 logger.warn(message); //TODO remove when events are handled correctly
246 return null;
247 }else if (taxonList.isEmpty()){
248 return null;
249 }else{
250 return taxonList.get(0);
251 }
252 }
253
254
255 //**************************** PARTITIONABLE ************************************************
256
257
258
259
260 protected void makeForeignKeysForItem(CsvStreamItem item, Map<String, Set<String>> fkMap){
261 //do nothing, their are no foreign keys yet to handle
262 }
263
264 //************************************* TO STRING ********************************************
265
266 @Override
267 public String toString(){
268 return this.getClass().getName();
269 }
270
271
272 }