minor (logging)
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / specimen / excel / in / SpecimenCdmExcelImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.specimen.excel.in;
11
12 import java.text.ParseException;
13 import java.util.HashMap;
14 import java.util.List;
15 import java.util.Set;
16 import java.util.UUID;
17
18 import org.apache.commons.lang.StringUtils;
19 import org.apache.log4j.Logger;
20 import org.springframework.stereotype.Component;
21
22 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
23 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade.DerivedUnitType;
24 import eu.etaxonomy.cdm.common.CdmUtils;
25 import eu.etaxonomy.cdm.io.common.ICdmIO;
26 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
27 import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
28 import eu.etaxonomy.cdm.model.agent.AgentBase;
29 import eu.etaxonomy.cdm.model.agent.Person;
30 import eu.etaxonomy.cdm.model.agent.Team;
31 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
32 import eu.etaxonomy.cdm.model.location.NamedArea;
33 import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
34 import eu.etaxonomy.cdm.model.location.NamedAreaType;
35 import eu.etaxonomy.cdm.model.location.ReferenceSystem;
36 import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
37 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
38 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
39 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
40 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
41 import eu.etaxonomy.cdm.model.occurrence.Collection;
42 import eu.etaxonomy.cdm.model.reference.Reference;
43 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
44 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
45
46 /**
47 * @author a.mueller
48 * @created 10.05.2011
49 * @version 1.0
50 */
51 @Component
52 public class SpecimenCdmExcelImport extends ExcelImporterBase<SpecimenCdmExcelImportState> implements ICdmIO<SpecimenCdmExcelImportState> {
53 private static final Logger logger = Logger.getLogger(SpecimenCdmExcelImport.class);
54
55 private static final String WORKSHEET_NAME = "Specimen";
56
57 private static final String UUID_COLUMN = "UUID";
58 private static final String BASIS_OF_RECORD_COLUMN = "BasisOfRecord";
59 private static final String COUNTRY_COLUMN = "Country";
60 private static final String ISO_COUNTRY_COLUMN = "ISOCountry";
61 private static final String LOCALITY_COLUMN = "Locality";
62 private static final String ABSOLUTE_ELEVATION_COLUMN = "AbsoluteElevation";
63 private static final String COLLECTION_DATE_COLUMN = "CollectionDate";
64 private static final String COLLECTION_DATE_END_COLUMN = "CollectionDateEnd";
65 private static final String COLLECTOR_COLUMN = "Collector";
66 private static final String LONGITUDE_COLUMN = "Longitude";
67 private static final String LATITUDE_COLUMN = "Latitude";
68 private static final String REFERENCE_SYSTEM_COLUMN = "ReferenceSystem";
69 private static final String ERROR_RADIUS_COLUMN = "ErrorRadius";
70
71
72 private static final String COLLECTORS_NUMBER_COLUMN = "CollectorsNumber";
73 private static final String ECOLOGY_COLUMN = "Ecology";
74 private static final String PLANT_DESCRIPTION_COLUMN = "PlantDescription";
75 private static final String FIELD_NOTES_COLUMN = "FieldNotes";
76 private static final String SEX_COLUMN = "Sex";
77
78
79 private static final String ACCESSION_NUMBER_COLUMN = "AccessionNumber";
80 private static final String BARCODE_COLUMN = "Barcode";
81 private static final String COLLECTION_CODE_COLUMN = "CollectionCode";
82 private static final String COLLECTION_COLUMN = "Collection";
83
84 private static final String TYPE_CATEGORY_COLUMN = "TypeCategory";
85 private static final String TYPIFIED_NAME_COLUMN = "TypifiedName";
86
87
88 private static final String SOURCE_COLUMN = "Source";
89 private static final String ID_IN_SOURCE_COLUMN = "IdInSource";
90
91
92 private static final String SPECIFIC_EPITHET_COLUMN = "SpecificEpithet";
93 private static final String FAMILY_COLUMN = "Family";
94 private static final String GENUS_COLUMN = "Genus";
95 private static final String AUTHOR_COLUMN = "Author";
96
97
98
99 public SpecimenCdmExcelImport() {
100 super();
101 }
102
103 @Override
104 protected boolean analyzeRecord(HashMap<String, String> record, SpecimenCdmExcelImportState state) {
105 boolean success = true;
106 Set<String> keys = record.keySet();
107
108 SpecimenRow row = new SpecimenRow();
109 state.setSpecimenRow(row);
110
111 for (String originalKey: keys) {
112 Integer index = 0;
113 String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
114 String[] split = indexedKey.split("_");
115 String key = split[0];
116 if (split.length > 1){
117 String indexString = split[split.length - 1];
118 try {
119 index = Integer.valueOf(indexString);
120 } catch (NumberFormatException e) {
121 String message = "Index must be integer";
122 logger.error(message);
123 continue;
124 }
125 }
126
127 String value = (String) record.get(indexedKey);
128 if (! StringUtils.isBlank(value)) {
129 if (logger.isDebugEnabled()) { logger.debug(key + ": " + value); }
130 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
131 }else{
132 continue;
133 }
134
135
136 if (key.equalsIgnoreCase(UUID_COLUMN)) {
137 row.setUuid(UUID.fromString(value)); //VALIDATE UUID
138 } else if(key.equalsIgnoreCase(BASIS_OF_RECORD_COLUMN)) {
139 row.setBasisOfRecord(value);
140 } else if(key.equalsIgnoreCase(COUNTRY_COLUMN)) {
141 row.setCountry(value);
142 } else if(key.equalsIgnoreCase(ISO_COUNTRY_COLUMN)) {
143 row.setIsoCountry(value);
144 } else if(key.equalsIgnoreCase(LOCALITY_COLUMN)) {
145 row.setLocality(value);
146 } else if(key.equalsIgnoreCase(FIELD_NOTES_COLUMN)) {
147 row.setLocality(value);
148 } else if(key.equalsIgnoreCase(ABSOLUTE_ELEVATION_COLUMN)) {
149 row.setAbsoluteElevation(value);
150 } else if(key.equalsIgnoreCase(COLLECTOR_COLUMN)) {
151 row.putCollector(index, value);
152 } else if(key.equalsIgnoreCase(ECOLOGY_COLUMN)) {
153 row.setEcology(value);
154 } else if(key.equalsIgnoreCase(PLANT_DESCRIPTION_COLUMN)) {
155 row.setPlantDescription(value);
156 } else if(key.equalsIgnoreCase(SEX_COLUMN)) {
157 row.setSex(value);
158 } else if(key.equalsIgnoreCase(COLLECTION_DATE_COLUMN)) {
159 row.setCollectingDate(value);
160 } else if(key.equalsIgnoreCase(COLLECTION_DATE_END_COLUMN)) {
161 row.setCollectingDateEnd(value);
162 } else if(key.equalsIgnoreCase(COLLECTOR_COLUMN)) {
163 row.putCollector(index, value);
164 } else if(key.equalsIgnoreCase(COLLECTORS_NUMBER_COLUMN)) {
165 row.setCollectorsNumber(value);
166 } else if(key.equalsIgnoreCase(LONGITUDE_COLUMN)) {
167 row.setLongitude(value);
168 } else if(key.equalsIgnoreCase(LATITUDE_COLUMN)) {
169 row.setLatitude(value);
170 } else if(key.equalsIgnoreCase(REFERENCE_SYSTEM_COLUMN)) {
171 row.setReferenceSystem(value);
172 } else if(key.equalsIgnoreCase(ERROR_RADIUS_COLUMN)) {
173 row.setErrorRadius(value);
174
175 } else if(key.equalsIgnoreCase(ACCESSION_NUMBER_COLUMN)) {
176 row.setLocality(value);
177 } else if(key.equalsIgnoreCase(BARCODE_COLUMN)) {
178 row.setBarcode(value);
179 } else if(key.equalsIgnoreCase(AUTHOR_COLUMN)) {
180 row.setAuthor(value);
181 } else if(key.equalsIgnoreCase(FAMILY_COLUMN)) {
182 row.setFamily(value);
183 } else if(key.equalsIgnoreCase(GENUS_COLUMN)) {
184 row.setGenus(value);
185 } else if(key.equalsIgnoreCase(SPECIFIC_EPITHET_COLUMN)) {
186 row.setSpecificEpithet(value);
187 } else if(key.equalsIgnoreCase(COLLECTION_CODE_COLUMN)) {
188 row.setCollectionCode(value);
189 } else if(key.equalsIgnoreCase(COLLECTION_COLUMN)) {
190 row.setCollection(value);
191
192 } else if(key.equalsIgnoreCase(TYPE_CATEGORY_COLUMN)) {
193 row.putTypeCategory(index, getSpecimenTypeStatus(state, value));
194 } else if(key.equalsIgnoreCase(TYPIFIED_NAME_COLUMN)) {
195 row.putTypifiedName(index, getTaxonName(state, value));
196
197
198 } else if(key.equalsIgnoreCase(SOURCE_COLUMN)) {
199 row.putSourceReference(index, getOrMakeReference(state, value));
200 } else if(key.equalsIgnoreCase(ID_IN_SOURCE_COLUMN)) {
201 row.putIdInSource(index, value);
202 }else {
203 success = false;
204 logger.error("Unexpected column header " + key);
205 }
206 }
207 return success;
208 }
209
210 @Override
211 protected boolean firstPass(SpecimenCdmExcelImportState state) {
212 SpecimenRow row = state.getSpecimenRow();
213
214 //basis of record
215 DerivedUnitType type = DerivedUnitType.valueOf2(row.getBasisOfRecord());
216 if (type == null){
217 String message = "%s is not a valid BasisOfRecord. 'Unknown' is used instead.";
218 message = String.format(message, row.getBasisOfRecord());
219 logger.warn(message);
220 type = DerivedUnitType.DerivedUnit;
221 }
222 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(type);
223
224 //country
225 handleCountry(facade, row, state);
226
227 facade.setGatheringPeriod(getTimePeriod(row.getCollectingDate(), row.getCollectingDateEnd()));
228 facade.setLocality(row.getLocality());
229 facade.setFieldNotes(row.getFieldNotes());
230 facade.setFieldNumber(row.getCollectorsNumber());
231 facade.setEcology(row.getEcology());
232 facade.setPlantDescription(row.getPlantDescription());
233 // facade.setSex(row.get)
234 handleExactLocation(facade, row, state);
235 facade.setCollector(getOrMakeAgent(state, row.getCollectors()));
236
237
238 //derivedUnit
239 facade.setBarcode(row.getBarcode());
240 facade.setAccessionNumber(row.getAccessionNumber());
241 facade.setCollection(getOrMakeCollection(state, row.getCollectionCode(), row.getCollection()));
242 for (IdentifiableSource source : row.getSources()){
243 facade.addSource(source);
244 }
245 for (SpecimenTypeDesignation designation : row.getTypeDesignations()){
246 facade.innerDerivedUnit().addSpecimenTypeDesignation(designation);
247 }
248
249
250
251 //save
252 getOccurrenceService().save(facade.innerDerivedUnit());
253 return true;
254 }
255
256 private AgentBase<?> getOrMakeAgent(SpecimenCdmExcelImportState state, List<String> agents) {
257 if (agents.size() == 0){
258 return null;
259 }else if (agents.size() == 1){
260 return getOrMakePerson(state, agents.get(0));
261 }else{
262 return getOrMakeTeam(state, agents);
263 }
264 }
265
266 private Team getOrMakeTeam(SpecimenCdmExcelImportState state, List<String> agents) {
267 String key = CdmUtils.concat("_", agents.toArray(new String[0]));
268
269 Team result = state.getTeam(key);
270 if (result == null){
271 result = Team.NewInstance();
272 for (String member : agents){
273 Person person = getOrMakePerson(state, member);
274 result.addTeamMember(person);
275 }
276 state.putTeam(key, result);
277 }
278 return result;
279 }
280
281 private Person getOrMakePerson(SpecimenCdmExcelImportState state, String value) {
282 Person result = state.getPerson(value);
283 if (result == null){
284 result = Person.NewInstance();
285 result.setTitleCache(value, true);
286 state.putPerson(value, result);
287 }
288 return result;
289 }
290
291 private Reference<?> getOrMakeReference(SpecimenCdmExcelImportState state, String value) {
292 Reference<?> result = state.getReference(value);
293 if (result == null){
294 result = ReferenceFactory.newGeneric();
295 result.setTitleCache(value, true);
296 state.putReference(value, result);
297 }
298 return result;
299 }
300
301
302
303 private Collection getOrMakeCollection(SpecimenCdmExcelImportState state, String collectionCode, String collectionString) {
304 Collection result = state.getCollection(collectionCode);
305 if (result == null){
306 result = Collection.NewInstance();
307 result.setCode(collectionCode);
308 result.setName(collectionString);
309 state.putCollection(collectionCode, result);
310 }
311 return result;
312 }
313
314
315 private TaxonNameBase<?, ?> getTaxonName(SpecimenCdmExcelImportState state, String name) {
316 TaxonNameBase result = null;
317 result = state.getName(name);
318 if (result != null){
319 return result;
320 }
321 List<TaxonNameBase> list = getNameService().findNamesByTitle(name);
322 //TODO better strategy to find best name, e.g. depending on the classification it is used in
323 if (! list.isEmpty()){
324 result = list.get(0);
325 }
326 if (result == null){
327 NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
328 NomenclaturalCode code = state.getConfig().getNomenclaturalCode();
329 result = parser.parseFullName(name, code, null);
330
331 }
332 if (result != null){
333 state.putName(name, result);
334 }
335 return result;
336 }
337
338 private SpecimenTypeDesignationStatus getSpecimenTypeStatus(SpecimenCdmExcelImportState state, String key) {
339 SpecimenTypeDesignationStatus result = null;
340 try {
341 result = state.getTransformer().getSpecimenTypeDesignationStatusByKey(key);
342 if (result == null){
343 String message = "Type status not recognized for %s in line %d";
344 message = String.format(message, key, state.getCurrentLine());
345 logger.warn(message);
346 }
347 return result;
348 } catch (UndefinedTransformerMethodException e) {
349 throw new RuntimeException("getSpecimenTypeDesignationStatusByKey not yet implemented");
350 }
351
352
353 }
354
355
356 private void handleExactLocation(DerivedUnitFacade facade, SpecimenRow row, SpecimenCdmExcelImportState state) {
357 try {
358 String longitude = row.getLongitude();
359 String latitude = row.getLatitude();
360 ReferenceSystem refSys = null;
361 if (StringUtils.isNotBlank(row.getReferenceSystem())){
362 String strRefSys = row.getReferenceSystem().trim().replaceAll("\\s", "").toLowerCase();
363 //TODO move to reference system class ??
364 if (strRefSys.equals("wgs84")){
365 refSys = ReferenceSystem.WGS84();
366 }else if (strRefSys.equals("gazetteer")){
367 refSys = ReferenceSystem.GAZETTEER();
368 }else if (strRefSys.equals("googleearth")){
369 refSys = ReferenceSystem.GOOGLE_EARTH();
370 }else{
371 String message = "Reference system %s not recognized in line %d";
372 message = String.format(message, strRefSys, state.getCurrentLine());
373 logger.warn(message);
374 }
375
376 }
377 Integer errorRadius = null;
378 if (StringUtils.isNotBlank(row.getErrorRadius())){
379 try {
380 errorRadius = Integer.valueOf(row.getErrorRadius());
381 } catch (NumberFormatException e) {
382 String message = "Error radius %s could not be transformed to Integer in line %d";
383 message = String.format(message, row.getErrorRadius(), state.getCurrentLine());
384 logger.warn(message);
385 }
386 }
387 facade.setExactLocationByParsing(longitude, latitude, refSys, errorRadius);
388 } catch (ParseException e) {
389 String message = "Problems when parsing exact location for line %d";
390 message = String.format(message, state.getCurrentLine());
391 logger.warn(message);
392
393 }
394
395
396 }
397
398
399 /*
400 * Set the current Country
401 * Search in the DB if the isoCode is known
402 * If not, search if the country name is in the DB
403 * If not, create a new Label with the Level Country
404 * @param iso: the country iso code
405 * @param fullName: the country's full name
406 * @param app: the CDM application controller
407 */
408 private void handleCountry(DerivedUnitFacade facade, SpecimenRow row, SpecimenCdmExcelImportState state) {
409
410 if (StringUtils.isNotBlank(row.getIsoCountry())){
411 NamedArea country = getOccurrenceService().getCountryByIso(row.getIsoCountry());
412 if (country != null){
413 facade.setCountry(country);
414 return;
415 }
416 }
417 if (StringUtils.isNotBlank(row.getCountry())){
418 List<WaterbodyOrCountry> countries = getOccurrenceService().getWaterbodyOrCountryByName(row.getCountry());
419 if (countries.size() >0){
420 facade.setCountry(countries.get(0));
421 }else{
422 UUID uuid = UUID.randomUUID();
423 String label = row.getCountry();
424 String text = row.getCountry();
425 String labelAbbrev = null;
426 NamedAreaType areaType = NamedAreaType.ADMINISTRATION_AREA();
427 NamedAreaLevel level = NamedAreaLevel.COUNTRY();
428 NamedArea newCountry = this.getNamedArea(state, uuid, label, text, labelAbbrev, areaType, level);
429 facade.setCountry(newCountry);
430 }
431 }
432 }
433
434
435 private DerivedUnitType getDerivedUnitType(String basisOfRecord) {
436 return null;
437 }
438
439 @Override
440 protected boolean secondPass(SpecimenCdmExcelImportState state) {
441 //no second path defined yet
442 return true;
443 }
444
445
446 @Override
447 protected String getWorksheetName() {
448 return WORKSHEET_NAME;
449 }
450
451 @Override
452 protected boolean needsNomenclaturalCode() {
453 return false;
454 }
455
456
457 /* (non-Javadoc)
458 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
459 */
460 @Override
461 protected boolean doCheck(SpecimenCdmExcelImportState state) {
462 logger.warn("Validation not yet implemented for " + this.getClass().getSimpleName());
463 return true;
464 }
465
466
467
468 @Override
469 protected boolean isIgnore(SpecimenCdmExcelImportState state) {
470 return !state.getConfig().isDoSpecimen();
471 }
472
473
474 }