2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.specimen
.excel
.in
;
12 import java
.text
.ParseException
;
13 import java
.util
.HashMap
;
14 import java
.util
.List
;
16 import java
.util
.UUID
;
18 import org
.apache
.commons
.lang
.StringUtils
;
19 import org
.apache
.log4j
.Logger
;
20 import org
.springframework
.stereotype
.Component
;
22 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
23 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
.DerivedUnitType
;
24 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
25 import eu
.etaxonomy
.cdm
.io
.common
.ICdmIO
;
26 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
27 import eu
.etaxonomy
.cdm
.io
.excel
.common
.ExcelImporterBase
;
28 import eu
.etaxonomy
.cdm
.model
.agent
.AgentBase
;
29 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
30 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
31 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
32 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
33 import eu
.etaxonomy
.cdm
.model
.location
.NamedAreaLevel
;
34 import eu
.etaxonomy
.cdm
.model
.location
.NamedAreaType
;
35 import eu
.etaxonomy
.cdm
.model
.location
.ReferenceSystem
;
36 import eu
.etaxonomy
.cdm
.model
.location
.WaterbodyOrCountry
;
37 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
38 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignation
;
39 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignationStatus
;
40 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
41 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
42 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
43 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
44 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
52 public class SpecimenCdmExcelImport
extends ExcelImporterBase
<SpecimenCdmExcelImportState
> implements ICdmIO
<SpecimenCdmExcelImportState
> {
53 private static final Logger logger
= Logger
.getLogger(SpecimenCdmExcelImport
.class);
55 private static final String WORKSHEET_NAME
= "Specimen";
57 private static final String UUID_COLUMN
= "UUID";
58 private static final String BASIS_OF_RECORD_COLUMN
= "BasisOfRecord";
59 private static final String COUNTRY_COLUMN
= "Country";
60 private static final String ISO_COUNTRY_COLUMN
= "ISOCountry";
61 private static final String LOCALITY_COLUMN
= "Locality";
62 private static final String ABSOLUTE_ELEVATION_COLUMN
= "AbsoluteElevation";
63 private static final String COLLECTION_DATE_COLUMN
= "CollectionDate";
64 private static final String COLLECTION_DATE_END_COLUMN
= "CollectionDateEnd";
65 private static final String COLLECTOR_COLUMN
= "Collector";
66 private static final String LONGITUDE_COLUMN
= "Longitude";
67 private static final String LATITUDE_COLUMN
= "Latitude";
68 private static final String REFERENCE_SYSTEM_COLUMN
= "ReferenceSystem";
69 private static final String ERROR_RADIUS_COLUMN
= "ErrorRadius";
72 private static final String COLLECTORS_NUMBER_COLUMN
= "CollectorsNumber";
73 private static final String ECOLOGY_COLUMN
= "Ecology";
74 private static final String PLANT_DESCRIPTION_COLUMN
= "PlantDescription";
75 private static final String FIELD_NOTES_COLUMN
= "FieldNotes";
76 private static final String SEX_COLUMN
= "Sex";
79 private static final String ACCESSION_NUMBER_COLUMN
= "AccessionNumber";
80 private static final String BARCODE_COLUMN
= "Barcode";
81 private static final String COLLECTION_CODE_COLUMN
= "CollectionCode";
82 private static final String COLLECTION_COLUMN
= "Collection";
84 private static final String TYPE_CATEGORY_COLUMN
= "TypeCategory";
85 private static final String TYPIFIED_NAME_COLUMN
= "TypifiedName";
88 private static final String SOURCE_COLUMN
= "Source";
89 private static final String ID_IN_SOURCE_COLUMN
= "IdInSource";
92 private static final String SPECIFIC_EPITHET_COLUMN
= "SpecificEpithet";
93 private static final String FAMILY_COLUMN
= "Family";
94 private static final String GENUS_COLUMN
= "Genus";
95 private static final String AUTHOR_COLUMN
= "Author";
99 public SpecimenCdmExcelImport() {
104 protected boolean analyzeRecord(HashMap
<String
, String
> record
, SpecimenCdmExcelImportState state
) {
105 boolean success
= true;
106 Set
<String
> keys
= record
.keySet();
108 SpecimenRow row
= new SpecimenRow();
109 state
.setSpecimenRow(row
);
111 for (String originalKey
: keys
) {
113 String indexedKey
= CdmUtils
.removeDuplicateWhitespace(originalKey
.trim()).toString();
114 String
[] split
= indexedKey
.split("_");
115 String key
= split
[0];
116 if (split
.length
> 1){
117 String indexString
= split
[split
.length
- 1];
119 index
= Integer
.valueOf(indexString
);
120 } catch (NumberFormatException e
) {
121 String message
= "Index must be integer";
122 logger
.error(message
);
127 String value
= (String
) record
.get(indexedKey
);
128 if (! StringUtils
.isBlank(value
)) {
129 if (logger
.isDebugEnabled()) { logger
.debug(key
+ ": " + value
); }
130 value
= CdmUtils
.removeDuplicateWhitespace(value
.trim()).toString();
136 if (key
.equalsIgnoreCase(UUID_COLUMN
)) {
137 row
.setUuid(UUID
.fromString(value
)); //VALIDATE UUID
138 } else if(key
.equalsIgnoreCase(BASIS_OF_RECORD_COLUMN
)) {
139 row
.setBasisOfRecord(value
);
140 } else if(key
.equalsIgnoreCase(COUNTRY_COLUMN
)) {
141 row
.setCountry(value
);
142 } else if(key
.equalsIgnoreCase(ISO_COUNTRY_COLUMN
)) {
143 row
.setIsoCountry(value
);
144 } else if(key
.equalsIgnoreCase(LOCALITY_COLUMN
)) {
145 row
.setLocality(value
);
146 } else if(key
.equalsIgnoreCase(FIELD_NOTES_COLUMN
)) {
147 row
.setLocality(value
);
148 } else if(key
.equalsIgnoreCase(ABSOLUTE_ELEVATION_COLUMN
)) {
149 row
.setAbsoluteElevation(value
);
150 } else if(key
.equalsIgnoreCase(COLLECTOR_COLUMN
)) {
151 row
.putCollector(index
, value
);
152 } else if(key
.equalsIgnoreCase(ECOLOGY_COLUMN
)) {
153 row
.setEcology(value
);
154 } else if(key
.equalsIgnoreCase(PLANT_DESCRIPTION_COLUMN
)) {
155 row
.setPlantDescription(value
);
156 } else if(key
.equalsIgnoreCase(SEX_COLUMN
)) {
158 } else if(key
.equalsIgnoreCase(COLLECTION_DATE_COLUMN
)) {
159 row
.setCollectingDate(value
);
160 } else if(key
.equalsIgnoreCase(COLLECTION_DATE_END_COLUMN
)) {
161 row
.setCollectingDateEnd(value
);
162 } else if(key
.equalsIgnoreCase(COLLECTOR_COLUMN
)) {
163 row
.putCollector(index
, value
);
164 } else if(key
.equalsIgnoreCase(COLLECTORS_NUMBER_COLUMN
)) {
165 row
.setCollectorsNumber(value
);
166 } else if(key
.equalsIgnoreCase(LONGITUDE_COLUMN
)) {
167 row
.setLongitude(value
);
168 } else if(key
.equalsIgnoreCase(LATITUDE_COLUMN
)) {
169 row
.setLatitude(value
);
170 } else if(key
.equalsIgnoreCase(REFERENCE_SYSTEM_COLUMN
)) {
171 row
.setReferenceSystem(value
);
172 } else if(key
.equalsIgnoreCase(ERROR_RADIUS_COLUMN
)) {
173 row
.setErrorRadius(value
);
175 } else if(key
.equalsIgnoreCase(ACCESSION_NUMBER_COLUMN
)) {
176 row
.setLocality(value
);
177 } else if(key
.equalsIgnoreCase(BARCODE_COLUMN
)) {
178 row
.setBarcode(value
);
179 } else if(key
.equalsIgnoreCase(AUTHOR_COLUMN
)) {
180 row
.setAuthor(value
);
181 } else if(key
.equalsIgnoreCase(FAMILY_COLUMN
)) {
182 row
.setFamily(value
);
183 } else if(key
.equalsIgnoreCase(GENUS_COLUMN
)) {
185 } else if(key
.equalsIgnoreCase(SPECIFIC_EPITHET_COLUMN
)) {
186 row
.setSpecificEpithet(value
);
187 } else if(key
.equalsIgnoreCase(COLLECTION_CODE_COLUMN
)) {
188 row
.setCollectionCode(value
);
189 } else if(key
.equalsIgnoreCase(COLLECTION_COLUMN
)) {
190 row
.setCollection(value
);
192 } else if(key
.equalsIgnoreCase(TYPE_CATEGORY_COLUMN
)) {
193 row
.putTypeCategory(index
, getSpecimenTypeStatus(state
, value
));
194 } else if(key
.equalsIgnoreCase(TYPIFIED_NAME_COLUMN
)) {
195 row
.putTypifiedName(index
, getTaxonName(state
, value
));
198 } else if(key
.equalsIgnoreCase(SOURCE_COLUMN
)) {
199 row
.putSourceReference(index
, getOrMakeReference(state
, value
));
200 } else if(key
.equalsIgnoreCase(ID_IN_SOURCE_COLUMN
)) {
201 row
.putIdInSource(index
, value
);
204 logger
.error("Unexpected column header " + key
);
211 protected boolean firstPass(SpecimenCdmExcelImportState state
) {
212 SpecimenRow row
= state
.getSpecimenRow();
215 DerivedUnitType type
= DerivedUnitType
.valueOf2(row
.getBasisOfRecord());
217 String message
= "%s is not a valid BasisOfRecord. 'Unknown' is used instead.";
218 message
= String
.format(message
, row
.getBasisOfRecord());
219 logger
.warn(message
);
220 type
= DerivedUnitType
.DerivedUnit
;
222 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(type
);
225 handleCountry(facade
, row
, state
);
227 facade
.setGatheringPeriod(getTimePeriod(row
.getCollectingDate(), row
.getCollectingDateEnd()));
228 facade
.setLocality(row
.getLocality());
229 facade
.setFieldNotes(row
.getFieldNotes());
230 facade
.setFieldNumber(row
.getCollectorsNumber());
231 facade
.setEcology(row
.getEcology());
232 facade
.setPlantDescription(row
.getPlantDescription());
233 // facade.setSex(row.get)
234 handleExactLocation(facade
, row
, state
);
235 facade
.setCollector(getOrMakeAgent(state
, row
.getCollectors()));
239 facade
.setBarcode(row
.getBarcode());
240 facade
.setAccessionNumber(row
.getAccessionNumber());
241 facade
.setCollection(getOrMakeCollection(state
, row
.getCollectionCode(), row
.getCollection()));
242 for (IdentifiableSource source
: row
.getSources()){
243 facade
.addSource(source
);
245 for (SpecimenTypeDesignation designation
: row
.getTypeDesignations()){
246 facade
.innerDerivedUnit().addSpecimenTypeDesignation(designation
);
252 getOccurrenceService().save(facade
.innerDerivedUnit());
256 private AgentBase
<?
> getOrMakeAgent(SpecimenCdmExcelImportState state
, List
<String
> agents
) {
257 if (agents
.size() == 0){
259 }else if (agents
.size() == 1){
260 return getOrMakePerson(state
, agents
.get(0));
262 return getOrMakeTeam(state
, agents
);
266 private Team
getOrMakeTeam(SpecimenCdmExcelImportState state
, List
<String
> agents
) {
267 String key
= CdmUtils
.concat("_", agents
.toArray(new String
[0]));
269 Team result
= state
.getTeam(key
);
271 result
= Team
.NewInstance();
272 for (String member
: agents
){
273 Person person
= getOrMakePerson(state
, member
);
274 result
.addTeamMember(person
);
276 state
.putTeam(key
, result
);
281 private Person
getOrMakePerson(SpecimenCdmExcelImportState state
, String value
) {
282 Person result
= state
.getPerson(value
);
284 result
= Person
.NewInstance();
285 result
.setTitleCache(value
, true);
286 state
.putPerson(value
, result
);
291 private Reference
<?
> getOrMakeReference(SpecimenCdmExcelImportState state
, String value
) {
292 Reference
<?
> result
= state
.getReference(value
);
294 result
= ReferenceFactory
.newGeneric();
295 result
.setTitleCache(value
, true);
296 state
.putReference(value
, result
);
303 private Collection
getOrMakeCollection(SpecimenCdmExcelImportState state
, String collectionCode
, String collectionString
) {
304 Collection result
= state
.getCollection(collectionCode
);
306 result
= Collection
.NewInstance();
307 result
.setCode(collectionCode
);
308 result
.setName(collectionString
);
309 state
.putCollection(collectionCode
, result
);
315 private TaxonNameBase
<?
, ?
> getTaxonName(SpecimenCdmExcelImportState state
, String name
) {
316 TaxonNameBase result
= null;
317 result
= state
.getName(name
);
321 List
<TaxonNameBase
> list
= getNameService().findNamesByTitle(name
);
322 //TODO better strategy to find best name, e.g. depending on the classification it is used in
323 if (! list
.isEmpty()){
324 result
= list
.get(0);
327 NonViralNameParserImpl parser
= NonViralNameParserImpl
.NewInstance();
328 NomenclaturalCode code
= state
.getConfig().getNomenclaturalCode();
329 result
= parser
.parseFullName(name
, code
, null);
333 state
.putName(name
, result
);
338 private SpecimenTypeDesignationStatus
getSpecimenTypeStatus(SpecimenCdmExcelImportState state
, String key
) {
339 SpecimenTypeDesignationStatus result
= null;
341 result
= state
.getTransformer().getSpecimenTypeDesignationStatusByKey(key
);
343 String message
= "Type status not recognized for %s in line %d";
344 message
= String
.format(message
, key
, state
.getCurrentLine());
345 logger
.warn(message
);
348 } catch (UndefinedTransformerMethodException e
) {
349 throw new RuntimeException("getSpecimenTypeDesignationStatusByKey not yet implemented");
356 private void handleExactLocation(DerivedUnitFacade facade
, SpecimenRow row
, SpecimenCdmExcelImportState state
) {
358 String longitude
= row
.getLongitude();
359 String latitude
= row
.getLatitude();
360 ReferenceSystem refSys
= null;
361 if (StringUtils
.isNotBlank(row
.getReferenceSystem())){
362 String strRefSys
= row
.getReferenceSystem().trim().replaceAll("\\s", "").toLowerCase();
363 //TODO move to reference system class ??
364 if (strRefSys
.equals("wgs84")){
365 refSys
= ReferenceSystem
.WGS84();
366 }else if (strRefSys
.equals("gazetteer")){
367 refSys
= ReferenceSystem
.GAZETTEER();
368 }else if (strRefSys
.equals("googleearth")){
369 refSys
= ReferenceSystem
.GOOGLE_EARTH();
371 String message
= "Reference system %s not recognized in line %d";
372 message
= String
.format(message
, strRefSys
, state
.getCurrentLine());
373 logger
.warn(message
);
377 Integer errorRadius
= null;
378 if (StringUtils
.isNotBlank(row
.getErrorRadius())){
380 errorRadius
= Integer
.valueOf(row
.getErrorRadius());
381 } catch (NumberFormatException e
) {
382 String message
= "Error radius %s could not be transformed to Integer in line %d";
383 message
= String
.format(message
, row
.getErrorRadius(), state
.getCurrentLine());
384 logger
.warn(message
);
387 facade
.setExactLocationByParsing(longitude
, latitude
, refSys
, errorRadius
);
388 } catch (ParseException e
) {
389 String message
= "Problems when parsing exact location for line %d";
390 message
= String
.format(message
, state
.getCurrentLine());
391 logger
.warn(message
);
400 * Set the current Country
401 * Search in the DB if the isoCode is known
402 * If not, search if the country name is in the DB
403 * If not, create a new Label with the Level Country
404 * @param iso: the country iso code
405 * @param fullName: the country's full name
406 * @param app: the CDM application controller
408 private void handleCountry(DerivedUnitFacade facade
, SpecimenRow row
, SpecimenCdmExcelImportState state
) {
410 if (StringUtils
.isNotBlank(row
.getIsoCountry())){
411 NamedArea country
= getOccurrenceService().getCountryByIso(row
.getIsoCountry());
412 if (country
!= null){
413 facade
.setCountry(country
);
417 if (StringUtils
.isNotBlank(row
.getCountry())){
418 List
<WaterbodyOrCountry
> countries
= getOccurrenceService().getWaterbodyOrCountryByName(row
.getCountry());
419 if (countries
.size() >0){
420 facade
.setCountry(countries
.get(0));
422 UUID uuid
= UUID
.randomUUID();
423 String label
= row
.getCountry();
424 String text
= row
.getCountry();
425 String labelAbbrev
= null;
426 NamedAreaType areaType
= NamedAreaType
.ADMINISTRATION_AREA();
427 NamedAreaLevel level
= NamedAreaLevel
.COUNTRY();
428 NamedArea newCountry
= this.getNamedArea(state
, uuid
, label
, text
, labelAbbrev
, areaType
, level
);
429 facade
.setCountry(newCountry
);
435 private DerivedUnitType
getDerivedUnitType(String basisOfRecord
) {
440 protected boolean secondPass(SpecimenCdmExcelImportState state
) {
441 //no second path defined yet
447 protected String
getWorksheetName() {
448 return WORKSHEET_NAME
;
452 protected boolean needsNomenclaturalCode() {
458 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
461 protected boolean doCheck(SpecimenCdmExcelImportState state
) {
462 logger
.warn("Validation not yet implemented for " + this.getClass().getSimpleName());
469 protected boolean isIgnore(SpecimenCdmExcelImportState state
) {
470 return !state
.getConfig().isDoSpecimen();