move base column handling to base class (Excel import)
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / specimen / excel / in / SpecimenSythesysExcelImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.specimen.excel.in;
11
12 import java.io.FileNotFoundException;
13 import java.net.URI;
14 import java.net.URL;
15 import java.util.ArrayList;
16 import java.util.HashMap;
17 import java.util.List;
18
19 import org.apache.log4j.Logger;
20 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
21 import org.springframework.stereotype.Component;
22 import org.springframework.transaction.TransactionStatus;
23
24 import eu.etaxonomy.cdm.common.ExcelUtils;
25 import eu.etaxonomy.cdm.common.media.ImageInfo;
26 import eu.etaxonomy.cdm.common.media.MediaInfo;
27 import eu.etaxonomy.cdm.database.DbSchemaValidation;
28 import eu.etaxonomy.cdm.io.common.ICdmIO;
29 import eu.etaxonomy.cdm.io.specimen.SpecimenImportBase;
30 import eu.etaxonomy.cdm.io.specimen.UnitsGatheringArea;
31 import eu.etaxonomy.cdm.io.specimen.UnitsGatheringEvent;
32 import eu.etaxonomy.cdm.model.agent.Institution;
33 import eu.etaxonomy.cdm.model.location.NamedArea;
34 import eu.etaxonomy.cdm.model.media.ImageFile;
35 import eu.etaxonomy.cdm.model.media.Media;
36 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
37 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
38 import eu.etaxonomy.cdm.model.name.NonViralName;
39 import eu.etaxonomy.cdm.model.occurrence.Collection;
40 import eu.etaxonomy.cdm.model.occurrence.DerivationEvent;
41 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
42 import eu.etaxonomy.cdm.model.occurrence.DerivedUnitBase;
43 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
44 import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
45 import eu.etaxonomy.cdm.model.occurrence.LivingBeing;
46 import eu.etaxonomy.cdm.model.occurrence.Observation;
47 import eu.etaxonomy.cdm.model.occurrence.Specimen;
48 import eu.etaxonomy.cdm.model.reference.Reference;
49 import eu.etaxonomy.cdm.model.taxon.Taxon;
50 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
51 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
52
53 /**
54 * @author p.kelbert
55 * @created 29.10.2008
56 * @version 1.0
57 */
58 @Component
59 public class SpecimenSythesysExcelImport extends SpecimenImportBase<SpecimenSynthesysExcelImportConfigurator, SpecimenSynthesysExcelImportState> implements ICdmIO<SpecimenSynthesysExcelImportState> {
60
61 private static final Logger logger = Logger.getLogger(SpecimenSythesysExcelImport.class);
62
63 protected String fullScientificNameString;
64 protected String nomenclatureCode;
65 protected String institutionCode;
66 protected String collectionCode;
67 protected String unitID;
68 protected String recordBasis;
69 protected String accessionNumber;
70 protected String fieldNumber;
71 protected Double longitude;
72 protected Double latitude;
73 protected String locality;
74 protected String languageIso = null;
75 protected String country;
76 protected String isocountry;
77 protected int depth;
78 protected int altitude;
79 protected ArrayList<String> gatheringAgentList;
80 protected ArrayList<String> identificationList;
81 protected ArrayList<String> namedAreaList;
82 protected ArrayList<String> multimediaObjects;
83
84 protected HSSFWorkbook hssfworkbook = null;
85
86
87 public SpecimenSythesysExcelImport() {
88 super();
89 }
90
91
92 /*
93 * Store the unit's properties into variables
94 * @param unit: the hashmap containing the splitted Excel line (Key=column name, value=value)
95 */
96 private void setUnitPropertiesExcel(HashMap<String,String> unit){
97 String author = unit.get("author");
98 author=author.replaceAll("None","");
99 String taxonName = unit.get("taxonName");
100 taxonName = taxonName.replaceAll("None", "");
101
102 try {
103 this.institutionCode = unit.get("institution").replaceAll("None", null);
104 } catch (Exception e) {this.institutionCode = "";}
105
106 try {this.collectionCode = unit.get("collection").replaceAll("None", null);
107 } catch (Exception e) {this.collectionCode = "";}
108
109 try {this.unitID = unit.get("unitID").replaceAll("None", null);
110 } catch (Exception e) {this.unitID = "";}
111
112 try {this.recordBasis = unit.get("recordBasis").replaceAll("None", null);
113 } catch (Exception e) {this.recordBasis = "";}
114
115 try {this.accessionNumber = null;
116 } catch (Exception e) {this.accessionNumber = "";}
117
118 try {this.locality = unit.get("locality").replaceAll("None", null);
119 } catch (Exception e) {this.locality = "";}
120
121 try {this.longitude = Double.valueOf(unit.get("longitude"));
122 } catch (Exception e) {this.longitude = 0.0;}
123
124 try {this.latitude = Double.valueOf(unit.get("latitude"));
125 } catch (Exception e) {this.latitude = 0.0;}
126
127 try {this.country = unit.get("country").replaceAll("None", null);
128 } catch (Exception e) {this.country = "";}
129
130 try {this.isocountry = unit.get("isoCountry").replaceAll("None", null);
131 } catch (Exception e) {this.isocountry = "";}
132
133 try {this.fieldNumber = unit.get("field number").replaceAll("None", null);
134 } catch (Exception e) {this.fieldNumber = "";}
135
136 try {
137 String url =unit.get("url");
138 url=url.replaceAll("None", null);
139 this.multimediaObjects.add(url);
140 } catch (Exception e) {this.multimediaObjects = new ArrayList<String>();}
141
142 try {
143 String coll =unit.get("collector");
144 coll=coll.replaceAll("None", null);
145 this.gatheringAgentList.add(coll);
146 } catch (Exception e) {this.gatheringAgentList = new ArrayList<String>();}
147
148 try {this.identificationList.add(taxonName+" "+author);
149 } catch (Exception e) {this.identificationList = new ArrayList<String>();}
150
151 }
152
153 private Institution getInstitution(String institutionCode, SpecimenSynthesysExcelImportConfigurator config){
154 Institution institution;
155 List<Institution> institutions;
156 try{
157 institutions= getAgentService().searchInstitutionByCode(this.institutionCode);
158 }catch(Exception e){
159 institutions=new ArrayList<Institution>();
160 }
161 if (institutions.size() ==0 || !config.getReUseExistingMetadata()){
162 System.out.println("Institution (agent) unknown or not allowed to reuse existing metadata");
163 //create institution
164 institution = Institution.NewInstance();
165 institution.setCode(this.institutionCode);
166 }
167 else{
168 System.out.println("Institution (agent) already in the db");
169 institution = institutions.get(0);
170 }
171 return institution;
172 }
173
174 /*
175 * Look if the Collection does already exists
176 * @param collectionCode: a string
177 * @param institution: the current Institution
178 * @param app
179 * @return the Collection (existing or new)
180 */
181 private Collection getCollection(String collectionCode, Institution institution, SpecimenSynthesysExcelImportConfigurator config){
182 Collection collection = Collection.NewInstance();
183 List<Collection> collections;
184 try{
185 collections = getCollectionService().searchByCode(this.collectionCode);
186 }catch(Exception e){
187 collections=new ArrayList<Collection>();
188 }
189 if (collections.size() ==0 || !config.getReUseExistingMetadata()){
190 System.out.println("Collection not found or do not reuse existing metadata "+this.collectionCode);
191 //create new collection
192 collection.setCode(this.collectionCode);
193 collection.setCodeStandard("GBIF");
194 collection.setInstitute(institution);
195 }
196 else{
197 boolean collectionFound=false;
198 for (int i=0; i<collections.size(); i++){
199 collection = collections.get(i);
200 try {
201 if (collection.getInstitute().getCode().equalsIgnoreCase(institution.getCode())){
202 //found a collection with the same code and the same institution
203 collectionFound=true;
204 }
205 } catch (NullPointerException e) {}
206 }
207 if (!collectionFound){
208 collection.setCode(this.collectionCode);
209 collection.setCodeStandard("GBIF");
210 collection.setInstitute(institution);
211 }
212
213 }
214 return collection;
215 }
216
217 /*
218 *
219 * @param app
220 * @param derivedThing
221 * @param sec
222 */
223 private void setTaxonNameBase(SpecimenSynthesysExcelImportConfigurator config, DerivedUnitBase derivedThing, Reference sec){
224 NonViralName<?> taxonName = null;
225 String fullScientificNameString;
226 Taxon taxon = null;
227 DeterminationEvent determinationEvent = null;
228 List<TaxonBase> names = null;
229
230 String scientificName="";
231 boolean preferredFlag=false;
232
233 for (int i = 0; i < this.identificationList.size(); i++) {
234 fullScientificNameString = this.identificationList.get(i);
235 fullScientificNameString = fullScientificNameString.replaceAll(" et ", " & ");
236 if (fullScientificNameString.indexOf("_preferred_") != -1){
237 scientificName = fullScientificNameString.split("_preferred_")[0];
238 String pTmp = fullScientificNameString.split("_preferred_")[1].split("_code_")[0];
239 if (pTmp == "1" || pTmp.toLowerCase().indexOf("true") != -1)
240 preferredFlag=true;
241 else
242 preferredFlag=false;
243 }
244 else scientificName = fullScientificNameString;
245
246 if (fullScientificNameString.indexOf("_code_") != -1)
247 this.nomenclatureCode = fullScientificNameString.split("_code_")[1];
248
249 if (config.getDoAutomaticParsing()){
250 taxonName = this.parseScientificName(scientificName);
251 } else {
252 taxonName.setTitleCache(scientificName, true);
253 }
254
255 if (config.getDoReUseTaxon()){
256 try{
257 names = getTaxonService().searchTaxaByName(scientificName, sec);
258 taxon = (Taxon)names.get(0);
259 }
260 catch(Exception e){taxon=null;}
261 }
262 if (!config.getDoReUseTaxon() || taxon == null){
263 getNameService().save(taxonName);
264 taxon = Taxon.NewInstance(taxonName, sec); //sec set null
265 }
266
267 determinationEvent = DeterminationEvent.NewInstance();
268 determinationEvent.setTaxon(taxon);
269 determinationEvent.setPreferredFlag(preferredFlag);
270 // no reference in the GBIF INDEX
271 // for (int l=0;l<this.referenceList.size();l++){
272 // Reference reference = new Generic();
273 // reference.setTitleCache(this.referenceList.get(l));
274 // determinationEvent.addReference(reference);
275 // }
276 derivedThing.addDetermination(determinationEvent);
277 }
278
279 }
280
281 private NonViralName<?> parseScientificName(String scientificName){
282 System.out.println("parseScientificName");
283 NonViralNameParserImpl nvnpi = NonViralNameParserImpl.NewInstance();
284 NonViralName<?>taxonName = null;
285 boolean problem=false;
286
287 System.out.println("nomenclature: "+this.nomenclatureCode);
288
289 if(this.nomenclatureCode == null){
290 taxonName = NonViralName.NewInstance(null);
291 taxonName.setTitleCache(scientificName, true);
292 return taxonName;
293 }
294
295 if (this.nomenclatureCode.toString().equals("Zoological")){
296 taxonName = nvnpi.parseFullName(scientificName,NomenclaturalCode.ICZN,null);
297 if (taxonName.hasProblem())
298 problem=true;
299 }
300 if (this.nomenclatureCode.toString().equals("Botanical")){
301 taxonName = nvnpi.parseFullName(scientificName,NomenclaturalCode.ICBN,null);
302 if (taxonName.hasProblem())
303 problem=true;;}
304 if (this.nomenclatureCode.toString().equals("Bacterial")){
305 taxonName = nvnpi.parseFullName(scientificName,NomenclaturalCode.ICNB, null);
306 if (taxonName.hasProblem())
307 problem=true;
308 }
309 if (this.nomenclatureCode.toString().equals("Cultivar")){
310 taxonName = nvnpi.parseFullName(scientificName,NomenclaturalCode.ICNCP, null);
311 if (taxonName.hasProblem())
312 problem=true;;
313 }
314 // if (this.nomenclatureCode.toString().equals("Viral")){
315 // ViralName taxonName = (ViralName)nvnpi.parseFullName(scientificName,NomenclaturalCode.ICVCN(), null);
316 // if (taxonName.hasProblem())
317 // System.out.println("pb ICVCN");
318 // }
319 //TODO: parsing of ViralNames?
320 if(problem){
321 taxonName = NonViralName.NewInstance(null);
322 taxonName.setTitleCache(scientificName, true);
323 }
324 return taxonName;
325
326 }
327
328
329 /*
330 * Store the unit with its Gathering informations in the CDM
331 */
332 public boolean start(SpecimenSynthesysExcelImportConfigurator config){
333 boolean result = true;
334 TransactionStatus tx = null;
335
336 tx = startTransaction();
337 try {
338 Reference sec = config.getTaxonReference();
339
340 /**
341 * SPECIMEN OR OBSERVATION OR LIVING
342 */
343 DerivedUnitBase derivedThing = null;
344 //create specimen
345 boolean rbFound=false;
346 if (this.recordBasis != null){
347 if (this.recordBasis.toLowerCase().startsWith("s")) {//specimen
348 derivedThing = Specimen.NewInstance();
349 rbFound = true;
350 }
351 else if (this.recordBasis.toLowerCase().startsWith("o")) {//observation
352 derivedThing = Observation.NewInstance();
353 rbFound = true;
354 }
355 else if (this.recordBasis.toLowerCase().startsWith("l")) {//living -> fossil, herbarium sheet....???
356 derivedThing = LivingBeing.NewInstance();
357 rbFound = true;
358 }
359 if (! rbFound){
360 logger.info("The basis of record does not seem to be known: "+this.recordBasis);
361 derivedThing = DerivedUnit.NewInstance();
362 }
363 }
364 else{
365 logger.info("The basis of record is null");
366 derivedThing = DerivedUnit.NewInstance();
367 }
368
369 this.setTaxonNameBase(config, derivedThing, sec);
370
371
372 //set catalogue number (unitID)
373 derivedThing.setCatalogNumber(this.unitID);
374 derivedThing.setAccessionNumber(this.accessionNumber);
375
376
377 /**
378 * INSTITUTION & COLLECTION
379 */
380 //manage institution
381 Institution institution = this.getInstitution(this.institutionCode,config);
382 //manage collection
383 Collection collection = this.getCollection(this.collectionCode, institution, config);
384 //link specimen & collection
385 derivedThing.setCollection(collection);
386
387 /**
388 * GATHERING EVENT
389 */
390
391 UnitsGatheringEvent unitsGatheringEvent = new UnitsGatheringEvent(getTermService(), this.locality, this.languageIso, this.longitude,
392 this.latitude, this.gatheringAgentList);
393 UnitsGatheringArea unitsGatheringArea = new UnitsGatheringArea(this.isocountry, this.country, getOccurrenceService());
394 NamedArea areaCountry = unitsGatheringArea.getArea();
395 unitsGatheringEvent.addArea(areaCountry);
396 //Only for ABCD XML data
397 // unitsGatheringArea = new UnitsGatheringArea(this.namedAreaList);
398 // ArrayList<NamedArea> nas = unitsGatheringArea.getAreas();
399 // for (int i=0; i<nas.size();i++)
400 // unitsGatheringEvent.addArea(nas.get(i));
401
402
403 //create field/observation
404 FieldObservation fieldObservation = FieldObservation.NewInstance();
405 //add fieldNumber
406 fieldObservation.setFieldNumber(this.fieldNumber);
407 //join gatheringEvent to fieldObservation
408 fieldObservation.setGatheringEvent(unitsGatheringEvent.getGatheringEvent());
409 //add Multimedia URLs
410 if(this.multimediaObjects.size()>0){
411 MediaRepresentation representation;
412 Media media;
413 MediaInfo mmd ;
414 ImageInfo imd ;
415 URL url ;
416 ImageFile imf;
417 for (int i=0;i<this.multimediaObjects.size();i++){
418 if(this.multimediaObjects.get(i) != null){
419 url = new URL(this.multimediaObjects.get(i));
420 imd = ImageInfo.NewInstance(url.toURI(), 0);
421 if (imd != null){
422 System.out.println("image not null");
423 representation = MediaRepresentation.NewInstance();
424 URI uri = new URI(this.multimediaObjects.get(i));
425 imf = ImageFile.NewInstance(uri, null, imd);
426 representation.addRepresentationPart(imf);
427 media = Media.NewInstance();
428 media.addRepresentation(representation);
429 fieldObservation.addMedia(media);
430 }
431 }
432 }
433 }
434 // //link fieldObservation and specimen
435 DerivationEvent derivationEvent = DerivationEvent.NewInstance();
436 derivationEvent.addOriginal(fieldObservation);
437 derivedThing.addDerivationEvent(derivationEvent);
438
439 /**
440 * SAVE AND STORE DATA
441 */
442
443 getTermService().save(areaCountry);//save it sooner
444 //ONLY FOR ABCD XML DATA
445 // for (int i=0; i<nas.size();i++)
446 // app.getTermService().saveTerm(nas.get(i));//save it sooner (foreach area)
447 getTermService().saveLanguageData(unitsGatheringEvent.getLocality());//save it sooner
448 getOccurrenceService().save(derivedThing);
449
450 logger.info("saved new specimen ...");
451
452
453 } catch (Exception e) {
454 logger.warn("Error when reading record!!");
455 e.printStackTrace();
456 result = false;
457 }
458 commitTransaction(tx);
459 System.out.println("commit done");
460 //app.close();
461 return result;
462 }
463
464
465 // /* (non-Javadoc)
466 // * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
467 // */
468 // @Override
469 // protected boolean doInvoke(IImportConfigurator config,
470 // Map<String, MapWrapper<? extends CdmBase>> stores){
471 // SpecimenImportState state = ((SpecimenImportConfigurator)config).getState();
472 // state.setConfig((SpecimenImportConfigurator)config);
473 // return doInvoke(state);
474 // }
475
476 // public boolean doInvoke(SpecimenImportState state){
477 // invoke(state.getConfig());
478 // return false;
479 // }
480
481
482 /* (non-Javadoc)
483 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IoStateBase)
484 */
485 @Override
486 protected boolean isIgnore(SpecimenSynthesysExcelImportState state) {
487 return false;
488 }
489
490
491 /* (non-Javadoc)
492 * @see eu.etaxonomy.cdm.io.specimen.SpecimenIoBase#doInvoke(eu.etaxonomy.cdm.io.specimen.abcd206.SpecimenImportState)
493 */
494 @Override
495 protected boolean doInvoke(SpecimenSynthesysExcelImportState state) {
496 System.out.println("INVOKE Specimen Import From Excel File (Synthesys Cache format");
497 SpecimenSythesysExcelImport test = new SpecimenSythesysExcelImport();
498 URI source = state.getConfig().getSource();
499 ArrayList<HashMap<String,String>> unitsList = null;
500 try{
501 unitsList = ExcelUtils.parseXLS(source);
502 } catch(FileNotFoundException e){
503 String message = "File not found: " + source;
504 warnProgress(state, message, e);
505 logger.error(message);
506 }
507 System.out.println("unitsList"+unitsList);
508 if (unitsList != null){
509 HashMap<String,String> unit=null;
510 for (int i=0; i<unitsList.size();i++){
511 unit = unitsList.get(i);
512 test.setUnitPropertiesExcel(unit);//and then invoke
513 test.start(state.getConfig());
514 state.getConfig().setDbSchemaValidation(DbSchemaValidation.UPDATE);
515 }
516 }
517
518 return false;
519 }
520
521
522 /* (non-Javadoc)
523 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
524 */
525 @Override
526 protected boolean doCheck(SpecimenSynthesysExcelImportState state) {
527 logger.warn("Validation not yet implemented for " + this.getClass().getSimpleName());
528 return true;
529 }
530
531
532 }