2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.specimen
.excel
.in
;
12 import java
.io
.FileNotFoundException
;
15 import java
.util
.ArrayList
;
16 import java
.util
.HashMap
;
17 import java
.util
.List
;
19 import org
.apache
.log4j
.Logger
;
20 import org
.apache
.poi
.hssf
.usermodel
.HSSFWorkbook
;
21 import org
.springframework
.stereotype
.Component
;
22 import org
.springframework
.transaction
.TransactionStatus
;
24 import eu
.etaxonomy
.cdm
.common
.ExcelUtils
;
25 import eu
.etaxonomy
.cdm
.common
.media
.ImageInfo
;
26 import eu
.etaxonomy
.cdm
.common
.media
.MediaInfo
;
27 import eu
.etaxonomy
.cdm
.database
.DbSchemaValidation
;
28 import eu
.etaxonomy
.cdm
.io
.common
.ICdmIO
;
29 import eu
.etaxonomy
.cdm
.io
.specimen
.SpecimenImportBase
;
30 import eu
.etaxonomy
.cdm
.io
.specimen
.UnitsGatheringArea
;
31 import eu
.etaxonomy
.cdm
.io
.specimen
.UnitsGatheringEvent
;
32 import eu
.etaxonomy
.cdm
.model
.agent
.Institution
;
33 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
34 import eu
.etaxonomy
.cdm
.model
.media
.ImageFile
;
35 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
36 import eu
.etaxonomy
.cdm
.model
.media
.MediaRepresentation
;
37 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
38 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
39 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
40 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivationEvent
;
41 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
42 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnitBase
;
43 import eu
.etaxonomy
.cdm
.model
.occurrence
.DeterminationEvent
;
44 import eu
.etaxonomy
.cdm
.model
.occurrence
.FieldObservation
;
45 import eu
.etaxonomy
.cdm
.model
.occurrence
.LivingBeing
;
46 import eu
.etaxonomy
.cdm
.model
.occurrence
.Observation
;
47 import eu
.etaxonomy
.cdm
.model
.occurrence
.Specimen
;
48 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
49 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
50 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
51 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
59 public class SpecimenSythesysExcelImport
extends SpecimenImportBase
<SpecimenSynthesysExcelImportConfigurator
, SpecimenSynthesysExcelImportState
> implements ICdmIO
<SpecimenSynthesysExcelImportState
> {
61 private static final Logger logger
= Logger
.getLogger(SpecimenSythesysExcelImport
.class);
63 protected String fullScientificNameString
;
64 protected String nomenclatureCode
;
65 protected String institutionCode
;
66 protected String collectionCode
;
67 protected String unitID
;
68 protected String recordBasis
;
69 protected String accessionNumber
;
70 protected String fieldNumber
;
71 protected Double longitude
;
72 protected Double latitude
;
73 protected String locality
;
74 protected String languageIso
= null;
75 protected String country
;
76 protected String isocountry
;
78 protected int altitude
;
79 protected ArrayList
<String
> gatheringAgentList
;
80 protected ArrayList
<String
> identificationList
;
81 protected ArrayList
<String
> namedAreaList
;
82 protected ArrayList
<String
> multimediaObjects
;
84 protected HSSFWorkbook hssfworkbook
= null;
87 public SpecimenSythesysExcelImport() {
93 * Store the unit's properties into variables
94 * @param unit: the hashmap containing the splitted Excel line (Key=column name, value=value)
96 private void setUnitPropertiesExcel(HashMap
<String
,String
> unit
){
97 String author
= unit
.get("author");
98 author
=author
.replaceAll("None","");
99 String taxonName
= unit
.get("taxonName");
100 taxonName
= taxonName
.replaceAll("None", "");
103 this.institutionCode
= unit
.get("institution").replaceAll("None", null);
104 } catch (Exception e
) {this.institutionCode
= "";}
106 try {this.collectionCode
= unit
.get("collection").replaceAll("None", null);
107 } catch (Exception e
) {this.collectionCode
= "";}
109 try {this.unitID
= unit
.get("unitID").replaceAll("None", null);
110 } catch (Exception e
) {this.unitID
= "";}
112 try {this.recordBasis
= unit
.get("recordBasis").replaceAll("None", null);
113 } catch (Exception e
) {this.recordBasis
= "";}
115 try {this.accessionNumber
= null;
116 } catch (Exception e
) {this.accessionNumber
= "";}
118 try {this.locality
= unit
.get("locality").replaceAll("None", null);
119 } catch (Exception e
) {this.locality
= "";}
121 try {this.longitude
= Double
.valueOf(unit
.get("longitude"));
122 } catch (Exception e
) {this.longitude
= 0.0;}
124 try {this.latitude
= Double
.valueOf(unit
.get("latitude"));
125 } catch (Exception e
) {this.latitude
= 0.0;}
127 try {this.country
= unit
.get("country").replaceAll("None", null);
128 } catch (Exception e
) {this.country
= "";}
130 try {this.isocountry
= unit
.get("isoCountry").replaceAll("None", null);
131 } catch (Exception e
) {this.isocountry
= "";}
133 try {this.fieldNumber
= unit
.get("field number").replaceAll("None", null);
134 } catch (Exception e
) {this.fieldNumber
= "";}
137 String url
=unit
.get("url");
138 url
=url
.replaceAll("None", null);
139 this.multimediaObjects
.add(url
);
140 } catch (Exception e
) {this.multimediaObjects
= new ArrayList
<String
>();}
143 String coll
=unit
.get("collector");
144 coll
=coll
.replaceAll("None", null);
145 this.gatheringAgentList
.add(coll
);
146 } catch (Exception e
) {this.gatheringAgentList
= new ArrayList
<String
>();}
148 try {this.identificationList
.add(taxonName
+" "+author
);
149 } catch (Exception e
) {this.identificationList
= new ArrayList
<String
>();}
153 private Institution
getInstitution(String institutionCode
, SpecimenSynthesysExcelImportConfigurator config
){
154 Institution institution
;
155 List
<Institution
> institutions
;
157 institutions
= getAgentService().searchInstitutionByCode(this.institutionCode
);
159 institutions
=new ArrayList
<Institution
>();
161 if (institutions
.size() ==0 || !config
.getReUseExistingMetadata()){
162 System
.out
.println("Institution (agent) unknown or not allowed to reuse existing metadata");
164 institution
= Institution
.NewInstance();
165 institution
.setCode(this.institutionCode
);
168 System
.out
.println("Institution (agent) already in the db");
169 institution
= institutions
.get(0);
175 * Look if the Collection does already exists
176 * @param collectionCode: a string
177 * @param institution: the current Institution
179 * @return the Collection (existing or new)
181 private Collection
getCollection(String collectionCode
, Institution institution
, SpecimenSynthesysExcelImportConfigurator config
){
182 Collection collection
= Collection
.NewInstance();
183 List
<Collection
> collections
;
185 collections
= getCollectionService().searchByCode(this.collectionCode
);
187 collections
=new ArrayList
<Collection
>();
189 if (collections
.size() ==0 || !config
.getReUseExistingMetadata()){
190 System
.out
.println("Collection not found or do not reuse existing metadata "+this.collectionCode
);
191 //create new collection
192 collection
.setCode(this.collectionCode
);
193 collection
.setCodeStandard("GBIF");
194 collection
.setInstitute(institution
);
197 boolean collectionFound
=false;
198 for (int i
=0; i
<collections
.size(); i
++){
199 collection
= collections
.get(i
);
201 if (collection
.getInstitute().getCode().equalsIgnoreCase(institution
.getCode())){
202 //found a collection with the same code and the same institution
203 collectionFound
=true;
205 } catch (NullPointerException e
) {}
207 if (!collectionFound
){
208 collection
.setCode(this.collectionCode
);
209 collection
.setCodeStandard("GBIF");
210 collection
.setInstitute(institution
);
220 * @param derivedThing
223 private void setTaxonNameBase(SpecimenSynthesysExcelImportConfigurator config
, DerivedUnitBase derivedThing
, Reference sec
){
224 NonViralName
<?
> taxonName
= null;
225 String fullScientificNameString
;
227 DeterminationEvent determinationEvent
= null;
228 List
<TaxonBase
> names
= null;
230 String scientificName
="";
231 boolean preferredFlag
=false;
233 for (int i
= 0; i
< this.identificationList
.size(); i
++) {
234 fullScientificNameString
= this.identificationList
.get(i
);
235 fullScientificNameString
= fullScientificNameString
.replaceAll(" et ", " & ");
236 if (fullScientificNameString
.indexOf("_preferred_") != -1){
237 scientificName
= fullScientificNameString
.split("_preferred_")[0];
238 String pTmp
= fullScientificNameString
.split("_preferred_")[1].split("_code_")[0];
239 if (pTmp
== "1" || pTmp
.toLowerCase().indexOf("true") != -1)
244 else scientificName
= fullScientificNameString
;
246 if (fullScientificNameString
.indexOf("_code_") != -1)
247 this.nomenclatureCode
= fullScientificNameString
.split("_code_")[1];
249 if (config
.getDoAutomaticParsing()){
250 taxonName
= this.parseScientificName(scientificName
);
252 taxonName
.setTitleCache(scientificName
, true);
255 if (config
.getDoReUseTaxon()){
257 names
= getTaxonService().searchTaxaByName(scientificName
, sec
);
258 taxon
= (Taxon
)names
.get(0);
260 catch(Exception e
){taxon
=null;}
262 if (!config
.getDoReUseTaxon() || taxon
== null){
263 getNameService().save(taxonName
);
264 taxon
= Taxon
.NewInstance(taxonName
, sec
); //sec set null
267 determinationEvent
= DeterminationEvent
.NewInstance();
268 determinationEvent
.setTaxon(taxon
);
269 determinationEvent
.setPreferredFlag(preferredFlag
);
270 // no reference in the GBIF INDEX
271 // for (int l=0;l<this.referenceList.size();l++){
272 // Reference reference = new Generic();
273 // reference.setTitleCache(this.referenceList.get(l));
274 // determinationEvent.addReference(reference);
276 derivedThing
.addDetermination(determinationEvent
);
281 private NonViralName
<?
> parseScientificName(String scientificName
){
282 System
.out
.println("parseScientificName");
283 NonViralNameParserImpl nvnpi
= NonViralNameParserImpl
.NewInstance();
284 NonViralName
<?
>taxonName
= null;
285 boolean problem
=false;
287 System
.out
.println("nomenclature: "+this.nomenclatureCode
);
289 if(this.nomenclatureCode
== null){
290 taxonName
= NonViralName
.NewInstance(null);
291 taxonName
.setTitleCache(scientificName
, true);
295 if (this.nomenclatureCode
.toString().equals("Zoological")){
296 taxonName
= nvnpi
.parseFullName(scientificName
,NomenclaturalCode
.ICZN
,null);
297 if (taxonName
.hasProblem())
300 if (this.nomenclatureCode
.toString().equals("Botanical")){
301 taxonName
= nvnpi
.parseFullName(scientificName
,NomenclaturalCode
.ICBN
,null);
302 if (taxonName
.hasProblem())
304 if (this.nomenclatureCode
.toString().equals("Bacterial")){
305 taxonName
= nvnpi
.parseFullName(scientificName
,NomenclaturalCode
.ICNB
, null);
306 if (taxonName
.hasProblem())
309 if (this.nomenclatureCode
.toString().equals("Cultivar")){
310 taxonName
= nvnpi
.parseFullName(scientificName
,NomenclaturalCode
.ICNCP
, null);
311 if (taxonName
.hasProblem())
314 // if (this.nomenclatureCode.toString().equals("Viral")){
315 // ViralName taxonName = (ViralName)nvnpi.parseFullName(scientificName,NomenclaturalCode.ICVCN(), null);
316 // if (taxonName.hasProblem())
317 // System.out.println("pb ICVCN");
319 //TODO: parsing of ViralNames?
321 taxonName
= NonViralName
.NewInstance(null);
322 taxonName
.setTitleCache(scientificName
, true);
330 * Store the unit with its Gathering informations in the CDM
332 public boolean start(SpecimenSynthesysExcelImportConfigurator config
){
333 boolean result
= true;
334 TransactionStatus tx
= null;
336 tx
= startTransaction();
338 Reference sec
= config
.getTaxonReference();
341 * SPECIMEN OR OBSERVATION OR LIVING
343 DerivedUnitBase derivedThing
= null;
345 boolean rbFound
=false;
346 if (this.recordBasis
!= null){
347 if (this.recordBasis
.toLowerCase().startsWith("s")) {//specimen
348 derivedThing
= Specimen
.NewInstance();
351 else if (this.recordBasis
.toLowerCase().startsWith("o")) {//observation
352 derivedThing
= Observation
.NewInstance();
355 else if (this.recordBasis
.toLowerCase().startsWith("l")) {//living -> fossil, herbarium sheet....???
356 derivedThing
= LivingBeing
.NewInstance();
360 logger
.info("The basis of record does not seem to be known: "+this.recordBasis
);
361 derivedThing
= DerivedUnit
.NewInstance();
365 logger
.info("The basis of record is null");
366 derivedThing
= DerivedUnit
.NewInstance();
369 this.setTaxonNameBase(config
, derivedThing
, sec
);
372 //set catalogue number (unitID)
373 derivedThing
.setCatalogNumber(this.unitID
);
374 derivedThing
.setAccessionNumber(this.accessionNumber
);
378 * INSTITUTION & COLLECTION
381 Institution institution
= this.getInstitution(this.institutionCode
,config
);
383 Collection collection
= this.getCollection(this.collectionCode
, institution
, config
);
384 //link specimen & collection
385 derivedThing
.setCollection(collection
);
391 UnitsGatheringEvent unitsGatheringEvent
= new UnitsGatheringEvent(getTermService(), this.locality
, this.languageIso
, this.longitude
,
392 this.latitude
, this.gatheringAgentList
);
393 UnitsGatheringArea unitsGatheringArea
= new UnitsGatheringArea(this.isocountry
, this.country
, getOccurrenceService());
394 NamedArea areaCountry
= unitsGatheringArea
.getArea();
395 unitsGatheringEvent
.addArea(areaCountry
);
396 //Only for ABCD XML data
397 // unitsGatheringArea = new UnitsGatheringArea(this.namedAreaList);
398 // ArrayList<NamedArea> nas = unitsGatheringArea.getAreas();
399 // for (int i=0; i<nas.size();i++)
400 // unitsGatheringEvent.addArea(nas.get(i));
403 //create field/observation
404 FieldObservation fieldObservation
= FieldObservation
.NewInstance();
406 fieldObservation
.setFieldNumber(this.fieldNumber
);
407 //join gatheringEvent to fieldObservation
408 fieldObservation
.setGatheringEvent(unitsGatheringEvent
.getGatheringEvent());
409 //add Multimedia URLs
410 if(this.multimediaObjects
.size()>0){
411 MediaRepresentation representation
;
417 for (int i
=0;i
<this.multimediaObjects
.size();i
++){
418 if(this.multimediaObjects
.get(i
) != null){
419 url
= new URL(this.multimediaObjects
.get(i
));
420 imd
= ImageInfo
.NewInstance(url
.toURI(), 0);
422 System
.out
.println("image not null");
423 representation
= MediaRepresentation
.NewInstance();
424 URI uri
= new URI(this.multimediaObjects
.get(i
));
425 imf
= ImageFile
.NewInstance(uri
, null, imd
);
426 representation
.addRepresentationPart(imf
);
427 media
= Media
.NewInstance();
428 media
.addRepresentation(representation
);
429 fieldObservation
.addMedia(media
);
434 // //link fieldObservation and specimen
435 DerivationEvent derivationEvent
= DerivationEvent
.NewInstance();
436 derivationEvent
.addOriginal(fieldObservation
);
437 derivedThing
.addDerivationEvent(derivationEvent
);
440 * SAVE AND STORE DATA
443 getTermService().save(areaCountry
);//save it sooner
444 //ONLY FOR ABCD XML DATA
445 // for (int i=0; i<nas.size();i++)
446 // app.getTermService().saveTerm(nas.get(i));//save it sooner (foreach area)
447 getTermService().saveLanguageData(unitsGatheringEvent
.getLocality());//save it sooner
448 getOccurrenceService().save(derivedThing
);
450 logger
.info("saved new specimen ...");
453 } catch (Exception e
) {
454 logger
.warn("Error when reading record!!");
458 commitTransaction(tx
);
459 System
.out
.println("commit done");
466 // * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
469 // protected boolean doInvoke(IImportConfigurator config,
470 // Map<String, MapWrapper<? extends CdmBase>> stores){
471 // SpecimenImportState state = ((SpecimenImportConfigurator)config).getState();
472 // state.setConfig((SpecimenImportConfigurator)config);
473 // return doInvoke(state);
476 // public boolean doInvoke(SpecimenImportState state){
477 // invoke(state.getConfig());
483 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IoStateBase)
486 protected boolean isIgnore(SpecimenSynthesysExcelImportState state
) {
492 * @see eu.etaxonomy.cdm.io.specimen.SpecimenIoBase#doInvoke(eu.etaxonomy.cdm.io.specimen.abcd206.SpecimenImportState)
495 protected boolean doInvoke(SpecimenSynthesysExcelImportState state
) {
496 System
.out
.println("INVOKE Specimen Import From Excel File (Synthesys Cache format");
497 SpecimenSythesysExcelImport test
= new SpecimenSythesysExcelImport();
498 URI source
= state
.getConfig().getSource();
499 ArrayList
<HashMap
<String
,String
>> unitsList
= null;
501 unitsList
= ExcelUtils
.parseXLS(source
);
502 } catch(FileNotFoundException e
){
503 String message
= "File not found: " + source
;
504 warnProgress(state
, message
, e
);
505 logger
.error(message
);
507 System
.out
.println("unitsList"+unitsList
);
508 if (unitsList
!= null){
509 HashMap
<String
,String
> unit
=null;
510 for (int i
=0; i
<unitsList
.size();i
++){
511 unit
= unitsList
.get(i
);
512 test
.setUnitPropertiesExcel(unit
);//and then invoke
513 test
.start(state
.getConfig());
514 state
.getConfig().setDbSchemaValidation(DbSchemaValidation
.UPDATE
);
523 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
526 protected boolean doCheck(SpecimenSynthesysExcelImportState state
) {
527 logger
.warn("Validation not yet implemented for " + this.getClass().getSimpleName());