1 package eu
.etaxonomy
.cdm
.io
.synthesys
;
4 import java
.io
.FileInputStream
;
5 import java
.util
.ArrayList
;
6 import java
.util
.Hashtable
;
10 import javax
.xml
.parsers
.DocumentBuilder
;
11 import javax
.xml
.parsers
.DocumentBuilderFactory
;
13 import org
.apache
.log4j
.Logger
;
14 import org
.apache
.poi
.hssf
.usermodel
.HSSFCell
;
15 import org
.apache
.poi
.hssf
.usermodel
.HSSFRow
;
16 import org
.apache
.poi
.hssf
.usermodel
.HSSFSheet
;
17 import org
.apache
.poi
.hssf
.usermodel
.HSSFWorkbook
;
18 import org
.apache
.poi
.poifs
.filesystem
.POIFSFileSystem
;
19 import org
.springframework
.transaction
.TransactionStatus
;
20 import org
.w3c
.dom
.Document
;
21 import org
.w3c
.dom
.Element
;
22 import org
.w3c
.dom
.NodeList
;
24 import eu
.etaxonomy
.cdm
.api
.application
.CdmApplicationController
;
25 import eu
.etaxonomy
.cdm
.database
.DataSourceNotFoundException
;
26 import eu
.etaxonomy
.cdm
.database
.DbSchemaValidation
;
27 import eu
.etaxonomy
.cdm
.io
.common
.ICdmIO
;
28 import eu
.etaxonomy
.cdm
.io
.common
.IImportConfigurator
;
29 import eu
.etaxonomy
.cdm
.model
.agent
.Institution
;
30 import eu
.etaxonomy
.cdm
.model
.common
.init
.TermNotFoundException
;
31 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
32 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
33 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
34 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
35 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivationEvent
;
36 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnitBase
;
37 import eu
.etaxonomy
.cdm
.model
.occurrence
.DeterminationEvent
;
38 import eu
.etaxonomy
.cdm
.model
.occurrence
.FieldObservation
;
39 import eu
.etaxonomy
.cdm
.model
.occurrence
.LivingBeing
;
40 import eu
.etaxonomy
.cdm
.model
.occurrence
.Observation
;
41 import eu
.etaxonomy
.cdm
.model
.occurrence
.Specimen
;
42 import eu
.etaxonomy
.cdm
.model
.reference
.Database
;
43 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceBase
;
44 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
45 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
47 public class SynthesysIO
extends SpecimenIoBase
implements ICdmIO
{
50 private static final Logger logger
= Logger
.getLogger(SynthesysIO
.class);
52 protected String fullScientificNameString
;
53 protected String nomenclatureCode
;
54 protected String institutionCode
;
55 protected String collectionCode
;
56 protected String unitID
;
57 protected String recordBasis
;
58 protected String accessionNumber
;
59 protected String collectorsNumber
;
60 protected String fieldNumber
;
61 protected Double longitude
;
62 protected Double latitude
;
63 protected String locality
;
64 protected String languageIso
= null;
65 protected String country
;
66 protected String isocountry
;
68 protected int altitude
;
69 protected ArrayList
<String
> gatheringAgentList
;
70 protected ArrayList
<String
> identificationList
;
71 protected ArrayList
<String
> namedAreaList
;
73 protected HSSFWorkbook hssfworkbook
= null;
76 public SynthesysIO() {
82 * Store the Excel's data into variables
83 * @param fileName: the location of the Excel file
84 * @return the list of units data
86 private static ArrayList
<Hashtable
<String
, String
>> parseXLS(String fileName
) {
87 ArrayList
<Hashtable
<String
, String
>> units
= new ArrayList
<Hashtable
<String
,String
>>();
90 POIFSFileSystem fs
= new POIFSFileSystem(new FileInputStream(fileName
));
91 HSSFWorkbook wb
= new HSSFWorkbook(fs
);
92 HSSFSheet sheet
= wb
.getSheetAt(0);
96 int rows
; // No of rows
97 rows
= sheet
.getPhysicalNumberOfRows();
99 int cols
= 0; // No of columns
102 // This trick ensures that we get the data properly even if it doesn't start from first few rows
103 for(int i
= 0; i
< 10 || i
< rows
; i
++) {
104 row
= sheet
.getRow(i
);
106 tmp
= sheet
.getRow(i
).getPhysicalNumberOfCells();
107 if(tmp
> cols
) cols
= tmp
;
110 Hashtable
<String
, String
> headers
= null;
111 ArrayList
<String
> columns
= new ArrayList
<String
>();
112 row
= sheet
.getRow(0);
113 for (int c
=0; c
<cols
; c
++){
114 cell
= row
.getCell(c
);
115 columns
.add(cell
.toString());
117 for(int r
= 1; r
< rows
; r
++) {
118 row
= sheet
.getRow(r
);
119 headers
= new Hashtable
<String
, String
>();
121 for(int c
= 0; c
< cols
; c
++) {
122 cell
= row
.getCell((short)c
);
124 headers
.put(columns
.get(c
),cell
.toString());
131 } catch(Exception ioe
) {
132 ioe
.printStackTrace();
138 * Store the unit's properties into variables
139 * @param unit: the hashmap containing the splitted Excel line (Key=column name, value=value)
141 private void setUnitPropertiesExcel(Hashtable
<String
,String
> unit
){
142 String author
= unit
.get("author");
143 author
=author
.replaceAll("None","");
144 String taxonName
= unit
.get("taxonName");
145 taxonName
= taxonName
.replaceAll("None", "");
148 this.institutionCode
= unit
.get("institution").replaceAll("None", null);
149 } catch (Exception e
) {
152 try {this.collectionCode
= unit
.get("collection").replaceAll("None", null);
153 } catch (Exception e
) {
155 try {this.unitID
= unit
.get("unitID").replaceAll("None", null);
156 } catch (Exception e
) {
158 try {this.recordBasis
= unit
.get("recordBasis").replaceAll("None", null);
159 } catch (Exception e
) {
161 try {this.accessionNumber
= null;
162 } catch (Exception e
) {
164 try {this.locality
= unit
.get("locality").replaceAll("None", null);
165 } catch (Exception e
) {
167 try {this.longitude
= Double
.valueOf(unit
.get("longitude"));
168 } catch (Exception e
) {
170 try {this.latitude
= Double
.valueOf(unit
.get("latitude"));
171 } catch (Exception e
) {
173 try {this.country
= unit
.get("country").replaceAll("None", null);
174 } catch (Exception e
) {
176 try {this.isocountry
= unit
.get("isoCountry").replaceAll("None", null);
177 } catch (Exception e
) {
179 try {this.fieldNumber
= unit
.get("field number").replaceAll("None", null);
180 } catch (Exception e
) {
182 try {this.collectorsNumber
= unit
.get("collector number").replaceAll("None", null);
183 } catch (Exception e
) {
185 try {String coll
=unit
.get("collector");
186 coll
=coll
.replaceAll("None", null);
187 this.gatheringAgentList
.add(coll
);
188 } catch (Exception e
) {
190 try {this.identificationList
.add(taxonName
+" "+author
);
191 } catch (Exception e
) {System
.out
.println(e
);
195 private Institution
getInstitution(String institutionCode
, CdmApplicationController app
){
196 Institution institution
;
197 List
<Institution
> institutions
;
199 System
.out
.println(this.institutionCode
);
200 institutions
= app
.getAgentService().searchInstitutionByCode(this.institutionCode
);
202 System
.out
.println("BLI "+e
);
203 institutions
=new ArrayList
<Institution
>();
205 if (institutions
.size() ==0){
206 System
.out
.println("Institution (agent) unknown");
208 institution
= Institution
.NewInstance();
209 institution
.setCode(this.institutionCode
);
212 System
.out
.println("Institution (agent) already in the db");
213 institution
= institutions
.get(0);
219 * Look if the Collection does already exists
220 * @param collectionCode: a string
221 * @param institution: the current Institution
223 * @return the Collection (existing or new)
225 private Collection
getCollection(String collectionCode
, Institution institution
, CdmApplicationController app
){
226 Collection collection
= Collection
.NewInstance();
227 List
<Collection
> collections
;
229 collections
= app
.getOccurrenceService().searchCollectionByCode(this.collectionCode
);
231 System
.out
.println("BLA"+e
);
232 collections
=new ArrayList
<Collection
>();
234 if (collections
.size() ==0){
235 System
.out
.println("Collection not found "+this.collectionCode
);
236 //create new collection
237 collection
.setCode(this.collectionCode
);
238 collection
.setCodeStandard("GBIF");
239 collection
.setInstitute(institution
);
242 boolean collectionFound
=false;
243 for (int i
=0; i
<collections
.size(); i
++){
244 collection
= collections
.get(i
);
246 if (collection
.getInstitute().getCode().equalsIgnoreCase(institution
.getCode())){
247 //found a collection with the same code and the same institution
248 collectionFound
=true;
250 } catch (NullPointerException e
) {}
252 if (!collectionFound
){
253 collection
.setCode(this.collectionCode
);
254 collection
.setCodeStandard("GBIF");
255 collection
.setInstitute(institution
);
265 * @param derivedThing
268 private void setTaxonNameBase(CdmApplicationController app
, DerivedUnitBase derivedThing
, ReferenceBase sec
){
269 TaxonNameBase taxonName
= null;
270 String fullScientificNameString
;
272 DeterminationEvent determinationEvent
= null;
273 List
<TaxonNameBase
> names
= null;
274 NonViralNameParserImpl nvnpi
= NonViralNameParserImpl
.NewInstance();
275 String scientificName
="";
276 boolean preferredFlag
=false;
278 for (int i
= 0; i
< this.identificationList
.size(); i
++) {
279 fullScientificNameString
= this.identificationList
.get(i
);
280 fullScientificNameString
= fullScientificNameString
.replaceAll(" et ", " & ");
281 if (fullScientificNameString
.indexOf("_preferred_") != -1){
282 scientificName
= fullScientificNameString
.split("_preferred_")[0];
283 String pTmp
= fullScientificNameString
.split("_preferred_")[1].split("_code_")[0];
284 if (pTmp
== "1" || pTmp
.toLowerCase().indexOf("true") != -1)
289 else scientificName
= fullScientificNameString
;
290 if (fullScientificNameString
.indexOf("_code_") != -1){
291 this.nomenclatureCode
= fullScientificNameString
.split("_code_")[1];
294 System
.out
.println("nomenclature: "+this.nomenclatureCode
);
295 if (this.nomenclatureCode
== "Zoological"){
296 taxonName
= nvnpi
.parseFullName(this.fullScientificNameString
,NomenclaturalCode
.ICZN(),null);
297 if (taxonName
.hasProblem())
298 System
.out
.println("pb ICZN");}
299 if (this.nomenclatureCode
== "Botanical"){
300 taxonName
= nvnpi
.parseFullName(this.fullScientificNameString
,NomenclaturalCode
.ICBN(),null);
301 if (taxonName
.hasProblem())
302 System
.out
.println("pb ICBN");}
303 if (this.nomenclatureCode
== "Bacterial"){
304 taxonName
= nvnpi
.parseFullName(this.fullScientificNameString
,NomenclaturalCode
.ICNB(), null);
305 if (taxonName
.hasProblem())
306 System
.out
.println("pb ICNB");
308 if (this.nomenclatureCode
== "Cultivar"){
309 taxonName
= nvnpi
.parseFullName(this.fullScientificNameString
,NomenclaturalCode
.ICNCP(), null);
310 if (taxonName
.hasProblem())
311 System
.out
.println("pb ICNCP");
313 if (this.nomenclatureCode
== "Viral"){
314 taxonName
= nvnpi
.parseFullName(this.fullScientificNameString
,NomenclaturalCode
.ICVCN(), null);
315 if (taxonName
.hasProblem())
316 System
.out
.println("pb ICVCN");
318 try{taxonName
.hasProblem();}
319 catch (Exception e
) {
320 taxonName
= nvnpi
.parseFullName(scientificName
);
322 if (taxonName
.hasProblem())
323 taxonName
= nvnpi
.parseFullName(scientificName
);
325 names
= app
.getNameService().getNamesByName(scientificName
);
326 if (names
.size() == 0){
327 System
.out
.println("Name not found: " + scientificName
);
329 if (names
.size() > 1){
330 System
.out
.println("More then 1 name found: " + scientificName
);
332 System
.out
.println("Name found");
333 taxonName
= names
.get(0);
337 app
.getNameService().saveTaxonName(taxonName
);
338 taxon
= Taxon
.NewInstance(taxonName
, sec
); //TODO use real reference for sec
340 determinationEvent
= DeterminationEvent
.NewInstance();
341 determinationEvent
.setTaxon(taxon
);
342 determinationEvent
.setPreferredFlag(preferredFlag
);
343 derivedThing
.addDetermination(determinationEvent
);
349 * Store the unit with its Gathering informations in the CDM
351 public boolean start(IImportConfigurator config
){
352 boolean result
= true;
353 boolean withCdm
= true;
354 CdmApplicationController app
= null;
355 TransactionStatus tx
= null;
358 app
= CdmApplicationController
.NewInstance(config
.getDestination(), config
.getDbSchemaValidation());
359 } catch (DataSourceNotFoundException e1
) {
360 e1
.printStackTrace();
361 System
.out
.println("DataSourceNotFoundException "+e1
);
362 } catch (TermNotFoundException e1
) {
363 e1
.printStackTrace();
364 System
.out
.println("TermNotFoundException " +e1
);
367 tx
= app
.startTransaction();
369 ReferenceBase sec
= Database
.NewInstance();
370 sec
.setTitleCache("XML DATA");
373 * SPECIMEN OR OBSERVATION OR LIVING
375 DerivedUnitBase derivedThing
= null;
377 if (this.recordBasis
!= null){
378 if (this.recordBasis
.toLowerCase().startsWith("s")) {//specimen
379 derivedThing
= Specimen
.NewInstance();
381 else if (this.recordBasis
.toLowerCase().startsWith("o")) {//observation
382 derivedThing
= Observation
.NewInstance();
384 else if (this.recordBasis
.toLowerCase().startsWith("l")) {//living -> fossil, herbarium sheet....???
385 derivedThing
= LivingBeing
.NewInstance();
388 if (derivedThing
== null)
389 derivedThing
= Observation
.NewInstance();
391 this.setTaxonNameBase(app
, derivedThing
, sec
);
394 //set catalogue number (unitID)
395 derivedThing
.setCatalogNumber(this.unitID
);
396 derivedThing
.setAccessionNumber(this.accessionNumber
);
397 derivedThing
.setCollectorsNumber(this.collectorsNumber
);
401 * INSTITUTION & COLLECTION
404 Institution institution
= this.getInstitution(this.institutionCode
,app
);
406 Collection collection
= this.getCollection(this.collectionCode
, institution
, app
);
407 //link specimen & collection
408 derivedThing
.setCollection(collection
);
414 UnitsGatheringEvent unitsGatheringEvent
= new UnitsGatheringEvent(app
, this.locality
, this.languageIso
, this.longitude
,
415 this.latitude
, this.gatheringAgentList
);
416 UnitsGatheringArea unitsGatheringArea
= new UnitsGatheringArea(this.isocountry
, this.country
,app
);
417 NamedArea areaCountry
= unitsGatheringArea
.getArea();
418 unitsGatheringEvent
.addArea(areaCountry
);
419 unitsGatheringArea
= new UnitsGatheringArea(this.namedAreaList
);
420 ArrayList
<NamedArea
> nas
= unitsGatheringArea
.getAreas();
421 for (int i
=0; i
<nas
.size();i
++)
422 unitsGatheringEvent
.addArea(nas
.get(i
));
425 //create field/observation
426 FieldObservation fieldObservation
= FieldObservation
.NewInstance();
428 fieldObservation
.setFieldNumber(this.fieldNumber
);
429 //join gatheringEvent to fieldObservation
430 fieldObservation
.setGatheringEvent(unitsGatheringEvent
.getGatheringEvent());
432 // //link fieldObservation and specimen
433 DerivationEvent derivationEvent
= DerivationEvent
.NewInstance();
434 derivationEvent
.addOriginal(fieldObservation
);
435 derivedThing
.addDerivationEvent(derivationEvent
);
438 * SAVE AND STORE DATA
441 app
.getTermService().saveTerm(areaCountry
);//save it sooner
442 for (int i
=0; i
<nas
.size();i
++)
443 app
.getTermService().saveTerm(nas
.get(i
));//save it sooner (foreach area)
444 app
.getTermService().saveLanguageData(unitsGatheringEvent
.getLocality());//save it sooner
445 app
.getOccurrenceService().saveSpecimenOrObservationBase(derivedThing
);
447 logger
.info("saved new specimen ...");
450 } catch (Exception e
) {
451 logger
.warn("Error when reading record!!");
455 app
.commitTransaction(tx
);
456 System
.out
.println("commit done");
462 public boolean invoke(IImportConfigurator config
){
463 System
.out
.println("INVOKE Specimen Import From Excel File (Synthesys Cache format");
464 SynthesysIO test
= new SynthesysIO();
465 String sourceName
= config
.getSourceNameString();
467 ArrayList
<Hashtable
<String
,String
>> unitsList
= parseXLS(sourceName
);
468 if (unitsList
!= null){
469 Hashtable
<String
,String
> unit
=null;
470 for (int i
=0; i
<unitsList
.size();i
++){
471 unit
= unitsList
.get(i
);
472 test
.setUnitPropertiesExcel(unit
);//and then invoke
474 config
.setDbSchemaValidation(DbSchemaValidation
.UPDATE
);
483 public boolean invoke(IImportConfigurator config
, Map stores
) {