separation of ABCD specimen import & Synthesys specimen import- IO package
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / synthesys / SynthesysIO.java
1 package eu.etaxonomy.cdm.io.synthesys;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.util.ArrayList;
6 import java.util.Hashtable;
7 import java.util.List;
8 import java.util.Map;
9
10 import javax.xml.parsers.DocumentBuilder;
11 import javax.xml.parsers.DocumentBuilderFactory;
12
13 import org.apache.log4j.Logger;
14 import org.apache.poi.hssf.usermodel.HSSFCell;
15 import org.apache.poi.hssf.usermodel.HSSFRow;
16 import org.apache.poi.hssf.usermodel.HSSFSheet;
17 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
18 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
19 import org.springframework.transaction.TransactionStatus;
20 import org.w3c.dom.Document;
21 import org.w3c.dom.Element;
22 import org.w3c.dom.NodeList;
23
24 import eu.etaxonomy.cdm.api.application.CdmApplicationController;
25 import eu.etaxonomy.cdm.database.DataSourceNotFoundException;
26 import eu.etaxonomy.cdm.database.DbSchemaValidation;
27 import eu.etaxonomy.cdm.io.common.ICdmIO;
28 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
29 import eu.etaxonomy.cdm.model.agent.Institution;
30 import eu.etaxonomy.cdm.model.common.init.TermNotFoundException;
31 import eu.etaxonomy.cdm.model.location.NamedArea;
32 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
33 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
34 import eu.etaxonomy.cdm.model.occurrence.Collection;
35 import eu.etaxonomy.cdm.model.occurrence.DerivationEvent;
36 import eu.etaxonomy.cdm.model.occurrence.DerivedUnitBase;
37 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
38 import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
39 import eu.etaxonomy.cdm.model.occurrence.LivingBeing;
40 import eu.etaxonomy.cdm.model.occurrence.Observation;
41 import eu.etaxonomy.cdm.model.occurrence.Specimen;
42 import eu.etaxonomy.cdm.model.reference.Database;
43 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
44 import eu.etaxonomy.cdm.model.taxon.Taxon;
45 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
46
47 public class SynthesysIO extends SpecimenIoBase implements ICdmIO {
48
49
50 private static final Logger logger = Logger.getLogger(SynthesysIO.class);
51
52 protected String fullScientificNameString;
53 protected String nomenclatureCode;
54 protected String institutionCode;
55 protected String collectionCode;
56 protected String unitID;
57 protected String recordBasis;
58 protected String accessionNumber;
59 protected String collectorsNumber;
60 protected String fieldNumber;
61 protected Double longitude;
62 protected Double latitude;
63 protected String locality;
64 protected String languageIso = null;
65 protected String country;
66 protected String isocountry;
67 protected int depth;
68 protected int altitude;
69 protected ArrayList<String> gatheringAgentList;
70 protected ArrayList<String> identificationList;
71 protected ArrayList<String> namedAreaList;
72
73 protected HSSFWorkbook hssfworkbook = null;
74
75
76 public SynthesysIO() {
77 super();
78 }
79
80
81 /*
82 * Store the Excel's data into variables
83 * @param fileName: the location of the Excel file
84 * @return the list of units data
85 */
86 private static ArrayList<Hashtable<String, String>> parseXLS(String fileName) {
87 ArrayList<Hashtable<String, String>> units = new ArrayList<Hashtable<String,String>>();
88
89 try {
90 POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(fileName));
91 HSSFWorkbook wb = new HSSFWorkbook(fs);
92 HSSFSheet sheet = wb.getSheetAt(0);
93 HSSFRow row;
94 HSSFCell cell;
95
96 int rows; // No of rows
97 rows = sheet.getPhysicalNumberOfRows();
98
99 int cols = 0; // No of columns
100 int tmp = 0;
101
102 // This trick ensures that we get the data properly even if it doesn't start from first few rows
103 for(int i = 0; i < 10 || i < rows; i++) {
104 row = sheet.getRow(i);
105 if(row != null) {
106 tmp = sheet.getRow(i).getPhysicalNumberOfCells();
107 if(tmp > cols) cols = tmp;
108 }
109 }
110 Hashtable<String, String> headers = null;
111 ArrayList<String> columns = new ArrayList<String>();
112 row = sheet.getRow(0);
113 for (int c =0; c<cols; c++){
114 cell = row.getCell(c);
115 columns.add(cell.toString());
116 }
117 for(int r = 1; r < rows; r++) {
118 row = sheet.getRow(r);
119 headers = new Hashtable<String, String>();
120 if(row != null) {
121 for(int c = 0; c < cols; c++) {
122 cell = row.getCell((short)c);
123 if(cell != null) {
124 headers.put(columns.get(c),cell.toString());
125 }
126 }
127 }
128 units.add(headers);
129 }
130
131 } catch(Exception ioe) {
132 ioe.printStackTrace();
133 }
134 return units;
135 }
136
137 /*
138 * Store the unit's properties into variables
139 * @param unit: the hashmap containing the splitted Excel line (Key=column name, value=value)
140 */
141 private void setUnitPropertiesExcel(Hashtable<String,String> unit){
142 String author = unit.get("author");
143 author=author.replaceAll("None","");
144 String taxonName = unit.get("taxonName");
145 taxonName = taxonName.replaceAll("None", "");
146
147 try {
148 this.institutionCode = unit.get("institution").replaceAll("None", null);
149 } catch (Exception e) {
150 }
151
152 try {this.collectionCode = unit.get("collection").replaceAll("None", null);
153 } catch (Exception e) {
154 }
155 try {this.unitID = unit.get("unitID").replaceAll("None", null);
156 } catch (Exception e) {
157 }
158 try {this.recordBasis = unit.get("recordBasis").replaceAll("None", null);
159 } catch (Exception e) {
160 }
161 try {this.accessionNumber = null;
162 } catch (Exception e) {
163 }
164 try {this.locality = unit.get("locality").replaceAll("None", null);
165 } catch (Exception e) {
166 }
167 try {this.longitude = Double.valueOf(unit.get("longitude"));
168 } catch (Exception e) {
169 }
170 try {this.latitude = Double.valueOf(unit.get("latitude"));
171 } catch (Exception e) {
172 }
173 try {this.country = unit.get("country").replaceAll("None", null);
174 } catch (Exception e) {
175 }
176 try {this.isocountry = unit.get("isoCountry").replaceAll("None", null);
177 } catch (Exception e) {
178 }
179 try {this.fieldNumber = unit.get("field number").replaceAll("None", null);
180 } catch (Exception e) {
181 }
182 try {this.collectorsNumber = unit.get("collector number").replaceAll("None", null);
183 } catch (Exception e) {
184 }
185 try {String coll =unit.get("collector");
186 coll=coll.replaceAll("None", null);
187 this.gatheringAgentList.add(coll);
188 } catch (Exception e) {
189 }
190 try {this.identificationList.add(taxonName+" "+author);
191 } catch (Exception e) {System.out.println(e);
192 }
193 }
194
195 private Institution getInstitution(String institutionCode, CdmApplicationController app){
196 Institution institution;
197 List<Institution> institutions;
198 try{
199 System.out.println(this.institutionCode);
200 institutions= app.getAgentService().searchInstitutionByCode(this.institutionCode);
201 }catch(Exception e){
202 System.out.println("BLI "+e);
203 institutions=new ArrayList<Institution>();
204 }
205 if (institutions.size() ==0){
206 System.out.println("Institution (agent) unknown");
207 //create institution
208 institution = Institution.NewInstance();
209 institution.setCode(this.institutionCode);
210 }
211 else{
212 System.out.println("Institution (agent) already in the db");
213 institution = institutions.get(0);
214 }
215 return institution;
216 }
217
218 /*
219 * Look if the Collection does already exists
220 * @param collectionCode: a string
221 * @param institution: the current Institution
222 * @param app
223 * @return the Collection (existing or new)
224 */
225 private Collection getCollection(String collectionCode, Institution institution, CdmApplicationController app){
226 Collection collection = Collection.NewInstance();
227 List<Collection> collections;
228 try{
229 collections = app.getOccurrenceService().searchCollectionByCode(this.collectionCode);
230 }catch(Exception e){
231 System.out.println("BLA"+e);
232 collections=new ArrayList<Collection>();
233 }
234 if (collections.size() ==0){
235 System.out.println("Collection not found "+this.collectionCode);
236 //create new collection
237 collection.setCode(this.collectionCode);
238 collection.setCodeStandard("GBIF");
239 collection.setInstitute(institution);
240 }
241 else{
242 boolean collectionFound=false;
243 for (int i=0; i<collections.size(); i++){
244 collection = collections.get(i);
245 try {
246 if (collection.getInstitute().getCode().equalsIgnoreCase(institution.getCode())){
247 //found a collection with the same code and the same institution
248 collectionFound=true;
249 }
250 } catch (NullPointerException e) {}
251 }
252 if (!collectionFound){
253 collection.setCode(this.collectionCode);
254 collection.setCodeStandard("GBIF");
255 collection.setInstitute(institution);
256 }
257
258 }
259 return collection;
260 }
261
262 /*
263 *
264 * @param app
265 * @param derivedThing
266 * @param sec
267 */
268 private void setTaxonNameBase(CdmApplicationController app, DerivedUnitBase derivedThing, ReferenceBase sec){
269 TaxonNameBase taxonName = null;
270 String fullScientificNameString;
271 Taxon taxon = null;
272 DeterminationEvent determinationEvent = null;
273 List<TaxonNameBase> names = null;
274 NonViralNameParserImpl nvnpi = NonViralNameParserImpl.NewInstance();
275 String scientificName="";
276 boolean preferredFlag=false;
277
278 for (int i = 0; i < this.identificationList.size(); i++) {
279 fullScientificNameString = this.identificationList.get(i);
280 fullScientificNameString = fullScientificNameString.replaceAll(" et ", " & ");
281 if (fullScientificNameString.indexOf("_preferred_") != -1){
282 scientificName = fullScientificNameString.split("_preferred_")[0];
283 String pTmp = fullScientificNameString.split("_preferred_")[1].split("_code_")[0];
284 if (pTmp == "1" || pTmp.toLowerCase().indexOf("true") != -1)
285 preferredFlag=true;
286 else
287 preferredFlag=false;
288 }
289 else scientificName = fullScientificNameString;
290 if (fullScientificNameString.indexOf("_code_") != -1){
291 this.nomenclatureCode = fullScientificNameString.split("_code_")[1];
292 }
293
294 System.out.println("nomenclature: "+this.nomenclatureCode);
295 if (this.nomenclatureCode == "Zoological"){
296 taxonName = nvnpi.parseFullName(this.fullScientificNameString,NomenclaturalCode.ICZN(),null);
297 if (taxonName.hasProblem())
298 System.out.println("pb ICZN");}
299 if (this.nomenclatureCode == "Botanical"){
300 taxonName = nvnpi.parseFullName(this.fullScientificNameString,NomenclaturalCode.ICBN(),null);
301 if (taxonName.hasProblem())
302 System.out.println("pb ICBN");}
303 if (this.nomenclatureCode == "Bacterial"){
304 taxonName = nvnpi.parseFullName(this.fullScientificNameString,NomenclaturalCode.ICNB(), null);
305 if (taxonName.hasProblem())
306 System.out.println("pb ICNB");
307 }
308 if (this.nomenclatureCode == "Cultivar"){
309 taxonName = nvnpi.parseFullName(this.fullScientificNameString,NomenclaturalCode.ICNCP(), null);
310 if (taxonName.hasProblem())
311 System.out.println("pb ICNCP");
312 }
313 if (this.nomenclatureCode == "Viral"){
314 taxonName = nvnpi.parseFullName(this.fullScientificNameString,NomenclaturalCode.ICVCN(), null);
315 if (taxonName.hasProblem())
316 System.out.println("pb ICVCN");
317 }
318 try{taxonName.hasProblem();}
319 catch (Exception e) {
320 taxonName = nvnpi.parseFullName(scientificName);
321 }
322 if (taxonName.hasProblem())
323 taxonName = nvnpi.parseFullName(scientificName);
324 if (true){
325 names = app.getNameService().getNamesByName(scientificName);
326 if (names.size() == 0){
327 System.out.println("Name not found: " + scientificName);
328 }else{
329 if (names.size() > 1){
330 System.out.println("More then 1 name found: " + scientificName);
331 }
332 System.out.println("Name found");
333 taxonName = names.get(0);
334 }
335 }
336
337 app.getNameService().saveTaxonName(taxonName);
338 taxon = Taxon.NewInstance(taxonName, sec); //TODO use real reference for sec
339
340 determinationEvent = DeterminationEvent.NewInstance();
341 determinationEvent.setTaxon(taxon);
342 determinationEvent.setPreferredFlag(preferredFlag);
343 derivedThing.addDetermination(determinationEvent);
344 }
345
346 }
347
348 /*
349 * Store the unit with its Gathering informations in the CDM
350 */
351 public boolean start(IImportConfigurator config){
352 boolean result = true;
353 boolean withCdm = true;
354 CdmApplicationController app = null;
355 TransactionStatus tx = null;
356
357 try {
358 app = CdmApplicationController.NewInstance(config.getDestination(), config.getDbSchemaValidation());
359 } catch (DataSourceNotFoundException e1) {
360 e1.printStackTrace();
361 System.out.println("DataSourceNotFoundException "+e1);
362 } catch (TermNotFoundException e1) {
363 e1.printStackTrace();
364 System.out.println("TermNotFoundException " +e1);
365 }
366
367 tx = app.startTransaction();
368 try {
369 ReferenceBase sec = Database.NewInstance();
370 sec.setTitleCache("XML DATA");
371
372 /**
373 * SPECIMEN OR OBSERVATION OR LIVING
374 */
375 DerivedUnitBase derivedThing = null;
376 //create specimen
377 if (this.recordBasis != null){
378 if (this.recordBasis.toLowerCase().startsWith("s")) {//specimen
379 derivedThing = Specimen.NewInstance();
380 }
381 else if (this.recordBasis.toLowerCase().startsWith("o")) {//observation
382 derivedThing = Observation.NewInstance();
383 }
384 else if (this.recordBasis.toLowerCase().startsWith("l")) {//living -> fossil, herbarium sheet....???
385 derivedThing = LivingBeing.NewInstance();
386 }
387 }
388 if (derivedThing == null)
389 derivedThing = Observation.NewInstance();
390
391 this.setTaxonNameBase(app, derivedThing, sec);
392
393
394 //set catalogue number (unitID)
395 derivedThing.setCatalogNumber(this.unitID);
396 derivedThing.setAccessionNumber(this.accessionNumber);
397 derivedThing.setCollectorsNumber(this.collectorsNumber);
398
399
400 /**
401 * INSTITUTION & COLLECTION
402 */
403 //manage institution
404 Institution institution = this.getInstitution(this.institutionCode,app);
405 //manage collection
406 Collection collection = this.getCollection(this.collectionCode, institution, app);
407 //link specimen & collection
408 derivedThing.setCollection(collection);
409
410 /**
411 * GATHERING EVENT
412 */
413
414 UnitsGatheringEvent unitsGatheringEvent = new UnitsGatheringEvent(app, this.locality, this.languageIso, this.longitude,
415 this.latitude, this.gatheringAgentList);
416 UnitsGatheringArea unitsGatheringArea = new UnitsGatheringArea(this.isocountry, this.country,app);
417 NamedArea areaCountry = unitsGatheringArea.getArea();
418 unitsGatheringEvent.addArea(areaCountry);
419 unitsGatheringArea = new UnitsGatheringArea(this.namedAreaList);
420 ArrayList<NamedArea> nas = unitsGatheringArea.getAreas();
421 for (int i=0; i<nas.size();i++)
422 unitsGatheringEvent.addArea(nas.get(i));
423
424
425 //create field/observation
426 FieldObservation fieldObservation = FieldObservation.NewInstance();
427 //add fieldNumber
428 fieldObservation.setFieldNumber(this.fieldNumber);
429 //join gatheringEvent to fieldObservation
430 fieldObservation.setGatheringEvent(unitsGatheringEvent.getGatheringEvent());
431
432 // //link fieldObservation and specimen
433 DerivationEvent derivationEvent = DerivationEvent.NewInstance();
434 derivationEvent.addOriginal(fieldObservation);
435 derivedThing.addDerivationEvent(derivationEvent);
436
437 /**
438 * SAVE AND STORE DATA
439 */
440
441 app.getTermService().saveTerm(areaCountry);//save it sooner
442 for (int i=0; i<nas.size();i++)
443 app.getTermService().saveTerm(nas.get(i));//save it sooner (foreach area)
444 app.getTermService().saveLanguageData(unitsGatheringEvent.getLocality());//save it sooner
445 app.getOccurrenceService().saveSpecimenOrObservationBase(derivedThing);
446
447 logger.info("saved new specimen ...");
448
449
450 } catch (Exception e) {
451 logger.warn("Error when reading record!!");
452 e.printStackTrace();
453 result = false;
454 }
455 app.commitTransaction(tx);
456 System.out.println("commit done");
457 app.close();
458 return result;
459 }
460
461
462 public boolean invoke(IImportConfigurator config){
463 System.out.println("INVOKE Specimen Import From Excel File (Synthesys Cache format");
464 SynthesysIO test = new SynthesysIO();
465 String sourceName = config.getSourceNameString();
466
467 ArrayList<Hashtable<String,String>> unitsList = parseXLS(sourceName);
468 if (unitsList != null){
469 Hashtable<String,String> unit=null;
470 for (int i=0; i<unitsList.size();i++){
471 unit = unitsList.get(i);
472 test.setUnitPropertiesExcel(unit);//and then invoke
473 test.start(config);
474 config.setDbSchemaValidation(DbSchemaValidation.UPDATE);
475 }
476 }
477
478 return false;
479
480 }
481
482
483 public boolean invoke(IImportConfigurator config, Map stores) {
484 invoke(config);
485 return false;
486 }
487
488
489
490
491 }