Use Factory MEthod for Determination Event
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisImageImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.io.IOException;
13 import java.net.MalformedURLException;
14 import java.net.URI;
15 import java.sql.ResultSet;
16 import java.sql.SQLException;
17 import java.util.HashMap;
18 import java.util.HashSet;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.UUID;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import org.apache.http.client.ClientProtocolException;
26 import org.apache.log4j.Logger;
27 import org.springframework.stereotype.Component;
28
29 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
30 import eu.etaxonomy.cdm.common.UriUtils;
31 import eu.etaxonomy.cdm.io.common.IOValidator;
32 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33 import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
34 import eu.etaxonomy.cdm.model.common.Annotation;
35 import eu.etaxonomy.cdm.model.common.CdmBase;
36 import eu.etaxonomy.cdm.model.common.Language;
37 import eu.etaxonomy.cdm.model.common.Marker;
38 import eu.etaxonomy.cdm.model.common.MarkerType;
39 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
40 import eu.etaxonomy.cdm.model.description.Feature;
41 import eu.etaxonomy.cdm.model.description.TaxonDescription;
42 import eu.etaxonomy.cdm.model.description.TextData;
43 import eu.etaxonomy.cdm.model.media.Media;
44 import eu.etaxonomy.cdm.model.name.ZoologicalName;
45 import eu.etaxonomy.cdm.model.occurrence.Collection;
46 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
47 import eu.etaxonomy.cdm.model.occurrence.Specimen;
48 import eu.etaxonomy.cdm.model.reference.Reference;
49 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
50 import eu.etaxonomy.cdm.model.taxon.Taxon;
51 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
52
53
54 /**
55 * @author a.mueller
56 * @created 20.02.2010
57 * @version 1.0
58 */
59 @Component
60 public class GlobisImageImport extends GlobisImportBase<Taxon> {
61 private static final Logger logger = Logger.getLogger(GlobisImageImport.class);
62
63 private int modCount = 10000;
64
65 private UUID uuidArtNonSpecTaxMarkerType = UUID.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
66 private static final String pluralString = "images";
67 private static final String dbTableName = "Einzelbilder";
68 private static final Class cdmTargetClass = Media.class; //not needed
69
70 private static final String IMAGE_NAMESPACE = "Einzelbilder";
71 private static UUID uuidGartRef = UUID.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
72
73 public GlobisImageImport(){
74 super(pluralString, dbTableName, cdmTargetClass);
75 }
76
77
78
79
80 /* (non-Javadoc)
81 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
82 */
83 @Override
84 protected String getIdQuery() {
85 String strRecordQuery =
86 " SELECT BildId " +
87 " FROM " + dbTableName;
88 return strRecordQuery;
89 }
90
91
92
93
94 /* (non-Javadoc)
95 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
96 */
97 @Override
98 protected String getRecordQuery(GlobisImportConfigurator config) {
99 String strRecordQuery =
100 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
101 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
102 " FROM " + getTableName() + " i " +
103 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
104 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
105 return strRecordQuery;
106 }
107
108
109
110 /* (non-Javadoc)
111 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
112 */
113 @Override
114 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
115 boolean success = true;
116
117 Set<Media> objectsToSave = new HashSet<Media>();
118
119 Map<String, Specimen> typeMap = (Map<String, Specimen>) partitioner.getObjectMap(GlobisSpecTaxImport.TYPE_NAMESPACE);
120
121 Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
122 Map<String, ZoologicalName> specTaxNameMap = (Map<String, ZoologicalName>) partitioner.getObjectMap(GlobisSpecTaxImport.SPEC_TAX_NAMESPACE);
123
124 ResultSet rs = partitioner.getResultSet();
125
126 Reference<?> refGart = getReferenceService().find(uuidGartRef);
127
128
129 try {
130
131 int i = 0;
132
133 //for each record
134 while (rs.next()){
135
136 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
137
138 Integer bildID = rs.getInt("BildID");
139 Integer spectaxID = nullSafeInt(rs, "spectaxID");
140 Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
141 String copyright = rs.getString("copyright");
142 String specimenId = rs.getString("specimenID");
143 String bemerkungen = rs.getString("Bemerkungen");
144 String artNotSpecTax = rs.getString("Art non spectax");
145 String motiv = rs.getString("Motiv");
146
147 //ignore:
148 // [file lab2], same as Dateiname04 but less data
149 // Dateipfad
150
151 Set<Media> recordMedia = new HashSet<Media>();
152
153 try {
154
155 makeAllMedia(state, rs, recordMedia, objectsToSave);
156
157 String title = null;
158
159 Specimen specimen = null;
160 if (spectaxID != null){
161 //try to find type specimen
162 if (isNotBlank(motiv) && (motiv.startsWith("type specimen"))){
163 String collectionCode = transformCopyright2CollectionCode(copyright);
164 String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
165 specimen = typeMap.get(id);
166 }
167
168 //try to find specTaxName
169 ZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
170 if (specTaxTaxonName != null){
171 title = " taxon name " + specTaxTaxonName.getTitleCache();
172 }else{
173 title = " spectaxID " + spectaxID;
174 }
175 }else{
176 title = " name " + getNameFromFileOs(rs) + (isBlank(specimenId)? "" : " (specimenId: " + specimenId + ")");
177 }
178
179 //not type specimen
180 if (specimen == null){
181 specimen = Specimen.NewInstance();
182 specimen.setTitleCache("Specimen for " + title );
183 String collectionCode = transformCopyright2CollectionCode(copyright);
184 //TODO
185 Collection collection = getCollection(collectionCode);
186 specimen.setCollection(collection);
187 }
188
189
190 //source
191 specimen.addSource(String.valueOf(bildID), IMAGE_NAMESPACE, state.getTransactionalSourceReference(), null);
192
193 //GART id (specimenID)
194 if (isNotBlank(specimenId)){
195 specimen.addSource(specimenId, "", refGart, null);
196 }
197 //bemerkungen
198 if (isNotBlank(bemerkungen)){
199 Annotation annotation = Annotation.NewInstance(bemerkungen, null, null);
200 specimen.addAnnotation(annotation);
201 }
202 //media
203 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimen);
204 for (Media media: recordMedia){
205 facade.addDerivedUnitMedia(media);
206 }
207 //art non spectax
208 if (isNotBlank(artNotSpecTax)){
209 if (artNotSpecTax.equalsIgnoreCase("ja")){
210 MarkerType artNotSpecTaxMarker = getMarkerType(state, uuidArtNonSpecTaxMarkerType , "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
211 specimen.addMarker(Marker.NewInstance(artNotSpecTaxMarker, true));
212 }else{
213 logger.warn(artNotSpecTax + " is not a valid value for 'Art non spectax' (BildID: " + bildID + ")" );
214 }
215 }
216
217 if (spectaxID != null){
218
219 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
220 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
221 ZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
222
223 //
224 // if (taxon == null){
225 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
226 //// specTaxName = specTaxMap.g
227 // }
228 // if (taxon == null){
229 // logger.warn("No taxon available for specTaxID: " + spectaxID);
230 // }else{
231 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
232 // }
233
234 //TODO FIXME
235
236 if (specTaxTaxonName == null){
237 logger.warn("Name could not be found for spectaxID: " + spectaxID + " in BildID: " + bildID);
238 }else{
239 Taxon taxon = null;
240 for (Taxon specTaxTaxon: specTaxTaxonName.getTaxa()){
241 taxon = specTaxTaxon;
242 }
243 if (taxon == null){
244 //FIXME
245 Reference<?> undefinedSec = null;
246 taxon = Taxon.NewInstance(specTaxTaxonName, undefinedSec);
247 }
248
249 DeterminationEvent.NewInstance(taxon, specimen);
250
251 }
252
253
254
255
256 // if (taxon != null){
257 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
258 // if (taxonDescription.getElements().size() == 0){
259 // TextData textData = TextData.NewInstance(Feature.IMAGE());
260 // taxonDescription.addElement(textData);
261 // }
262 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
263 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
264 // for (Media media: recordMedia){
265 // textData.addMedia(media);
266 // }
267 // }
268 }
269
270 } catch (Exception e) {
271 logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
272 e.printStackTrace();
273 }
274
275 }
276
277 logger.info(pluralString + " to save: " + objectsToSave.size());
278 getMediaService().save(objectsToSave);
279
280 return success;
281 } catch (SQLException e) {
282 logger.error("SQLException:" + e);
283 return false;
284 }
285 }
286
287 private Collection getCollection(String collectionCode) {
288 //TODO
289 return null;
290 }
291
292
293
294
295 private String getNameFromFileOs(ResultSet rs) throws SQLException {
296 String fileOS = rs.getString("file OS");
297 Pattern pattern = Pattern.compile("(.+)(_...._..\\.jpg)");
298 Matcher matcher = pattern.matcher(fileOS);
299 if (matcher.matches()){
300 String match = matcher.group(1);
301 return match;
302 }else{
303 logger.warn("FileOS does not match: " + fileOS);
304 return fileOS;
305 }
306 }
307
308
309
310
311 private void makeAllMedia(GlobisImportState state, ResultSet rs, Set<Media> recordMedia, Set<Media> objectsToSave) throws SQLException{
312 //make image path
313 String pathShort = rs.getString("Dateipfad_kurz");
314 String fileOS = rs.getString("file OS");
315 pathShort= pathShort.replace(fileOS, "");
316 String newPath = state.getConfig().getImageBaseUrl();
317 String path = pathShort.replace("image:Webversionen/", newPath);
318
319 Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
320 recordMedia.add(singleMedia);
321 singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
322 recordMedia.add(singleMedia);
323 singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
324 recordMedia.add(singleMedia);
325 singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
326 recordMedia.add(singleMedia);
327
328 }
329
330 private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
331 Media media = null;
332 String fileName = rs.getString(fileNameAttr);
333 String legend = rs.getString(legendAttr);
334 Integer bildID = rs.getInt("BildID");
335
336 String uriStr = path+fileName;
337 uriStr = uriStr.replace(" ", "%20");
338
339 URI uri = URI.create(uriStr);
340
341 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
342
343 try {
344 boolean readMediaData = state.getConfig().isDoReadMediaData();
345 if (isBlank(legend) && readMediaData){
346 if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
347 logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
348 }else{
349 return null;
350 }
351 }
352
353 media = this.getImageMedia(uri.toString(), readMediaData, false);
354 media.putTitle(Language.ENGLISH(), legend);
355 this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
356
357 objectsToSave.add(media);
358
359
360 } catch (MalformedURLException e) {
361 e.printStackTrace();
362 } catch (ClientProtocolException e) {
363 e.printStackTrace();
364 } catch (IOException e) {
365 e.printStackTrace();
366 }
367
368 return media;
369 }
370
371 private String transformCopyright2CollectionCode(String copyright){
372
373 if (isBlank(copyright)){
374 return "";
375 }else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
376 return "MFNB";
377 }else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
378 return "SMTD";
379 }else if(copyright.equals("Natural History Museum, London")){
380 return "BMNH";
381 }else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
382 return "ZSSM";
383 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
384 return "SMNK";
385 }else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
386 return "DEIE";
387 }else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
388 return "SMFM";
389 }else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
390 return "MNHN";
391 }else if(copyright.equals("Naturhistorisches Museum Wien")){
392 return "NHMW";
393 }else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
394 return "NRMS";
395 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
396 return "SMNS";
397 }else if(copyright.equals("United States National Museum of Natural History, Washington")){
398 return "USNM";
399 }else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
400 return "ZFBS";
401 }else if(copyright.equals("Zoological Museum, University of Copenhagen")){
402 return "ZMUC";
403 }else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
404 return "ZFMK";
405 }else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
406 return "ZFMK";
407 }else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
408 return "ZIUH";
409 }else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
410 return "ZIUT";
411 }else{
412 logger.warn("Unknown copyright entry: " + copyright);
413 return "";
414 }
415
416
417 }
418
419
420
421 /* (non-Javadoc)
422 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
423 */
424 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
425 String nameSpace;
426 Class cdmClass;
427 Set<String> idSet;
428 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
429 try{
430 Set<String> currSpecIdSet = new HashSet<String>();
431 Set<String> specTaxIdSet = new HashSet<String>();
432 Set<String> typeIdSet = new HashSet<String>();
433
434 while (rs.next()){
435 handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
436 handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
437 }
438
439 //specTax map
440 nameSpace = GlobisSpecTaxImport.SPEC_TAX_NAMESPACE;
441 cdmClass = TaxonBase.class;
442 idSet = specTaxIdSet;
443 Map<String, TaxonBase> specTaxMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
444 result.put(nameSpace, specTaxMap);
445
446 //taxon map
447 nameSpace = TAXON_NAMESPACE;
448 cdmClass = Taxon.class;
449 idSet = currSpecIdSet;
450 Map<String, Taxon> taxonMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
451 result.put(nameSpace, taxonMap);
452
453
454 //type map
455 nameSpace = GlobisSpecTaxImport.TYPE_NAMESPACE;
456 cdmClass = Specimen.class;
457 idSet = typeIdSet;
458 Map<String, Specimen> typeMap = (Map<String, Specimen>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
459 result.put(nameSpace, typeMap);
460
461
462 } catch (SQLException e) {
463 throw new RuntimeException(e);
464 }
465 return result;
466 }
467
468 private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr)
469 throws SQLException {
470 Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
471 if (specTaxId != null){
472 String copyright = rs.getString(copyrightAttr);
473 if (isNotBlank(copyright)){
474 String id = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
475 idSet.add(id);
476 }
477 }
478 }
479
480 /* (non-Javadoc)
481 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
482 */
483 @Override
484 protected boolean doCheck(GlobisImportState state){
485 IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
486 return validator.validate(state);
487 }
488
489
490 /* (non-Javadoc)
491 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
492 */
493 protected boolean isIgnore(GlobisImportState state){
494 return ! state.getConfig().isDoImages();
495 }
496
497
498
499
500 /* (non-Javadoc)
501 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doInvoke(eu.etaxonomy.cdm.io.globis.GlobisImportState)
502 */
503 @Override
504 protected void doInvoke(GlobisImportState state) {
505 Reference refGart = ReferenceFactory.newGeneric();
506 refGart.setTitleCache("GART");
507 refGart.setUuid(uuidGartRef);
508 getReferenceService().saveOrUpdate(refGart);
509 super.doInvoke(state);
510 }
511
512
513
514
515
516 }