latest globis
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisImageImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.io.IOException;
13 import java.net.MalformedURLException;
14 import java.net.URI;
15 import java.sql.ResultSet;
16 import java.sql.SQLException;
17 import java.util.HashMap;
18 import java.util.HashSet;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.UUID;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import org.apache.http.client.ClientProtocolException;
26 import org.apache.log4j.Logger;
27 import org.springframework.stereotype.Component;
28
29 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
30 import eu.etaxonomy.cdm.common.UriUtils;
31 import eu.etaxonomy.cdm.io.common.IOValidator;
32 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33 import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
34 import eu.etaxonomy.cdm.model.common.Annotation;
35 import eu.etaxonomy.cdm.model.common.CdmBase;
36 import eu.etaxonomy.cdm.model.common.Language;
37 import eu.etaxonomy.cdm.model.common.Marker;
38 import eu.etaxonomy.cdm.model.common.MarkerType;
39 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
40 import eu.etaxonomy.cdm.model.description.Feature;
41 import eu.etaxonomy.cdm.model.description.TaxonDescription;
42 import eu.etaxonomy.cdm.model.description.TextData;
43 import eu.etaxonomy.cdm.model.media.Media;
44 import eu.etaxonomy.cdm.model.name.ZoologicalName;
45 import eu.etaxonomy.cdm.model.occurrence.Collection;
46 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
47 import eu.etaxonomy.cdm.model.occurrence.Specimen;
48 import eu.etaxonomy.cdm.model.reference.Reference;
49 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
50 import eu.etaxonomy.cdm.model.taxon.Taxon;
51 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
52
53
54 /**
55 * @author a.mueller
56 * @created 20.02.2010
57 * @version 1.0
58 */
59 @Component
60 public class GlobisImageImport extends GlobisImportBase<Taxon> {
61 private static final Logger logger = Logger.getLogger(GlobisImageImport.class);
62
63 private int modCount = 10000;
64
65 private UUID uuidArtNonSpecTaxMarkerType = UUID.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
66 private static final String pluralString = "images";
67 private static final String dbTableName = "Einzelbilder";
68 private static final Class cdmTargetClass = Media.class; //not needed
69
70 private static final String IMAGE_NAMESPACE = "Einzelbilder";
71 private static UUID uuidGartRef = UUID.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
72
73 public GlobisImageImport(){
74 super(pluralString, dbTableName, cdmTargetClass);
75 }
76
77
78
79
80 /* (non-Javadoc)
81 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
82 */
83 @Override
84 protected String getIdQuery() {
85 String strRecordQuery =
86 " SELECT BildId " +
87 " FROM " + dbTableName;
88 return strRecordQuery;
89 }
90
91
92
93
94 /* (non-Javadoc)
95 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
96 */
97 @Override
98 protected String getRecordQuery(GlobisImportConfigurator config) {
99 String strRecordQuery =
100 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
101 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
102 " FROM " + getTableName() + " i " +
103 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
104 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
105 return strRecordQuery;
106 }
107
108
109
110 /* (non-Javadoc)
111 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
112 */
113 @Override
114 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
115 boolean success = true;
116
117 Set<Media> objectsToSave = new HashSet<Media>();
118
119 Map<String, Specimen> typeMap = (Map<String, Specimen>) partitioner.getObjectMap(GlobisSpecTaxImport.TYPE_NAMESPACE);
120
121 Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
122 Map<String, ZoologicalName> specTaxNameMap = (Map<String, ZoologicalName>) partitioner.getObjectMap(GlobisSpecTaxImport.SPEC_TAX_NAMESPACE);
123
124 ResultSet rs = partitioner.getResultSet();
125
126 Reference<?> refGart = getReferenceService().find(uuidGartRef);
127
128
129 try {
130
131 int i = 0;
132
133 //for each record
134 while (rs.next()){
135
136 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
137
138 Integer bildID = rs.getInt("BildID");
139 Integer spectaxID = nullSafeInt(rs, "spectaxID");
140 Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
141 String copyright = rs.getString("copyright");
142 String specimenId = rs.getString("specimenID");
143 String bemerkungen = rs.getString("Bemerkungen");
144 String artNotSpecTax = rs.getString("Art non spectax");
145 String motiv = rs.getString("Motiv");
146
147 //ignore:
148 // [file lab2], same as Dateiname04 but less data
149 // Dateipfad
150
151 Set<Media> recordMedia = new HashSet<Media>();
152
153 try {
154
155 makeAllMedia(state, rs, recordMedia, objectsToSave);
156
157 String title = null;
158
159 Specimen specimen = null;
160 if (spectaxID != null){
161 //try to find type specimen
162 if (isNotBlank(motiv) && (motiv.startsWith("type specimen"))){
163 String collectionCode = transformCopyright2CollectionCode(copyright);
164 String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
165 specimen = typeMap.get(id);
166 }
167
168 //try to find specTaxName
169 ZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
170 if (specTaxTaxonName != null){
171 title = " taxon name " + specTaxTaxonName.getTitleCache();
172 }else{
173 title = " spectaxID " + spectaxID;
174 }
175 }else{
176 title = " name " + getNameFromFileOs(rs) + (isBlank(specimenId)? "" : " (specimenId: " + specimenId + ")");
177 }
178
179 //not type specimen
180 if (specimen == null){
181 specimen = Specimen.NewInstance();
182 specimen.setTitleCache("Specimen for " + title );
183 String collectionCode = transformCopyright2CollectionCode(copyright);
184 //TODO
185 Collection collection = getCollection(collectionCode);
186 specimen.setCollection(collection);
187 }
188
189
190 //source
191 specimen.addSource(String.valueOf(bildID), IMAGE_NAMESPACE, state.getTransactionalSourceReference(), null);
192
193 //GART id (specimenID)
194 if (isNotBlank(specimenId)){
195 specimen.addSource(specimenId, "", refGart, null);
196 }
197 //bemerkungen
198 if (isNotBlank(bemerkungen)){
199 Annotation annotation = Annotation.NewInstance(bemerkungen, null, null);
200 specimen.addAnnotation(annotation);
201 }
202 //media
203 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimen);
204 for (Media media: recordMedia){
205 facade.addDerivedUnitMedia(media);
206 }
207 //art non spectax
208 if (isNotBlank(artNotSpecTax)){
209 if (artNotSpecTax.equalsIgnoreCase("ja")){
210 MarkerType artNotSpecTaxMarker = getMarkerType(state, uuidArtNonSpecTaxMarkerType , "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
211 specimen.addMarker(Marker.NewInstance(artNotSpecTaxMarker, true));
212 }else{
213 logger.warn(artNotSpecTax + " is not a valid value for 'Art non spectax' (BildID: " + bildID + ")" );
214 }
215 }
216
217 if (spectaxID != null){
218
219 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
220 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
221 ZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
222
223 //
224 // if (taxon == null){
225 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
226 //// specTaxName = specTaxMap.g
227 // }
228 // if (taxon == null){
229 // logger.warn("No taxon available for specTaxID: " + spectaxID);
230 // }else{
231 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
232 // }
233
234 //TODO FIXME
235
236 if (specTaxTaxonName == null){
237 logger.warn("Name could not be found for spectaxID: " + spectaxID + " in BildID: " + bildID);
238 }else{
239 Taxon taxon = null;
240 for (Taxon specTaxTaxon: specTaxTaxonName.getTaxa()){
241 taxon = specTaxTaxon;
242 }
243 if (taxon == null){
244 //FIXME
245 Reference undefinedSec = null;
246 taxon = Taxon.NewInstance(specTaxTaxonName, undefinedSec);
247 }
248
249 DeterminationEvent determinationEvent = DeterminationEvent.NewInstance();
250 determinationEvent.setTaxon(taxon);
251 determinationEvent.setIdentifiedUnit(specimen);
252 }
253
254
255
256
257 // if (taxon != null){
258 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
259 // if (taxonDescription.getElements().size() == 0){
260 // TextData textData = TextData.NewInstance(Feature.IMAGE());
261 // taxonDescription.addElement(textData);
262 // }
263 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
264 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
265 // for (Media media: recordMedia){
266 // textData.addMedia(media);
267 // }
268 // }
269 }
270
271 } catch (Exception e) {
272 logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
273 e.printStackTrace();
274 }
275
276 }
277
278 logger.info(pluralString + " to save: " + objectsToSave.size());
279 getMediaService().save(objectsToSave);
280
281 return success;
282 } catch (SQLException e) {
283 logger.error("SQLException:" + e);
284 return false;
285 }
286 }
287
288 private Collection getCollection(String collectionCode) {
289 //TODO
290 return null;
291 }
292
293
294
295
296 private String getNameFromFileOs(ResultSet rs) throws SQLException {
297 String fileOS = rs.getString("file OS");
298 Pattern pattern = Pattern.compile("(.+)(_...._..\\.jpg)");
299 Matcher matcher = pattern.matcher(fileOS);
300 if (matcher.matches()){
301 String match = matcher.group(1);
302 return match;
303 }else{
304 logger.warn("FileOS does not match: " + fileOS);
305 return fileOS;
306 }
307 }
308
309
310
311
312 private void makeAllMedia(GlobisImportState state, ResultSet rs, Set<Media> recordMedia, Set<Media> objectsToSave) throws SQLException{
313 //make image path
314 String pathShort = rs.getString("Dateipfad_kurz");
315 String fileOS = rs.getString("file OS");
316 pathShort= pathShort.replace(fileOS, "");
317 String newPath = state.getConfig().getImageBaseUrl();
318 String path = pathShort.replace("image:Webversionen/", newPath);
319
320 Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
321 recordMedia.add(singleMedia);
322 singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
323 recordMedia.add(singleMedia);
324 singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
325 recordMedia.add(singleMedia);
326 singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
327 recordMedia.add(singleMedia);
328
329 }
330
331 private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
332 Media media = null;
333 String fileName = rs.getString(fileNameAttr);
334 String legend = rs.getString(legendAttr);
335 Integer bildID = rs.getInt("BildID");
336
337 String uriStr = path+fileName;
338 uriStr = uriStr.replace(" ", "%20");
339
340 URI uri = URI.create(uriStr);
341
342 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
343
344 try {
345 boolean readMediaData = state.getConfig().isDoReadMediaData();
346 if (isBlank(legend) && readMediaData){
347 if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
348 logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
349 }else{
350 return null;
351 }
352 }
353
354 media = this.getImageMedia(uri.toString(), readMediaData, false);
355 media.putTitle(Language.ENGLISH(), legend);
356 this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
357
358 objectsToSave.add(media);
359
360
361 } catch (MalformedURLException e) {
362 e.printStackTrace();
363 } catch (ClientProtocolException e) {
364 e.printStackTrace();
365 } catch (IOException e) {
366 e.printStackTrace();
367 }
368
369 return media;
370 }
371
372 private String transformCopyright2CollectionCode(String copyright){
373
374 if (isBlank(copyright)){
375 return "";
376 }else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
377 return "MFNB";
378 }else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
379 return "SMTD";
380 }else if(copyright.equals("Natural History Museum, London")){
381 return "BMNH";
382 }else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
383 return "ZSSM";
384 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
385 return "SMNK";
386 }else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
387 return "DEIE";
388 }else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
389 return "SMFM";
390 }else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
391 return "MNHN";
392 }else if(copyright.equals("Naturhistorisches Museum Wien")){
393 return "NHMW";
394 }else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
395 return "NRMS";
396 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
397 return "SMNS";
398 }else if(copyright.equals("United States National Museum of Natural History, Washington")){
399 return "USNM";
400 }else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
401 return "ZFBS";
402 }else if(copyright.equals("Zoological Museum, University of Copenhagen")){
403 return "ZMUC";
404 }else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
405 return "ZFMK";
406 }else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
407 return "ZFMK";
408 }else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
409 return "ZIUH";
410 }else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
411 return "ZIUT";
412 }else{
413 logger.warn("Unknown copyright entry: " + copyright);
414 return "";
415 }
416
417
418 }
419
420
421
422 /* (non-Javadoc)
423 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
424 */
425 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
426 String nameSpace;
427 Class cdmClass;
428 Set<String> idSet;
429 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
430 try{
431 Set<String> currSpecIdSet = new HashSet<String>();
432 Set<String> specTaxIdSet = new HashSet<String>();
433 Set<String> typeIdSet = new HashSet<String>();
434
435 while (rs.next()){
436 handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
437 handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
438 }
439
440 //specTax map
441 nameSpace = GlobisSpecTaxImport.SPEC_TAX_NAMESPACE;
442 cdmClass = TaxonBase.class;
443 idSet = specTaxIdSet;
444 Map<String, TaxonBase> specTaxMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
445 result.put(nameSpace, specTaxMap);
446
447 //taxon map
448 nameSpace = TAXON_NAMESPACE;
449 cdmClass = Taxon.class;
450 idSet = currSpecIdSet;
451 Map<String, Taxon> taxonMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
452 result.put(nameSpace, taxonMap);
453
454
455 //type map
456 nameSpace = GlobisSpecTaxImport.TYPE_NAMESPACE;
457 cdmClass = Specimen.class;
458 idSet = typeIdSet;
459 Map<String, Specimen> typeMap = (Map<String, Specimen>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
460 result.put(nameSpace, typeMap);
461
462
463 } catch (SQLException e) {
464 throw new RuntimeException(e);
465 }
466 return result;
467 }
468
469 private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr)
470 throws SQLException {
471 Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
472 if (specTaxId != null){
473 String copyright = rs.getString(copyrightAttr);
474 if (isNotBlank(copyright)){
475 String id = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
476 idSet.add(id);
477 }
478 }
479 }
480
481 /* (non-Javadoc)
482 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
483 */
484 @Override
485 protected boolean doCheck(GlobisImportState state){
486 IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
487 return validator.validate(state);
488 }
489
490
491 /* (non-Javadoc)
492 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
493 */
494 protected boolean isIgnore(GlobisImportState state){
495 return ! state.getConfig().isDoImages();
496 }
497
498
499
500
501 /* (non-Javadoc)
502 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doInvoke(eu.etaxonomy.cdm.io.globis.GlobisImportState)
503 */
504 @Override
505 protected void doInvoke(GlobisImportState state) {
506 Reference refGart = ReferenceFactory.newGeneric();
507 refGart.setTitleCache("GART");
508 refGart.setUuid(uuidGartRef);
509 getReferenceService().saveOrUpdate(refGart);
510 super.doInvoke(state);
511 }
512
513
514
515
516
517 }