ref #6241 replaced @created by @since in cdmlib-app
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisImageImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.io.IOException;
13 import java.net.MalformedURLException;
14 import java.net.URI;
15 import java.sql.ResultSet;
16 import java.sql.SQLException;
17 import java.util.HashMap;
18 import java.util.HashSet;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.UUID;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import org.apache.http.client.ClientProtocolException;
26 import org.apache.log4j.Logger;
27 import org.springframework.stereotype.Component;
28
29 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
30 import eu.etaxonomy.cdm.common.UriUtils;
31 import eu.etaxonomy.cdm.io.common.IOValidator;
32 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33 import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
34 import eu.etaxonomy.cdm.model.common.Annotation;
35 import eu.etaxonomy.cdm.model.common.CdmBase;
36 import eu.etaxonomy.cdm.model.common.Language;
37 import eu.etaxonomy.cdm.model.common.Marker;
38 import eu.etaxonomy.cdm.model.common.MarkerType;
39 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
40 import eu.etaxonomy.cdm.model.media.Media;
41 import eu.etaxonomy.cdm.model.name.IZoologicalName;
42 import eu.etaxonomy.cdm.model.name.TaxonName;
43 import eu.etaxonomy.cdm.model.occurrence.Collection;
44 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
45 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
46 import eu.etaxonomy.cdm.model.reference.Reference;
47 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
48 import eu.etaxonomy.cdm.model.taxon.Taxon;
49
50
51 /**
52 * @author a.mueller
53 * @since 20.02.2010
54 * @version 1.0
55 */
56 @Component
57 public class GlobisImageImport extends GlobisImportBase<Taxon> {
58 private static final Logger logger = Logger.getLogger(GlobisImageImport.class);
59
60 private int modCount = 1000;
61
62 private UUID uuidArtNonSpecTaxMarkerType = UUID.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
63 private static final String pluralString = "images";
64 private static final String dbTableName = "Einzelbilder";
65 private static final Class<?> cdmTargetClass = Media.class; //not needed
66
67 private static UUID uuidGartRef = UUID.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
68
69 public GlobisImageImport(){
70 super(pluralString, dbTableName, cdmTargetClass);
71 }
72
73
74
75
76 /* (non-Javadoc)
77 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
78 */
79 @Override
80 protected String getIdQuery() {
81 String strRecordQuery =
82 " SELECT BildId " +
83 " FROM " + dbTableName;
84 return strRecordQuery;
85 }
86
87
88
89
90 /* (non-Javadoc)
91 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
92 */
93 @Override
94 protected String getRecordQuery(GlobisImportConfigurator config) {
95 String strRecordQuery =
96 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
97 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
98 " FROM " + getTableName() + " i " +
99 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
100 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
101 return strRecordQuery;
102 }
103
104
105
106 /* (non-Javadoc)
107 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
108 */
109 @Override
110 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
111 boolean success = true;
112
113 Set<Media> objectsToSave = new HashSet<>();
114
115 Map<String, DerivedUnit> typeMap = partitioner.getObjectMap(TYPE_NAMESPACE);
116
117 Map<String, Taxon> taxonMap = partitioner.getObjectMap(TAXON_NAMESPACE);
118 Map<String, TaxonName> specTaxNameMap = partitioner.getObjectMap(SPEC_TAX_NAMESPACE);
119
120 ResultSet rs = partitioner.getResultSet();
121
122 Reference refGart = getReferenceService().find(uuidGartRef);
123
124
125 try {
126
127 int i = 0;
128
129 //for each record
130 while (rs.next()){
131
132 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
133
134 Integer bildID = rs.getInt("BildID");
135 Integer spectaxID = nullSafeInt(rs, "spectaxID");
136 Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
137 String copyright = rs.getString("copyright");
138 String specimenId = rs.getString("specimenID");
139 String bemerkungen = rs.getString("Bemerkungen");
140 String artNotSpecTax = rs.getString("Art non spectax");
141 String motiv = rs.getString("Motiv");
142
143 //ignore:
144 // [file lab2], same as Dateiname04 but less data
145 // Dateipfad
146
147 Set<Media> recordMedia = new HashSet<Media>();
148
149 try {
150
151 makeAllMedia(state, rs, recordMedia, objectsToSave);
152
153 String title = null;
154
155 DerivedUnit specimen = null;
156 if (spectaxID != null){
157 //try to find type specimen
158 if (isNotBlank(motiv) && (motiv.startsWith("type specimen"))){
159 String collectionCode = transformCopyright2CollectionCode(copyright);
160 String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
161 specimen = typeMap.get(id);
162 }
163
164 //try to find specTaxName
165 IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
166 if (specTaxTaxonName != null){
167 title = " taxon name " + specTaxTaxonName.getTitleCache();
168 }else{
169 title = " spectaxID " + spectaxID;
170 }
171 }else{
172 title = " name " + getNameFromFileOs(rs) + (isBlank(specimenId)? "" : " (specimenId: " + specimenId + ")");
173 }
174
175 //not type specimen
176 if (specimen == null){
177 specimen = DerivedUnit.NewPreservedSpecimenInstance();
178 specimen.setTitleCache("Specimen for " + title );
179 String collectionCode = transformCopyright2CollectionCode(copyright);
180 //TODO
181 Collection collection = getCollection(collectionCode);
182 specimen.setCollection(collection);
183 }
184
185
186 //source
187 specimen.addSource(OriginalSourceType.Import, String.valueOf(bildID), IMAGE_NAMESPACE, state.getTransactionalSourceReference(), null);
188
189 //GART id (specimenID)
190 if (isNotBlank(specimenId)){
191 specimen.addSource(OriginalSourceType.Lineage, specimenId, "specimenId", refGart, null);
192 }
193 //bemerkungen
194 if (isNotBlank(bemerkungen)){
195 Annotation annotation = Annotation.NewInstance(bemerkungen, null, null);
196 specimen.addAnnotation(annotation);
197 }
198 //media
199 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimen);
200 for (Media media: recordMedia){
201 facade.addDerivedUnitMedia(media);
202 }
203 //art non spectax
204 if (isNotBlank(artNotSpecTax)){
205 if (artNotSpecTax.equalsIgnoreCase("ja")){
206 MarkerType artNotSpecTaxMarker = getMarkerType(state, uuidArtNonSpecTaxMarkerType , "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
207 specimen.addMarker(Marker.NewInstance(artNotSpecTaxMarker, true));
208 }else{
209 logger.warn(artNotSpecTax + " is not a valid value for 'Art non spectax' (BildID: " + bildID + ")" );
210 }
211 }
212
213 if (spectaxID != null){
214
215 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
216 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
217 IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
218
219 //
220 // if (taxon == null){
221 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
222 //// specTaxName = specTaxMap.g
223 // }
224 // if (taxon == null){
225 // logger.warn("No taxon available for specTaxID: " + spectaxID);
226 // }else{
227 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
228 // }
229
230 //TODO FIXME
231
232 if (specTaxTaxonName == null){
233 logger.warn("Name could not be found for spectaxID: " + spectaxID + " in BildID: " + bildID);
234 }else{
235 Taxon taxon = null;
236 for (Taxon specTaxTaxon: specTaxTaxonName.getTaxa()){
237 taxon = specTaxTaxon;
238 }
239 if (taxon == null){
240 //FIXME
241 Reference undefinedSec = null;
242 taxon = Taxon.NewInstance(specTaxTaxonName, undefinedSec);
243 }
244
245 DeterminationEvent.NewInstance(taxon, specimen);
246
247 }
248
249
250
251
252 // if (taxon != null){
253 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
254 // if (taxonDescription.getElements().size() == 0){
255 // TextData textData = TextData.NewInstance(Feature.IMAGE());
256 // taxonDescription.addElement(textData);
257 // }
258 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
259 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
260 // for (Media media: recordMedia){
261 // textData.addMedia(media);
262 // }
263 // }
264 }
265
266 } catch (Exception e) {
267 logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
268 e.printStackTrace();
269 }
270
271 }
272
273 logger.info(pluralString + " to save: " + objectsToSave.size());
274 getMediaService().save(objectsToSave);
275
276 return success;
277 } catch (SQLException e) {
278 logger.error("SQLException:" + e);
279 return false;
280 }
281 }
282
283 private Collection getCollection(String collectionCode) {
284 //TODO
285 return null;
286 }
287
288
289
290
291 private String getNameFromFileOs(ResultSet rs) throws SQLException {
292 String fileOS = rs.getString("file OS");
293 Pattern pattern = Pattern.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
294 Matcher matcher = pattern.matcher(fileOS);
295 if (matcher.matches()){
296 String match = matcher.group(1);
297 return match;
298 }else{
299 logger.warn("FileOS does not match: " + fileOS);
300 return fileOS;
301 }
302 }
303
304
305
306
307 private void makeAllMedia(GlobisImportState state, ResultSet rs, Set<Media> recordMedia, Set<Media> objectsToSave) throws SQLException{
308 //make image path
309 String pathShort = rs.getString("Dateipfad_kurz");
310 String fileOS = rs.getString("file OS");
311 pathShort= pathShort.replace(fileOS, "");
312 String newPath = state.getConfig().getImageBaseUrl();
313 String path = pathShort.replace("image:Webversionen/", newPath);
314
315 Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
316 recordMedia.add(singleMedia);
317 singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
318 recordMedia.add(singleMedia);
319 singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
320 recordMedia.add(singleMedia);
321 singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
322 recordMedia.add(singleMedia);
323
324 }
325
326 private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
327 Media media = null;
328 String fileName = rs.getString(fileNameAttr);
329 String legend = rs.getString(legendAttr);
330 Integer bildID = rs.getInt("BildID");
331
332 String uriStr = path+fileName;
333 uriStr = uriStr.replace(" ", "%20");
334
335 URI uri = URI.create(uriStr);
336
337 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
338
339 try {
340 boolean readMediaData = state.getConfig().isDoReadMediaData();
341 if (isBlank(legend) && readMediaData){
342 if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
343 logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
344 }else{
345 return null;
346 }
347 }
348
349 media = this.getImageMedia(uri.toString(), readMediaData);
350 media.putTitle(Language.ENGLISH(), legend);
351 this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
352
353 objectsToSave.add(media);
354
355
356 } catch (MalformedURLException e) {
357 e.printStackTrace();
358 } catch (ClientProtocolException e) {
359 e.printStackTrace();
360 } catch (IOException e) {
361 e.printStackTrace();
362 }
363
364 return media;
365 }
366
367 private String transformCopyright2CollectionCode(String copyright){
368
369 if (isBlank(copyright)){
370 return "";
371 }else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
372 return "MFNB";
373 }else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
374 return "SMTD";
375 }else if(copyright.equals("Natural History Museum, London")){
376 return "BMNH";
377 }else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
378 return "ZSSM";
379 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
380 return "SMNK";
381 }else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
382 return "DEIE";
383 }else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
384 return "SMFM";
385 }else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
386 return "MNHN";
387 }else if(copyright.equals("Naturhistorisches Museum Wien")){
388 return "NHMW";
389 }else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
390 return "NRMS";
391 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
392 return "SMNS";
393 }else if(copyright.equals("United States National Museum of Natural History, Washington")){
394 return "USNM";
395 }else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
396 return "ZFBS";
397 }else if(copyright.equals("Zoological Museum, University of Copenhagen")){
398 return "ZMUC";
399 }else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
400 return "ZFMK";
401 }else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
402 return "ZFMK";
403 }else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
404 return "ZIUH";
405 }else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
406 return "ZIUT";
407 }else{
408 logger.warn("Unknown copyright entry: " + copyright);
409 return "";
410 }
411 }
412
413
414 @Override
415 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
416 String nameSpace;
417 Class<?> cdmClass;
418 Set<String> idSet;
419 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
420 try{
421 Set<String> currSpecIdSet = new HashSet<>();
422 Set<String> specTaxIdSet = new HashSet<>();
423 Set<String> typeIdSet = new HashSet<>();
424
425 while (rs.next()){
426 handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
427 handleForeignKey(rs, specTaxIdSet, "spectaxID");
428 handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
429 }
430
431 //specTax map
432 nameSpace = SPEC_TAX_NAMESPACE;
433 cdmClass = TaxonName.class;
434 idSet = specTaxIdSet;
435 Map<String, TaxonName> specTaxNameMap = (Map<String, TaxonName>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
436 result.put(nameSpace, specTaxNameMap);
437
438 // //taxon map
439 // nameSpace = TAXON_NAMESPACE;
440 // cdmClass = Taxon.class;
441 // idSet = currSpecIdSet;
442 // Map<String, Taxon> taxonMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
443 // result.put(nameSpace, taxonMap);
444
445
446 //type map
447 nameSpace = GlobisSpecTaxImport.TYPE_NAMESPACE;
448 cdmClass = DerivedUnit.class;
449 idSet = typeIdSet;
450 Map<String, DerivedUnit> typeMap = (Map<String, DerivedUnit>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
451 result.put(nameSpace, typeMap);
452
453
454 } catch (SQLException e) {
455 throw new RuntimeException(e);
456 }
457 return result;
458 }
459
460 private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr) throws SQLException {
461 Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
462 if (specTaxId != null){
463 String copyright = rs.getString(copyrightAttr);
464 if (isNotBlank(copyright)){
465 String id = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
466 idSet.add(id);
467 }
468 }
469 }
470
471 /* (non-Javadoc)
472 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
473 */
474 @Override
475 protected boolean doCheck(GlobisImportState state){
476 IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
477 return validator.validate(state);
478 }
479
480
481 /* (non-Javadoc)
482 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
483 */
484 @Override
485 protected boolean isIgnore(GlobisImportState state){
486 return ! state.getConfig().isDoImages();
487 }
488
489
490
491
492 /* (non-Javadoc)
493 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doInvoke(eu.etaxonomy.cdm.io.globis.GlobisImportState)
494 */
495 @Override
496 protected void doInvoke(GlobisImportState state) {
497 Reference refGart = ReferenceFactory.newGeneric();
498 refGart.setTitleCache("GART");
499 refGart.setUuid(uuidGartRef);
500 getReferenceService().saveOrUpdate(refGart);
501 super.doInvoke(state);
502 }
503
504
505
506
507
508 }