ref #9114 adapt URI to wrapper in cdmlib-app
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisImageImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.io.IOException;
13 import java.net.MalformedURLException;
14 import eu.etaxonomy.cdm.common.URI;
15 import java.sql.ResultSet;
16 import java.sql.SQLException;
17 import java.util.HashMap;
18 import java.util.HashSet;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.UUID;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import org.apache.http.client.ClientProtocolException;
26 import org.apache.log4j.Logger;
27 import org.springframework.stereotype.Component;
28
29 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
30 import eu.etaxonomy.cdm.common.UriUtils;
31 import eu.etaxonomy.cdm.io.common.IOValidator;
32 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33 import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
34 import eu.etaxonomy.cdm.model.common.Annotation;
35 import eu.etaxonomy.cdm.model.common.CdmBase;
36 import eu.etaxonomy.cdm.model.common.Language;
37 import eu.etaxonomy.cdm.model.common.Marker;
38 import eu.etaxonomy.cdm.model.common.MarkerType;
39 import eu.etaxonomy.cdm.model.media.Media;
40 import eu.etaxonomy.cdm.model.name.IZoologicalName;
41 import eu.etaxonomy.cdm.model.name.TaxonName;
42 import eu.etaxonomy.cdm.model.occurrence.Collection;
43 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
44 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
45 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
46 import eu.etaxonomy.cdm.model.reference.Reference;
47 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
48 import eu.etaxonomy.cdm.model.taxon.Taxon;
49
50
51 /**
52 * @author a.mueller
53 * @since 20.02.2010
54 */
55 @Component
56 public class GlobisImageImport extends GlobisImportBase<Taxon> {
57
58 private static final long serialVersionUID = 5697033145326415146L;
59 private static final Logger logger = Logger.getLogger(GlobisImageImport.class);
60
61 private int modCount = 1000;
62
63 private UUID uuidArtNonSpecTaxMarkerType = UUID.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
64 private static final String pluralString = "images";
65 private static final String dbTableName = "Einzelbilder";
66 private static final Class<?> cdmTargetClass = Media.class; //not needed
67
68 private static UUID uuidGartRef = UUID.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
69
70 public GlobisImageImport(){
71 super(pluralString, dbTableName, cdmTargetClass);
72 }
73
74 @Override
75 protected String getIdQuery() {
76 String strRecordQuery =
77 " SELECT BildId " +
78 " FROM " + dbTableName;
79 return strRecordQuery;
80 }
81
82 @Override
83 protected String getRecordQuery(GlobisImportConfigurator config) {
84 String strRecordQuery =
85 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
86 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
87 " FROM " + getTableName() + " i " +
88 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
89 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
90 return strRecordQuery;
91 }
92
93 @Override
94 public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, GlobisImportState state) {
95 boolean success = true;
96
97 Set<Media> objectsToSave = new HashSet<>();
98
99 @SuppressWarnings("unchecked")
100 Map<String, DerivedUnit> typeMap = partitioner.getObjectMap(TYPE_NAMESPACE);
101
102 @SuppressWarnings("unchecked")
103 Map<String, Taxon> taxonMap = partitioner.getObjectMap(TAXON_NAMESPACE);
104 @SuppressWarnings("unchecked")
105 Map<String, TaxonName> specTaxNameMap = partitioner.getObjectMap(SPEC_TAX_NAMESPACE);
106
107 ResultSet rs = partitioner.getResultSet();
108
109 Reference refGart = getReferenceService().find(uuidGartRef);
110
111
112 try {
113
114 int i = 0;
115
116 //for each record
117 while (rs.next()){
118
119 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
120
121 Integer bildID = rs.getInt("BildID");
122 Integer spectaxID = nullSafeInt(rs, "spectaxID");
123 Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
124 String copyright = rs.getString("copyright");
125 String specimenId = rs.getString("specimenID");
126 String bemerkungen = rs.getString("Bemerkungen");
127 String artNotSpecTax = rs.getString("Art non spectax");
128 String motiv = rs.getString("Motiv");
129
130 //ignore:
131 // [file lab2], same as Dateiname04 but less data
132 // Dateipfad
133
134 Set<Media> recordMedia = new HashSet<>();
135
136 try {
137
138 makeAllMedia(state, rs, recordMedia, objectsToSave);
139
140 String title = null;
141
142 DerivedUnit specimen = null;
143 if (spectaxID != null){
144 //try to find type specimen
145 if (isNotBlank(motiv) && (motiv.startsWith("type specimen"))){
146 String collectionCode = transformCopyright2CollectionCode(copyright);
147 String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
148 specimen = typeMap.get(id);
149 }
150
151 //try to find specTaxName
152 IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
153 if (specTaxTaxonName != null){
154 title = " taxon name " + specTaxTaxonName.getTitleCache();
155 }else{
156 title = " spectaxID " + spectaxID;
157 }
158 }else{
159 title = " name " + getNameFromFileOs(rs) + (isBlank(specimenId)? "" : " (specimenId: " + specimenId + ")");
160 }
161
162 //not type specimen
163 if (specimen == null){
164 specimen = DerivedUnit.NewPreservedSpecimenInstance();
165 specimen.setTitleCache("Specimen for " + title, true);
166 String collectionCode = transformCopyright2CollectionCode(copyright);
167 //TODO
168 Collection collection = getCollection(collectionCode);
169 specimen.setCollection(collection);
170 }
171
172 //source
173 specimen.addSource(OriginalSourceType.Import, String.valueOf(bildID), IMAGE_NAMESPACE, state.getTransactionalSourceReference(), null);
174
175 //GART id (specimenID)
176 if (isNotBlank(specimenId)){
177 specimen.addSource(OriginalSourceType.Lineage, specimenId, "specimenId", refGart, null);
178 }
179 //bemerkungen
180 if (isNotBlank(bemerkungen)){
181 Annotation annotation = Annotation.NewInstance(bemerkungen, null, null);
182 specimen.addAnnotation(annotation);
183 }
184 //media
185 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimen);
186 for (Media media: recordMedia){
187 facade.addDerivedUnitMedia(media);
188 }
189 //art non spectax
190 if (isNotBlank(artNotSpecTax)){
191 if (artNotSpecTax.equalsIgnoreCase("ja")){
192 MarkerType artNotSpecTaxMarker = getMarkerType(state, uuidArtNonSpecTaxMarkerType , "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
193 specimen.addMarker(Marker.NewInstance(artNotSpecTaxMarker, true));
194 }else{
195 logger.warn(artNotSpecTax + " is not a valid value for 'Art non spectax' (BildID: " + bildID + ")" );
196 }
197 }
198
199 if (spectaxID != null){
200
201 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
202 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
203 IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
204
205 //
206 // if (taxon == null){
207 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
208 //// specTaxName = specTaxMap.g
209 // }
210 // if (taxon == null){
211 // logger.warn("No taxon available for specTaxID: " + spectaxID);
212 // }else{
213 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
214 // }
215
216 //TODO FIXME
217
218 if (specTaxTaxonName == null){
219 logger.warn("Name could not be found for spectaxID: " + spectaxID + " in BildID: " + bildID);
220 }else{
221 Taxon taxon = null;
222 for (Taxon specTaxTaxon: specTaxTaxonName.getTaxa()){
223 taxon = specTaxTaxon;
224 }
225 if (taxon == null){
226 //FIXME
227 Reference undefinedSec = null;
228 taxon = Taxon.NewInstance(specTaxTaxonName, undefinedSec);
229 }
230
231 DeterminationEvent.NewInstance(taxon, specimen);
232
233 }
234
235
236
237
238 // if (taxon != null){
239 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
240 // if (taxonDescription.getElements().size() == 0){
241 // TextData textData = TextData.NewInstance(Feature.IMAGE());
242 // taxonDescription.addElement(textData);
243 // }
244 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
245 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
246 // for (Media media: recordMedia){
247 // textData.addMedia(media);
248 // }
249 // }
250 }
251
252 } catch (Exception e) {
253 logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
254 e.printStackTrace();
255 }
256
257 }
258
259 logger.info(pluralString + " to save: " + objectsToSave.size());
260 getMediaService().save(objectsToSave);
261
262 return success;
263 } catch (SQLException e) {
264 logger.error("SQLException:" + e);
265 return false;
266 }
267 }
268
269 private Collection getCollection(String collectionCode) {
270 //TODO
271 return null;
272 }
273
274 private String getNameFromFileOs(ResultSet rs) throws SQLException {
275 String fileOS = rs.getString("file OS");
276 Pattern pattern = Pattern.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
277 Matcher matcher = pattern.matcher(fileOS);
278 if (matcher.matches()){
279 String match = matcher.group(1);
280 return match;
281 }else{
282 logger.warn("FileOS does not match: " + fileOS);
283 return fileOS;
284 }
285 }
286
287 private void makeAllMedia(GlobisImportState state, ResultSet rs, Set<Media> recordMedia, Set<Media> objectsToSave) throws SQLException{
288 //make image path
289 String pathShort = rs.getString("Dateipfad_kurz");
290 String fileOS = rs.getString("file OS");
291 pathShort= pathShort.replace(fileOS, "");
292 String newPath = state.getConfig().getImageBaseUrl();
293 String path = pathShort.replace("image:Webversionen/", newPath);
294
295 Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
296 recordMedia.add(singleMedia);
297 singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
298 recordMedia.add(singleMedia);
299 singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
300 recordMedia.add(singleMedia);
301 singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
302 recordMedia.add(singleMedia);
303
304 }
305
306 private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
307 Media media = null;
308 String fileName = rs.getString(fileNameAttr);
309 String legend = rs.getString(legendAttr);
310 Integer bildID = rs.getInt("BildID");
311
312 String uriStr = path+fileName;
313 uriStr = uriStr.replace(" ", "%20");
314
315 URI uri = URI.create(uriStr);
316
317 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
318
319 try {
320 boolean readMediaData = state.getConfig().isDoReadMediaData();
321 if (isBlank(legend) && readMediaData){
322 if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
323 logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
324 }else{
325 return null;
326 }
327 }
328
329 media = this.getImageMedia(uri.toString(), readMediaData);
330 media.putTitle(Language.ENGLISH(), legend);
331 this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
332
333 objectsToSave.add(media);
334
335
336 } catch (MalformedURLException e) {
337 e.printStackTrace();
338 } catch (ClientProtocolException e) {
339 e.printStackTrace();
340 } catch (IOException e) {
341 e.printStackTrace();
342 }
343
344 return media;
345 }
346
347 private String transformCopyright2CollectionCode(String copyright){
348
349 if (isBlank(copyright)){
350 return "";
351 }else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
352 return "MFNB";
353 }else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
354 return "SMTD";
355 }else if(copyright.equals("Natural History Museum, London")){
356 return "BMNH";
357 }else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
358 return "ZSSM";
359 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
360 return "SMNK";
361 }else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
362 return "DEIE";
363 }else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
364 return "SMFM";
365 }else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
366 return "MNHN";
367 }else if(copyright.equals("Naturhistorisches Museum Wien")){
368 return "NHMW";
369 }else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
370 return "NRMS";
371 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
372 return "SMNS";
373 }else if(copyright.equals("United States National Museum of Natural History, Washington")){
374 return "USNM";
375 }else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
376 return "ZFBS";
377 }else if(copyright.equals("Zoological Museum, University of Copenhagen")){
378 return "ZMUC";
379 }else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
380 return "ZFMK";
381 }else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
382 return "ZFMK";
383 }else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
384 return "ZIUH";
385 }else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
386 return "ZIUT";
387 }else{
388 logger.warn("Unknown copyright entry: " + copyright);
389 return "";
390 }
391 }
392
393
394 @Override
395 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
396
397 String nameSpace;
398 Set<String> idSet;
399 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
400 try{
401 Set<String> currSpecIdSet = new HashSet<>();
402 Set<String> specTaxIdSet = new HashSet<>();
403 Set<String> typeIdSet = new HashSet<>();
404
405 while (rs.next()){
406 handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
407 handleForeignKey(rs, specTaxIdSet, "spectaxID");
408 handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
409 }
410
411 //specTax map
412 nameSpace = SPEC_TAX_NAMESPACE;
413 idSet = specTaxIdSet;
414 Map<String, TaxonName> specTaxNameMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonName.class, idSet, nameSpace);
415 result.put(nameSpace, specTaxNameMap);
416
417 // //taxon map
418 // nameSpace = TAXON_NAMESPACE;
419 // idSet = currSpecIdSet;
420 // Map<String, Taxon> taxonMap = getCommonService().getSourcedObjectsByIdInSourceC(Taxon.class, idSet, nameSpace);
421 // result.put(nameSpace, taxonMap);
422
423
424 //type map
425 nameSpace = GlobisImportBase.TYPE_NAMESPACE;
426 idSet = typeIdSet;
427 Map<String, DerivedUnit> typeMap = getCommonService().getSourcedObjectsByIdInSourceC(DerivedUnit.class, idSet, nameSpace);
428 result.put(nameSpace, typeMap);
429
430
431 } catch (SQLException e) {
432 throw new RuntimeException(e);
433 }
434 return result;
435 }
436
437 private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr) throws SQLException {
438 Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
439 if (specTaxId != null){
440 String copyright = rs.getString(copyrightAttr);
441 if (isNotBlank(copyright)){
442 String id = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
443 idSet.add(id);
444 }
445 }
446 }
447
448 @Override
449 protected boolean doCheck(GlobisImportState state){
450 IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
451 return validator.validate(state);
452 }
453
454 @Override
455 protected boolean isIgnore(GlobisImportState state){
456 return ! state.getConfig().isDoImages();
457 }
458
459 @Override
460 protected void doInvoke(GlobisImportState state) {
461 Reference refGart = ReferenceFactory.newGeneric();
462 refGart.setTitleCache("GART", true);
463 refGart.setUuid(uuidGartRef);
464 getReferenceService().saveOrUpdate(refGart);
465 super.doInvoke(state);
466 }
467 }