adapt app-import to v5.45
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisImageImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.io.IOException;
13 import java.net.MalformedURLException;
14 import java.sql.ResultSet;
15 import java.sql.SQLException;
16 import java.util.HashMap;
17 import java.util.HashSet;
18 import java.util.Map;
19 import java.util.Set;
20 import java.util.UUID;
21 import java.util.regex.Matcher;
22 import java.util.regex.Pattern;
23
24 import org.apache.http.client.ClientProtocolException;
25 import org.apache.logging.log4j.LogManager;
26 import org.apache.logging.log4j.Logger;
27 import org.springframework.stereotype.Component;
28
29 import eu.etaxonomy.cdm.common.URI;
30 import eu.etaxonomy.cdm.common.UriUtils;
31 import eu.etaxonomy.cdm.facade.DerivedUnitFacade;
32 import eu.etaxonomy.cdm.io.common.IOValidator;
33 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
34 import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
35 import eu.etaxonomy.cdm.model.common.Annotation;
36 import eu.etaxonomy.cdm.model.common.CdmBase;
37 import eu.etaxonomy.cdm.model.common.Language;
38 import eu.etaxonomy.cdm.model.common.Marker;
39 import eu.etaxonomy.cdm.model.common.MarkerType;
40 import eu.etaxonomy.cdm.model.media.Media;
41 import eu.etaxonomy.cdm.model.name.IZoologicalName;
42 import eu.etaxonomy.cdm.model.name.TaxonName;
43 import eu.etaxonomy.cdm.model.occurrence.Collection;
44 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
45 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
46 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
47 import eu.etaxonomy.cdm.model.reference.Reference;
48 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
49 import eu.etaxonomy.cdm.model.taxon.Taxon;
50
51
52 /**
53 * @author a.mueller
54 * @since 20.02.2010
55 */
56 @Component
57 public class GlobisImageImport extends GlobisImportBase<Taxon> {
58
59 private static final long serialVersionUID = 5697033145326415146L;
60 private static final Logger logger = LogManager.getLogger();
61
62 private int modCount = 1000;
63
64 private UUID uuidArtNonSpecTaxMarkerType = UUID.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
65 private static final String pluralString = "images";
66 private static final String dbTableName = "Einzelbilder";
67 private static final Class<?> cdmTargetClass = Media.class; //not needed
68
69 private static UUID uuidGartRef = UUID.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
70
71 public GlobisImageImport(){
72 super(pluralString, dbTableName, cdmTargetClass);
73 }
74
75 @Override
76 protected String getIdQuery() {
77 String strRecordQuery =
78 " SELECT BildId " +
79 " FROM " + dbTableName;
80 return strRecordQuery;
81 }
82
83 @Override
84 protected String getRecordQuery(GlobisImportConfigurator config) {
85 String strRecordQuery =
86 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
87 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
88 " FROM " + getTableName() + " i " +
89 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
90 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
91 return strRecordQuery;
92 }
93
94 @Override
95 public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, GlobisImportState state) {
96 boolean success = true;
97
98 Set<Media> objectsToSave = new HashSet<>();
99
100 @SuppressWarnings("unchecked")
101 Map<String, DerivedUnit> typeMap = partitioner.getObjectMap(TYPE_NAMESPACE);
102
103 @SuppressWarnings("unchecked")
104 Map<String, Taxon> taxonMap = partitioner.getObjectMap(TAXON_NAMESPACE);
105 @SuppressWarnings("unchecked")
106 Map<String, TaxonName> specTaxNameMap = partitioner.getObjectMap(SPEC_TAX_NAMESPACE);
107
108 ResultSet rs = partitioner.getResultSet();
109
110 Reference refGart = getReferenceService().find(uuidGartRef);
111
112
113 try {
114
115 int i = 0;
116
117 //for each record
118 while (rs.next()){
119
120 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
121
122 Integer bildID = rs.getInt("BildID");
123 Integer spectaxID = nullSafeInt(rs, "spectaxID");
124 Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
125 String copyright = rs.getString("copyright");
126 String specimenId = rs.getString("specimenID");
127 String bemerkungen = rs.getString("Bemerkungen");
128 String artNotSpecTax = rs.getString("Art non spectax");
129 String motiv = rs.getString("Motiv");
130
131 //ignore:
132 // [file lab2], same as Dateiname04 but less data
133 // Dateipfad
134
135 Set<Media> recordMedia = new HashSet<>();
136
137 try {
138
139 makeAllMedia(state, rs, recordMedia, objectsToSave);
140
141 String title = null;
142
143 DerivedUnit specimen = null;
144 if (spectaxID != null){
145 //try to find type specimen
146 if (isNotBlank(motiv) && (motiv.startsWith("type specimen"))){
147 String collectionCode = transformCopyright2CollectionCode(copyright);
148 String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
149 specimen = typeMap.get(id);
150 }
151
152 //try to find specTaxName
153 IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
154 if (specTaxTaxonName != null){
155 title = " taxon name " + specTaxTaxonName.getTitleCache();
156 }else{
157 title = " spectaxID " + spectaxID;
158 }
159 }else{
160 title = " name " + getNameFromFileOs(rs) + (isBlank(specimenId)? "" : " (specimenId: " + specimenId + ")");
161 }
162
163 //not type specimen
164 if (specimen == null){
165 specimen = DerivedUnit.NewPreservedSpecimenInstance();
166 specimen.setTitleCache("Specimen for " + title, true);
167 String collectionCode = transformCopyright2CollectionCode(copyright);
168 //TODO
169 Collection collection = getCollection(collectionCode);
170 specimen.setCollection(collection);
171 }
172
173 //source
174 specimen.addSource(OriginalSourceType.Import, String.valueOf(bildID), IMAGE_NAMESPACE, state.getTransactionalSourceReference(), null);
175
176 //GART id (specimenID)
177 if (isNotBlank(specimenId)){
178 specimen.addSource(OriginalSourceType.Lineage, specimenId, "specimenId", refGart, null);
179 }
180 //bemerkungen
181 if (isNotBlank(bemerkungen)){
182 Annotation annotation = Annotation.NewInstance(bemerkungen, null, null);
183 specimen.addAnnotation(annotation);
184 }
185 //media
186 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimen);
187 for (Media media: recordMedia){
188 facade.addDerivedUnitMedia(media);
189 }
190 //art non spectax
191 if (isNotBlank(artNotSpecTax)){
192 if (artNotSpecTax.equalsIgnoreCase("ja")){
193 MarkerType artNotSpecTaxMarker = getMarkerType(state, uuidArtNonSpecTaxMarkerType , "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
194 specimen.addMarker(Marker.NewInstance(artNotSpecTaxMarker, true));
195 }else{
196 logger.warn(artNotSpecTax + " is not a valid value for 'Art non spectax' (BildID: " + bildID + ")" );
197 }
198 }
199
200 if (spectaxID != null){
201
202 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
203 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
204 IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
205
206 //
207 // if (taxon == null){
208 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
209 //// specTaxName = specTaxMap.g
210 // }
211 // if (taxon == null){
212 // logger.warn("No taxon available for specTaxID: " + spectaxID);
213 // }else{
214 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
215 // }
216
217 //TODO FIXME
218
219 if (specTaxTaxonName == null){
220 logger.warn("Name could not be found for spectaxID: " + spectaxID + " in BildID: " + bildID);
221 }else{
222 Taxon taxon = null;
223 for (Taxon specTaxTaxon: specTaxTaxonName.getTaxa()){
224 taxon = specTaxTaxon;
225 }
226 if (taxon == null){
227 //FIXME
228 Reference undefinedSec = null;
229 taxon = Taxon.NewInstance(specTaxTaxonName, undefinedSec);
230 }
231
232 DeterminationEvent.NewInstance(taxon, specimen);
233
234 }
235
236
237
238
239 // if (taxon != null){
240 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
241 // if (taxonDescription.getElements().size() == 0){
242 // TextData textData = TextData.NewInstance(Feature.IMAGE());
243 // taxonDescription.addElement(textData);
244 // }
245 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
246 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
247 // for (Media media: recordMedia){
248 // textData.addMedia(media);
249 // }
250 // }
251 }
252
253 } catch (Exception e) {
254 logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
255 e.printStackTrace();
256 }
257
258 }
259
260 logger.info(pluralString + " to save: " + objectsToSave.size());
261 getMediaService().save(objectsToSave);
262
263 return success;
264 } catch (SQLException e) {
265 logger.error("SQLException:" + e);
266 return false;
267 }
268 }
269
270 private Collection getCollection(String collectionCode) {
271 //TODO
272 return null;
273 }
274
275 private String getNameFromFileOs(ResultSet rs) throws SQLException {
276 String fileOS = rs.getString("file OS");
277 Pattern pattern = Pattern.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
278 Matcher matcher = pattern.matcher(fileOS);
279 if (matcher.matches()){
280 String match = matcher.group(1);
281 return match;
282 }else{
283 logger.warn("FileOS does not match: " + fileOS);
284 return fileOS;
285 }
286 }
287
288 private void makeAllMedia(GlobisImportState state, ResultSet rs, Set<Media> recordMedia, Set<Media> objectsToSave) throws SQLException{
289 //make image path
290 String pathShort = rs.getString("Dateipfad_kurz");
291 String fileOS = rs.getString("file OS");
292 pathShort= pathShort.replace(fileOS, "");
293 String newPath = state.getConfig().getImageBaseUrl();
294 String path = pathShort.replace("image:Webversionen/", newPath);
295
296 Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
297 recordMedia.add(singleMedia);
298 singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
299 recordMedia.add(singleMedia);
300 singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
301 recordMedia.add(singleMedia);
302 singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
303 recordMedia.add(singleMedia);
304
305 }
306
307 private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
308 Media media = null;
309 String fileName = rs.getString(fileNameAttr);
310 String legend = rs.getString(legendAttr);
311 Integer bildID = rs.getInt("BildID");
312
313 String uriStr = path+fileName;
314 uriStr = uriStr.replace(" ", "%20");
315
316 URI uri = URI.create(uriStr);
317
318 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
319
320 try {
321 boolean readMediaData = state.getConfig().isDoReadMediaData();
322 if (isBlank(legend) && readMediaData){
323 if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
324 logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
325 }else{
326 return null;
327 }
328 }
329
330 media = this.getImageMedia(uri.toString(), readMediaData);
331 media.putTitle(Language.ENGLISH(), legend);
332 this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
333
334 objectsToSave.add(media);
335
336
337 } catch (MalformedURLException e) {
338 e.printStackTrace();
339 } catch (ClientProtocolException e) {
340 e.printStackTrace();
341 } catch (IOException e) {
342 e.printStackTrace();
343 }
344
345 return media;
346 }
347
348 private String transformCopyright2CollectionCode(String copyright){
349
350 if (isBlank(copyright)){
351 return "";
352 }else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
353 return "MFNB";
354 }else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
355 return "SMTD";
356 }else if(copyright.equals("Natural History Museum, London")){
357 return "BMNH";
358 }else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
359 return "ZSSM";
360 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
361 return "SMNK";
362 }else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
363 return "DEIE";
364 }else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
365 return "SMFM";
366 }else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
367 return "MNHN";
368 }else if(copyright.equals("Naturhistorisches Museum Wien")){
369 return "NHMW";
370 }else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
371 return "NRMS";
372 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
373 return "SMNS";
374 }else if(copyright.equals("United States National Museum of Natural History, Washington")){
375 return "USNM";
376 }else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
377 return "ZFBS";
378 }else if(copyright.equals("Zoological Museum, University of Copenhagen")){
379 return "ZMUC";
380 }else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
381 return "ZFMK";
382 }else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
383 return "ZFMK";
384 }else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
385 return "ZIUH";
386 }else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
387 return "ZIUT";
388 }else{
389 logger.warn("Unknown copyright entry: " + copyright);
390 return "";
391 }
392 }
393
394
395 @Override
396 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
397
398 String nameSpace;
399 Set<String> idSet;
400 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
401 try{
402 Set<String> currSpecIdSet = new HashSet<>();
403 Set<String> specTaxIdSet = new HashSet<>();
404 Set<String> typeIdSet = new HashSet<>();
405
406 while (rs.next()){
407 handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
408 handleForeignKey(rs, specTaxIdSet, "spectaxID");
409 handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
410 }
411
412 //specTax map
413 nameSpace = SPEC_TAX_NAMESPACE;
414 idSet = specTaxIdSet;
415 Map<String, TaxonName> specTaxNameMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonName.class, idSet, nameSpace);
416 result.put(nameSpace, specTaxNameMap);
417
418 // //taxon map
419 // nameSpace = TAXON_NAMESPACE;
420 // idSet = currSpecIdSet;
421 // Map<String, Taxon> taxonMap = getCommonService().getSourcedObjectsByIdInSourceC(Taxon.class, idSet, nameSpace);
422 // result.put(nameSpace, taxonMap);
423
424
425 //type map
426 nameSpace = GlobisImportBase.TYPE_NAMESPACE;
427 idSet = typeIdSet;
428 Map<String, DerivedUnit> typeMap = getCommonService().getSourcedObjectsByIdInSourceC(DerivedUnit.class, idSet, nameSpace);
429 result.put(nameSpace, typeMap);
430
431
432 } catch (SQLException e) {
433 throw new RuntimeException(e);
434 }
435 return result;
436 }
437
438 private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr) throws SQLException {
439 Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
440 if (specTaxId != null){
441 String copyright = rs.getString(copyrightAttr);
442 if (isNotBlank(copyright)){
443 String id = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
444 idSet.add(id);
445 }
446 }
447 }
448
449 @Override
450 protected boolean doCheck(GlobisImportState state){
451 IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
452 return validator.validate(state);
453 }
454
455 @Override
456 protected boolean isIgnore(GlobisImportState state){
457 return ! state.getConfig().isDoImages();
458 }
459
460 @Override
461 protected void doInvoke(GlobisImportState state) {
462 Reference refGart = ReferenceFactory.newGeneric();
463 refGart.setTitleCache("GART", true);
464 refGart.setUuid(uuidGartRef);
465 getReferenceService().saveOrUpdate(refGart);
466 super.doInvoke(state);
467 }
468 }