Revert missing rank constant
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisImageImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.io.IOException;
13 import java.net.MalformedURLException;
14 import java.net.URI;
15 import java.sql.ResultSet;
16 import java.sql.SQLException;
17 import java.util.HashMap;
18 import java.util.HashSet;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.UUID;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import org.apache.http.client.ClientProtocolException;
26 import org.apache.log4j.Logger;
27 import org.springframework.stereotype.Component;
28
29 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
30 import eu.etaxonomy.cdm.common.UriUtils;
31 import eu.etaxonomy.cdm.io.common.IOValidator;
32 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33 import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
34 import eu.etaxonomy.cdm.model.common.Annotation;
35 import eu.etaxonomy.cdm.model.common.CdmBase;
36 import eu.etaxonomy.cdm.model.common.Language;
37 import eu.etaxonomy.cdm.model.common.Marker;
38 import eu.etaxonomy.cdm.model.common.MarkerType;
39 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
40 import eu.etaxonomy.cdm.model.media.Media;
41 import eu.etaxonomy.cdm.model.name.ZoologicalName;
42 import eu.etaxonomy.cdm.model.occurrence.Collection;
43 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
44 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
45 import eu.etaxonomy.cdm.model.reference.Reference;
46 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
47 import eu.etaxonomy.cdm.model.taxon.Taxon;
48
49
50 /**
51 * @author a.mueller
52 * @created 20.02.2010
53 * @version 1.0
54 */
55 @Component
56 public class GlobisImageImport extends GlobisImportBase<Taxon> {
57 private static final Logger logger = Logger.getLogger(GlobisImageImport.class);
58
59 private int modCount = 1000;
60
61 private UUID uuidArtNonSpecTaxMarkerType = UUID.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
62 private static final String pluralString = "images";
63 private static final String dbTableName = "Einzelbilder";
64 private static final Class<?> cdmTargetClass = Media.class; //not needed
65
66 private static UUID uuidGartRef = UUID.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
67
68 public GlobisImageImport(){
69 super(pluralString, dbTableName, cdmTargetClass);
70 }
71
72
73
74
75 /* (non-Javadoc)
76 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
77 */
78 @Override
79 protected String getIdQuery() {
80 String strRecordQuery =
81 " SELECT BildId " +
82 " FROM " + dbTableName;
83 return strRecordQuery;
84 }
85
86
87
88
89 /* (non-Javadoc)
90 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
91 */
92 @Override
93 protected String getRecordQuery(GlobisImportConfigurator config) {
94 String strRecordQuery =
95 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
96 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
97 " FROM " + getTableName() + " i " +
98 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
99 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
100 return strRecordQuery;
101 }
102
103
104
105 /* (non-Javadoc)
106 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
107 */
108 @Override
109 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
110 boolean success = true;
111
112 Set<Media> objectsToSave = new HashSet<Media>();
113
114 Map<String, DerivedUnit> typeMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(TYPE_NAMESPACE);
115
116 Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
117 Map<String, ZoologicalName> specTaxNameMap = (Map<String, ZoologicalName>) partitioner.getObjectMap(SPEC_TAX_NAMESPACE);
118
119 ResultSet rs = partitioner.getResultSet();
120
121 Reference<?> refGart = getReferenceService().find(uuidGartRef);
122
123
124 try {
125
126 int i = 0;
127
128 //for each record
129 while (rs.next()){
130
131 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
132
133 Integer bildID = rs.getInt("BildID");
134 Integer spectaxID = nullSafeInt(rs, "spectaxID");
135 Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
136 String copyright = rs.getString("copyright");
137 String specimenId = rs.getString("specimenID");
138 String bemerkungen = rs.getString("Bemerkungen");
139 String artNotSpecTax = rs.getString("Art non spectax");
140 String motiv = rs.getString("Motiv");
141
142 //ignore:
143 // [file lab2], same as Dateiname04 but less data
144 // Dateipfad
145
146 Set<Media> recordMedia = new HashSet<Media>();
147
148 try {
149
150 makeAllMedia(state, rs, recordMedia, objectsToSave);
151
152 String title = null;
153
154 DerivedUnit specimen = null;
155 if (spectaxID != null){
156 //try to find type specimen
157 if (isNotBlank(motiv) && (motiv.startsWith("type specimen"))){
158 String collectionCode = transformCopyright2CollectionCode(copyright);
159 String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
160 specimen = typeMap.get(id);
161 }
162
163 //try to find specTaxName
164 ZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
165 if (specTaxTaxonName != null){
166 title = " taxon name " + specTaxTaxonName.getTitleCache();
167 }else{
168 title = " spectaxID " + spectaxID;
169 }
170 }else{
171 title = " name " + getNameFromFileOs(rs) + (isBlank(specimenId)? "" : " (specimenId: " + specimenId + ")");
172 }
173
174 //not type specimen
175 if (specimen == null){
176 specimen = DerivedUnit.NewPreservedSpecimenInstance();
177 specimen.setTitleCache("Specimen for " + title );
178 String collectionCode = transformCopyright2CollectionCode(copyright);
179 //TODO
180 Collection collection = getCollection(collectionCode);
181 specimen.setCollection(collection);
182 }
183
184
185 //source
186 specimen.addSource(OriginalSourceType.Import, String.valueOf(bildID), IMAGE_NAMESPACE, state.getTransactionalSourceReference(), null);
187
188 //GART id (specimenID)
189 if (isNotBlank(specimenId)){
190 specimen.addSource(OriginalSourceType.Lineage, specimenId, "specimenId", refGart, null);
191 }
192 //bemerkungen
193 if (isNotBlank(bemerkungen)){
194 Annotation annotation = Annotation.NewInstance(bemerkungen, null, null);
195 specimen.addAnnotation(annotation);
196 }
197 //media
198 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimen);
199 for (Media media: recordMedia){
200 facade.addDerivedUnitMedia(media);
201 }
202 //art non spectax
203 if (isNotBlank(artNotSpecTax)){
204 if (artNotSpecTax.equalsIgnoreCase("ja")){
205 MarkerType artNotSpecTaxMarker = getMarkerType(state, uuidArtNonSpecTaxMarkerType , "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
206 specimen.addMarker(Marker.NewInstance(artNotSpecTaxMarker, true));
207 }else{
208 logger.warn(artNotSpecTax + " is not a valid value for 'Art non spectax' (BildID: " + bildID + ")" );
209 }
210 }
211
212 if (spectaxID != null){
213
214 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
215 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
216 ZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
217
218 //
219 // if (taxon == null){
220 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
221 //// specTaxName = specTaxMap.g
222 // }
223 // if (taxon == null){
224 // logger.warn("No taxon available for specTaxID: " + spectaxID);
225 // }else{
226 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
227 // }
228
229 //TODO FIXME
230
231 if (specTaxTaxonName == null){
232 logger.warn("Name could not be found for spectaxID: " + spectaxID + " in BildID: " + bildID);
233 }else{
234 Taxon taxon = null;
235 for (Taxon specTaxTaxon: specTaxTaxonName.getTaxa()){
236 taxon = specTaxTaxon;
237 }
238 if (taxon == null){
239 //FIXME
240 Reference<?> undefinedSec = null;
241 taxon = Taxon.NewInstance(specTaxTaxonName, undefinedSec);
242 }
243
244 DeterminationEvent.NewInstance(taxon, specimen);
245
246 }
247
248
249
250
251 // if (taxon != null){
252 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
253 // if (taxonDescription.getElements().size() == 0){
254 // TextData textData = TextData.NewInstance(Feature.IMAGE());
255 // taxonDescription.addElement(textData);
256 // }
257 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
258 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
259 // for (Media media: recordMedia){
260 // textData.addMedia(media);
261 // }
262 // }
263 }
264
265 } catch (Exception e) {
266 logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
267 e.printStackTrace();
268 }
269
270 }
271
272 logger.info(pluralString + " to save: " + objectsToSave.size());
273 getMediaService().save(objectsToSave);
274
275 return success;
276 } catch (SQLException e) {
277 logger.error("SQLException:" + e);
278 return false;
279 }
280 }
281
282 private Collection getCollection(String collectionCode) {
283 //TODO
284 return null;
285 }
286
287
288
289
290 private String getNameFromFileOs(ResultSet rs) throws SQLException {
291 String fileOS = rs.getString("file OS");
292 Pattern pattern = Pattern.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
293 Matcher matcher = pattern.matcher(fileOS);
294 if (matcher.matches()){
295 String match = matcher.group(1);
296 return match;
297 }else{
298 logger.warn("FileOS does not match: " + fileOS);
299 return fileOS;
300 }
301 }
302
303
304
305
306 private void makeAllMedia(GlobisImportState state, ResultSet rs, Set<Media> recordMedia, Set<Media> objectsToSave) throws SQLException{
307 //make image path
308 String pathShort = rs.getString("Dateipfad_kurz");
309 String fileOS = rs.getString("file OS");
310 pathShort= pathShort.replace(fileOS, "");
311 String newPath = state.getConfig().getImageBaseUrl();
312 String path = pathShort.replace("image:Webversionen/", newPath);
313
314 Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
315 recordMedia.add(singleMedia);
316 singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
317 recordMedia.add(singleMedia);
318 singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
319 recordMedia.add(singleMedia);
320 singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
321 recordMedia.add(singleMedia);
322
323 }
324
325 private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
326 Media media = null;
327 String fileName = rs.getString(fileNameAttr);
328 String legend = rs.getString(legendAttr);
329 Integer bildID = rs.getInt("BildID");
330
331 String uriStr = path+fileName;
332 uriStr = uriStr.replace(" ", "%20");
333
334 URI uri = URI.create(uriStr);
335
336 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
337
338 try {
339 boolean readMediaData = state.getConfig().isDoReadMediaData();
340 if (isBlank(legend) && readMediaData){
341 if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
342 logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
343 }else{
344 return null;
345 }
346 }
347
348 media = this.getImageMedia(uri.toString(), readMediaData);
349 media.putTitle(Language.ENGLISH(), legend);
350 this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
351
352 objectsToSave.add(media);
353
354
355 } catch (MalformedURLException e) {
356 e.printStackTrace();
357 } catch (ClientProtocolException e) {
358 e.printStackTrace();
359 } catch (IOException e) {
360 e.printStackTrace();
361 }
362
363 return media;
364 }
365
366 private String transformCopyright2CollectionCode(String copyright){
367
368 if (isBlank(copyright)){
369 return "";
370 }else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
371 return "MFNB";
372 }else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
373 return "SMTD";
374 }else if(copyright.equals("Natural History Museum, London")){
375 return "BMNH";
376 }else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
377 return "ZSSM";
378 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
379 return "SMNK";
380 }else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
381 return "DEIE";
382 }else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
383 return "SMFM";
384 }else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
385 return "MNHN";
386 }else if(copyright.equals("Naturhistorisches Museum Wien")){
387 return "NHMW";
388 }else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
389 return "NRMS";
390 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
391 return "SMNS";
392 }else if(copyright.equals("United States National Museum of Natural History, Washington")){
393 return "USNM";
394 }else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
395 return "ZFBS";
396 }else if(copyright.equals("Zoological Museum, University of Copenhagen")){
397 return "ZMUC";
398 }else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
399 return "ZFMK";
400 }else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
401 return "ZFMK";
402 }else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
403 return "ZIUH";
404 }else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
405 return "ZIUT";
406 }else{
407 logger.warn("Unknown copyright entry: " + copyright);
408 return "";
409 }
410 }
411
412
413 @Override
414 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
415 String nameSpace;
416 Class<?> cdmClass;
417 Set<String> idSet;
418 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
419 try{
420 Set<String> currSpecIdSet = new HashSet<String>();
421 Set<String> specTaxIdSet = new HashSet<String>();
422 Set<String> typeIdSet = new HashSet<String>();
423
424 while (rs.next()){
425 handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
426 handleForeignKey(rs, specTaxIdSet, "spectaxID");
427 handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
428 }
429
430 //specTax map
431 nameSpace = SPEC_TAX_NAMESPACE;
432 cdmClass = ZoologicalName.class;
433 idSet = specTaxIdSet;
434 Map<String, ZoologicalName> specTaxNameMap = (Map<String, ZoologicalName>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
435 result.put(nameSpace, specTaxNameMap);
436
437 // //taxon map
438 // nameSpace = TAXON_NAMESPACE;
439 // cdmClass = Taxon.class;
440 // idSet = currSpecIdSet;
441 // Map<String, Taxon> taxonMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
442 // result.put(nameSpace, taxonMap);
443
444
445 //type map
446 nameSpace = GlobisSpecTaxImport.TYPE_NAMESPACE;
447 cdmClass = DerivedUnit.class;
448 idSet = typeIdSet;
449 Map<String, DerivedUnit> typeMap = (Map<String, DerivedUnit>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
450 result.put(nameSpace, typeMap);
451
452
453 } catch (SQLException e) {
454 throw new RuntimeException(e);
455 }
456 return result;
457 }
458
459 private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr) throws SQLException {
460 Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
461 if (specTaxId != null){
462 String copyright = rs.getString(copyrightAttr);
463 if (isNotBlank(copyright)){
464 String id = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
465 idSet.add(id);
466 }
467 }
468 }
469
470 /* (non-Javadoc)
471 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
472 */
473 @Override
474 protected boolean doCheck(GlobisImportState state){
475 IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
476 return validator.validate(state);
477 }
478
479
480 /* (non-Javadoc)
481 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
482 */
483 protected boolean isIgnore(GlobisImportState state){
484 return ! state.getConfig().isDoImages();
485 }
486
487
488
489
490 /* (non-Javadoc)
491 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doInvoke(eu.etaxonomy.cdm.io.globis.GlobisImportState)
492 */
493 @Override
494 protected void doInvoke(GlobisImportState state) {
495 Reference refGart = ReferenceFactory.newGeneric();
496 refGart.setTitleCache("GART");
497 refGart.setUuid(uuidGartRef);
498 getReferenceService().saveOrUpdate(refGart);
499 super.doInvoke(state);
500 }
501
502
503
504
505
506 }