Project

General

Profile

Download (16.5 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.io.IOException;
13
import java.net.MalformedURLException;
14
import java.sql.ResultSet;
15
import java.sql.SQLException;
16
import java.util.HashMap;
17
import java.util.HashSet;
18
import java.util.Map;
19
import java.util.Set;
20
import java.util.UUID;
21
import java.util.regex.Matcher;
22
import java.util.regex.Pattern;
23

    
24
import org.apache.http.client.ClientProtocolException;
25
import org.apache.logging.log4j.LogManager;
26
import org.apache.logging.log4j.Logger;
27
import org.springframework.stereotype.Component;
28

    
29
import eu.etaxonomy.cdm.common.URI;
30
import eu.etaxonomy.cdm.common.UriUtils;
31
import eu.etaxonomy.cdm.facade.DerivedUnitFacade;
32
import eu.etaxonomy.cdm.io.common.IOValidator;
33
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
34
import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
35
import eu.etaxonomy.cdm.model.common.Annotation;
36
import eu.etaxonomy.cdm.model.common.CdmBase;
37
import eu.etaxonomy.cdm.model.common.Language;
38
import eu.etaxonomy.cdm.model.common.Marker;
39
import eu.etaxonomy.cdm.model.common.MarkerType;
40
import eu.etaxonomy.cdm.model.media.Media;
41
import eu.etaxonomy.cdm.model.name.IZoologicalName;
42
import eu.etaxonomy.cdm.model.name.TaxonName;
43
import eu.etaxonomy.cdm.model.occurrence.Collection;
44
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
45
import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
46
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
47
import eu.etaxonomy.cdm.model.reference.Reference;
48
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
49
import eu.etaxonomy.cdm.model.taxon.Taxon;
50

    
51

    
52
/**
53
 * @author a.mueller
54
 * @since 20.02.2010
55
 */
56
@Component
57
public class GlobisImageImport  extends GlobisImportBase<Taxon> {
58

    
59
    private static final long serialVersionUID = 5697033145326415146L;
60
    private static final Logger logger = LogManager.getLogger();
61

    
62
	private int modCount = 1000;
63

    
64
	private UUID uuidArtNonSpecTaxMarkerType = UUID.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
65
	private static final String pluralString = "images";
66
	private static final String dbTableName = "Einzelbilder";
67
	private static final Class<?> cdmTargetClass = Media.class;  //not needed
68

    
69
	private static UUID uuidGartRef = UUID.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
70

    
71
	public GlobisImageImport(){
72
		super(pluralString, dbTableName, cdmTargetClass);
73
	}
74

    
75
	@Override
76
	protected String getIdQuery() {
77
		String strRecordQuery =
78
			" SELECT BildId " +
79
			" FROM " + dbTableName;
80
		return strRecordQuery;
81
	}
82

    
83
	@Override
84
	protected String getRecordQuery(GlobisImportConfigurator config) {
85
		String strRecordQuery =
86
			" SELECT i.*, NULL as Created_When, NULL as Created_Who," +
87
				"  NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
88
			" FROM " + getTableName() + " i " +
89
				" LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
90
			" WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
91
		return strRecordQuery;
92
	}
93

    
94
	@Override
95
	public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, GlobisImportState state) {
96
		boolean success = true;
97

    
98
		Set<Media> objectsToSave = new HashSet<>();
99

    
100
		@SuppressWarnings("unchecked")
101
        Map<String, DerivedUnit> typeMap = partitioner.getObjectMap(TYPE_NAMESPACE);
102

    
103
		@SuppressWarnings("unchecked")
104
        Map<String, Taxon> taxonMap = partitioner.getObjectMap(TAXON_NAMESPACE);
105
		@SuppressWarnings("unchecked")
106
        Map<String, TaxonName> specTaxNameMap = partitioner.getObjectMap(SPEC_TAX_NAMESPACE);
107

    
108
		ResultSet rs = partitioner.getResultSet();
109

    
110
		Reference refGart = getReferenceService().find(uuidGartRef);
111

    
112

    
113
		try {
114

    
115
			int i = 0;
116

    
117
			//for each record
118
            while (rs.next()){
119

    
120
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
121

    
122
        		Integer bildID = rs.getInt("BildID");
123
        		Integer spectaxID = nullSafeInt(rs, "spectaxID");
124
        		Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
125
        		String copyright = rs.getString("copyright");
126
        		String specimenId = rs.getString("specimenID");
127
        		String bemerkungen = rs.getString("Bemerkungen");
128
        		String artNotSpecTax = rs.getString("Art non spectax");
129
        		String motiv = rs.getString("Motiv");
130

    
131
        		//ignore:
132
        		//	[file lab2], same as Dateiname04 but less data
133
        		//	Dateipfad
134

    
135
        		Set<Media> recordMedia = new HashSet<>();
136

    
137
        		try {
138

    
139
        			makeAllMedia(state, rs, recordMedia, objectsToSave);
140

    
141
        			String title = null;
142

    
143
        			DerivedUnit specimen = null;
144
        			if (spectaxID != null){
145
        				//try to find type specimen
146
        				if (isNotBlank(motiv) && (motiv.startsWith("type specimen"))){
147
	        				String collectionCode = transformCopyright2CollectionCode(copyright);
148
	    					String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
149
	    					specimen = typeMap.get(id);
150
        				}
151

    
152
    					//try to find specTaxName
153
        				IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
154
            			if (specTaxTaxonName != null){
155
            				title = " taxon name " + specTaxTaxonName.getTitleCache();
156
            			}else{
157
            				title = " spectaxID " + spectaxID;
158
            			}
159
    				}else{
160
    					title = " name " + getNameFromFileOs(rs) + (isBlank(specimenId)? "" : " (specimenId: " + specimenId + ")");
161
    				}
162

    
163
        			//not type specimen
164
        			if (specimen == null){
165
						specimen = DerivedUnit.NewPreservedSpecimenInstance();
166
						specimen.setTitleCache("Specimen for " + title, true);
167
						String collectionCode = transformCopyright2CollectionCode(copyright);
168
						//TODO
169
						Collection collection = getCollection(collectionCode);
170
						specimen.setCollection(collection);
171
					}
172

    
173
					//source
174
					specimen.addSource(OriginalSourceType.Import, String.valueOf(bildID), IMAGE_NAMESPACE, state.getTransactionalSourceReference(), null);
175

    
176
					//GART id (specimenID)
177
					if (isNotBlank(specimenId)){
178
						specimen.addSource(OriginalSourceType.Lineage, specimenId, "specimenId", refGart, null);
179
					}
180
					//bemerkungen
181
					if (isNotBlank(bemerkungen)){
182
						Annotation annotation = Annotation.NewInstance(bemerkungen, null, null);
183
						specimen.addAnnotation(annotation);
184
					}
185
					//media
186
					DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimen);
187
					for (Media media: recordMedia){
188
						facade.addDerivedUnitMedia(media);
189
					}
190
					//art non spectax
191
					if (isNotBlank(artNotSpecTax)){
192
						if (artNotSpecTax.equalsIgnoreCase("ja")){
193
							MarkerType artNotSpecTaxMarker = getMarkerType(state, uuidArtNonSpecTaxMarkerType  , "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
194
							specimen.addMarker(Marker.NewInstance(artNotSpecTaxMarker, true));
195
						}else{
196
							logger.warn(artNotSpecTax + " is not a valid value for 'Art non spectax' (BildID: " + bildID + ")" );
197
						}
198
					}
199

    
200
					if (spectaxID != null){
201

    
202
						//add to image gallery (discuss if this is also needed if taxon is already added to type specimen
203
//						Taxon taxon = taxonMap.get(String.valueOf(taxonID));
204
						IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
205

    
206
//
207
//						if (taxon == null){
208
////							taxon = specTaxMap.get(String.valueOf(spectaxID));
209
////							specTaxName = specTaxMap.g
210
//						}
211
//						if (taxon == null){
212
//							logger.warn("No taxon available for specTaxID: " +  spectaxID);
213
//						}else{
214
//							name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
215
//						}
216

    
217
						//TODO FIXME
218

    
219
						if (specTaxTaxonName == null){
220
							logger.warn("Name could not be found for spectaxID: " + spectaxID +  " in BildID: " + bildID);
221
						}else{
222
							Taxon taxon = null;
223
							for (Taxon specTaxTaxon: specTaxTaxonName.getTaxa()){
224
								taxon = specTaxTaxon;
225
							}
226
							if (taxon == null){
227
								//FIXME
228
								Reference undefinedSec = null;
229
								taxon = Taxon.NewInstance(specTaxTaxonName, undefinedSec);
230
							}
231

    
232
							DeterminationEvent.NewInstance(taxon, specimen);
233

    
234
						}
235

    
236

    
237

    
238

    
239
//						if (taxon != null){
240
//							TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
241
//							if (taxonDescription.getElements().size() == 0){
242
//								TextData textData = TextData.NewInstance(Feature.IMAGE());
243
//								taxonDescription.addElement(textData);
244
//							}
245
//							Set<DescriptionElementBase> elements = taxonDescription.getElements();
246
//							TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
247
//							for (Media media: recordMedia){
248
//								textData.addMedia(media);
249
//							}
250
//						}
251
					}
252

    
253
				} catch (Exception e) {
254
					logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
255
					e.printStackTrace();
256
				}
257

    
258
            }
259

    
260
			logger.info(pluralString + " to save: " + objectsToSave.size());
261
			getMediaService().save(objectsToSave);
262

    
263
			return success;
264
		} catch (SQLException e) {
265
			logger.error("SQLException:" +  e);
266
			return false;
267
		}
268
	}
269

    
270
	private Collection getCollection(String collectionCode) {
271
		//TODO
272
		return null;
273
	}
274

    
275
	private String getNameFromFileOs(ResultSet rs) throws SQLException {
276
		String fileOS = rs.getString("file OS");
277
		Pattern pattern = Pattern.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
278
		Matcher matcher = pattern.matcher(fileOS);
279
		if (matcher.matches()){
280
			String match = matcher.group(1);
281
			return match;
282
		}else{
283
			logger.warn("FileOS does not match: " +  fileOS);
284
			return fileOS;
285
		}
286
	}
287

    
288
	private void makeAllMedia(GlobisImportState state, ResultSet rs, Set<Media> recordMedia, Set<Media> objectsToSave) throws SQLException{
289
			//make image path
290
		String pathShort = rs.getString("Dateipfad_kurz");
291
		String fileOS = rs.getString("file OS");
292
		pathShort= pathShort.replace(fileOS, "");
293
		String newPath = state.getConfig().getImageBaseUrl();
294
		String path = pathShort.replace("image:Webversionen/", newPath);
295

    
296
		Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
297
		recordMedia.add(singleMedia);
298
		singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
299
		recordMedia.add(singleMedia);
300
		singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
301
		recordMedia.add(singleMedia);
302
		singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
303
		recordMedia.add(singleMedia);
304

    
305
	}
306

    
307
	private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
308
		Media media = null;
309
		String fileName = rs.getString(fileNameAttr);
310
		String legend = rs.getString(legendAttr);
311
		Integer bildID = rs.getInt("BildID");
312

    
313
		String uriStr = path+fileName;
314
		uriStr = uriStr.replace(" ", "%20");
315

    
316
		URI uri = URI.create(uriStr);
317

    
318
//		Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
319

    
320
		try {
321
			boolean readMediaData = state.getConfig().isDoReadMediaData();
322
			if (isBlank(legend) && readMediaData){
323
				if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
324
					logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
325
				}else{
326
					return null;
327
				}
328
			}
329

    
330
			media = this.getImageMedia(uri.toString(), readMediaData);
331
			media.putTitle(Language.ENGLISH(), legend);
332
			this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
333

    
334
			objectsToSave.add(media);
335

    
336

    
337
		} catch (MalformedURLException e) {
338
			e.printStackTrace();
339
		} catch (ClientProtocolException e) {
340
			e.printStackTrace();
341
		} catch (IOException e) {
342
			e.printStackTrace();
343
		}
344

    
345
		return media;
346
	}
347

    
348
	private String transformCopyright2CollectionCode(String copyright){
349

    
350
		if (isBlank(copyright)){
351
			return "";
352
		}else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
353
			return "MFNB";
354
		}else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
355
			return "SMTD";
356
		}else if(copyright.equals("Natural History Museum, London")){
357
			return "BMNH";
358
		}else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
359
			return "ZSSM";
360
		}else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
361
			return "SMNK";
362
		}else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
363
			return "DEIE";
364
		}else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
365
			return "SMFM";
366
		}else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
367
			return "MNHN";
368
		}else if(copyright.equals("Naturhistorisches Museum Wien")){
369
			return "NHMW";
370
		}else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
371
			return "NRMS";
372
		}else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
373
			return "SMNS";
374
		}else if(copyright.equals("United States National Museum of Natural History, Washington")){
375
			return "USNM";
376
		}else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
377
			return "ZFBS";
378
		}else if(copyright.equals("Zoological Museum, University of Copenhagen")){
379
			return "ZMUC";
380
		}else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
381
			return "ZFMK";
382
		}else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
383
			return "ZFMK";
384
		}else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
385
			return "ZIUH";
386
		}else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
387
			return "ZIUT";
388
		}else{
389
			logger.warn("Unknown copyright entry: " + copyright);
390
			return "";
391
		}
392
	}
393

    
394

    
395
	@Override
396
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
397

    
398
	    String nameSpace;
399
		Set<String> idSet;
400
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
401
		try{
402
			Set<String> currSpecIdSet = new HashSet<>();
403
			Set<String> specTaxIdSet = new HashSet<>();
404
			Set<String> typeIdSet = new HashSet<>();
405

    
406
			while (rs.next()){
407
				handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
408
				handleForeignKey(rs, specTaxIdSet, "spectaxID");
409
				handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
410
			}
411

    
412
			//specTax map
413
			nameSpace = SPEC_TAX_NAMESPACE;
414
			idSet = specTaxIdSet;
415
			Map<String, TaxonName> specTaxNameMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonName.class, idSet, nameSpace);
416
			result.put(nameSpace, specTaxNameMap);
417

    
418
//			//taxon map
419
//			nameSpace = TAXON_NAMESPACE;
420
//			idSet = currSpecIdSet;
421
//			Map<String, Taxon> taxonMap = getCommonService().getSourcedObjectsByIdInSourceC(Taxon.class, idSet, nameSpace);
422
//			result.put(nameSpace, taxonMap);
423

    
424

    
425
			//type map
426
			nameSpace = GlobisImportBase.TYPE_NAMESPACE;
427
			idSet = typeIdSet;
428
			Map<String, DerivedUnit> typeMap = getCommonService().getSourcedObjectsByIdInSourceC(DerivedUnit.class, idSet, nameSpace);
429
			result.put(nameSpace, typeMap);
430

    
431

    
432
		} catch (SQLException e) {
433
			throw new RuntimeException(e);
434
		}
435
		return result;
436
	}
437

    
438
	private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr) throws SQLException {
439
		Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
440
		if (specTaxId != null){
441
			String copyright = rs.getString(copyrightAttr);
442
			if (isNotBlank(copyright)){
443
				String id  = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
444
				idSet.add(id);
445
			}
446
		}
447
	}
448

    
449
	@Override
450
	protected boolean doCheck(GlobisImportState state){
451
		IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
452
		return validator.validate(state);
453
	}
454

    
455
	@Override
456
    protected boolean isIgnore(GlobisImportState state){
457
		return ! state.getConfig().isDoImages();
458
	}
459

    
460
	@Override
461
	protected void doInvoke(GlobisImportState state) {
462
		Reference refGart = ReferenceFactory.newGeneric();
463
		refGart.setTitleCache("GART", true);
464
		refGart.setUuid(uuidGartRef);
465
		getReferenceService().saveOrUpdate(refGart);
466
		super.doInvoke(state);
467
	}
468
}
(4-4/10)