Project

General

Profile

Download (17.7 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.io.IOException;
13
import java.net.MalformedURLException;
14
import java.net.URI;
15
import java.sql.ResultSet;
16
import java.sql.SQLException;
17
import java.util.HashMap;
18
import java.util.HashSet;
19
import java.util.Map;
20
import java.util.Set;
21
import java.util.UUID;
22
import java.util.regex.Matcher;
23
import java.util.regex.Pattern;
24

    
25
import org.apache.http.client.ClientProtocolException;
26
import org.apache.log4j.Logger;
27
import org.springframework.stereotype.Component;
28

    
29
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
30
import eu.etaxonomy.cdm.common.UriUtils;
31
import eu.etaxonomy.cdm.io.common.IOValidator;
32
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33
import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
34
import eu.etaxonomy.cdm.model.common.Annotation;
35
import eu.etaxonomy.cdm.model.common.CdmBase;
36
import eu.etaxonomy.cdm.model.common.Language;
37
import eu.etaxonomy.cdm.model.common.Marker;
38
import eu.etaxonomy.cdm.model.common.MarkerType;
39
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
40
import eu.etaxonomy.cdm.model.media.Media;
41
import eu.etaxonomy.cdm.model.name.ZoologicalName;
42
import eu.etaxonomy.cdm.model.occurrence.Collection;
43
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
44
import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
45
import eu.etaxonomy.cdm.model.reference.Reference;
46
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
47
import eu.etaxonomy.cdm.model.taxon.Taxon;
48

    
49

    
50
/**
51
 * @author a.mueller
52
 * @created 20.02.2010
53
 * @version 1.0
54
 */
55
@Component
56
public class GlobisImageImport  extends GlobisImportBase<Taxon> {
57
	private static final Logger logger = Logger.getLogger(GlobisImageImport.class);
58
	
59
	private int modCount = 1000;
60

    
61
	private UUID uuidArtNonSpecTaxMarkerType = UUID.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
62
	private static final String pluralString = "images";
63
	private static final String dbTableName = "Einzelbilder";
64
	private static final Class<?> cdmTargetClass = Media.class;  //not needed
65
	
66
	private static UUID uuidGartRef = UUID.fromString("af85470f-6e54-4304-9d29-fd117cd56161"); 
67
	
68
	public GlobisImageImport(){
69
		super(pluralString, dbTableName, cdmTargetClass);
70
	}
71

    
72

    
73
	
74
	
75
	/* (non-Javadoc)
76
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
77
	 */
78
	@Override
79
	protected String getIdQuery() {
80
		String strRecordQuery = 
81
			" SELECT BildId " + 
82
			" FROM " + dbTableName; 
83
		return strRecordQuery;	
84
	}
85

    
86

    
87

    
88

    
89
	/* (non-Javadoc)
90
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
91
	 */
92
	@Override
93
	protected String getRecordQuery(GlobisImportConfigurator config) {
94
		String strRecordQuery = 
95
			" SELECT i.*, NULL as Created_When, NULL as Created_Who," +
96
				"  NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " + 
97
			" FROM " + getTableName() + " i " +
98
				" LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
99
			" WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
100
		return strRecordQuery;
101
	}
102
	
103

    
104

    
105
	/* (non-Javadoc)
106
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
107
	 */
108
	@Override
109
	public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
110
		boolean success = true;
111
		
112
		Set<Media> objectsToSave = new HashSet<Media>();
113
		
114
		Map<String, DerivedUnit> typeMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(TYPE_NAMESPACE);
115
		
116
		Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
117
		Map<String, ZoologicalName> specTaxNameMap = (Map<String, ZoologicalName>) partitioner.getObjectMap(SPEC_TAX_NAMESPACE);
118
		
119
		ResultSet rs = partitioner.getResultSet();
120
		
121
		Reference<?> refGart = getReferenceService().find(uuidGartRef);
122
		
123
		
124
		try {
125
			
126
			int i = 0;
127

    
128
			//for each record
129
            while (rs.next()){
130
                
131
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
132
				
133
        		Integer bildID = rs.getInt("BildID");
134
        		Integer spectaxID = nullSafeInt(rs, "spectaxID");
135
        		Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
136
        		String copyright = rs.getString("copyright");
137
        		String specimenId = rs.getString("specimenID");
138
        		String bemerkungen = rs.getString("Bemerkungen");
139
        		String artNotSpecTax = rs.getString("Art non spectax");
140
        		String motiv = rs.getString("Motiv");
141
        		
142
        		//ignore: 
143
        		//	[file lab2], same as Dateiname04 but less data
144
        		//	Dateipfad
145

    
146
        		Set<Media> recordMedia = new HashSet<Media>();
147
        		
148
        		try {
149
					
150
        			makeAllMedia(state, rs, recordMedia, objectsToSave);
151
        			
152
        			String title = null;
153
        			
154
        			DerivedUnit specimen = null;
155
        			if (spectaxID != null){
156
        				//try to find type specimen
157
        				if (isNotBlank(motiv) && (motiv.startsWith("type specimen"))){
158
	        				String collectionCode = transformCopyright2CollectionCode(copyright);
159
	    					String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
160
	    					specimen = typeMap.get(id);
161
        				}
162
        				
163
    					//try to find specTaxName
164
        				ZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
165
            			if (specTaxTaxonName != null){
166
            				title = " taxon name " + specTaxTaxonName.getTitleCache();
167
            			}else{
168
            				title = " spectaxID " + spectaxID;
169
            			}
170
    				}else{
171
    					title = " name " + getNameFromFileOs(rs) + (isBlank(specimenId)? "" : " (specimenId: " + specimenId + ")");
172
    				}
173
        			
174
        			//not type specimen
175
        			if (specimen == null){
176
						specimen = DerivedUnit.NewPreservedSpecimenInstance();
177
						specimen.setTitleCache("Specimen for " + title );
178
						String collectionCode = transformCopyright2CollectionCode(copyright);
179
						//TODO
180
						Collection collection = getCollection(collectionCode);
181
						specimen.setCollection(collection);
182
					}
183
					
184
					
185
					//source
186
					specimen.addSource(OriginalSourceType.Import, String.valueOf(bildID), IMAGE_NAMESPACE, state.getTransactionalSourceReference(), null);
187
					
188
					//GART id (specimenID)
189
					if (isNotBlank(specimenId)){
190
						specimen.addSource(OriginalSourceType.Lineage, specimenId, "specimenId", refGart, null);
191
					}
192
					//bemerkungen
193
					if (isNotBlank(bemerkungen)){
194
						Annotation annotation = Annotation.NewInstance(bemerkungen, null, null);
195
						specimen.addAnnotation(annotation);
196
					}
197
					//media
198
					DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimen);
199
					for (Media media: recordMedia){
200
						facade.addDerivedUnitMedia(media);
201
					}
202
					//art non spectax
203
					if (isNotBlank(artNotSpecTax)){
204
						if (artNotSpecTax.equalsIgnoreCase("ja")){
205
							MarkerType artNotSpecTaxMarker = getMarkerType(state, uuidArtNonSpecTaxMarkerType  , "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
206
							specimen.addMarker(Marker.NewInstance(artNotSpecTaxMarker, true));
207
						}else{
208
							logger.warn(artNotSpecTax + " is not a valid value for 'Art non spectax' (BildID: " + bildID + ")" );
209
						}
210
					}
211
        			
212
					if (spectaxID != null){
213
						
214
						//add to image gallery (discuss if this is also needed if taxon is already added to type specimen
215
//						Taxon taxon = taxonMap.get(String.valueOf(taxonID));
216
						ZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
217
						
218
//						
219
//						if (taxon == null){
220
////							taxon = specTaxMap.get(String.valueOf(spectaxID));
221
////							specTaxName = specTaxMap.g
222
//						}
223
//						if (taxon == null){
224
//							logger.warn("No taxon available for specTaxID: " +  spectaxID);
225
//						}else{
226
//							name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
227
//						}
228
						
229
						//TODO FIXME
230
						
231
						if (specTaxTaxonName == null){
232
							logger.warn("Name could not be found for spectaxID: " + spectaxID +  " in BildID: " + bildID);
233
						}else{
234
							Taxon taxon = null;
235
							for (Taxon specTaxTaxon: specTaxTaxonName.getTaxa()){
236
								taxon = specTaxTaxon;
237
							}
238
							if (taxon == null){
239
								//FIXME
240
								Reference<?> undefinedSec = null;
241
								taxon = Taxon.NewInstance(specTaxTaxonName, undefinedSec);
242
							}
243
							
244
							DeterminationEvent.NewInstance(taxon, specimen);
245

    
246
						}
247
						
248
						
249

    
250
						
251
//						if (taxon != null){
252
//							TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
253
//							if (taxonDescription.getElements().size() == 0){
254
//								TextData textData = TextData.NewInstance(Feature.IMAGE());
255
//								taxonDescription.addElement(textData);
256
//							}
257
//							Set<DescriptionElementBase> elements = taxonDescription.getElements();
258
//							TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
259
//							for (Media media: recordMedia){
260
//								textData.addMedia(media);
261
//							}
262
//						}
263
					}
264
					
265
				} catch (Exception e) {
266
					logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
267
					e.printStackTrace();
268
				} 
269
                
270
            }
271
           
272
			logger.info(pluralString + " to save: " + objectsToSave.size());
273
			getMediaService().save(objectsToSave);	
274
			
275
			return success;
276
		} catch (SQLException e) {
277
			logger.error("SQLException:" +  e);
278
			return false;
279
		}
280
	}
281
	
282
	private Collection getCollection(String collectionCode) {
283
		//TODO
284
		return null;
285
	}
286

    
287

    
288

    
289

    
290
	private String getNameFromFileOs(ResultSet rs) throws SQLException {
291
		String fileOS = rs.getString("file OS");
292
		Pattern pattern = Pattern.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
293
		Matcher matcher = pattern.matcher(fileOS);
294
		if (matcher.matches()){
295
			String match = matcher.group(1);
296
			return match;
297
		}else{
298
			logger.warn("FileOS does not match: " +  fileOS);
299
			return fileOS;
300
		}
301
	}
302

    
303

    
304

    
305

    
306
	private void makeAllMedia(GlobisImportState state, ResultSet rs, Set<Media> recordMedia, Set<Media> objectsToSave) throws SQLException{
307
			//make image path
308
		String pathShort = rs.getString("Dateipfad_kurz");
309
		String fileOS = rs.getString("file OS");
310
		pathShort= pathShort.replace(fileOS, "");
311
		String newPath = state.getConfig().getImageBaseUrl();
312
		String path = pathShort.replace("image:Webversionen/", newPath);
313
		
314
		Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
315
		recordMedia.add(singleMedia);
316
		singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
317
		recordMedia.add(singleMedia);
318
		singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
319
		recordMedia.add(singleMedia);
320
		singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
321
		recordMedia.add(singleMedia);
322

    
323
	}
324

    
325
	private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
326
		Media media = null;
327
		String fileName = rs.getString(fileNameAttr);
328
		String legend = rs.getString(legendAttr);
329
		Integer bildID = rs.getInt("BildID");
330
		
331
		String uriStr = path+fileName;
332
		uriStr = uriStr.replace(" ", "%20");
333
		
334
		URI uri = URI.create(uriStr); 
335
		
336
//		Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
337
		
338
		try {
339
			boolean readMediaData = state.getConfig().isDoReadMediaData();
340
			if (isBlank(legend) && readMediaData){
341
				if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
342
					logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
343
				}else{
344
					return null;
345
				}
346
			}
347
			
348
			media = this.getImageMedia(uri.toString(), readMediaData, false);
349
			media.putTitle(Language.ENGLISH(), legend);
350
			this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
351
			
352
			objectsToSave.add(media);
353
			
354
			
355
		} catch (MalformedURLException e) {
356
			e.printStackTrace();
357
		} catch (ClientProtocolException e) {
358
			e.printStackTrace();
359
		} catch (IOException e) {
360
			e.printStackTrace();
361
		}
362
		
363
		return media;
364
	}
365
	
366
	private String transformCopyright2CollectionCode(String copyright){
367
		
368
		if (isBlank(copyright)){
369
			return "";
370
		}else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
371
			return "MFNB";
372
		}else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
373
			return "SMTD";
374
		}else if(copyright.equals("Natural History Museum, London")){
375
			return "BMNH";
376
		}else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
377
			return "ZSSM";
378
		}else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
379
			return "SMNK";
380
		}else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
381
			return "DEIE";
382
		}else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
383
			return "SMFM";
384
		}else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
385
			return "MNHN";
386
		}else if(copyright.equals("Naturhistorisches Museum Wien")){
387
			return "NHMW";
388
		}else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
389
			return "NRMS";
390
		}else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
391
			return "SMNS";
392
		}else if(copyright.equals("United States National Museum of Natural History, Washington")){
393
			return "USNM";
394
		}else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
395
			return "ZFBS";
396
		}else if(copyright.equals("Zoological Museum, University of Copenhagen")){
397
			return "ZMUC";
398
		}else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
399
			return "ZFMK";
400
		}else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
401
			return "ZFMK";
402
		}else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
403
			return "ZIUH";
404
		}else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
405
			return "ZIUT";
406
		}else{
407
			logger.warn("Unknown copyright entry: " + copyright);
408
			return "";
409
		}
410

    
411
	
412
	}
413

    
414

    
415

    
416
	/* (non-Javadoc)
417
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
418
	 */
419
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
420
		String nameSpace;
421
		Class cdmClass;
422
		Set<String> idSet;
423
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
424
		try{
425
			Set<String> currSpecIdSet = new HashSet<String>();
426
			Set<String> specTaxIdSet = new HashSet<String>();
427
			Set<String> typeIdSet = new HashSet<String>();
428
			
429
			while (rs.next()){
430
				handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
431
				handleForeignKey(rs, specTaxIdSet, "spectaxID");
432
				handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
433
			}
434
			
435
			//specTax map
436
			nameSpace = SPEC_TAX_NAMESPACE;
437
			cdmClass = ZoologicalName.class;
438
			idSet = specTaxIdSet;
439
			Map<String, ZoologicalName> specTaxNameMap = (Map<String, ZoologicalName>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
440
			result.put(nameSpace, specTaxNameMap);
441

    
442
//			//taxon map
443
//			nameSpace = TAXON_NAMESPACE;
444
//			cdmClass = Taxon.class;
445
//			idSet = currSpecIdSet;
446
//			Map<String, Taxon> taxonMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
447
//			result.put(nameSpace, taxonMap);
448

    
449
			
450
			//type map
451
			nameSpace = GlobisSpecTaxImport.TYPE_NAMESPACE;
452
			cdmClass = DerivedUnit.class;
453
			idSet = typeIdSet;
454
			Map<String, DerivedUnit> typeMap = (Map<String, DerivedUnit>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
455
			result.put(nameSpace, typeMap);
456
			
457
			
458
		} catch (SQLException e) {
459
			throw new RuntimeException(e);
460
		}
461
		return result;
462
	}
463
	
464
	private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr) throws SQLException {
465
		Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
466
		if (specTaxId != null){
467
			String copyright = rs.getString(copyrightAttr);
468
			if (isNotBlank(copyright)){
469
				String id  = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
470
				idSet.add(id);
471
			}
472
		}
473
	}
474
	
475
	/* (non-Javadoc)
476
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
477
	 */
478
	@Override
479
	protected boolean doCheck(GlobisImportState state){
480
		IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
481
		return validator.validate(state);
482
	}
483
	
484
	
485
	/* (non-Javadoc)
486
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
487
	 */
488
	protected boolean isIgnore(GlobisImportState state){
489
		return ! state.getConfig().isDoImages();
490
	}
491

    
492

    
493

    
494

    
495
	/* (non-Javadoc)
496
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doInvoke(eu.etaxonomy.cdm.io.globis.GlobisImportState)
497
	 */
498
	@Override
499
	protected void doInvoke(GlobisImportState state) {
500
		Reference refGart = ReferenceFactory.newGeneric();
501
		refGart.setTitleCache("GART");
502
		refGart.setUuid(uuidGartRef);
503
		getReferenceService().saveOrUpdate(refGart);
504
		super.doInvoke(state);
505
	}
506

    
507

    
508

    
509

    
510

    
511
}
(3-3/9)