Project

General

Profile

Download (19.7 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

    
12
import java.io.IOException;
13
import java.net.MalformedURLException;
14
import java.net.URISyntaxException;
15
import java.net.URL;
16
import java.sql.ResultSet;
17
import java.sql.SQLException;
18
import java.util.Collection;
19
import java.util.HashMap;
20
import java.util.HashSet;
21
import java.util.Map;
22
import java.util.Set;
23

    
24
import org.apache.http.HttpException;
25
import org.apache.log4j.Logger;
26
import org.springframework.stereotype.Component;
27

    
28
import eu.etaxonomy.cdm.common.CdmUtils;
29
import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;
30
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
31
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelFactsImportValidator;
32
import eu.etaxonomy.cdm.io.common.IOValidator;
33
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
34
import eu.etaxonomy.cdm.io.common.Source;
35
import eu.etaxonomy.cdm.model.common.Annotation;
36
import eu.etaxonomy.cdm.model.common.CdmBase;
37
import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
38
import eu.etaxonomy.cdm.model.common.Language;
39
import eu.etaxonomy.cdm.model.common.Marker;
40
import eu.etaxonomy.cdm.model.common.MarkerType;
41
import eu.etaxonomy.cdm.model.common.TermVocabulary;
42
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
43
import eu.etaxonomy.cdm.model.description.Feature;
44
import eu.etaxonomy.cdm.model.description.TaxonDescription;
45
import eu.etaxonomy.cdm.model.description.TextData;
46
import eu.etaxonomy.cdm.model.media.ImageFile;
47
import eu.etaxonomy.cdm.model.media.Media;
48
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
49
import eu.etaxonomy.cdm.model.reference.ReferenceBase;
50
import eu.etaxonomy.cdm.model.taxon.Taxon;
51
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
52
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
53

    
54
/**
55
 * @author a.mueller
56
 * @created 20.03.2008
57
 * @version 1.0
58
 */
59
@Component
60
public class BerlinModelFactsImport  extends BerlinModelImportBase {
61
	private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
62

    
63
	public static final String NAMESPACE = "Fact";
64
	
65
	public static final String SEQUENCE_PREFIX = "ORDER: ";
66
	
67
	private int modCount = 10000;
68
	private static final String pluralString = "facts";
69
	private static final String dbTableName = "Fact";
70

    
71
	//FIXME don't use as class variable
72
	private Map<Integer, Feature> featureMap;
73
	
74
	public BerlinModelFactsImport(){
75
		super();
76
	}
77

    
78

    
79
	private TermVocabulary<Feature> getFeatureVocabulary(){
80
		try {
81
			//TODO work around until service method works
82
			TermVocabulary<Feature> featureVocabulary =  BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
83
			//TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
84
			return featureVocabulary;
85
		} catch (UnknownCdmTypeException e) {
86
			logger.error("Feature vocabulary not available. New vocabulary created");
87
			return new TermVocabulary<Feature>() ;
88
		}
89
	}
90
	
91
	private Map<Integer, Feature>  invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
92
		
93
		Map<Integer, Feature>  result = bmiConfig.getFeatureMap();
94
		Source source = bmiConfig.getSource();
95
		
96
		try {
97
			//get data from database
98
			String strQuery = 
99
					" SELECT FactCategory.* " + 
100
					" FROM FactCategory "+
101
                    " WHERE (1=1)";
102
			ResultSet rs = source.getResultSet(strQuery) ;
103

    
104
			
105
			TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
106
			int i = 0;
107
			//for each reference
108
			while (rs.next()){
109
				
110
				if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
111
				
112
				int factCategoryId = rs.getInt("factCategoryId");
113
				String factCategory = rs.getString("factCategory");
114
				
115
					
116
				Feature feature;
117
				try {
118
					feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
119
				} catch (UnknownCdmTypeException e) {
120
					logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
121
					feature = Feature.NewInstance(factCategory, factCategory, null);
122
					featureVocabulary.addTerm(feature);
123
					feature.setSupportsTextData(true);
124
					//TODO
125
//					MaxFactNumber	int	Checked
126
//					ExtensionTableName	varchar(100)	Checked
127
//					Description	nvarchar(1000)	Checked
128
//					locExtensionFormName	nvarchar(80)	Checked
129
//					RankRestrictionFk	int	Checked
130
				}
131
								
132
				result.put(factCategoryId, feature);
133
			}
134
			Collection<Feature> col = result.values();
135
			getTermService().save((Collection)col);
136
			return result;
137
		} catch (SQLException e) {
138
			logger.error("SQLException:" +  e);
139
			return null;
140
		}
141

    
142
	}
143

    
144
	/* (non-Javadoc)
145
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
146
	 */
147
	@Override
148
	protected boolean doInvoke(BerlinModelImportState state) {
149
		featureMap = invokeFactCategories(state.getConfig());
150
		return super.doInvoke(state);
151
	}
152
		
153

    
154
	/* (non-Javadoc)
155
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
156
	 */
157
	@Override
158
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
159
			String strQuery = 
160
					" SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " + 
161
					" FROM Fact " +
162
                      	" INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
163
                      	" LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
164
              	" WHERE (FactId IN (" + ID_LIST_TOKEN + "))" + 
165
                        " ORDER By Sequence";
166
		return strQuery;
167
	}
168
	
169

    
170
	/* (non-Javadoc)
171
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
172
	 */
173
	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
174
		boolean success = true ;
175
		BerlinModelImportConfigurator config = state.getConfig();
176
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
177
		Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
178
		Map<String, ReferenceBase> biblioRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE);
179
		Map<String, ReferenceBase> nomRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE);
180

    
181
		ResultSet rs = partitioner.getResultSet();
182
		
183
			ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
184
			
185
		try{
186
			int i = 0;
187
			//for each fact
188
			while (rs.next()){
189
				try{
190
					if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
191
					
192
					int factId = rs.getInt("factId");
193
					Object taxonIdObj = rs.getObject("taxonId");
194
					int taxonId = rs.getInt("taxonId");
195
					Object factRefFkObj = rs.getObject("factRefFk");
196
					Object categoryFkObj = rs.getObject("factCategoryFk");
197
					Integer categoryFk = rs.getInt("factCategoryFk");
198
					String details = rs.getString("Details");
199
					String fact = CdmUtils.Nz(rs.getString("Fact"));
200
					String notes = CdmUtils.Nz(rs.getString("notes"));
201
					Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
202
					Boolean publishFlag = rs.getBoolean("publishFlag");
203
					
204
					TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
205
					Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
206
					
207
					if (taxonBase == null){
208
						logger.warn("Taxon for Fact " + factId + " does not exist in store");
209
						success = false;
210
					}else{
211
						Taxon taxon;
212
						if ( taxonBase instanceof Taxon ) {
213
							taxon = (Taxon) taxonBase;
214
						}else{
215
							logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
216
							success = false;
217
							continue;
218
						}
219
						
220
						TaxonDescription taxonDescription = null;
221
						Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
222
						
223
						boolean isImage = false;
224
						Media media = null;
225
						//for diptera images
226
						if (categoryFk == 51){  //TODO check also FactCategory string
227
							isImage = true;
228
							media = Media.NewInstance();
229
							taxonDescription = makeImage(state, fact, media, descriptionSet, taxon);
230
							if (taxonDescription == null){
231
								continue;
232
							}
233
						}
234
						//all others (no image)
235
						else{ 
236
							for (TaxonDescription desc: descriptionSet){
237
								if (! desc.isImageGallery()){
238
									taxonDescription = desc;
239
								}
240
							}
241
							if (taxonDescription == null){
242
								taxonDescription = TaxonDescription.NewInstance();
243
								taxonDescription.setTitleCache(sourceRef == null ? null : sourceRef.getTitleCache(), true);
244
								taxon.addDescription(taxonDescription);
245
							}
246
						}
247
					
248
						//textData
249
						TextData textData = null;
250
						boolean newTextData = true;
251
	
252
						// For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData 
253
						// description element append the fact text to the existing TextData
254
						if(categoryFk == 31) {
255
							Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
256
							for (DescriptionElementBase descriptionElement : descriptionElements) {
257
								String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
258
								if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
259
									textData = (TextData)descriptionElement;
260
									String factTextStr = textData.getText(Language.DEFAULT());
261
									// FIXME: Removing newlines doesn't work
262
									if (factTextStr.contains("\\r\\n")) {
263
										factTextStr = factTextStr.replaceAll("\\r\\n","");
264
									}
265
									StringBuilder factText = new StringBuilder(factTextStr);
266
									factText.append(fact);
267
									fact = factText.toString();
268
									newTextData = false;
269
									break;
270
								}
271
							}
272
						}
273
						
274
						if(newTextData == true)	{ 
275
							textData = TextData.NewInstance(); 
276
						}
277
						
278
						//for diptera database
279
						if (categoryFk == 99 && notes.contains("<OriginalName>")){
280
//							notes = notes.replaceAll("<OriginalName>", "");
281
//							notes = notes.replaceAll("</OriginalName>", "");
282
							fact = notes + ": " +  fact ;
283
						}
284
						//TODO textData.putText(fact, bmiConfig.getFactLanguage());  //doesn't work because  bmiConfig.getFactLanguage() is not not a persistent Language Object
285
						//throws  in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
286
						if (isImage){
287
							textData.addMedia(media);
288
							textData.setType(Feature.IMAGE());
289
						}else{
290
							textData.putText(fact, Language.DEFAULT());
291
							textData.setType(feature);
292
						}
293
						
294
						//reference
295
						ReferenceBase citation = null;
296
						String factRefFk = String.valueOf(factRefFkObj);
297
						if (factRefFkObj != null){
298
							citation = getReferenceOnlyFromMaps(
299
									biblioRefMap, nomRefMap, factRefFk);	
300
							}
301
						if (citation == null && (factRefFkObj != null)){
302
								logger.warn("Citation not found in referenceMap: " + factRefFk);
303
							success = false;
304
							}
305
						if (citation != null || CdmUtils.isNotEmpty(details)){
306
							DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
307
							originalSource.setCitation(citation);
308
							originalSource.setCitationMicroReference(details);
309
							textData.addSource(originalSource);
310
						}
311
						taxonDescription.addElement(textData);
312
						//doubtfulFlag
313
						if (doubtfulFlag){
314
							textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
315
						}
316
						//publisheFlag
317
						textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
318
						//Sequence
319
						Integer sequence = rs.getInt("Sequence");
320
						if (sequence != null && sequence != 999){
321
							String strSequence = String.valueOf(sequence);
322
							strSequence = SEQUENCE_PREFIX + strSequence;
323
							//TODO make it an Extension when possible
324
							//Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
325
							Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
326
							textData.addAnnotation(annotation);
327
						}
328
						
329
						//						if (categoryFkObj == FACT_DESCRIPTION){
330
	//						//;
331
	//					}else if (categoryFkObj == FACT_OBSERVATION){
332
	//						//;
333
	//					}else if (categoryFkObj == FACT_DISTRIBUTION_EM){
334
	//						//
335
	//					}else {
336
	//						//TODO
337
	//						//logger.warn("FactCategory " + categoryFk + " not yet implemented");
338
	//					}
339
						
340
						//notes
341
						doCreatedUpdatedNotes(state, textData, rs);
342
						
343
						//TODO
344
						//Designation References -> unclear how to map to CDM
345
						//factId -> OriginalSource for descriptionElements not yet implemented
346
						
347
						//sequence -> textData is not an identifiable entity therefore extensions are not possible
348
						//fact category better
349
						
350
						taxaToSave.add(taxon);
351
					}
352
				} catch (Exception re){
353
					logger.error("An exception occurred during the facts import");
354
					re.printStackTrace();
355
					success = false;
356
				}
357
				//put
358
			}
359
			logger.info("Facts handled: " + (i-1));
360
			logger.info("Taxa to save: " + taxaToSave.size());
361
			getTaxonService().save(taxaToSave);	
362
		}catch(SQLException e){
363
			throw new RuntimeException(e);
364
		}
365
		return success;
366
	}
367

    
368
	/* (non-Javadoc)
369
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
370
	 */
371
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
372
		String nameSpace;
373
		Class cdmClass;
374
		Set<String> idSet;
375
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
376
			
377
		try{
378
			Set<String> taxonIdSet = new HashSet<String>();
379
			Set<String> referenceIdSet = new HashSet<String>();
380
			Set<String> refDetailIdSet = new HashSet<String>();
381
			while (rs.next()){
382
				handleForeignKey(rs, taxonIdSet, "taxonId");
383
				handleForeignKey(rs, referenceIdSet, "FactRefFk");
384
				handleForeignKey(rs, referenceIdSet, "PTDesignationRefFk");
385
				handleForeignKey(rs, refDetailIdSet, "FactRefDetailFk");
386
				handleForeignKey(rs, refDetailIdSet, "PTDesignationRefDetailFk");
387
		}
388
			
389
			//taxon map
390
			nameSpace = BerlinModelTaxonImport.NAMESPACE;
391
			cdmClass = TaxonBase.class;
392
			idSet = taxonIdSet;
393
			Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
394
			result.put(nameSpace, taxonMap);
395

    
396

    
397
			//nom reference map
398
			nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
399
			cdmClass = ReferenceBase.class;
400
			idSet = referenceIdSet;
401
			Map<String, ReferenceBase> nomReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
402
			result.put(nameSpace, nomReferenceMap);
403

    
404
			//biblio reference map
405
			nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
406
			cdmClass = ReferenceBase.class;
407
			idSet = referenceIdSet;
408
			Map<String, ReferenceBase> biblioReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
409
			result.put(nameSpace, biblioReferenceMap);
410
			
411
			//nom refDetail map
412
			nameSpace = BerlinModelRefDetailImport.NOM_REFDETAIL_NAMESPACE;
413
			cdmClass = ReferenceBase.class;
414
			idSet = refDetailIdSet;
415
			Map<String, ReferenceBase> nomRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
416
			result.put(nameSpace, nomRefDetailMap);
417
			
418
			//biblio refDetail map
419
			nameSpace = BerlinModelRefDetailImport.BIBLIO_REFDETAIL_NAMESPACE;
420
			cdmClass = ReferenceBase.class;
421
			idSet = refDetailIdSet;
422
			Map<String, ReferenceBase> biblioRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
423
			result.put(nameSpace, biblioRefDetailMap);
424
	
425
		} catch (SQLException e) {
426
			throw new RuntimeException(e);
427
	}
428
		return result;
429
	}
430
	
431
	
432
	/**
433
	 * @param state 
434
	 * @param media 
435
	 * @param media 
436
	 * @param descriptionSet 
437
	 * 
438
	 */
439
	private TaxonDescription makeImage(BerlinModelImportState state, String fact, Media media, Set<TaxonDescription> descriptionSet, Taxon taxon) {
440
		TaxonDescription taxonDescription = null;
441
		ReferenceBase sourceRef = state.getConfig().getSourceReference();
442
		String uri = fact;
443
		Integer size = null; 
444
		ImageMetaData imageMetaData = ImageMetaData.newInstance();
445
		URL url;
446
		try {
447
			url = new URL(fact.trim());
448
		} catch (MalformedURLException e) {
449
			logger.warn("Malformed URL. Image could not be imported: " + CdmUtils.Nz(uri));
450
			return null;
451
		}
452
		try {
453
			imageMetaData.readMetaData(url.toURI(), 0);
454
		}
455
		catch(URISyntaxException e){
456
			logger.error("URISyntaxException reading image metadata." , e);
457
		} catch (IOException e) {
458
			logger.error("IOError reading image metadata." , e);
459
		} catch (HttpException e) {
460
			logger.error("HttpException reading image metadata." , e);
461
		}
462
		MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);
463
		media.addRepresentation(mediaRepresentation);
464
		ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
465
		mediaRepresentation.addRepresentationPart(image);
466
		
467
		taxonDescription = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
468
		
469
		return taxonDescription;
470
	}
471

    
472
	private TaxonBase getTaxon(Map<String, TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){
473
		if (taxonIdObj != null){
474
			return taxonMap.get(String.valueOf(taxonId));
475
		}else{
476
			return null;
477
		}
478
		
479
	}
480
	
481
	private Feature getFeature(Map<Integer, Feature>  featureMap, Object categoryFkObj, Integer categoryFk){
482
		if (categoryFkObj != null){
483
			return featureMap.get(categoryFk); 
484
		}else{
485
			return null;
486
		}
487
		
488
	}
489
	
490

    
491
	/* (non-Javadoc)
492
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
493
	 */
494
	@Override
495
	protected boolean doCheck(BerlinModelImportState state){
496
		IOValidator<BerlinModelImportState> validator = new BerlinModelFactsImportValidator();
497
		return validator.validate(state);
498
	}
499
				
500
	/* (non-Javadoc)
501
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
502
	 */
503
	@Override
504
	protected String getTableName() {
505
		return dbTableName;
506
			}
507
	
508
	/* (non-Javadoc)
509
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
510
	 */
511
	@Override
512
	public String getPluralString() {
513
		return pluralString;
514
		}
515
	
516
	/* (non-Javadoc)
517
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
518
	 */
519
	protected boolean isIgnore(BerlinModelImportState state){
520
		return ! state.getConfig().isDoFacts();
521
	}
522

    
523

    
524
}
(4-4/21)