Project

General

Profile

Download (19.4 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

    
12
import java.io.IOException;
13
import java.net.URI;
14
import java.net.URISyntaxException;
15
import java.sql.ResultSet;
16
import java.sql.SQLException;
17
import java.util.Collection;
18
import java.util.HashMap;
19
import java.util.HashSet;
20
import java.util.Map;
21
import java.util.Set;
22

    
23
import org.apache.http.HttpException;
24
import org.apache.log4j.Logger;
25
import org.springframework.stereotype.Component;
26

    
27
import eu.etaxonomy.cdm.common.CdmUtils;
28
import eu.etaxonomy.cdm.common.media.ImageInfo;
29
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
30
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelFactsImportValidator;
31
import eu.etaxonomy.cdm.io.common.IOValidator;
32
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33
import eu.etaxonomy.cdm.io.common.Source;
34
import eu.etaxonomy.cdm.model.common.Annotation;
35
import eu.etaxonomy.cdm.model.common.CdmBase;
36
import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
37
import eu.etaxonomy.cdm.model.common.Language;
38
import eu.etaxonomy.cdm.model.common.Marker;
39
import eu.etaxonomy.cdm.model.common.MarkerType;
40
import eu.etaxonomy.cdm.model.common.TermVocabulary;
41
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
42
import eu.etaxonomy.cdm.model.description.Feature;
43
import eu.etaxonomy.cdm.model.description.TaxonDescription;
44
import eu.etaxonomy.cdm.model.description.TextData;
45
import eu.etaxonomy.cdm.model.media.ImageFile;
46
import eu.etaxonomy.cdm.model.media.Media;
47
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
48
import eu.etaxonomy.cdm.model.reference.Reference;
49
import eu.etaxonomy.cdm.model.taxon.Taxon;
50
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
51
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
52

    
53
/**
54
 * @author a.mueller
55
 * @created 20.03.2008
56
 * @version 1.0
57
 */
58
@Component
59
public class BerlinModelFactsImport  extends BerlinModelImportBase {
60
	private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
61

    
62
	public static final String NAMESPACE = "Fact";
63
	
64
	public static final String SEQUENCE_PREFIX = "ORDER: ";
65
	
66
	private int modCount = 10000;
67
	private static final String pluralString = "facts";
68
	private static final String dbTableName = "Fact";
69

    
70
	//FIXME don't use as class variable
71
	private Map<Integer, Feature> featureMap;
72
	
73
	public BerlinModelFactsImport(){
74
		super();
75
	}
76

    
77

    
78
	private TermVocabulary<Feature> getFeatureVocabulary(){
79
		try {
80
			//TODO work around until service method works
81
			TermVocabulary<Feature> featureVocabulary =  BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
82
			//TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
83
			return featureVocabulary;
84
		} catch (UnknownCdmTypeException e) {
85
			logger.error("Feature vocabulary not available. New vocabulary created");
86
			return TermVocabulary.NewInstance("User Defined Feature Vocabulary", "User Defined Feature Vocabulary", null, null); 
87
		}
88
	}
89
	
90
	private Map<Integer, Feature>  invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
91
		
92
		Map<Integer, Feature>  result = bmiConfig.getFeatureMap();
93
		Source source = bmiConfig.getSource();
94
		
95
		try {
96
			//get data from database
97
			String strQuery = 
98
					" SELECT FactCategory.* " + 
99
					" FROM FactCategory "+
100
                    " WHERE (1=1)";
101
			ResultSet rs = source.getResultSet(strQuery) ;
102

    
103
			
104
			TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
105
			int i = 0;
106
			//for each reference
107
			while (rs.next()){
108
				
109
				if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
110
				
111
				int factCategoryId = rs.getInt("factCategoryId");
112
				String factCategory = rs.getString("factCategory");
113
				
114
					
115
				Feature feature;
116
				try {
117
					feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
118
				} catch (UnknownCdmTypeException e) {
119
					logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
120
					feature = Feature.NewInstance(factCategory, factCategory, null);
121
					featureVocabulary.addTerm(feature);
122
					feature.setSupportsTextData(true);
123
					//TODO
124
//					MaxFactNumber	int	Checked
125
//					ExtensionTableName	varchar(100)	Checked
126
//					Description	nvarchar(1000)	Checked
127
//					locExtensionFormName	nvarchar(80)	Checked
128
//					RankRestrictionFk	int	Checked
129
				}
130
								
131
				result.put(factCategoryId, feature);
132
			}
133
			Collection<Feature> col = result.values();
134
			getTermService().save((Collection)col);
135
			return result;
136
		} catch (SQLException e) {
137
			logger.error("SQLException:" +  e);
138
			return null;
139
		}
140

    
141
	}
142

    
143
	/* (non-Javadoc)
144
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
145
	 */
146
	@Override
147
	protected void doInvoke(BerlinModelImportState state) {
148
		featureMap = invokeFactCategories(state.getConfig());
149
		super.doInvoke(state);
150
		return;
151
	}
152
		
153

    
154
	/* (non-Javadoc)
155
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
156
	 */
157
	@Override
158
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
159
			String strQuery = 
160
					" SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " + 
161
					" FROM Fact " +
162
                      	" INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
163
                      	" LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
164
              	" WHERE (FactId IN (" + ID_LIST_TOKEN + "))" + 
165
                        " ORDER By Sequence";
166
		return strQuery;
167
	}
168
	
169

    
170
	/* (non-Javadoc)
171
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
172
	 */
173
	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
174
		boolean success = true ;
175
		BerlinModelImportConfigurator config = state.getConfig();
176
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
177
		Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
178
		Map<String, Reference> biblioRefMap = (Map<String, Reference>) partitioner.getObjectMap(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE);
179
		Map<String, Reference> nomRefMap = (Map<String, Reference>) partitioner.getObjectMap(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE);
180

    
181
		ResultSet rs = partitioner.getResultSet();
182
		
183
			Reference<?> sourceRef = state.getConfig().getSourceReference();
184
			
185
		try{
186
			int i = 0;
187
			//for each fact
188
			while (rs.next()){
189
				try{
190
					if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
191
					
192
					int factId = rs.getInt("factId");
193
					Object taxonIdObj = rs.getObject("taxonId");
194
					long taxonId = rs.getLong("taxonId");
195
					Object factRefFkObj = rs.getObject("factRefFk");
196
					Object categoryFkObj = rs.getObject("factCategoryFk");
197
					Integer categoryFk = rs.getInt("factCategoryFk");
198
					String details = rs.getString("Details");
199
					String fact = CdmUtils.Nz(rs.getString("Fact"));
200
					String notes = CdmUtils.Nz(rs.getString("notes"));
201
					Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
202
					Boolean publishFlag = rs.getBoolean("publishFlag");
203
					
204
					TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
205
					Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
206
					
207
					if (taxonBase == null){
208
						logger.warn("Taxon for Fact " + factId + " does not exist in store");
209
						success = false;
210
					}else{
211
						Taxon taxon;
212
						if ( taxonBase instanceof Taxon ) {
213
							taxon = (Taxon) taxonBase;
214
						}else{
215
							logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
216
							success = false;
217
							continue;
218
						}
219
						
220
						TaxonDescription taxonDescription = null;
221
						Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
222
						
223
						boolean isImage = false;
224
						Media media = null;
225
						//for diptera images
226
						if (categoryFk == 51){  //TODO check also FactCategory string
227
							isImage = true;
228
							media = Media.NewInstance();
229
							taxonDescription = makeImage(state, fact, media, descriptionSet, taxon);
230
							if (taxonDescription == null){
231
								continue;
232
							}
233
						}
234
						//all others (no image)
235
						else{ 
236
							for (TaxonDescription desc: descriptionSet){
237
								if (! desc.isImageGallery()){
238
									taxonDescription = desc;
239
								}
240
							}
241
							if (taxonDescription == null){
242
								taxonDescription = TaxonDescription.NewInstance();
243
								taxonDescription.setTitleCache(sourceRef == null ? null : sourceRef.getTitleCache(), true);
244
								taxon.addDescription(taxonDescription);
245
							}
246
						}
247
					
248
						//textData
249
						TextData textData = null;
250
						boolean newTextData = true;
251
	
252
						// For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData 
253
						// description element append the fact text to the existing TextData
254
						if(categoryFk == 31) {
255
							Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
256
							for (DescriptionElementBase descriptionElement : descriptionElements) {
257
								String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
258
								if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
259
									textData = (TextData)descriptionElement;
260
									String factTextStr = textData.getText(Language.DEFAULT());
261
									// FIXME: Removing newlines doesn't work
262
									if (factTextStr.contains("\\r\\n")) {
263
										factTextStr = factTextStr.replaceAll("\\r\\n","");
264
									}
265
									StringBuilder factText = new StringBuilder(factTextStr);
266
									factText.append(fact);
267
									fact = factText.toString();
268
									newTextData = false;
269
									break;
270
								}
271
							}
272
						}
273
						
274
						if(newTextData == true)	{ 
275
							textData = TextData.NewInstance(); 
276
						}
277
						
278
						//for diptera database
279
						if (categoryFk == 99 && notes.contains("<OriginalName>")){
280
//							notes = notes.replaceAll("<OriginalName>", "");
281
//							notes = notes.replaceAll("</OriginalName>", "");
282
							fact = notes + ": " +  fact ;
283
						}
284
						//TODO textData.putText(fact, bmiConfig.getFactLanguage());  //doesn't work because  bmiConfig.getFactLanguage() is not not a persistent Language Object
285
						//throws  in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
286
						if (isImage){
287
							textData.addMedia(media);
288
							textData.setFeature(Feature.IMAGE());
289
						}else{
290
							textData.putText(Language.DEFAULT(), fact);
291
							textData.setFeature(feature);
292
						}
293
						
294
						//reference
295
						Reference citation = null;
296
						String factRefFk = String.valueOf(factRefFkObj);
297
						if (factRefFkObj != null){
298
							citation = getReferenceOnlyFromMaps(
299
									biblioRefMap, nomRefMap, factRefFk);	
300
							}
301
						if (citation == null && (factRefFkObj != null)){
302
								logger.warn("Citation not found in referenceMap: " + factRefFk);
303
							success = false;
304
							}
305
						if (citation != null || CdmUtils.isNotEmpty(details)){
306
							DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
307
							originalSource.setCitation(citation);
308
							originalSource.setCitationMicroReference(details);
309
							textData.addSource(originalSource);
310
						}
311
						taxonDescription.addElement(textData);
312
						//doubtfulFlag
313
						if (doubtfulFlag){
314
							textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
315
						}
316
						//publisheFlag
317
						textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
318
						//Sequence
319
						Integer sequence = rs.getInt("Sequence");
320
						if (sequence != null && sequence != 999){
321
							String strSequence = String.valueOf(sequence);
322
							strSequence = SEQUENCE_PREFIX + strSequence;
323
							//TODO make it an Extension when possible
324
							//Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
325
							Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
326
							textData.addAnnotation(annotation);
327
						}
328
						
329
						//						if (categoryFkObj == FACT_DESCRIPTION){
330
	//						//;
331
	//					}else if (categoryFkObj == FACT_OBSERVATION){
332
	//						//;
333
	//					}else if (categoryFkObj == FACT_DISTRIBUTION_EM){
334
	//						//
335
	//					}else {
336
	//						//TODO
337
	//						//logger.warn("FactCategory " + categoryFk + " not yet implemented");
338
	//					}
339
						
340
						//notes
341
						doCreatedUpdatedNotes(state, textData, rs);
342
						
343
						//TODO
344
						//Designation References -> unclear how to map to CDM
345
						//factId -> OriginalSource for descriptionElements not yet implemented
346
						
347
						//sequence -> textData is not an identifiable entity therefore extensions are not possible
348
						//fact category better
349
						
350
						taxaToSave.add(taxon);
351
					}
352
				} catch (Exception re){
353
					logger.error("An exception occurred during the facts import");
354
					re.printStackTrace();
355
					success = false;
356
				}
357
				//put
358
			}
359
			logger.info("Facts handled: " + (i-1));
360
			logger.info("Taxa to save: " + taxaToSave.size());
361
			getTaxonService().save(taxaToSave);	
362
		}catch(SQLException e){
363
			throw new RuntimeException(e);
364
		}
365
		return success;
366
	}
367

    
368
	/* (non-Javadoc)
369
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
370
	 */
371
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
372
		String nameSpace;
373
		Class cdmClass;
374
		Set<String> idSet;
375
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
376
			
377
		try{
378
			Set<String> taxonIdSet = new HashSet<String>();
379
			Set<String> referenceIdSet = new HashSet<String>();
380
			Set<String> refDetailIdSet = new HashSet<String>();
381
			while (rs.next()){
382
				handleForeignKey(rs, taxonIdSet, "taxonId");
383
				handleForeignKey(rs, referenceIdSet, "FactRefFk");
384
				handleForeignKey(rs, referenceIdSet, "PTDesignationRefFk");
385
				handleForeignKey(rs, refDetailIdSet, "FactRefDetailFk");
386
				handleForeignKey(rs, refDetailIdSet, "PTDesignationRefDetailFk");
387
		}
388
			
389
			//taxon map
390
			nameSpace = BerlinModelTaxonImport.NAMESPACE;
391
			cdmClass = TaxonBase.class;
392
			idSet = taxonIdSet;
393
			Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
394
			result.put(nameSpace, taxonMap);
395

    
396

    
397
			//nom reference map
398
			nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
399
			cdmClass = Reference.class;
400
			idSet = referenceIdSet;
401
			Map<String, Reference> nomReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
402
			result.put(nameSpace, nomReferenceMap);
403

    
404
			//biblio reference map
405
			nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
406
			cdmClass = Reference.class;
407
			idSet = referenceIdSet;
408
			Map<String, Reference> biblioReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
409
			result.put(nameSpace, biblioReferenceMap);
410
			
411
			//nom refDetail map
412
			nameSpace = BerlinModelRefDetailImport.NOM_REFDETAIL_NAMESPACE;
413
			cdmClass = Reference.class;
414
			idSet = refDetailIdSet;
415
			Map<String, Reference> nomRefDetailMap= (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
416
			result.put(nameSpace, nomRefDetailMap);
417
			
418
			//biblio refDetail map
419
			nameSpace = BerlinModelRefDetailImport.BIBLIO_REFDETAIL_NAMESPACE;
420
			cdmClass = Reference.class;
421
			idSet = refDetailIdSet;
422
			Map<String, Reference> biblioRefDetailMap= (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
423
			result.put(nameSpace, biblioRefDetailMap);
424
	
425
		} catch (SQLException e) {
426
			throw new RuntimeException(e);
427
	}
428
		return result;
429
	}
430
	
431
	
432
	/**
433
	 * @param state 
434
	 * @param media 
435
	 * @param media 
436
	 * @param descriptionSet 
437
	 * 
438
	 */
439
	private TaxonDescription makeImage(BerlinModelImportState state, String fact, Media media, Set<TaxonDescription> descriptionSet, Taxon taxon) {
440
		TaxonDescription taxonDescription = null;
441
		Reference sourceRef = state.getConfig().getSourceReference();
442
		Integer size = null; 
443
		ImageInfo imageInfo = null;
444
		URI uri;
445
		try {
446
			uri = new URI(fact.trim());
447
		} catch (URISyntaxException e) {
448
			logger.warn("URISyntaxException. Image could not be imported: " + fact);
449
			return null;
450
		}
451
		try {
452
			imageInfo = ImageInfo.NewInstance(uri, 0);
453
		} catch (IOException e) {
454
			logger.error("IOError reading image metadata." , e);
455
		} catch (HttpException e) {
456
			logger.error("HttpException reading image metadata." , e);
457
		}
458
		MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageInfo.getMimeType(), null);
459
		media.addRepresentation(mediaRepresentation);
460
		ImageFile image = ImageFile.NewInstance(uri, size, imageInfo);
461
		mediaRepresentation.addRepresentationPart(image);
462
		
463
		taxonDescription = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
464
		
465
		return taxonDescription;
466
	}
467

    
468
	private TaxonBase getTaxon(Map<String, TaxonBase> taxonMap, Object taxonIdObj, Long taxonId){
469
		if (taxonIdObj != null){
470
			return taxonMap.get(String.valueOf(taxonId));
471
		}else{
472
			return null;
473
		}
474
		
475
	}
476
	
477
	private Feature getFeature(Map<Integer, Feature>  featureMap, Object categoryFkObj, Integer categoryFk){
478
		if (categoryFkObj != null){
479
			return featureMap.get(categoryFk); 
480
		}else{
481
			return null;
482
		}
483
		
484
	}
485
	
486

    
487
	/* (non-Javadoc)
488
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
489
	 */
490
	@Override
491
	protected boolean doCheck(BerlinModelImportState state){
492
		IOValidator<BerlinModelImportState> validator = new BerlinModelFactsImportValidator();
493
		return validator.validate(state);
494
	}
495
				
496
	/* (non-Javadoc)
497
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
498
	 */
499
	@Override
500
	protected String getTableName() {
501
		return dbTableName;
502
			}
503
	
504
	/* (non-Javadoc)
505
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
506
	 */
507
	@Override
508
	public String getPluralString() {
509
		return pluralString;
510
		}
511
	
512
	/* (non-Javadoc)
513
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
514
	 */
515
	protected boolean isIgnore(BerlinModelImportState state){
516
		return ! state.getConfig().isDoFacts();
517
	}
518

    
519

    
520
}
(4-4/21)