Project

General

Profile

Download (19.5 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

    
12
import java.io.IOException;
13
import java.net.MalformedURLException;
14
import java.net.URI;
15
import java.net.URISyntaxException;
16
import java.net.URL;
17
import java.sql.ResultSet;
18
import java.sql.SQLException;
19
import java.util.Collection;
20
import java.util.HashMap;
21
import java.util.HashSet;
22
import java.util.Map;
23
import java.util.Set;
24

    
25
import org.apache.http.HttpException;
26
import org.apache.log4j.Logger;
27
import org.springframework.stereotype.Component;
28

    
29
import eu.etaxonomy.cdm.common.CdmUtils;
30
import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;
31
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
32
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelFactsImportValidator;
33
import eu.etaxonomy.cdm.io.common.IOValidator;
34
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
35
import eu.etaxonomy.cdm.io.common.Source;
36
import eu.etaxonomy.cdm.model.common.Annotation;
37
import eu.etaxonomy.cdm.model.common.CdmBase;
38
import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
39
import eu.etaxonomy.cdm.model.common.Language;
40
import eu.etaxonomy.cdm.model.common.Marker;
41
import eu.etaxonomy.cdm.model.common.MarkerType;
42
import eu.etaxonomy.cdm.model.common.TermVocabulary;
43
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
44
import eu.etaxonomy.cdm.model.description.Feature;
45
import eu.etaxonomy.cdm.model.description.TaxonDescription;
46
import eu.etaxonomy.cdm.model.description.TextData;
47
import eu.etaxonomy.cdm.model.media.ImageFile;
48
import eu.etaxonomy.cdm.model.media.Media;
49
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
50
import eu.etaxonomy.cdm.model.reference.Reference;
51
import eu.etaxonomy.cdm.model.taxon.Taxon;
52
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
53
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
54

    
55
/**
56
 * @author a.mueller
57
 * @created 20.03.2008
58
 * @version 1.0
59
 */
60
@Component
61
public class BerlinModelFactsImport  extends BerlinModelImportBase {
62
	private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
63

    
64
	public static final String NAMESPACE = "Fact";
65
	
66
	public static final String SEQUENCE_PREFIX = "ORDER: ";
67
	
68
	private int modCount = 10000;
69
	private static final String pluralString = "facts";
70
	private static final String dbTableName = "Fact";
71

    
72
	//FIXME don't use as class variable
73
	private Map<Integer, Feature> featureMap;
74
	
75
	public BerlinModelFactsImport(){
76
		super();
77
	}
78

    
79

    
80
	private TermVocabulary<Feature> getFeatureVocabulary(){
81
		try {
82
			//TODO work around until service method works
83
			TermVocabulary<Feature> featureVocabulary =  BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
84
			//TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
85
			return featureVocabulary;
86
		} catch (UnknownCdmTypeException e) {
87
			logger.error("Feature vocabulary not available. New vocabulary created");
88
			return new TermVocabulary<Feature>() ;
89
		}
90
	}
91
	
92
	private Map<Integer, Feature>  invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
93
		
94
		Map<Integer, Feature>  result = bmiConfig.getFeatureMap();
95
		Source source = bmiConfig.getSource();
96
		
97
		try {
98
			//get data from database
99
			String strQuery = 
100
					" SELECT FactCategory.* " + 
101
					" FROM FactCategory "+
102
                    " WHERE (1=1)";
103
			ResultSet rs = source.getResultSet(strQuery) ;
104

    
105
			
106
			TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
107
			int i = 0;
108
			//for each reference
109
			while (rs.next()){
110
				
111
				if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
112
				
113
				int factCategoryId = rs.getInt("factCategoryId");
114
				String factCategory = rs.getString("factCategory");
115
				
116
					
117
				Feature feature;
118
				try {
119
					feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
120
				} catch (UnknownCdmTypeException e) {
121
					logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
122
					feature = Feature.NewInstance(factCategory, factCategory, null);
123
					featureVocabulary.addTerm(feature);
124
					feature.setSupportsTextData(true);
125
					//TODO
126
//					MaxFactNumber	int	Checked
127
//					ExtensionTableName	varchar(100)	Checked
128
//					Description	nvarchar(1000)	Checked
129
//					locExtensionFormName	nvarchar(80)	Checked
130
//					RankRestrictionFk	int	Checked
131
				}
132
								
133
				result.put(factCategoryId, feature);
134
			}
135
			Collection<Feature> col = result.values();
136
			getTermService().save((Collection)col);
137
			return result;
138
		} catch (SQLException e) {
139
			logger.error("SQLException:" +  e);
140
			return null;
141
		}
142

    
143
	}
144

    
145
	/* (non-Javadoc)
146
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
147
	 */
148
	@Override
149
	protected boolean doInvoke(BerlinModelImportState state) {
150
		featureMap = invokeFactCategories(state.getConfig());
151
		return super.doInvoke(state);
152
	}
153
		
154

    
155
	/* (non-Javadoc)
156
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
157
	 */
158
	@Override
159
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
160
			String strQuery = 
161
					" SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " + 
162
					" FROM Fact " +
163
                      	" INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
164
                      	" LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
165
              	" WHERE (FactId IN (" + ID_LIST_TOKEN + "))" + 
166
                        " ORDER By Sequence";
167
		return strQuery;
168
	}
169
	
170

    
171
	/* (non-Javadoc)
172
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
173
	 */
174
	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
175
		boolean success = true ;
176
		BerlinModelImportConfigurator config = state.getConfig();
177
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
178
		Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
179
		Map<String, Reference> biblioRefMap = (Map<String, Reference>) partitioner.getObjectMap(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE);
180
		Map<String, Reference> nomRefMap = (Map<String, Reference>) partitioner.getObjectMap(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE);
181

    
182
		ResultSet rs = partitioner.getResultSet();
183
		
184
			Reference<?> sourceRef = state.getConfig().getSourceReference();
185
			
186
		try{
187
			int i = 0;
188
			//for each fact
189
			while (rs.next()){
190
				try{
191
					if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
192
					
193
					int factId = rs.getInt("factId");
194
					Object taxonIdObj = rs.getObject("taxonId");
195
					long taxonId = rs.getLong("taxonId");
196
					Object factRefFkObj = rs.getObject("factRefFk");
197
					Object categoryFkObj = rs.getObject("factCategoryFk");
198
					Integer categoryFk = rs.getInt("factCategoryFk");
199
					String details = rs.getString("Details");
200
					String fact = CdmUtils.Nz(rs.getString("Fact"));
201
					String notes = CdmUtils.Nz(rs.getString("notes"));
202
					Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
203
					Boolean publishFlag = rs.getBoolean("publishFlag");
204
					
205
					TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
206
					Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
207
					
208
					if (taxonBase == null){
209
						logger.warn("Taxon for Fact " + factId + " does not exist in store");
210
						success = false;
211
					}else{
212
						Taxon taxon;
213
						if ( taxonBase instanceof Taxon ) {
214
							taxon = (Taxon) taxonBase;
215
						}else{
216
							logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
217
							success = false;
218
							continue;
219
						}
220
						
221
						TaxonDescription taxonDescription = null;
222
						Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
223
						
224
						boolean isImage = false;
225
						Media media = null;
226
						//for diptera images
227
						if (categoryFk == 51){  //TODO check also FactCategory string
228
							isImage = true;
229
							media = Media.NewInstance();
230
							taxonDescription = makeImage(state, fact, media, descriptionSet, taxon);
231
							if (taxonDescription == null){
232
								continue;
233
							}
234
						}
235
						//all others (no image)
236
						else{ 
237
							for (TaxonDescription desc: descriptionSet){
238
								if (! desc.isImageGallery()){
239
									taxonDescription = desc;
240
								}
241
							}
242
							if (taxonDescription == null){
243
								taxonDescription = TaxonDescription.NewInstance();
244
								taxonDescription.setTitleCache(sourceRef == null ? null : sourceRef.getTitleCache(), true);
245
								taxon.addDescription(taxonDescription);
246
							}
247
						}
248
					
249
						//textData
250
						TextData textData = null;
251
						boolean newTextData = true;
252
	
253
						// For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData 
254
						// description element append the fact text to the existing TextData
255
						if(categoryFk == 31) {
256
							Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
257
							for (DescriptionElementBase descriptionElement : descriptionElements) {
258
								String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
259
								if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
260
									textData = (TextData)descriptionElement;
261
									String factTextStr = textData.getText(Language.DEFAULT());
262
									// FIXME: Removing newlines doesn't work
263
									if (factTextStr.contains("\\r\\n")) {
264
										factTextStr = factTextStr.replaceAll("\\r\\n","");
265
									}
266
									StringBuilder factText = new StringBuilder(factTextStr);
267
									factText.append(fact);
268
									fact = factText.toString();
269
									newTextData = false;
270
									break;
271
								}
272
							}
273
						}
274
						
275
						if(newTextData == true)	{ 
276
							textData = TextData.NewInstance(); 
277
						}
278
						
279
						//for diptera database
280
						if (categoryFk == 99 && notes.contains("<OriginalName>")){
281
//							notes = notes.replaceAll("<OriginalName>", "");
282
//							notes = notes.replaceAll("</OriginalName>", "");
283
							fact = notes + ": " +  fact ;
284
						}
285
						//TODO textData.putText(fact, bmiConfig.getFactLanguage());  //doesn't work because  bmiConfig.getFactLanguage() is not not a persistent Language Object
286
						//throws  in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
287
						if (isImage){
288
							textData.addMedia(media);
289
							textData.setFeature(Feature.IMAGE());
290
						}else{
291
							textData.putText(Language.DEFAULT(), fact);
292
							textData.setFeature(feature);
293
						}
294
						
295
						//reference
296
						Reference citation = null;
297
						String factRefFk = String.valueOf(factRefFkObj);
298
						if (factRefFkObj != null){
299
							citation = getReferenceOnlyFromMaps(
300
									biblioRefMap, nomRefMap, factRefFk);	
301
							}
302
						if (citation == null && (factRefFkObj != null)){
303
								logger.warn("Citation not found in referenceMap: " + factRefFk);
304
							success = false;
305
							}
306
						if (citation != null || CdmUtils.isNotEmpty(details)){
307
							DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
308
							originalSource.setCitation(citation);
309
							originalSource.setCitationMicroReference(details);
310
							textData.addSource(originalSource);
311
						}
312
						taxonDescription.addElement(textData);
313
						//doubtfulFlag
314
						if (doubtfulFlag){
315
							textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
316
						}
317
						//publisheFlag
318
						textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
319
						//Sequence
320
						Integer sequence = rs.getInt("Sequence");
321
						if (sequence != null && sequence != 999){
322
							String strSequence = String.valueOf(sequence);
323
							strSequence = SEQUENCE_PREFIX + strSequence;
324
							//TODO make it an Extension when possible
325
							//Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
326
							Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
327
							textData.addAnnotation(annotation);
328
						}
329
						
330
						//						if (categoryFkObj == FACT_DESCRIPTION){
331
	//						//;
332
	//					}else if (categoryFkObj == FACT_OBSERVATION){
333
	//						//;
334
	//					}else if (categoryFkObj == FACT_DISTRIBUTION_EM){
335
	//						//
336
	//					}else {
337
	//						//TODO
338
	//						//logger.warn("FactCategory " + categoryFk + " not yet implemented");
339
	//					}
340
						
341
						//notes
342
						doCreatedUpdatedNotes(state, textData, rs);
343
						
344
						//TODO
345
						//Designation References -> unclear how to map to CDM
346
						//factId -> OriginalSource for descriptionElements not yet implemented
347
						
348
						//sequence -> textData is not an identifiable entity therefore extensions are not possible
349
						//fact category better
350
						
351
						taxaToSave.add(taxon);
352
					}
353
				} catch (Exception re){
354
					logger.error("An exception occurred during the facts import");
355
					re.printStackTrace();
356
					success = false;
357
				}
358
				//put
359
			}
360
			logger.info("Facts handled: " + (i-1));
361
			logger.info("Taxa to save: " + taxaToSave.size());
362
			getTaxonService().save(taxaToSave);	
363
		}catch(SQLException e){
364
			throw new RuntimeException(e);
365
		}
366
		return success;
367
	}
368

    
369
	/* (non-Javadoc)
370
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
371
	 */
372
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
373
		String nameSpace;
374
		Class cdmClass;
375
		Set<String> idSet;
376
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
377
			
378
		try{
379
			Set<String> taxonIdSet = new HashSet<String>();
380
			Set<String> referenceIdSet = new HashSet<String>();
381
			Set<String> refDetailIdSet = new HashSet<String>();
382
			while (rs.next()){
383
				handleForeignKey(rs, taxonIdSet, "taxonId");
384
				handleForeignKey(rs, referenceIdSet, "FactRefFk");
385
				handleForeignKey(rs, referenceIdSet, "PTDesignationRefFk");
386
				handleForeignKey(rs, refDetailIdSet, "FactRefDetailFk");
387
				handleForeignKey(rs, refDetailIdSet, "PTDesignationRefDetailFk");
388
		}
389
			
390
			//taxon map
391
			nameSpace = BerlinModelTaxonImport.NAMESPACE;
392
			cdmClass = TaxonBase.class;
393
			idSet = taxonIdSet;
394
			Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
395
			result.put(nameSpace, taxonMap);
396

    
397

    
398
			//nom reference map
399
			nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
400
			cdmClass = Reference.class;
401
			idSet = referenceIdSet;
402
			Map<String, Reference> nomReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
403
			result.put(nameSpace, nomReferenceMap);
404

    
405
			//biblio reference map
406
			nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
407
			cdmClass = Reference.class;
408
			idSet = referenceIdSet;
409
			Map<String, Reference> biblioReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
410
			result.put(nameSpace, biblioReferenceMap);
411
			
412
			//nom refDetail map
413
			nameSpace = BerlinModelRefDetailImport.NOM_REFDETAIL_NAMESPACE;
414
			cdmClass = Reference.class;
415
			idSet = refDetailIdSet;
416
			Map<String, Reference> nomRefDetailMap= (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
417
			result.put(nameSpace, nomRefDetailMap);
418
			
419
			//biblio refDetail map
420
			nameSpace = BerlinModelRefDetailImport.BIBLIO_REFDETAIL_NAMESPACE;
421
			cdmClass = Reference.class;
422
			idSet = refDetailIdSet;
423
			Map<String, Reference> biblioRefDetailMap= (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
424
			result.put(nameSpace, biblioRefDetailMap);
425
	
426
		} catch (SQLException e) {
427
			throw new RuntimeException(e);
428
	}
429
		return result;
430
	}
431
	
432
	
433
	/**
434
	 * @param state 
435
	 * @param media 
436
	 * @param media 
437
	 * @param descriptionSet 
438
	 * 
439
	 */
440
	private TaxonDescription makeImage(BerlinModelImportState state, String fact, Media media, Set<TaxonDescription> descriptionSet, Taxon taxon) {
441
		TaxonDescription taxonDescription = null;
442
		Reference sourceRef = state.getConfig().getSourceReference();
443
		Integer size = null; 
444
		ImageMetaData imageMetaData = ImageMetaData.newInstance();
445
		URI uri;
446
		try {
447
			uri = new URI(fact.trim());
448
		} catch (URISyntaxException e) {
449
			logger.warn("URISyntaxException. Image could not be imported: " + fact);
450
			return null;
451
		}
452
		try {
453
			imageMetaData.readMetaData(uri, 0);
454
		} catch (IOException e) {
455
			logger.error("IOError reading image metadata." , e);
456
		} catch (HttpException e) {
457
			logger.error("HttpException reading image metadata." , e);
458
		}
459
		MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);
460
		media.addRepresentation(mediaRepresentation);
461
		ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
462
		mediaRepresentation.addRepresentationPart(image);
463
		
464
		taxonDescription = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
465
		
466
		return taxonDescription;
467
	}
468

    
469
	private TaxonBase getTaxon(Map<String, TaxonBase> taxonMap, Object taxonIdObj, Long taxonId){
470
		if (taxonIdObj != null){
471
			return taxonMap.get(String.valueOf(taxonId));
472
		}else{
473
			return null;
474
		}
475
		
476
	}
477
	
478
	private Feature getFeature(Map<Integer, Feature>  featureMap, Object categoryFkObj, Integer categoryFk){
479
		if (categoryFkObj != null){
480
			return featureMap.get(categoryFk); 
481
		}else{
482
			return null;
483
		}
484
		
485
	}
486
	
487

    
488
	/* (non-Javadoc)
489
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
490
	 */
491
	@Override
492
	protected boolean doCheck(BerlinModelImportState state){
493
		IOValidator<BerlinModelImportState> validator = new BerlinModelFactsImportValidator();
494
		return validator.validate(state);
495
	}
496
				
497
	/* (non-Javadoc)
498
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
499
	 */
500
	@Override
501
	protected String getTableName() {
502
		return dbTableName;
503
			}
504
	
505
	/* (non-Javadoc)
506
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
507
	 */
508
	@Override
509
	public String getPluralString() {
510
		return pluralString;
511
		}
512
	
513
	/* (non-Javadoc)
514
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
515
	 */
516
	protected boolean isIgnore(BerlinModelImportState state){
517
		return ! state.getConfig().isDoFacts();
518
	}
519

    
520

    
521
}
(4-4/21)