Project

General

Profile

Download (19.5 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

    
12
import java.io.IOException;
13
import java.net.MalformedURLException;
14
import java.net.URISyntaxException;
15
import java.net.URL;
16
import java.sql.ResultSet;
17
import java.sql.SQLException;
18
import java.util.Collection;
19
import java.util.HashMap;
20
import java.util.HashSet;
21
import java.util.Map;
22
import java.util.Set;
23

    
24
import org.apache.log4j.Logger;
25
import org.springframework.stereotype.Component;
26

    
27
import eu.etaxonomy.cdm.common.CdmUtils;
28
import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;
29
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
30
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelFactsImportValidator;
31
import eu.etaxonomy.cdm.io.common.IOValidator;
32
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33
import eu.etaxonomy.cdm.io.common.Source;
34
import eu.etaxonomy.cdm.model.common.Annotation;
35
import eu.etaxonomy.cdm.model.common.CdmBase;
36
import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
37
import eu.etaxonomy.cdm.model.common.Language;
38
import eu.etaxonomy.cdm.model.common.Marker;
39
import eu.etaxonomy.cdm.model.common.MarkerType;
40
import eu.etaxonomy.cdm.model.common.TermVocabulary;
41
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
42
import eu.etaxonomy.cdm.model.description.Feature;
43
import eu.etaxonomy.cdm.model.description.TaxonDescription;
44
import eu.etaxonomy.cdm.model.description.TextData;
45
import eu.etaxonomy.cdm.model.media.ImageFile;
46
import eu.etaxonomy.cdm.model.media.Media;
47
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
48
import eu.etaxonomy.cdm.model.reference.ReferenceBase;
49
import eu.etaxonomy.cdm.model.taxon.Taxon;
50
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
51
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
52

    
53
/**
54
 * @author a.mueller
55
 * @created 20.03.2008
56
 * @version 1.0
57
 */
58
@Component
59
public class BerlinModelFactsImport  extends BerlinModelImportBase {
60
	private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
61

    
62
	public static final String NAMESPACE = "Fact";
63
	
64
	public static final String SEQUENCE_PREFIX = "ORDER: ";
65
	
66
	private int modCount = 10000;
67
	private static final String pluralString = "facts";
68
	private static final String dbTableName = "Fact";
69

    
70
	//FIXME don't use as class variable
71
	private Map<Integer, Feature> featureMap;
72
	
73
	public BerlinModelFactsImport(){
74
		super();
75
	}
76

    
77

    
78
	private TermVocabulary<Feature> getFeatureVocabulary(){
79
		try {
80
			//TODO work around until service method works
81
			TermVocabulary<Feature> featureVocabulary =  BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
82
			//TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
83
			return featureVocabulary;
84
		} catch (UnknownCdmTypeException e) {
85
			logger.error("Feature vocabulary not available. New vocabulary created");
86
			return new TermVocabulary<Feature>() ;
87
		}
88
	}
89
	
90
	private Map<Integer, Feature>  invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
91
		
92
		Map<Integer, Feature>  result = bmiConfig.getFeatureMap();
93
		Source source = bmiConfig.getSource();
94
		
95
		try {
96
			//get data from database
97
			String strQuery = 
98
					" SELECT FactCategory.* " + 
99
					" FROM FactCategory "+
100
                    " WHERE (1=1)";
101
			ResultSet rs = source.getResultSet(strQuery) ;
102

    
103
			
104
			TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
105
			int i = 0;
106
			//for each reference
107
			while (rs.next()){
108
				
109
				if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
110
				
111
				int factCategoryId = rs.getInt("factCategoryId");
112
				String factCategory = rs.getString("factCategory");
113
				
114
					
115
				Feature feature;
116
				try {
117
					feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
118
				} catch (UnknownCdmTypeException e) {
119
					logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
120
					feature = Feature.NewInstance(factCategory, factCategory, null);
121
					feature.setVocabulary(featureVocabulary);
122
					feature.setSupportsTextData(true);
123
					//TODO
124
//					MaxFactNumber	int	Checked
125
//					ExtensionTableName	varchar(100)	Checked
126
//					Description	nvarchar(1000)	Checked
127
//					locExtensionFormName	nvarchar(80)	Checked
128
//					RankRestrictionFk	int	Checked
129
				}
130
								
131
				result.put(factCategoryId, feature);
132
			}
133
			Collection<Feature> col = result.values();
134
			getTermService().save((Collection)col);
135
			return result;
136
		} catch (SQLException e) {
137
			logger.error("SQLException:" +  e);
138
			return null;
139
		}
140

    
141
	}
142

    
143
	/* (non-Javadoc)
144
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
145
	 */
146
	@Override
147
	protected boolean doInvoke(BerlinModelImportState state) {
148
		featureMap = invokeFactCategories(state.getConfig());
149
		return super.doInvoke(state);
150
	}
151
		
152

    
153
	/* (non-Javadoc)
154
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
155
	 */
156
	@Override
157
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
158
			String strQuery = 
159
					" SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " + 
160
					" FROM Fact " +
161
                      	" INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
162
                      	" LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
163
              	" WHERE (FactId IN (" + ID_LIST_TOKEN + "))" + 
164
                        " ORDER By Sequence";
165
		return strQuery;
166
	}
167
	
168

    
169
	/* (non-Javadoc)
170
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
171
	 */
172
	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
173
		boolean success = true ;
174
		BerlinModelImportConfigurator config = state.getConfig();
175
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
176
		Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
177
		Map<String, ReferenceBase> biblioRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE);
178
		Map<String, ReferenceBase> nomRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE);
179

    
180
		ResultSet rs = partitioner.getResultSet();
181
		
182
			ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
183
			
184
		try{
185
			int i = 0;
186
			//for each fact
187
			while (rs.next()){
188
				try{
189
					if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
190
					
191
					int factId = rs.getInt("factId");
192
					Object taxonIdObj = rs.getObject("taxonId");
193
					int taxonId = rs.getInt("taxonId");
194
					Object factRefFkObj = rs.getObject("factRefFk");
195
					Object categoryFkObj = rs.getObject("factCategoryFk");
196
					Integer categoryFk = rs.getInt("factCategoryFk");
197
					String details = rs.getString("Details");
198
					String fact = CdmUtils.Nz(rs.getString("Fact"));
199
					String notes = CdmUtils.Nz(rs.getString("notes"));
200
					Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
201
					Boolean publishFlag = rs.getBoolean("publishFlag");
202
					
203
					TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
204
					Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
205
					
206
					if (taxonBase == null){
207
						logger.warn("Taxon for Fact " + factId + " does not exist in store");
208
						success = false;
209
					}else{
210
						Taxon taxon;
211
						if ( taxonBase instanceof Taxon ) {
212
							taxon = (Taxon) taxonBase;
213
						}else{
214
							logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
215
							success = false;
216
							continue;
217
						}
218
						
219
						TaxonDescription taxonDescription = null;
220
						Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
221
						
222
						boolean isImage = false;
223
						Media media = null;
224
						//for diptera images
225
						if (categoryFk == 51){  //TODO check also FactCategory string
226
							isImage = true;
227
							media = Media.NewInstance();
228
							taxonDescription = makeImage(state, fact, media, descriptionSet, taxon);
229
							if (taxonDescription == null){
230
								continue;
231
							}
232
						}
233
						//all others (no image)
234
						else{ 
235
							for (TaxonDescription desc: descriptionSet){
236
								if (! desc.isImageGallery()){
237
									taxonDescription = desc;
238
								}
239
							}
240
							if (taxonDescription == null){
241
								taxonDescription = TaxonDescription.NewInstance();
242
								taxonDescription.setTitleCache(sourceRef == null ? null : sourceRef.getTitleCache(), true);
243
								taxon.addDescription(taxonDescription);
244
							}
245
						}
246
					
247
						//textData
248
						TextData textData = null;
249
						boolean newTextData = true;
250
	
251
						// For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData 
252
						// description element append the fact text to the existing TextData
253
						if(categoryFk == 31) {
254
							Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
255
							for (DescriptionElementBase descriptionElement : descriptionElements) {
256
								String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
257
								if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
258
									textData = (TextData)descriptionElement;
259
									String factTextStr = textData.getText(Language.DEFAULT());
260
									// FIXME: Removing newlines doesn't work
261
									if (factTextStr.contains("\\r\\n")) {
262
										factTextStr = factTextStr.replaceAll("\\r\\n","");
263
									}
264
									StringBuilder factText = new StringBuilder(factTextStr);
265
									factText.append(fact);
266
									fact = factText.toString();
267
									newTextData = false;
268
									break;
269
								}
270
							}
271
						}
272
						
273
						if(newTextData == true)	{ 
274
							textData = TextData.NewInstance(); 
275
						}
276
						
277
						//for diptera database
278
						if (categoryFk == 99 && notes.contains("<OriginalName>")){
279
//							notes = notes.replaceAll("<OriginalName>", "");
280
//							notes = notes.replaceAll("</OriginalName>", "");
281
							fact = notes + ": " +  fact ;
282
						}
283
						//TODO textData.putText(fact, bmiConfig.getFactLanguage());  //doesn't work because  bmiConfig.getFactLanguage() is not not a persistent Language Object
284
						//throws  in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
285
						if (isImage){
286
							textData.addMedia(media);
287
							textData.setType(Feature.IMAGE());
288
						}else{
289
							textData.putText(fact, Language.DEFAULT());
290
							textData.setType(feature);
291
						}
292
						
293
						//reference
294
						ReferenceBase citation = null;
295
						String factRefFk = String.valueOf(factRefFkObj);
296
						if (factRefFkObj != null){
297
							citation = getReferenceOnlyFromMaps(
298
									biblioRefMap, nomRefMap, factRefFk);	
299
							}
300
						if (citation == null && (factRefFkObj != null)){
301
								logger.warn("Citation not found in referenceMap: " + factRefFk);
302
							success = false;
303
							}
304
						if (citation != null || CdmUtils.isNotEmpty(details)){
305
							DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
306
							originalSource.setCitation(citation);
307
							originalSource.setCitationMicroReference(details);
308
							textData.addSource(originalSource);
309
						}
310
						taxonDescription.addElement(textData);
311
						//doubtfulFlag
312
						if (doubtfulFlag){
313
							textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
314
						}
315
						//publisheFlag
316
						textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
317
						//Sequence
318
						Integer sequence = rs.getInt("Sequence");
319
						if (sequence != null && sequence != 999){
320
							String strSequence = String.valueOf(sequence);
321
							strSequence = SEQUENCE_PREFIX + strSequence;
322
							//TODO make it an Extension when possible
323
							//Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
324
							Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
325
							textData.addAnnotation(annotation);
326
						}
327
						
328
						//						if (categoryFkObj == FACT_DESCRIPTION){
329
	//						//;
330
	//					}else if (categoryFkObj == FACT_OBSERVATION){
331
	//						//;
332
	//					}else if (categoryFkObj == FACT_DISTRIBUTION_EM){
333
	//						//
334
	//					}else {
335
	//						//TODO
336
	//						//logger.warn("FactCategory " + categoryFk + " not yet implemented");
337
	//					}
338
						
339
						//notes
340
						doCreatedUpdatedNotes(state, textData, rs);
341
						
342
						//TODO
343
						//Designation References -> unclear how to map to CDM
344
						//factId -> OriginalSource for descriptionElements not yet implemented
345
						
346
						//sequence -> textData is not an identifiable entity therefore extensions are not possible
347
						//fact category better
348
						
349
						taxaToSave.add(taxon);
350
					}
351
				} catch (Exception re){
352
					logger.error("An exception occurred during the facts import");
353
					re.printStackTrace();
354
					success = false;
355
				}
356
				//put
357
			}
358
			logger.info("Facts handled: " + (i-1));
359
			logger.info("Taxa to save: " + taxaToSave.size());
360
			getTaxonService().save(taxaToSave);	
361
		}catch(SQLException e){
362
			throw new RuntimeException(e);
363
		}
364
		return success;
365
	}
366

    
367
	/* (non-Javadoc)
368
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
369
	 */
370
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
371
		String nameSpace;
372
		Class cdmClass;
373
		Set<String> idSet;
374
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
375
			
376
		try{
377
			Set<String> taxonIdSet = new HashSet<String>();
378
			Set<String> referenceIdSet = new HashSet<String>();
379
			Set<String> refDetailIdSet = new HashSet<String>();
380
			while (rs.next()){
381
				handleForeignKey(rs, taxonIdSet, "taxonId");
382
				handleForeignKey(rs, referenceIdSet, "FactRefFk");
383
				handleForeignKey(rs, referenceIdSet, "PTDesignationRefFk");
384
				handleForeignKey(rs, refDetailIdSet, "FactRefDetailFk");
385
				handleForeignKey(rs, refDetailIdSet, "PTDesignationRefDetailFk");
386
		}
387
			
388
			//taxon map
389
			nameSpace = BerlinModelTaxonImport.NAMESPACE;
390
			cdmClass = TaxonBase.class;
391
			idSet = taxonIdSet;
392
			Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
393
			result.put(nameSpace, taxonMap);
394

    
395

    
396
			//nom reference map
397
			nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
398
			cdmClass = ReferenceBase.class;
399
			idSet = referenceIdSet;
400
			Map<String, ReferenceBase> nomReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
401
			result.put(nameSpace, nomReferenceMap);
402

    
403
			//biblio reference map
404
			nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
405
			cdmClass = ReferenceBase.class;
406
			idSet = referenceIdSet;
407
			Map<String, ReferenceBase> biblioReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
408
			result.put(nameSpace, biblioReferenceMap);
409
			
410
			//nom refDetail map
411
			nameSpace = BerlinModelRefDetailImport.NOM_REFDETAIL_NAMESPACE;
412
			cdmClass = ReferenceBase.class;
413
			idSet = refDetailIdSet;
414
			Map<String, ReferenceBase> nomRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
415
			result.put(nameSpace, nomRefDetailMap);
416
			
417
			//biblio refDetail map
418
			nameSpace = BerlinModelRefDetailImport.BIBLIO_REFDETAIL_NAMESPACE;
419
			cdmClass = ReferenceBase.class;
420
			idSet = refDetailIdSet;
421
			Map<String, ReferenceBase> biblioRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
422
			result.put(nameSpace, biblioRefDetailMap);
423
	
424
		} catch (SQLException e) {
425
			throw new RuntimeException(e);
426
	}
427
		return result;
428
	}
429
	
430
	
431
	/**
432
	 * @param state 
433
	 * @param media 
434
	 * @param media 
435
	 * @param descriptionSet 
436
	 * 
437
	 */
438
	private TaxonDescription makeImage(BerlinModelImportState state, String fact, Media media, Set<TaxonDescription> descriptionSet, Taxon taxon) {
439
		TaxonDescription taxonDescription = null;
440
		ReferenceBase sourceRef = state.getConfig().getSourceReference();
441
		String uri = fact;
442
		Integer size = null; 
443
		ImageMetaData imageMetaData = ImageMetaData.newInstance();
444
		URL url;
445
		try {
446
			url = new URL(fact.trim());
447
		} catch (MalformedURLException e) {
448
			logger.warn("Malformed URL. Image could not be imported: " + CdmUtils.Nz(uri));
449
			return null;
450
		}
451
		try {
452
			imageMetaData.readMetaData(url.toURI(), 0);
453
		}
454
		catch(URISyntaxException e){
455
			logger.error("URISyntaxException reading image metadata." , e);
456
		} catch (IOException e) {
457
			logger.error("IOError reading image metadata." , e);
458
		}
459
		MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);
460
		media.addRepresentation(mediaRepresentation);
461
		ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
462
		mediaRepresentation.addRepresentationPart(image);
463
		
464
		taxonDescription = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
465
		
466
		return taxonDescription;
467
	}
468

    
469
	private TaxonBase getTaxon(Map<String, TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){
470
		if (taxonIdObj != null){
471
			return taxonMap.get(String.valueOf(taxonId));
472
		}else{
473
			return null;
474
		}
475
		
476
	}
477
	
478
	private Feature getFeature(Map<Integer, Feature>  featureMap, Object categoryFkObj, Integer categoryFk){
479
		if (categoryFkObj != null){
480
			return featureMap.get(categoryFk); 
481
		}else{
482
			return null;
483
		}
484
		
485
	}
486
	
487

    
488
	/* (non-Javadoc)
489
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
490
	 */
491
	@Override
492
	protected boolean doCheck(BerlinModelImportState state){
493
		IOValidator<BerlinModelImportState> validator = new BerlinModelFactsImportValidator();
494
		return validator.validate(state);
495
	}
496
				
497
	/* (non-Javadoc)
498
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
499
	 */
500
	@Override
501
	protected String getTableName() {
502
		return dbTableName;
503
			}
504
	
505
	/* (non-Javadoc)
506
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
507
	 */
508
	@Override
509
	public String getPluralString() {
510
		return pluralString;
511
		}
512
	
513
	/* (non-Javadoc)
514
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
515
	 */
516
	protected boolean isIgnore(BerlinModelImportState state){
517
		return ! state.getConfig().isDoFacts();
518
	}
519

    
520

    
521
}
(4-4/21)