Project

General

Profile

Download (17.7 KB) Statistics
| Branch: | Revision:
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.io.pesi.faunaEuropaea;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.Collection;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.Iterator;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.Map.Entry;
21
import java.util.Set;
22
import java.util.UUID;
23

    
24
import org.apache.commons.lang.StringUtils;
25
import org.apache.log4j.Logger;
26
import org.springframework.stereotype.Component;
27
import org.springframework.transaction.TransactionStatus;
28

    
29
import eu.etaxonomy.cdm.io.common.IImportConfigurator;
30
import eu.etaxonomy.cdm.io.common.IImportConfigurator.DO_REFERENCES;
31
import eu.etaxonomy.cdm.io.common.ImportHelper;
32
import eu.etaxonomy.cdm.io.common.Source;
33
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
34
import eu.etaxonomy.cdm.model.common.CdmBase;
35
import eu.etaxonomy.cdm.model.common.OriginalSourceBase;
36
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
37
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
38
import eu.etaxonomy.cdm.model.description.Feature;
39
import eu.etaxonomy.cdm.model.description.TaxonDescription;
40
import eu.etaxonomy.cdm.model.description.TextData;
41
import eu.etaxonomy.cdm.model.reference.Reference;
42
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
43
import eu.etaxonomy.cdm.model.taxon.Synonym;
44
import eu.etaxonomy.cdm.model.taxon.Taxon;
45
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
46
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
47

    
48

    
49
/**
50
 * @author a.babadshanjan
51
 * @created 12.05.2009
52
 * @version 1.0
53
 */
54
@Component
55
public class FaunaEuropaeaRefImport extends FaunaEuropaeaImportBase {
56
    private static final long serialVersionUID = -586555645981648177L;
57

    
58
    private static final Logger logger = Logger.getLogger(FaunaEuropaeaRefImport.class);
59

    
60
	@Override
61
	protected boolean doCheck(FaunaEuropaeaImportState state) {
62
		boolean result = true;
63
		FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
64
		logger.warn("Checking for References not yet fully implemented");
65
//		result &= checkReferenceStatus(fauEuConfig);
66

    
67
		return result;
68
	}
69

    
70
	@Override
71
	protected void doInvoke(FaunaEuropaeaImportState state) {
72
		/*
73
		logger.warn("Start RefImport doInvoke");
74
		ProfilerController.memorySnapshot();
75
		*/
76
		if (state.getConfig().getDoReferences().equals(DO_REFERENCES.NONE)){
77
			return;
78
		}
79
		if (state.getConfig().getSourceReference().getId() == 0){
80
		    Reference sourceRef = getReferenceService().find(state.getConfig().getSourceRefUuid());
81
		    state.getConfig().setSourceReference(sourceRef);
82
		}
83
		Set<UUID> taxonUuids = null;
84
		Map<Integer, Reference> references = null;
85
		Map<String,TeamOrPersonBase<?>> authors = null;
86
		Map<UUID, FaunaEuropaeaReferenceTaxon> fauEuTaxonMap = null;
87
		Map<Integer, UUID> referenceUuids = new HashMap<Integer, UUID>();
88
		Set<Integer> referenceIDs = null;
89
		int limit = state.getConfig().getLimitSave();
90

    
91
		FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
92
		Source source = fauEuConfig.getSource();
93

    
94
		String namespace = "Reference";
95
		int i = 0;
96

    
97
		String selectCountTaxRefs =
98
			" SELECT count(*) ";
99

    
100
		String selectColumnsTaxRefs =
101
			" SELECT Reference.*, TaxRefs.*, Taxon.UUID  ";
102

    
103
		String fromClauseTaxRefs =
104
			" FROM TaxRefs " +
105
			" INNER JOIN Reference ON Reference.ref_id = TaxRefs.trf_ref_id " +
106
			" INNER JOIN Taxon ON TaxRefs.trf_tax_id = Taxon.TAX_ID ";
107

    
108
		String orderClauseTaxRefs =
109
			" ORDER BY TaxRefs.trf_tax_id";
110

    
111
		String selectCountRefs =
112
			" SELECT count(*) FROM Reference";
113

    
114
		String selectColumnsRefs =
115
			" SELECT * FROM Reference order by ref_author";
116

    
117

    
118
		String countQueryTaxRefs =
119
			selectCountTaxRefs + fromClauseTaxRefs;
120

    
121
		String selectQueryTaxRefs =
122
			selectColumnsTaxRefs + fromClauseTaxRefs + orderClauseTaxRefs;
123

    
124
		String countQueryRefs =
125
			selectCountRefs;
126

    
127
		String selectQueryRefs =
128
			selectColumnsRefs;
129

    
130

    
131
		if(logger.isInfoEnabled()) { logger.info("Start making References..."); }
132
		//first add all References to CDM
133
		processReferences(state, references, authors,
134
				referenceUuids, limit, fauEuConfig, source, namespace, i,
135
				countQueryRefs, selectQueryRefs);
136

    
137
	    /*
138
		logger.warn("Start ref taxon relationships");
139
		ProfilerController.memorySnapshot();
140
	 	*/
141
	 //create the relationships between references and taxa
142

    
143
        createTaxonReferenceRel(state, taxonUuids, fauEuTaxonMap,
144
				referenceUuids, referenceIDs, limit, source,
145
				countQueryTaxRefs, selectQueryTaxRefs);
146

    
147
        /*
148
		logger.warn("End RefImport doInvoke");
149
		ProfilerController.memorySnapshot();
150
		*/
151
		if(logger.isInfoEnabled()) { logger.info("End making references ..."); }
152

    
153
		return;
154
	}
155

    
156
	private void processReferences(FaunaEuropaeaImportState state,
157
			Map<Integer, Reference> references,
158
			Map<String, TeamOrPersonBase<?>> authors,
159
			Map<Integer, UUID> referenceUuids, int limit,
160
			FaunaEuropaeaImportConfigurator fauEuConfig, Source source,
161
			String namespace, int i, String countQueryRefs,
162
			String selectQueryRefs) {
163
		TransactionStatus txStatus = null;
164
		int count;
165
		Map<String, Reference> inReferences = new HashMap<String, Reference>();
166
		try {
167
			ResultSet rsRefs = source.getResultSet(countQueryRefs);
168
			rsRefs.next();
169
			count = rsRefs.getInt(1);
170

    
171
			rsRefs = source.getResultSet(selectQueryRefs);
172

    
173
	        if (logger.isInfoEnabled()) {
174
	        	logger.info("Get all References...");
175
				logger.info("Number of rows: " + count);
176
				logger.info("Count Query: " + countQueryRefs);
177
				logger.info("Select Query: " + selectQueryRefs);
178
			}
179

    
180
	        while (rsRefs.next()){
181
	        	int refId = rsRefs.getInt("ref_id");
182
	        	String var = "\u00A7";
183
				String refAuthor = deleteSymbol(var,rsRefs.getString("ref_author"));
184

    
185
				String year = deleteSymbol(var, rsRefs.getString("ref_year"));
186
				String title = deleteSymbol(var, rsRefs.getString("ref_title"));
187

    
188
				if (year == null){
189
					try{
190
						year = String.valueOf((Integer.parseInt(title)));
191
					}
192
					catch(Exception ex)
193
					{
194
						logger.info("year is empty and " +title + " contains no integer");
195
				    }
196
				}
197
				String refSource = deleteSymbol(var, rsRefs.getString("ref_source"));
198

    
199
				if ((i++ % limit) == 0) {
200

    
201
					txStatus = startTransaction();
202
					references = new HashMap<Integer,Reference>(limit);
203
					authors = new HashMap<String,TeamOrPersonBase<?>>(limit);
204
					//inReferences = new HashMap<String, Reference>(limit);
205
					if(logger.isInfoEnabled()) {
206
						logger.info("i = " + i + " - Reference import transaction started");
207
					}
208
				}
209

    
210
				Reference reference = null;
211
				TeamOrPersonBase<?> author = null;
212
				//ReferenceFactory refFactory = ReferenceFactory.newInstance();
213
				reference = ReferenceFactory.newGeneric();
214

    
215
//				reference.setTitleCache(title);
216
				reference.setTitle(title);
217
				reference.setDatePublished(ImportHelper.getDatePublished(year));
218
				Reference inReference;
219
				Reference tempInReference;
220
				if (!StringUtils.isBlank(refSource)) {
221
				    tempInReference = (Reference)NonViralNameParserImpl.NewInstance().parseReferenceTitle(refSource, null, false);
222
				    if (inReferences.containsKey(tempInReference.getTitleCache())){
223
				        inReference = inReferences.get(tempInReference.getTitleCache());
224

    
225
				    }else{
226
				        inReference = (Reference) tempInReference.clone();
227
				        inReference.setPages(null);
228
				        inReference.setEdition(null);
229
				        inReferences.put(inReference.getTitleCache(), inReference);
230

    
231
				    }
232
				    reference.setPages(tempInReference.getPages());
233
                    reference.setEdition(tempInReference.getEdition());
234
                    tempInReference = null;
235
                    reference.setInReference(inReference);
236
				}
237

    
238
				if (!authors.containsKey(refAuthor)) {
239
					if (refAuthor == null) {
240
						logger.warn("Reference author is null");
241
					}
242
					author = FaunaEuropaeaAuthorImport.parseNomAuthorString(refAuthor);
243

    
244
					authors.put(refAuthor,author);
245
					if (logger.isTraceEnabled()) {
246
						logger.trace("Stored author (" + refAuthor + ")");
247
					}
248
				//}
249

    
250
				} else {
251
					author = authors.get(refAuthor);
252
					if (logger.isDebugEnabled()) {
253
						logger.debug("Not imported author with duplicated aut_id (" + refId +
254
							") " + refAuthor);
255
					}
256
				}
257

    
258
				reference.setAuthorship(author);
259

    
260
				ImportHelper.setOriginalSource(reference, fauEuConfig.getSourceReference(), refId, namespace);
261
				ImportHelper.setOriginalSource(author, fauEuConfig.getSourceReference(), refId, namespace);
262

    
263
				// Store reference
264

    
265

    
266
				if (!references.containsKey(refId) && !(reference.getId() ==fauEuConfig.getSourceReference().getId())) {
267

    
268
					if (reference == null) {
269
						logger.warn("Reference is null");
270
					}
271
					references.put(refId, reference);
272
					if (logger.isTraceEnabled()) {
273
						logger.trace("Stored reference (" + refAuthor + ")");
274
					}
275
				} else {
276
					if (logger.isDebugEnabled()) {
277
						logger.debug("Duplicated reference (" + refId + ", " + refAuthor + ")");
278
					}
279
					//continue;
280
				}
281

    
282
				if (((i % limit) == 0 && i > 1 ) || i == count ) {
283

    
284
					commitReferences(references, authors, referenceUuids, i,
285
							txStatus);
286
					references= null;
287
					authors = null;
288
				}
289

    
290

    
291

    
292
	        }
293
	        if (references != null){
294
	        	commitReferences(references, authors, referenceUuids, i, txStatus);
295
	        	references= null;
296
				authors = null;
297
	        }
298
		}catch(SQLException e) {
299
			logger.error("SQLException:" +  e);
300
			state.setUnsuccessfull();
301
		}
302
		inReferences = null;
303

    
304
	}
305

    
306
	private void commitReferences(Map<Integer, Reference> references,
307
			Map<String, TeamOrPersonBase<?>> authors,
308
			Map<Integer, UUID> referenceUuids, int i, TransactionStatus txStatus) {
309

    
310
		Map <UUID, Reference> referenceMap =getReferenceService().saveOrUpdate(references.values());
311
		logger.info("i = " + i + " - references saved");
312

    
313
		Iterator<Entry<UUID, Reference>> it = referenceMap.entrySet().iterator();
314
		while (it.hasNext()){
315
			Reference ref = it.next().getValue();
316
			int refID = Integer.valueOf((ref.getSources().iterator().next()).getIdInSource());
317
			UUID uuid = ref.getUuid();
318
			referenceUuids.put(refID, uuid);
319
		}
320

    
321
		getAgentService().save((Collection)authors.values());
322
		commitTransaction(txStatus);
323
	}
324

    
325
	private void createTaxonReferenceRel(FaunaEuropaeaImportState state,
326
			Set<UUID> taxonUuids,
327
			Map<UUID, FaunaEuropaeaReferenceTaxon> fauEuTaxonMap,
328
			Map<Integer, UUID> referenceUuids, Set<Integer> referenceIDs,
329
			int limit, Source source, String countQueryTaxRefs,
330
			String selectQueryTaxRefs) {
331

    
332
		TransactionStatus txStatus = null;
333
		int i;
334
		int count;
335
		Taxon taxon = null;
336
		i = 0;
337
		try{
338
			ResultSet rsTaxRefs = source.getResultSet(countQueryTaxRefs);
339
			rsTaxRefs.next();
340
			count = rsTaxRefs.getInt(1);
341

    
342
			rsTaxRefs = source.getResultSet(selectQueryTaxRefs);
343

    
344
			logger.info("Start taxon reference-relationships");
345
			FaunaEuropaeaReference fauEuReference;
346
			FaunaEuropaeaReferenceTaxon fauEuReferenceTaxon;
347
			while (rsTaxRefs.next()) {
348

    
349

    
350
				if ((i++ % limit) == 0) {
351

    
352
					txStatus = startTransaction();
353
					taxonUuids = new HashSet<UUID>(limit);
354
					referenceIDs = new HashSet<Integer>(limit);
355
					fauEuTaxonMap = new HashMap<UUID, FaunaEuropaeaReferenceTaxon>(limit);
356

    
357
					if(logger.isInfoEnabled()) {
358
						logger.info("i = " + i + " - Reference import transaction started");
359
					}
360
				}
361

    
362

    
363
				int taxonId = rsTaxRefs.getInt("trf_tax_id");
364
				int refId = rsTaxRefs.getInt("ref_id");
365
				String refAuthor = rsTaxRefs.getString("ref_author");
366
				String year = rsTaxRefs.getString("ref_year");
367
				String title = rsTaxRefs.getString("ref_title");
368

    
369
				if (year == null){
370
					try{
371
						year = String.valueOf((Integer.parseInt(title)));
372
					}
373
					catch(Exception ex)
374
					{
375
						logger.info("year is empty and " +title + " contains no integer");
376
				    }
377
				}
378
				String refSource = rsTaxRefs.getString("ref_source");
379
				String page = rsTaxRefs.getString("trf_page");
380
				UUID currentTaxonUuid = null;
381
				if (resultSetHasColumn(rsTaxRefs, "UUID")){
382
					currentTaxonUuid = UUID.fromString(rsTaxRefs.getString("UUID"));
383
				} else {
384
					logger.error("Taxon (" + taxonId + ") without UUID ignored");
385
					continue;
386
				}
387

    
388
				fauEuReference = new FaunaEuropaeaReference();
389
				fauEuReference.setTaxonUuid(currentTaxonUuid);
390
				fauEuReference.setReferenceId(refId);
391
				fauEuReference.setReferenceAuthor(refAuthor);
392
				fauEuReference.setReferenceYear(year);
393
				fauEuReference.setReferenceTitle(title);
394
				fauEuReference.setReferenceSource(refSource);
395
				fauEuReference.setPage(page);
396

    
397
				if (!taxonUuids.contains(currentTaxonUuid)) {
398
					taxonUuids.add(currentTaxonUuid);
399
					fauEuReferenceTaxon =
400
						new FaunaEuropaeaReferenceTaxon(currentTaxonUuid);
401
					fauEuTaxonMap.put(currentTaxonUuid, fauEuReferenceTaxon);
402
				} else {
403
					if (logger.isTraceEnabled()) {
404
						logger.trace("Taxon (" + currentTaxonUuid + ") already stored.");
405
						//continue; ein Taxon kann mehr als eine Referenz haben
406
					}
407
				}
408

    
409
				if (!referenceIDs.contains(refId)) {
410

    
411

    
412
					referenceIDs.add(refId);
413
					if (logger.isTraceEnabled()) {
414
						logger.trace("Stored reference (" + refAuthor + ")");
415
					}
416
				} else {
417
					if (logger.isDebugEnabled()) {
418
						logger.debug("Duplicated reference (" + refId + ", " + refAuthor + ")");
419
					}
420
					//continue;
421
				}
422

    
423
				fauEuTaxonMap.get(currentTaxonUuid).addReference(fauEuReference);
424

    
425
				if (((i % limit) == 0 && i > 1 ) || i == count) {
426

    
427
					try {
428
						commitTaxaReferenceRel(taxonUuids, fauEuTaxonMap,
429
								referenceUuids, referenceIDs, limit, txStatus, i,
430
								taxon);
431

    
432
						taxonUuids = null;
433

    
434
						fauEuTaxonMap = null;
435
						referenceIDs = null;
436

    
437
					} catch (Exception e) {
438
						logger.warn("An exception occurred when creating reference, reference could not be saved.");
439
					}
440
				}
441
			}
442
			if (taxonUuids != null){
443
				commitTaxaReferenceRel(taxonUuids, fauEuTaxonMap,
444
						referenceUuids, referenceIDs, limit, txStatus, i,
445
						taxon);
446
			}
447
			rsTaxRefs.close();
448
		} catch (SQLException e) {
449
				logger.error("SQLException:" +  e);
450
				state.setUnsuccessfull();
451
		}
452
		taxonUuids = null;
453

    
454
		fauEuTaxonMap = null;
455
		referenceIDs = null;
456

    
457
	}
458

    
459
	private void commitTaxaReferenceRel(Set<UUID> taxonUuids,
460
			Map<UUID, FaunaEuropaeaReferenceTaxon> fauEuTaxonMap,
461
			Map<Integer, UUID> referenceUuids, Set<Integer> referenceIDs,
462
			int limit, TransactionStatus txStatus, int i, Taxon taxon) {
463
		List<TaxonBase> taxonList;
464
		List<Reference> referenceList;
465
		Map<Integer, Reference> references;
466
		taxonList = getTaxonService().find(taxonUuids);
467
		//get UUIDs of used references
468
		Iterator<?> itRefs = referenceIDs.iterator();
469
		Set<UUID> uuidSet = new HashSet<UUID>(referenceIDs.size());
470
		UUID uuid;
471
		while (itRefs.hasNext()){
472
			uuid = referenceUuids.get(itRefs.next());
473
			uuidSet.add(uuid);
474
		}
475

    
476
		referenceList = getReferenceService().find(uuidSet);
477

    
478
		references = new HashMap<Integer, Reference>(limit);
479
		for (Reference ref : referenceList){
480
			references.put(Integer.valueOf(((OriginalSourceBase)ref.getSources().iterator().next()).getIdInSource()), ref);
481
		}
482
		for (TaxonBase<?> taxonBase : taxonList) {
483

    
484
			// Create descriptions
485

    
486
			if (taxonBase == null) {
487
				if (logger.isDebugEnabled()) {
488
					logger.debug("TaxonBase is null ");
489
				}
490
				continue;
491
			}
492
			boolean isSynonym = taxonBase.isInstanceOf(Synonym.class);
493
			if (isSynonym) {
494
				Synonym syn = CdmBase.deproxy(taxonBase, Synonym.class);
495
				Set<Taxon> acceptedTaxa = syn.getAcceptedTaxa();
496
				if (acceptedTaxa.size() > 0) {
497
					taxon = syn.getAcceptedTaxa().iterator().next();
498
					//logger.warn("Synonym (" + taxonBase.getUuid() + ") has accepted taxon" + taxon.getUuid());
499
				} else {
500
//								if (logger.isDebugEnabled()) {
501
					logger.warn("Synonym (" + taxonBase.getUuid() + ") does not have accepted taxa");
502
//								}
503
				}
504
			} else {
505
				taxon = CdmBase.deproxy(taxonBase, Taxon.class);
506
			}
507

    
508
			if (taxon != null) {
509
				TaxonDescription taxonDescription;
510
				Set<TaxonDescription> descriptions = taxon.getDescriptions();
511
				if (descriptions.size() > 0) {
512
					taxonDescription = descriptions.iterator().next();
513
				} else {
514
					taxonDescription = TaxonDescription.NewInstance();
515
					taxon.addDescription(taxonDescription);
516
				}
517

    
518

    
519
				UUID taxonUuid = taxonBase.getUuid();
520
				FaunaEuropaeaReferenceTaxon fauEuHelperTaxon = fauEuTaxonMap.get(taxonUuid);
521
				Reference citation;
522
				String microCitation;
523
				DescriptionElementSource originalSource;
524
				Synonym syn;
525
				for (FaunaEuropaeaReference storedReference : fauEuHelperTaxon.getReferences()) {
526

    
527
					TextData textData = TextData.NewInstance(Feature.CITATION());
528

    
529
					citation = references.get(storedReference.getReferenceId());
530
					microCitation = null;
531
					originalSource = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource, null, null, citation, microCitation, null, null);
532
					if (isSynonym){
533
						syn = CdmBase.deproxy(taxonBase, Synonym.class);
534
						originalSource.setNameUsedInSource(syn.getName());
535
					}
536
					textData.addSource(originalSource);
537
					taxonDescription.addElement(textData);
538
				}
539
			}
540
		}
541
		if(logger.isInfoEnabled()) {
542
			logger.info("i = " + i + " - Transaction committed");
543
		}
544

    
545
		// save taxa
546
		getTaxonService().save(taxonList);
547
		commitTransaction(txStatus);
548

    
549
	}
550

    
551
	@Override
552
	protected boolean isIgnore(FaunaEuropaeaImportState state){
553
		return (state.getConfig().getDoReferences() == IImportConfigurator.DO_REFERENCES.NONE);
554
	}
555

    
556
	private String deleteSymbol(String symbol, String stringVar){
557
		if (stringVar.startsWith(symbol)){
558
			if (stringVar.endsWith(symbol)){
559
				stringVar = stringVar.substring(1,stringVar.length()-1);
560
			}else{
561
				stringVar = stringVar.substring(1);
562
			}
563
		} else if (stringVar.endsWith(symbol)){
564
			stringVar = stringVar.substring(0, stringVar.length()-1);
565
		}
566
		return stringVar;
567
	}
568

    
569
}
(10-10/20)