latest FE import features
[cdmlib-apps.git] / cdm-pesi / src / main / java / eu / etaxonomy / cdm / io / pesi / faunaEuropaea / FaunaEuropaeaRefImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.pesi.faunaEuropaea;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.Collection;
15 import java.util.HashMap;
16 import java.util.HashSet;
17 import java.util.Iterator;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Map.Entry;
21 import java.util.Set;
22 import java.util.UUID;
23
24 import org.apache.log4j.Logger;
25 import org.springframework.stereotype.Component;
26 import org.springframework.transaction.TransactionStatus;
27
28 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
29 import eu.etaxonomy.cdm.io.common.ImportHelper;
30 import eu.etaxonomy.cdm.io.common.Source;
31 import eu.etaxonomy.cdm.io.profiler.ProfilerController;
32 import eu.etaxonomy.cdm.model.agent.Team;
33 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
34 import eu.etaxonomy.cdm.model.common.CdmBase;
35 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
36 import eu.etaxonomy.cdm.model.common.OriginalSourceBase;
37 import eu.etaxonomy.cdm.model.description.Feature;
38 import eu.etaxonomy.cdm.model.description.TaxonDescription;
39 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
40 import eu.etaxonomy.cdm.model.description.TextData;
41 import eu.etaxonomy.cdm.model.reference.Reference;
42 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
43 import eu.etaxonomy.cdm.model.taxon.Synonym;
44 import eu.etaxonomy.cdm.model.taxon.Taxon;
45 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
46
47
48 /**
49 * @author a.babadshanjan
50 * @created 12.05.2009
51 * @version 1.0
52 */
53 @Component
54 public class FaunaEuropaeaRefImport extends FaunaEuropaeaImportBase {
55 private static final Logger logger = Logger.getLogger(FaunaEuropaeaRefImport.class);
56
57 /* Interval for progress info message when retrieving taxa */
58 private int modCount = 10000;
59
60 /* (non-Javadoc)
61 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
62 */
63 @Override
64 protected boolean doCheck(FaunaEuropaeaImportState state) {
65 boolean result = true;
66 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
67 logger.warn("Checking for References not yet fully implemented");
68 result &= checkReferenceStatus(fauEuConfig);
69
70 return result;
71 }
72
73 private boolean checkReferenceStatus(FaunaEuropaeaImportConfigurator fauEuConfig) {
74 boolean result = true;
75 // try {
76 Source source = fauEuConfig.getSource();
77 String sqlStr = "";
78 // ResultSet rs = source.getResultSet(sqlStr);
79 return result;
80 // } catch (SQLException e) {
81 // e.printStackTrace();
82 // return false;
83 // }
84 }
85
86 /* (non-Javadoc)
87 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
88 */
89 @Override
90 protected void doInvoke(FaunaEuropaeaImportState state) {
91 /*
92 logger.warn("Start RefImport doInvoke");
93 ProfilerController.memorySnapshot();
94 */
95 List<TaxonBase> taxonList = null;
96 List<Reference> referenceList = null;
97 Set<UUID> taxonUuids = null;
98 Map<Integer, Reference> references = null;
99 Map<String,TeamOrPersonBase> authors = null;
100 Map<UUID, FaunaEuropaeaReferenceTaxon> fauEuTaxonMap = null;
101 Map<Integer, UUID> referenceUuids = new HashMap<Integer, UUID>();
102 Set<Integer> referenceIDs = null;
103 int limit = state.getConfig().getLimitSave();
104
105 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
106 Source source = fauEuConfig.getSource();
107
108 String namespace = "Reference";
109 int i = 0;
110
111 String selectCountTaxRefs =
112 " SELECT count(*) ";
113
114 String selectColumnsTaxRefs =
115 " SELECT Reference.*, TaxRefs.*, Taxon.UUID ";
116
117 String fromClauseTaxRefs =
118 " FROM TaxRefs " +
119 " INNER JOIN Reference ON Reference.ref_id = TaxRefs.trf_ref_id " +
120 " INNER JOIN Taxon ON TaxRefs.trf_tax_id = Taxon.TAX_ID ";
121
122 String orderClauseTaxRefs =
123 " ORDER BY TaxRefs.trf_tax_id";
124
125 String selectCountRefs =
126 " SELECT count(*) FROM Reference";
127
128 String selectColumnsRefs =
129 " SELECT * FROM Reference order by ref_author";
130
131
132 String countQueryTaxRefs =
133 selectCountTaxRefs + fromClauseTaxRefs;
134
135 String selectQueryTaxRefs =
136 selectColumnsTaxRefs + fromClauseTaxRefs + orderClauseTaxRefs;
137
138 String countQueryRefs =
139 selectCountRefs;
140
141 String selectQueryRefs =
142 selectColumnsRefs;
143
144
145 if(logger.isInfoEnabled()) { logger.info("Start making References..."); }
146 //first add all References to CDM
147 processReferences(state, references, authors,
148 referenceUuids, limit, fauEuConfig, source, namespace, i,
149 countQueryRefs, selectQueryRefs);
150
151 /*
152 logger.warn("Start ref taxon relationships");
153 ProfilerController.memorySnapshot();
154 */
155 //create the relationships between references and taxa
156
157 createTaxonReferenceRel(state, taxonUuids, fauEuTaxonMap,
158 referenceUuids, referenceIDs, limit, source,
159 countQueryTaxRefs, selectQueryTaxRefs);
160
161 /*
162 logger.warn("End RefImport doInvoke");
163 ProfilerController.memorySnapshot();
164 */
165 if(logger.isInfoEnabled()) { logger.info("End making references ..."); }
166
167 return;
168 }
169
170 private void processReferences(FaunaEuropaeaImportState state,
171 Map<Integer, Reference> references,
172 Map<String, TeamOrPersonBase> authors,
173 Map<Integer, UUID> referenceUuids, int limit,
174 FaunaEuropaeaImportConfigurator fauEuConfig, Source source,
175 String namespace, int i, String countQueryRefs,
176 String selectQueryRefs) {
177 TransactionStatus txStatus = null;
178 int count;
179 try {
180 ResultSet rsRefs = source.getResultSet(countQueryRefs);
181 rsRefs.next();
182 count = rsRefs.getInt(1);
183
184 rsRefs = source.getResultSet(selectQueryRefs);
185
186 if (logger.isInfoEnabled()) {
187 logger.info("Get all References...");
188 logger.info("Number of rows: " + count);
189 logger.info("Count Query: " + countQueryRefs);
190 logger.info("Select Query: " + selectQueryRefs);
191 }
192
193 while (rsRefs.next()){
194 int refId = rsRefs.getInt("ref_id");
195 String refAuthor = rsRefs.getString("ref_author");
196 String year = rsRefs.getString("ref_year");
197 String title = rsRefs.getString("ref_title");
198
199 if (year == null){
200 try{
201 year = String.valueOf((Integer.parseInt(title)));
202 }
203 catch(Exception ex)
204 {
205 logger.info("year is empty and " +title + " contains no integer");
206 }
207 }
208 String refSource = rsRefs.getString("ref_source");
209
210 if ((i++ % limit) == 0) {
211
212 txStatus = startTransaction();
213 references = new HashMap<Integer,Reference>(limit);
214 authors = new HashMap<String,TeamOrPersonBase>(limit);
215
216 if(logger.isInfoEnabled()) {
217 logger.info("i = " + i + " - Reference import transaction started");
218 }
219 }
220
221 Reference<?> reference = null;
222 TeamOrPersonBase<Team> author = null;
223 //ReferenceFactory refFactory = ReferenceFactory.newInstance();
224 reference = ReferenceFactory.newGeneric();
225
226 // reference.setTitleCache(title);
227 reference.setTitle(title);
228 reference.setDatePublished(ImportHelper.getDatePublished(year));
229
230 if (!authors.containsKey(refAuthor)) {
231 if (refAuthor == null) {
232 logger.warn("Reference author is null");
233 }
234 author = Team.NewInstance();
235 author.setTitleCache(refAuthor, true);
236 authors.put(refAuthor,author);
237 if (logger.isTraceEnabled()) {
238 logger.trace("Stored author (" + refAuthor + ")");
239 }
240 //}
241
242 } else {
243 author = authors.get(refAuthor);
244 if (logger.isDebugEnabled()) {
245 logger.debug("Not imported author with duplicated aut_id (" + refId +
246 ") " + refAuthor);
247 }
248 }
249
250 reference.setAuthorTeam(author);
251
252 ImportHelper.setOriginalSource(reference, fauEuConfig.getSourceReference(), refId, namespace);
253 ImportHelper.setOriginalSource(author, fauEuConfig.getSourceReference(), refId, namespace);
254
255 // Store reference
256
257
258 if (!references.containsKey(refId)) {
259
260 if (reference == null) {
261 logger.warn("Reference is null");
262 }
263 references.put(refId, reference);
264 if (logger.isTraceEnabled()) {
265 logger.trace("Stored reference (" + refAuthor + ")");
266 }
267 } else {
268 if (logger.isDebugEnabled()) {
269 logger.debug("Duplicated reference (" + refId + ", " + refAuthor + ")");
270 }
271 //continue;
272 }
273
274 if (((i % limit) == 0 && i > 1 ) || i == count ) {
275
276 commitReferences(references, authors, referenceUuids, i,
277 txStatus);
278 references= null;
279 authors = null;
280 }
281
282
283
284 }
285 if (references != null){
286 commitReferences(references, authors, referenceUuids, i, txStatus);
287 references= null;
288 authors = null;
289 }
290 }catch(SQLException e) {
291 logger.error("SQLException:" + e);
292 state.setUnsuccessfull();
293 }
294
295 }
296
297 private void commitReferences(Map<Integer, Reference> references,
298 Map<String, TeamOrPersonBase> authors,
299 Map<Integer, UUID> referenceUuids, int i, TransactionStatus txStatus) {
300 Map <UUID, Reference> referenceMap =getReferenceService().save(references.values());
301 logger.info("i = " + i + " - references saved");
302
303 Iterator<Entry<UUID, Reference>> it = referenceMap.entrySet().iterator();
304 while (it.hasNext()){
305 Reference ref = it.next().getValue();
306 int refID = Integer.valueOf(((OriginalSourceBase)ref.getSources().iterator().next()).getIdInSource());
307 UUID uuid = ref.getUuid();
308 referenceUuids.put(refID, uuid);
309 }
310
311 getAgentService().save((Collection)authors.values());
312 commitTransaction(txStatus);
313 }
314
315 private void createTaxonReferenceRel(FaunaEuropaeaImportState state,
316 Set<UUID> taxonUuids,
317 Map<UUID, FaunaEuropaeaReferenceTaxon> fauEuTaxonMap,
318 Map<Integer, UUID> referenceUuids, Set<Integer> referenceIDs,
319 int limit, Source source, String countQueryTaxRefs,
320 String selectQueryTaxRefs) {
321
322 TransactionStatus txStatus = null;
323 List<TaxonBase> taxonList;
324 List<Reference> referenceList;
325 Map<Integer, Reference> references;
326 Map<String, TeamOrPersonBase> authors;
327 int i;
328 int count;
329 Taxon taxon = null;
330 i = 0;
331 try{
332 ResultSet rsTaxRefs = source.getResultSet(countQueryTaxRefs);
333 rsTaxRefs.next();
334 count = rsTaxRefs.getInt(1);
335
336 rsTaxRefs = source.getResultSet(selectQueryTaxRefs);
337
338 logger.info("Start taxon reference-relationships");
339 FaunaEuropaeaReference fauEuReference;
340 FaunaEuropaeaReferenceTaxon fauEuReferenceTaxon;
341 while (rsTaxRefs.next()) {
342
343
344 if ((i++ % limit) == 0) {
345
346 txStatus = startTransaction();
347 taxonUuids = new HashSet<UUID>(limit);
348 referenceIDs = new HashSet<Integer>(limit);
349 authors = new HashMap<String,TeamOrPersonBase>(limit);
350 fauEuTaxonMap = new HashMap<UUID, FaunaEuropaeaReferenceTaxon>(limit);
351
352 if(logger.isInfoEnabled()) {
353 logger.info("i = " + i + " - Reference import transaction started");
354 }
355 }
356
357
358 int taxonId = rsTaxRefs.getInt("trf_tax_id");
359 int refId = rsTaxRefs.getInt("ref_id");
360 String refAuthor = rsTaxRefs.getString("ref_author");
361 String year = rsTaxRefs.getString("ref_year");
362 String title = rsTaxRefs.getString("ref_title");
363
364 if (year == null){
365 try{
366 year = String.valueOf((Integer.parseInt(title)));
367 }
368 catch(Exception ex)
369 {
370 logger.info("year is empty and " +title + " contains no integer");
371 }
372 }
373 String refSource = rsTaxRefs.getString("ref_source");
374 String page = rsTaxRefs.getString("trf_page");
375 UUID currentTaxonUuid = null;
376 if (resultSetHasColumn(rsTaxRefs, "UUID")){
377 currentTaxonUuid = UUID.fromString(rsTaxRefs.getString("UUID"));
378 } else {
379 logger.error("Taxon (" + taxonId + ") without UUID ignored");
380 continue;
381 }
382
383 fauEuReference = new FaunaEuropaeaReference();
384 fauEuReference.setTaxonUuid(currentTaxonUuid);
385 fauEuReference.setReferenceId(refId);
386 fauEuReference.setReferenceAuthor(refAuthor);
387 fauEuReference.setReferenceYear(year);
388 fauEuReference.setReferenceTitle(title);
389 fauEuReference.setReferenceSource(refSource);
390 fauEuReference.setPage(page);
391
392 if (!taxonUuids.contains(currentTaxonUuid)) {
393 taxonUuids.add(currentTaxonUuid);
394 fauEuReferenceTaxon =
395 new FaunaEuropaeaReferenceTaxon(currentTaxonUuid);
396 fauEuTaxonMap.put(currentTaxonUuid, fauEuReferenceTaxon);
397 } else {
398 if (logger.isTraceEnabled()) {
399 logger.trace("Taxon (" + currentTaxonUuid + ") already stored.");
400 //continue; ein Taxon kann mehr als eine Referenz haben
401 }
402 }
403
404 if (!referenceIDs.contains(refId)) {
405
406
407 referenceIDs.add(refId);
408 if (logger.isTraceEnabled()) {
409 logger.trace("Stored reference (" + refAuthor + ")");
410 }
411 } else {
412 if (logger.isDebugEnabled()) {
413 logger.debug("Duplicated reference (" + refId + ", " + refAuthor + ")");
414 }
415 //continue;
416 }
417
418
419 fauEuTaxonMap.get(currentTaxonUuid).addReference(fauEuReference);
420
421
422
423
424 if (((i % limit) == 0 && i > 1 ) || i == count) {
425
426 try {
427
428 Set<UUID> uuidSet;
429 commitTaxaReferenceRel(taxonUuids, fauEuTaxonMap,
430 referenceUuids, referenceIDs, limit, txStatus, i,
431 taxon);
432
433 taxonUuids = null;
434 references = null;
435 taxonList = null;
436 fauEuTaxonMap = null;
437 referenceIDs = null;
438 referenceList = null;
439 uuidSet = null;
440
441
442 } catch (Exception e) {
443 logger.warn("An exception occurred when creating reference, reference could not be saved.");
444 }
445 }
446 }
447 if (taxonUuids != null){
448 commitTaxaReferenceRel(taxonUuids, fauEuTaxonMap,
449 referenceUuids, referenceIDs, limit, txStatus, i,
450 taxon);
451 }
452 rsTaxRefs.close();
453 } catch (SQLException e) {
454 logger.error("SQLException:" + e);
455 state.setUnsuccessfull();
456 }
457 }
458
459 private void commitTaxaReferenceRel(Set<UUID> taxonUuids,
460 Map<UUID, FaunaEuropaeaReferenceTaxon> fauEuTaxonMap,
461 Map<Integer, UUID> referenceUuids, Set<Integer> referenceIDs,
462 int limit, TransactionStatus txStatus, int i, Taxon taxon) {
463 List<TaxonBase> taxonList;
464 List<Reference> referenceList;
465 Map<Integer, Reference> references;
466 taxonList = getTaxonService().find(taxonUuids);
467 //get UUIDs of used references
468 Iterator itRefs = referenceIDs.iterator();
469 Set<UUID> uuidSet = new HashSet<UUID>(referenceIDs.size());
470 UUID uuid;
471 while (itRefs.hasNext()){
472 uuid = referenceUuids.get(itRefs.next());
473 uuidSet.add(uuid);
474 }
475 referenceList = getReferenceService().find(uuidSet);
476 references = new HashMap<Integer, Reference>(limit);
477 for (Reference ref : referenceList){
478 references.put(Integer.valueOf(((OriginalSourceBase)ref.getSources().iterator().next()).getIdInSource()), ref);
479 }
480 for (TaxonBase taxonBase : taxonList) {
481
482 // Create descriptions
483
484 if (taxonBase == null) {
485 if (logger.isDebugEnabled()) {
486 logger.debug("TaxonBase is null ");
487 }
488 continue;
489 }
490 boolean isSynonym = taxonBase.isInstanceOf(Synonym.class);
491 if (isSynonym) {
492 Synonym syn = CdmBase.deproxy(taxonBase, Synonym.class);
493 Set<Taxon> acceptedTaxa = syn.getAcceptedTaxa();
494 if (acceptedTaxa.size() > 0) {
495 taxon = syn.getAcceptedTaxa().iterator().next();
496 } else {
497 // if (logger.isDebugEnabled()) {
498 logger.warn("Synonym (" + taxonBase.getUuid() + ") does not have accepted taxa");
499 // }
500 }
501 } else {
502 taxon = CdmBase.deproxy(taxonBase, Taxon.class);
503 }
504 //TODO: statt TaxonDescription TaxonNameDescription und an den Namen anstatt ans Taxon hängen!!!
505 if (taxon != null) {
506 TaxonNameDescription taxonNameDescription = null;
507 Set<TaxonNameDescription> descriptions = taxon.getName().getDescriptions();
508 if (descriptions.size() > 0) {
509 taxonNameDescription = descriptions.iterator().next();
510 } else {
511 taxonNameDescription = TaxonNameDescription.NewInstance();
512 taxon.getName().addDescription(taxonNameDescription);
513 }
514
515
516 UUID taxonUuid = taxonBase.getUuid();
517 FaunaEuropaeaReferenceTaxon fauEuHelperTaxon = fauEuTaxonMap.get(taxonUuid);
518 Reference citation;
519 String microCitation;
520 DescriptionElementSource originalSource;
521 Synonym syn;
522 for (FaunaEuropaeaReference storedReference : fauEuHelperTaxon.getReferences()) {
523
524 TextData textData = TextData.NewInstance(Feature.CITATION());
525
526 citation = references.get(storedReference.getReferenceId());
527 microCitation = storedReference.getPage();
528 originalSource = DescriptionElementSource.NewInstance(null, null, citation, microCitation, null, null);
529 if (isSynonym){
530 syn = CdmBase.deproxy(taxonBase, Synonym.class);
531 originalSource.setNameUsedInSource(syn.getName());
532 }
533 textData.addSource(originalSource);
534 taxonNameDescription.addElement(textData);
535 }
536 }
537 }
538 if(logger.isInfoEnabled()) {
539 logger.info("i = " + i + " - Transaction committed");
540 }
541
542 // save taxa
543 getTaxonService().save(taxonList);
544 commitTransaction(txStatus);
545
546 }
547
548
549
550
551
552
553 /* (non-Javadoc)
554 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
555 */
556 protected boolean isIgnore(FaunaEuropaeaImportState state){
557 return (state.getConfig().getDoReferences() == IImportConfigurator.DO_REFERENCES.NONE);
558 }
559
560 }