b3befe46fe3a207c847d0f0061519283285664b5
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / faunaEuropaea / FaunaEuropaeaRelTaxonIncludeImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.faunaEuropaea;
11
12 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.A_AUCT;
13 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.P_PARENTHESIS;
14 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.Q_NO_RESTRICTION;
15 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_GENUS;
16 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_SUBGENUS;
17 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_SPECIES;
18 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_SUBSPECIES;
19 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_ACCEPTED;
20 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_NOT_ACCEPTED;
21
22 import java.sql.ResultSet;
23 import java.sql.SQLException;
24 import java.util.Collection;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Set;
31 import java.util.UUID;
32
33 import org.apache.log4j.Logger;
34 import org.hibernate.Session;
35 import org.springframework.stereotype.Component;
36 import org.springframework.transaction.TransactionStatus;
37
38 import eu.etaxonomy.cdm.api.service.IService;
39 import eu.etaxonomy.cdm.common.CdmUtils;
40 import eu.etaxonomy.cdm.io.berlinModel.CdmOneToManyMapper;
41 import eu.etaxonomy.cdm.io.berlinModel.CdmStringMapper;
42 import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState;
43 import eu.etaxonomy.cdm.io.common.CdmAttributeMapperBase;
44 import eu.etaxonomy.cdm.io.common.CdmSingleAttributeMapperBase;
45 import eu.etaxonomy.cdm.io.common.ICdmIO;
46 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
47 import eu.etaxonomy.cdm.io.common.ImportHelper;
48 import eu.etaxonomy.cdm.io.common.MapWrapper;
49 import eu.etaxonomy.cdm.io.common.Source;
50 import eu.etaxonomy.cdm.io.profiler.ProfilerController;
51 import eu.etaxonomy.cdm.io.tcsxml.in.TcsXmlImportState;
52 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
53 import eu.etaxonomy.cdm.model.common.CdmBase;
54 import eu.etaxonomy.cdm.model.common.ISourceable;
55 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
56 import eu.etaxonomy.cdm.model.common.OriginalSource;
57 import eu.etaxonomy.cdm.model.name.Rank;
58 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
59 import eu.etaxonomy.cdm.model.name.ZoologicalName;
60 import eu.etaxonomy.cdm.model.reference.Database;
61 import eu.etaxonomy.cdm.model.reference.Generic;
62 import eu.etaxonomy.cdm.model.reference.PublicationBase;
63 import eu.etaxonomy.cdm.model.reference.Publisher;
64 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
65 import eu.etaxonomy.cdm.model.taxon.Synonym;
66 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
67 import eu.etaxonomy.cdm.model.taxon.Taxon;
68 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
69 import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;
70 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
71
72 import com.yourkit.api.Controller;
73
74
75
76 /**
77 * @author a.babadshanjan
78 * @created 12.05.2009
79 * @version 1.0
80 */
81 @Component
82 public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase {
83
84 public static final String OS_NAMESPACE_TAXON = "Taxon";
85 private static final Logger logger = Logger.getLogger(FaunaEuropaeaRelTaxonIncludeImport.class);
86
87 /* Max number of taxa to retrieve (for test purposes) */
88 private int maxTaxa = 0;
89 /* Max number of taxa to be saved in CDM DB with one service call */
90 private int limit = 5000; // TODO: Make configurable
91 /* Max number of taxa to be retrieved from CDM DB with one service call */
92 private int limitRetrieve = 10000; // TODO: Make configurable
93 /* Interval for progress info message when retrieving taxa */
94 private int modCount = 10000;
95 /* Highest taxon index in the FauEu database */
96 private int highestTaxonIndex = 0;
97 /* Number of times method buildParentName() has been called for one taxon */
98 private int callCount = 0;
99 private Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap = new HashMap();
100
101
102
103 /* (non-Javadoc)
104 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
105 */
106 @Override
107 protected boolean doCheck(FaunaEuropaeaImportState state) {
108 boolean result = true;
109 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
110 logger.warn("Checking for Taxa not yet fully implemented");
111 result &= checkTaxonStatus(fauEuConfig);
112
113 return result;
114 }
115
116 /* (non-Javadoc)
117 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
118 */
119 protected boolean isIgnore(FaunaEuropaeaImportState state) {
120 return ! state.getConfig().isDoTaxa();
121 }
122
123 private boolean checkTaxonStatus(FaunaEuropaeaImportConfigurator fauEuConfig) {
124 boolean result = true;
125 // try {
126 Source source = fauEuConfig.getSource();
127 String sqlStr = "";
128 ResultSet rs = source.getResultSet(sqlStr);
129 return result;
130 // } catch (SQLException e) {
131 // e.printStackTrace();
132 // return false;
133 // }
134 }
135
136 /* (non-Javadoc)
137 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
138 */
139 protected boolean doInvokeAlter(FaunaEuropaeaImportState state) {
140
141 boolean success = true;
142
143 if(logger.isInfoEnabled()) { logger.info("Start making taxa..."); }
144
145 // TransactionStatus txStatus = startTransaction();
146
147 success = retrieveTaxa(state, fauEuTaxonMap, Q_NO_RESTRICTION);
148 success = processTaxaFromDatabase(state, fauEuTaxonMap);
149
150 // commitTransaction(txStatus);
151
152 logger.info("End making taxa...");
153 return success;
154 }
155
156
157 protected boolean doInvoke(FaunaEuropaeaImportState state) {
158
159 boolean success = true;
160
161
162 ProfilerController.memorySnapshot();
163
164 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
165 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
166 taxonStore.makeEmpty();
167 MapWrapper<TeamOrPersonBase> authorStore = (MapWrapper<TeamOrPersonBase>)stores.get(ICdmIO.TEAM_STORE);
168 authorStore.makeEmpty();
169
170 if(logger.isInfoEnabled()) { logger.info("Start making taxonomically included relationships..."); }
171
172 // TransactionStatus txStatus = startTransaction();
173
174 success = retrieveChildParentUuidMap(state);
175 ProfilerController.memorySnapshot();
176 success = createRelationships(state);
177
178 // commitTransaction(txStatus);
179
180 logger.info("End making taxa...");
181 ProfilerController.memorySnapshot();
182
183 return success;
184 }
185
186 /** Retrieve child-parent uuid map from CDM DB */
187 private boolean retrieveChildParentUuidMap(FaunaEuropaeaImportState state) {
188
189 Map<UUID, UUID> childParentMap = state.getChildParentMap();
190 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
191 Source source = fauEuConfig.getSource();
192 int i = 0;
193 boolean success = true;
194
195 try {
196
197 String strQuery =
198 " SELECT dbo.Taxon.UUID AS ChildUuid, Parent.UUID AS ParentUuid " +
199 " FROM dbo.Taxon INNER JOIN dbo.Taxon AS Parent " +
200 " ON dbo.Taxon.TAX_TAX_IDPARENT = Parent.TAX_ID " +
201 " WHERE (dbo.Taxon.TAX_VALID <> 0) AND (dbo.Taxon.TAX_AUT_ID <> " + A_AUCT + ")";
202
203 if (logger.isInfoEnabled()) {
204 logger.info("Query: " + strQuery);
205 }
206
207 ResultSet rs = source.getResultSet(strQuery);
208
209 while (rs.next()) {
210
211 if ((i++ % modCount) == 0 && i != 1 ) {
212 if(logger.isInfoEnabled()) {
213 logger.info("Parent-child mappings retrieved: " + (i-1));
214 }
215 }
216
217 String childUuidStr = rs.getString("ChildUuid");
218 String parentUuidStr = rs.getString("ParentUuid");
219 UUID childUuid = UUID.fromString(childUuidStr);
220 UUID parentUuid = UUID.fromString(parentUuidStr);
221
222 if (!childParentMap.containsKey(childUuid)) {
223
224 childParentMap.put(childUuid, parentUuid);
225
226 } else {
227 if(logger.isDebugEnabled()) {
228 logger.debug("Duplicated child UUID (" + childUuid + ")");
229 }
230 }
231 }
232
233 } catch (SQLException e) {
234 logger.error("SQLException:" + e);
235 success = false;
236 }
237 return success;
238 }
239
240
241 /** Retrieve taxa from FauEu DB and build FauEuTaxonMap only */
242 private boolean retrieveTaxa(FaunaEuropaeaImportState state,
243 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap, int valid) {
244
245 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
246 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
247 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
248 ReferenceBase<?> sourceRef = fauEuConfig.getSourceReference();
249 MapWrapper<TeamOrPersonBase> authorStore = (MapWrapper<TeamOrPersonBase>)stores.get(ICdmIO.TEAM_STORE);
250
251 Source source = fauEuConfig.getSource();
252 // String namespace = "Taxon";
253 int i = 0;
254 boolean success = true;
255
256 try {
257
258 String strQuery =
259 " SELECT MAX(TAX_ID) AS TAX_ID FROM dbo.Taxon ";
260
261 ResultSet rs = source.getResultSet(strQuery);
262 while (rs.next()) {
263 int maxTaxonId = rs.getInt("TAX_ID");
264 highestTaxonIndex = maxTaxonId;
265 }
266
267 String top = "";
268 if (maxTaxa > 0) {
269 top = "TOP " + maxTaxa;
270 }
271
272 String validClause = "";
273 if (valid == T_STATUS_ACCEPTED || valid == T_STATUS_NOT_ACCEPTED) {
274 validClause = " AND " + " TAX_VALID = " + valid;
275 }
276
277 strQuery =
278 " SELECT " + top + " Taxon.*, rank.*, author.* " +
279 " FROM dbo.Taxon " +
280 " LEFT OUTER JOIN dbo.author ON dbo.Taxon.TAX_AUT_ID = dbo.author.aut_id " +
281 " LEFT OUTER JOIN dbo.rank ON dbo.Taxon.TAX_RNK_ID = dbo.rank.rnk_id " +
282 " WHERE (1=1)" +
283 validClause;
284
285 if (logger.isDebugEnabled()) {
286 logger.debug("Query: " + strQuery);
287 }
288 rs = source.getResultSet(strQuery);
289
290 while (rs.next()) {
291
292 if ((i++ % modCount) == 0 && i != 1 ) {
293 if(logger.isInfoEnabled()) {
294 logger.info("Taxa retrieved: " + (i-1));
295 }
296 }
297
298 int taxonId = rs.getInt("TAX_ID");
299 String localName = rs.getString("TAX_NAME");
300 int rankId = rs.getInt("TAX_RNK_ID");
301 int parentId = rs.getInt("TAX_TAX_IDPARENT");
302 int familyId = rs.getInt("TAX_TAX_IDFAMILY");
303 int originalGenusId = rs.getInt("TAX_TAX_IDGENUS");
304 int autId = rs.getInt("TAX_AUT_ID");
305 int status = rs.getInt("TAX_VALID");
306 int year = rs.getInt("TAX_YEAR");
307 int parenthesis = rs.getInt("TAX_PARENTHESIS");
308 String autName = rs.getString("aut_name");
309 Rank rank = null;
310 // UUID taxonBaseUuid = UUID.randomUUID();
311
312 FaunaEuropaeaTaxon fauEuTaxon = new FaunaEuropaeaTaxon();
313 // fauEuTaxon.setUuid(taxonBaseUuid);
314 fauEuTaxon.setLocalName(localName);
315 fauEuTaxon.setParentId(parentId);
316 fauEuTaxon.setOriginalGenusId(originalGenusId);
317 fauEuTaxon.setId(taxonId);
318 fauEuTaxon.setRankId(rankId);
319 fauEuTaxon.setYear(year);
320 fauEuTaxon.setAuthor(autName);
321 if (parenthesis == P_PARENTHESIS) {
322 fauEuTaxon.setParenthesis(true);
323 } else {
324 fauEuTaxon.setParenthesis(false);
325 }
326 if (status == T_STATUS_ACCEPTED) {
327 fauEuTaxon.setValid(true);
328 } else {
329 fauEuTaxon.setValid(false);
330 }
331
332 try {
333 rank = FaunaEuropaeaTransformer.rankId2Rank(rs, false);
334 } catch (UnknownCdmTypeException e) {
335 logger.warn("Taxon (" + taxonId + ") has unknown rank (" + rankId + ") and could not be saved.");
336 continue;
337 } catch (NullPointerException e) {
338 logger.warn("Taxon (" + taxonId + ") has rank null and can not be saved.");
339 continue;
340 }
341
342 try {
343
344
345 if (!fauEuTaxonMap.containsKey(taxonId)) {
346 if (fauEuTaxon == null) {
347 if (logger.isDebugEnabled()) {
348 logger.debug("Taxon base is null. Taxon (" + taxonId + ") ignored.");
349 }
350 continue;
351 }
352
353
354 // taxonStore.put(taxonId, taxonBase);
355
356 fauEuTaxonMap.put(taxonId, fauEuTaxon);
357
358 // if (logger.isDebugEnabled()) {
359 // logger.debug("Stored taxon base (" + taxonId + ") " + localName);
360 // }
361 } else {
362 logger.warn("Not imported taxon base with duplicated TAX_ID (" + taxonId +
363 ") " + localName);
364 }
365 } catch (Exception e) {
366 logger.warn("An exception occurred when creating taxon base with id " + taxonId +
367 ". Taxon base could not be saved.");
368 }
369 }
370 } catch (SQLException e) {
371 logger.error("SQLException:" + e);
372 success = false;
373 }
374
375 return success;
376 }
377
378
379 public Map<UUID, UUID> partMap(int border, Map<UUID, UUID> map) {
380
381 if (logger.isInfoEnabled()) {
382 logger.info("Map size: " + map.size());
383 }
384 Set<Map.Entry<UUID, UUID>> entries = map.entrySet();
385 Iterator<Map.Entry<UUID, UUID>> entryIter = entries.iterator();
386 Map<UUID, UUID> partMap = new HashMap<UUID, UUID>();
387
388 for (int i = 0; i < border; i++) {
389 //while (entryIter.hasNext()) {
390
391 Map.Entry<UUID, UUID> mapEntry = (Map.Entry<UUID, UUID>)entryIter.next();
392 partMap.put(mapEntry.getKey(), mapEntry.getValue());
393 entryIter.remove();
394 }
395
396 if (logger.isDebugEnabled()) {
397 logger.debug("Map size: " + map.size());
398 }
399 return partMap;
400 }
401
402 // public Map<UUID, UUID> childParentMap partMap(int start, int limit, Map<UUID, UUID> childParentMap) {
403 //
404 // int index = 0;
405 //
406 // for (int i = 0; i < limit; i++) {
407 //
408 // int j = start + i;
409 //
410 // Object object = childParentMap.get(j);
411 // if(object != null) {
412 // childParentMap.put(index, childParentMap.get(j));
413 // index++;
414 // } else {
415 // if (logger.isDebugEnabled()) { logger.debug("Object (" + j + ") is null"); }
416 // }
417 // }
418 // return (Map<UUID, UUID> childParentMap)internalPartMap.values();
419 // }
420
421
422 /** Creates parent-child relationships.
423 * Single Parent-child pairs are retrieved via findByUUID(UUID) from CDM DB
424 * This takes inacceptable long time. */
425 private boolean createRelationships_(FaunaEuropaeaImportState state) {
426
427 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
428 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
429 taxonStore.makeEmpty();
430 Map<UUID, UUID> childParentMap = state.getChildParentMap();
431 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
432
433 int upperBorder = childParentMap.size();
434 int nbrOfBlocks = 0;
435
436 boolean success = true;
437
438 if (upperBorder < limit) { // TODO: test with critical values
439 limit = upperBorder;
440 } else {
441 nbrOfBlocks = upperBorder / limit;
442 }
443
444 if(logger.isInfoEnabled()) {
445 logger.info("number of child-parent pairs = " + upperBorder
446 + ", limit = " + limit
447 + ", number of blocks = " + nbrOfBlocks);
448 }
449
450 for (int j = 1; j <= nbrOfBlocks + 1; j++) {
451 int offset = j - 1;
452 int start = offset * limit;
453
454 if(logger.isInfoEnabled()) { logger.info("Processing child-parent pairs: " + start + " - " + (start + limit - 1)); }
455
456 if(logger.isInfoEnabled()) {
457 logger.info("index = " + j
458 + ", offset = " + offset
459 + ", start = " + start);
460 }
461
462 if (j == nbrOfBlocks + 1) {
463 limit = upperBorder - nbrOfBlocks * limit;
464 if(logger.isInfoEnabled()) { logger.info("number of blocks = " + nbrOfBlocks + " limit = " + limit); }
465 }
466
467 TransactionStatus txStatus = startTransaction();
468
469
470 // for (int k = 1; k <= start + offset; k++) { // TODO: test borders
471 // int k = 0;
472
473 Map<UUID, UUID> childParentPartMap = partMap(limit, childParentMap);
474 Set<TaxonBase> childSet = new HashSet<TaxonBase>(limit);
475
476 if (logger.isInfoEnabled()) {
477 logger.info("Partmap size: " + childParentPartMap.size());
478 }
479
480 for (UUID childUuid : childParentPartMap.keySet()) {
481 // for (UUID childUuid : childParentMap.keySet()) {
482
483 UUID parentUuid = childParentPartMap.get(childUuid);
484
485 try {
486 TaxonBase<?> parent = getTaxonService().findByUuid(parentUuid);
487 if (logger.isTraceEnabled()) {
488 logger.trace("Parent find called (" + parentUuid + ")");
489 }
490 TaxonBase<?> child = getTaxonService().findByUuid(childUuid);
491 if (logger.isTraceEnabled()) {
492 logger.trace("Child find called (" + childUuid + ")");
493 }
494 Taxon parentTaxon = parent.deproxy(parent, Taxon.class);
495 Taxon childTaxon = child.deproxy(child, Taxon.class);
496
497 if (childTaxon != null && parentTaxon != null) {
498
499 // makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null);
500
501 if (logger.isDebugEnabled()) {
502 logger.debug("Parent-child (" + parentUuid + "-" + childUuid +
503 ") relationship created");
504 }
505 if (!childSet.contains(childTaxon)) {
506
507 childSet.add(childTaxon);
508
509 if (logger.isTraceEnabled()) {
510 logger.trace("Child taxon (" + childUuid + ") added to Set");
511 }
512
513 } else {
514 if (logger.isDebugEnabled()) {
515 logger.debug("Duplicated child taxon (" + childUuid + ")");
516 }
517 }
518 } else {
519 if (logger.isDebugEnabled()) {
520 logger.debug("Parent(" + parentUuid + ") or child (" + childUuid + " is null");
521 }
522 }
523
524 // if (childTaxon != null && !childSet.contains(childTaxon)) {
525 // childSet.add(childTaxon);
526 // if (logger.isDebugEnabled()) {
527 // logger.debug("Child taxon (" + childUuid + ") added to Set");
528 // }
529 // } else {
530 // if (logger.isDebugEnabled()) {
531 // logger.debug("Duplicated child taxon (" + childUuid + ")");
532 // }
533 // }
534
535 } catch (Exception e) {
536 logger.error("Error creating taxonomically included relationship parent-child (" +
537 parentUuid + "-" + childUuid + ")");
538 }
539
540 }
541 getTaxonService().saveTaxonAll(childSet);
542 commitTransaction(txStatus);
543 }
544 return success;
545 }
546
547
548 /* Creates parent-child relationships.
549 * Parent-child pairs are retrieved in blocks via findByUUID(Set<UUID>) from CDM DB.
550 * It takes about 5min to save a block of 5000 taxa.*/
551 private boolean createRelationships(FaunaEuropaeaImportState state) {
552
553 Map<UUID, UUID> childParentUuidMap = state.getChildParentMap();
554 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
555 // UUID treeUuid = state.getTree(sourceRef).getUuid();
556 // TaxonomicTree tree = getTaxonService().getTaxonomicTreeByUuid(treeUuid);
557 // TaxonomicTree tree = state.getTree(sourceRef);
558
559 int upperBorder = childParentUuidMap.size();
560 int nbrOfBlocks = 0;
561
562 boolean success = true;
563
564 if (upperBorder < limit) { // TODO: test with critical values
565 limit = upperBorder;
566 } else {
567 nbrOfBlocks = upperBorder / limit;
568 }
569
570 if(logger.isInfoEnabled()) {
571 logger.info("number of child-parent pairs = " + upperBorder
572 + ", limit = " + limit
573 + ", number of blocks = " + nbrOfBlocks);
574 }
575
576 for (int j = 1; j <= nbrOfBlocks + 1; j++) {
577 int offset = j - 1;
578 int start = offset * limit;
579
580 if(logger.isInfoEnabled()) { logger.info("Processing child-parent pairs: " + start + " - " + (start + limit - 1)); }
581
582 if(logger.isInfoEnabled()) {
583 logger.info("index = " + j
584 + ", offset = " + offset
585 + ", start = " + start);
586 }
587
588 if (j == nbrOfBlocks + 1) {
589 limit = upperBorder - nbrOfBlocks * limit;
590 if(logger.isInfoEnabled()) { logger.info("number of blocks = " + nbrOfBlocks + " limit = " + limit); }
591 }
592
593 TransactionStatus txStatus = startTransaction();
594 //add tree to new session
595 TaxonomicTree tree = state.getTree(sourceRef);
596 if (tree == null){
597 tree = makeTree(state, sourceRef);
598 }
599 getTaxonService().saveTaxonomicTree(tree);
600
601 Map<UUID, UUID> childParentPartUuidMap = partMap(limit, childParentUuidMap);
602 Set<TaxonBase> childSet = new HashSet<TaxonBase>(limit);
603
604 Set<UUID> childKeysSet = childParentPartUuidMap.keySet();
605 Set<UUID> parentValuesSet = new HashSet<UUID>(childParentPartUuidMap.values());
606
607 if (logger.isInfoEnabled()) {
608 logger.info("Start reading children and parents");
609 }
610 List<TaxonBase> children = getTaxonService().findByUuid(childKeysSet);
611 List<TaxonBase> parents = getTaxonService().findByUuid(parentValuesSet);
612 Map<UUID, TaxonBase> parentsMap = new HashMap<UUID, TaxonBase>();
613 for (TaxonBase taxonBase : parents){
614 parentsMap.put(taxonBase.getUuid(), taxonBase);
615 }
616
617
618 if (logger.isInfoEnabled()) {
619 logger.info("End reading children and parents");
620 }
621
622
623 if (logger.isTraceEnabled()) {
624 for (UUID uuid : childKeysSet) {
625 logger.trace("child uuid query: " + uuid);
626 }
627 }
628 if (logger.isTraceEnabled()) {
629 for (UUID uuid : parentValuesSet) {
630 logger.trace("parent uuid query: " + uuid);
631 }
632 }
633 if (logger.isTraceEnabled()) {
634 for (TaxonBase tb : children) {
635 logger.trace("child uuid result: " + tb.getUuid());
636 }
637 }
638 if (logger.isTraceEnabled()) {
639 for (TaxonBase tb : parents) {
640 logger.trace("parent uuid result: " + tb.getUuid());
641 }
642 }
643
644 UUID mappedParentUuid = null;
645 UUID parentUuid = null;
646 UUID childUuid = null;
647
648 for (TaxonBase child : children) {
649
650 try {
651 Taxon childTaxon = child.deproxy(child, Taxon.class);
652 childUuid = childTaxon.getUuid();
653 mappedParentUuid = childParentPartUuidMap.get(childUuid);
654 TaxonBase parent = null;
655
656 TaxonBase potentialParent = parentsMap.get(mappedParentUuid);
657 // for (TaxonBase potentialParent : parents ) {
658 // parentUuid = potentialParent.getUuid();
659 // if(parentUuid.equals(mappedParentUuid)) {
660 parent = potentialParent;
661 if (logger.isDebugEnabled()) {
662 logger.debug("Parent (" + parentUuid + ") found for child (" + childUuid + ")");
663 }
664 // break;
665 // }
666 // }
667
668 Taxon parentTaxon = parent.deproxy(parent, Taxon.class);
669
670 if (childTaxon != null && parentTaxon != null) {
671
672 // makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null, tree);
673 makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null);
674
675 if (logger.isDebugEnabled()) {
676 logger.debug("Parent-child (" + parentUuid + "-" + childUuid +
677 ") relationship created");
678 }
679 if (!childSet.contains(childTaxon)) {
680
681 childSet.add(childTaxon);
682
683 if (logger.isTraceEnabled()) {
684 logger.trace("Child taxon (" + childUuid + ") added to Set");
685 }
686
687 } else {
688 if (logger.isDebugEnabled()) {
689 logger.debug("Duplicated child taxon (" + childUuid + ")");
690 }
691 }
692 } else {
693 if (logger.isDebugEnabled()) {
694 logger.debug("Parent(" + parentUuid + ") or child (" + childUuid + " is null");
695 }
696 }
697
698 if (childTaxon != null && !childSet.contains(childTaxon)) {
699 childSet.add(childTaxon);
700 if (logger.isDebugEnabled()) {
701 logger.debug("Child taxon (" + childUuid + ") added to Set");
702 }
703 } else {
704 if (logger.isDebugEnabled()) {
705 logger.debug("Duplicated child taxon (" + childUuid + ")");
706 }
707 }
708
709 } catch (Exception e) {
710 logger.error("Error creating taxonomically included relationship parent-child (" +
711 parentUuid + "-" + childUuid + ")");
712 }
713
714 }
715 if (logger.isInfoEnabled()) {
716 logger.info("Start saving childSet");
717 }
718 getTaxonService().saveTaxonAll(childSet);
719 if (logger.isInfoEnabled()) {
720 logger.info("End saving childSet");
721 }
722 // getTaxonService().clear();
723 // if (logger.isInfoEnabled()) {
724 // logger.info("End clearing session");
725 // }
726 commitTransaction(txStatus);
727 if (logger.isInfoEnabled()) {
728 logger.info("End commit transaction");
729 }
730 parentValuesSet = null;
731 childSet = null;
732 childParentPartUuidMap = null;
733 children = null;
734 parents = null;
735 }
736 return success;
737 }
738
739
740 /* Creates parent-child relationships.
741 * Taxon bases are retrieved in blocks from CDM DB.
742 * Parent is retrieved from CDM DB via original source id if not found in current block.
743 * In case of blocksize = 20.000 this takes ca. 1-2 hours per block.
744 * */
745 private boolean createRelationships_old(FaunaEuropaeaTaxon fauEuTaxon,
746 TaxonBase<?> taxonBase, TaxonNameBase<?,?> taxonName, List<Taxon> taxa,
747 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap, FaunaEuropaeaImportState state) {
748
749 int parentId = fauEuTaxon.getParentId();
750 int taxonId = fauEuTaxon.getId();
751 FaunaEuropaeaTaxon parentFauEuTaxon = fauEuTaxonMap.get(parentId);
752 if (parentFauEuTaxon == null) {
753 if (logger.isInfoEnabled()) {
754 logger.info("Parent taxon is null (" + parentId + ")");
755 }
756 return false;
757 }
758 // UUID parentUuid = parentFauEuTaxon.getUuid();
759 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
760 MapWrapper<TaxonBase> parentTaxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
761 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
762
763 TaxonBase<?> parentTaxonBase = null;
764
765 // for (TaxonBase<?> potentialParentTaxon : taxonBases) {
766 // if(potentialParentTaxon.getUuid().equals(parentUuid)) {
767 // parentTaxonBase = potentialParentTaxon;
768 // break;
769 // }
770 // }
771 // if (parentTaxonBase == null) {
772 // parentTaxonBase = getTaxonService().getTaxonByUuid(parentUuid);
773 // }
774
775 // TODO: Copy parents from taxonBases to parentTaxonStore
776
777 if (parentTaxonStore.containsId(parentId)) {
778 parentTaxonBase = parentTaxonStore.get(parentId);
779 if (logger.isDebugEnabled()) {
780 logger.debug("Parent (" + parentId + ") found in parent taxon store");
781 }
782 // } else {
783 // for (TaxonBase<?> potentialParentTaxon : taxonBases) {
784 // if(potentialParentTaxon.getId() == parentId) {
785 // parentTaxonBase = potentialParentTaxon;
786 // if (logger.isInfoEnabled()) {
787 // logger.info("Parent (" + parentId + ") found in taxon base list");
788 // }
789 // break;
790 // }
791 // }
792 }
793 if (parentTaxonBase == null) {
794 ISourceable sourceable =
795 getCommonService().getSourcedObjectByIdInSource(TaxonBase.class, Integer.toString(parentId), OS_NAMESPACE_TAXON);
796 parentTaxonBase = ((IdentifiableEntity)sourceable).deproxy(sourceable, TaxonBase.class);
797 if (logger.isDebugEnabled()) {
798 logger.debug("Parent (" + parentId + ") retrieved from DB via original source id");
799 }
800 }
801
802 if (!parentTaxonStore.containsId(parentId)) {
803 parentTaxonStore.put(parentId, parentTaxonBase);
804 }
805
806
807
808 Taxon parentTaxon = parentTaxonBase.deproxy(parentTaxonBase, Taxon.class);
809
810 boolean success = true;
811
812 // if (!fauEuTaxon.isValid()) { // FauEu Synonym
813
814 // } else if (fauEuTaxon.isValid()) { // FauEu Taxon
815
816 Taxon taxon = taxonBase.deproxy(taxonBase, Taxon.class);
817
818 try {
819 // add this taxon as child to parent
820 if (parentTaxon != null) {
821 // makeTaxonomicallyIncluded(state, parentTaxon, taxon, sourceRef, null);
822 if (logger.isDebugEnabled()) {
823 logger.debug("Parent-child (" + parentId + "-" + taxonId +
824 ") relationship created");
825 }
826 }
827
828 } catch (Exception e) {
829 logger.error("Error creating taxonomically included relationship Parent-child (" +
830 parentId + "-" + taxonId + ")");
831 }
832
833
834 // }
835
836 return success;
837 }
838
839
840 private boolean makeTaxonomicallyIncluded(FaunaEuropaeaImportState state, Taxon toTaxon, Taxon fromTaxon,
841 ReferenceBase citation, String microCitation){
842 boolean success = true;
843 ReferenceBase sec = toTaxon.getSec();
844 sec = CdmBase.deproxy(sec, ReferenceBase.class);
845 sec = citation;
846 TaxonomicTree tree = state.getTree(sec);
847
848
849
850 // Session session = getTaxonService().getSession();
851
852 // if (session.contains(sec)) {
853 // logger.debug("Sec contained in session. Id = " + sec.getId());
854 // } else {
855 // logger.info("Sec not contained in session. Id = " + sec.getId());
856 // getReferenceService().merge(sec);
857 // }
858
859 if (tree == null){
860 tree = makeTree(state, sec);
861 }
862
863 // if (session.contains(tree)) {
864 // logger.debug("Taxonomic tree contained in session. Id = " + tree.getId());
865 // } else {
866 // logger.info("Taxonomic tree not contained in session. Id = " + tree.getId());
867 // UUID treeUuid = state.getTree(sec).getUuid();
868 // tree = getTaxonService().getTaxonomicTreeByUuid(treeUuid);
869 // logger.info("Tree retrieved");
870 // }
871
872 success = tree.addParentChild(toTaxon, fromTaxon, citation, microCitation);
873 return success;
874 }
875
876
877 // public int calculateBlockSize(int limit, int upperBorder) {
878 //
879 // int blockSize = 0;
880 //
881 // if (upperBorder < limit) {
882 // limit = upperBorder;
883 // } else {
884 // blockSize = upperBorder / limit;
885 // }
886 // }
887
888
889 private boolean processTaxaFromDatabase(FaunaEuropaeaImportState state,
890 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap) {
891
892 if(logger.isInfoEnabled()) { logger.info("Processing taxa second pass..."); }
893
894 MapWrapper<TaxonBase> taxonBaseMap = new MapWrapper<TaxonBase>(null);
895
896 int nbrOfTaxa = getTaxonService().count(Taxon.class);
897 int n = 0;
898
899 boolean success = true;
900
901 if (nbrOfTaxa < limit) { // TODO: test with critical values
902 limit = nbrOfTaxa;
903 } else {
904 n = nbrOfTaxa / limit;
905 }
906
907 if(logger.isInfoEnabled()) {
908 logger.info("number of taxa = " + nbrOfTaxa
909 + ", limit = " + limit
910 + ", n = " + n);
911 }
912
913 // process taxa in chunks of <=limit
914
915 for (int j = 1; j <= n + 1; j++)
916 {
917 int offset = j - 1;
918 int start = offset * limit;
919
920 if(logger.isInfoEnabled()) { logger.info("Processing taxa: " + start + " - " + (start + limit - 1)); }
921
922 if(logger.isInfoEnabled()) {
923 logger.info("index = " + j
924 + ", offset = " + offset
925 + ", start = " + start);
926 }
927
928 if (j == n + 1) {
929 limit = nbrOfTaxa - n * limit;
930 if(logger.isInfoEnabled()) { logger.info("n = " + n + " limit = " + limit); }
931 }
932
933 TransactionStatus txStatus = startTransaction();
934
935 List<Taxon> taxa = getTaxonService().getAllTaxa(limit, start);
936 if(logger.isInfoEnabled()) {
937 logger.info(taxa.size() + " taxa retrieved from CDM DB");
938 }
939
940 for (TaxonBase taxonBase : taxa) {
941
942 TaxonNameBase<?,?> taxonName = taxonBase.getName();
943
944 FaunaEuropaeaTaxon fauEuTaxon = findFauEuTaxonByOriginalSourceId(taxonBase, fauEuTaxonMap);
945
946
947 if (logger.isDebugEnabled()) {
948 logger.debug("Taxon # " + fauEuTaxon.getId());
949 }
950 //createRelationships(fauEuTaxon, taxonBase, taxonName, taxa, fauEuTaxonMap, state);
951 }
952
953 getTaxonService().saveTaxonAll(taxa);
954 taxa = null;
955
956 commitTransaction(txStatus);
957
958 // empty parent taxon store
959 // Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
960 // MapWrapper<TaxonBase> parentTaxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
961 // parentTaxonStore.makeEmpty();
962 }
963 return success;
964 }
965
966
967 private FaunaEuropaeaTaxon findFauEuTaxonByOriginalSourceId(TaxonBase<?> taxonBase,
968 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap) {
969
970 Set set = taxonBase.getSources();
971 Object[] array = set.toArray();
972 if (array.length == 0) { return null; }
973 OriginalSource os = (OriginalSource) taxonBase.getSources().toArray()[0];
974 String taxonBaseIdStr = os.getIdInSource();
975 int taxonBaseId = Integer.parseInt(taxonBaseIdStr);
976 FaunaEuropaeaTaxon fauEuTaxon = fauEuTaxonMap.get(taxonBaseId);
977
978 return fauEuTaxon;
979 }
980
981
982 }