Fauna Europaea Import
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / faunaEuropaea / FaunaEuropaeaRelTaxonIncludeImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.faunaEuropaea;
11
12 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.A_AUCT;
13 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.P_PARENTHESIS;
14 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.Q_NO_RESTRICTION;
15 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_GENUS;
16 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_SUBGENUS;
17 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_SPECIES;
18 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_SUBSPECIES;
19 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_ACCEPTED;
20 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_NOT_ACCEPTED;
21
22 import java.sql.ResultSet;
23 import java.sql.SQLException;
24 import java.util.Collection;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Set;
31 import java.util.UUID;
32
33 import org.apache.log4j.Logger;
34 import org.springframework.stereotype.Component;
35 import org.springframework.transaction.TransactionStatus;
36
37 import eu.etaxonomy.cdm.common.CdmUtils;
38 import eu.etaxonomy.cdm.io.berlinModel.CdmOneToManyMapper;
39 import eu.etaxonomy.cdm.io.berlinModel.CdmStringMapper;
40 import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState;
41 import eu.etaxonomy.cdm.io.common.CdmAttributeMapperBase;
42 import eu.etaxonomy.cdm.io.common.CdmSingleAttributeMapperBase;
43 import eu.etaxonomy.cdm.io.common.ICdmIO;
44 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
45 import eu.etaxonomy.cdm.io.common.ImportHelper;
46 import eu.etaxonomy.cdm.io.common.MapWrapper;
47 import eu.etaxonomy.cdm.io.common.Source;
48 import eu.etaxonomy.cdm.io.tcsxml.in.TcsXmlImportState;
49 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
50 import eu.etaxonomy.cdm.model.common.CdmBase;
51 import eu.etaxonomy.cdm.model.common.ISourceable;
52 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
53 import eu.etaxonomy.cdm.model.common.OriginalSource;
54 import eu.etaxonomy.cdm.model.name.Rank;
55 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
56 import eu.etaxonomy.cdm.model.name.ZoologicalName;
57 import eu.etaxonomy.cdm.model.reference.Database;
58 import eu.etaxonomy.cdm.model.reference.Generic;
59 import eu.etaxonomy.cdm.model.reference.PublicationBase;
60 import eu.etaxonomy.cdm.model.reference.Publisher;
61 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
62 import eu.etaxonomy.cdm.model.taxon.Synonym;
63 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
64 import eu.etaxonomy.cdm.model.taxon.Taxon;
65 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
66 import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;
67 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
68
69
70 /**
71 * @author a.babadshanjan
72 * @created 12.05.2009
73 * @version 1.0
74 */
75 @Component
76 public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase {
77
78 public static final String OS_NAMESPACE_TAXON = "Taxon";
79 private static final Logger logger = Logger.getLogger(FaunaEuropaeaRelTaxonIncludeImport.class);
80
81 /* Max number of taxa to retrieve (for test purposes) */
82 private int maxTaxa = 0;
83 /* Max number of taxa to be saved in CDM DB with one service call */
84 private int limit = 5000; // TODO: Make configurable
85 /* Max number of taxa to be retrieved from CDM DB with one service call */
86 private int limitRetrieve = 10000; // TODO: Make configurable
87 /* Interval for progress info message when retrieving taxa */
88 private int modCount = 10000;
89 /* Highest taxon index in the FauEu database */
90 private int highestTaxonIndex = 0;
91 /* Number of times method buildParentName() has been called for one taxon */
92 private int callCount = 0;
93 private Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap = new HashMap();
94
95
96 /* (non-Javadoc)
97 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
98 */
99 @Override
100 protected boolean doCheck(FaunaEuropaeaImportState state) {
101 boolean result = true;
102 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
103 logger.warn("Checking for Taxa not yet fully implemented");
104 result &= checkTaxonStatus(fauEuConfig);
105
106 return result;
107 }
108
109 /* (non-Javadoc)
110 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
111 */
112 protected boolean isIgnore(FaunaEuropaeaImportState state) {
113 return ! state.getConfig().isDoTaxa();
114 }
115
116 private boolean checkTaxonStatus(FaunaEuropaeaImportConfigurator fauEuConfig) {
117 boolean result = true;
118 // try {
119 Source source = fauEuConfig.getSource();
120 String sqlStr = "";
121 ResultSet rs = source.getResultSet(sqlStr);
122 return result;
123 // } catch (SQLException e) {
124 // e.printStackTrace();
125 // return false;
126 // }
127 }
128
129 /* (non-Javadoc)
130 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
131 */
132 protected boolean doInvokeAlter(FaunaEuropaeaImportState state) {
133
134 boolean success = true;
135
136 if(logger.isInfoEnabled()) { logger.info("Start making taxa..."); }
137
138 TransactionStatus txStatus = startTransaction();
139
140 success = retrieveTaxa(state, fauEuTaxonMap, Q_NO_RESTRICTION);
141 success = processTaxaFromDatabase(state, fauEuTaxonMap);
142
143 commitTransaction(txStatus);
144
145 logger.info("End making taxa...");
146 return success;
147 }
148
149
150 protected boolean doInvoke(FaunaEuropaeaImportState state) {
151
152 boolean success = true;
153
154 if(logger.isInfoEnabled()) { logger.info("Start making taxa..."); }
155
156 TransactionStatus txStatus = startTransaction();
157
158 success = retrieveChildParentUuidMap(state);
159 success = createRelationships(state);
160
161 commitTransaction(txStatus);
162
163 logger.info("End making taxa...");
164 return success;
165 }
166
167
168 /** Retrieve child-parent uuid map from CDM DB */
169 private boolean retrieveChildParentUuidMap(FaunaEuropaeaImportState state) {
170
171 Map<UUID, UUID> childParentMap = state.getChildParentMap();
172 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
173 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
174 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
175 ReferenceBase<?> sourceRef = fauEuConfig.getSourceReference();
176 Source source = fauEuConfig.getSource();
177 int i = 0;
178 boolean success = true;
179
180 try {
181
182 String strQuery =
183 " SELECT dbo.Taxon.UUID AS ChildUuid, Parent.UUID AS ParentUuid " +
184 " FROM dbo.Taxon INNER JOIN dbo.Taxon AS Parent " +
185 " ON dbo.Taxon.TAX_TAX_IDPARENT = Parent.TAX_ID " +
186 " WHERE (dbo.Taxon.TAX_VALID <> 0) AND (dbo.Taxon.TAX_AUT_ID <> " + A_AUCT + ")";
187
188 if (logger.isInfoEnabled()) {
189 logger.info("Query: " + strQuery);
190 }
191
192 ResultSet rs = source.getResultSet(strQuery);
193
194 while (rs.next()) {
195
196 if ((i++ % modCount) == 0 && i != 1 ) {
197 if(logger.isInfoEnabled()) {
198 logger.info("Taxa retrieved: " + (i-1));
199 }
200 }
201
202 String childUuidStr = rs.getString("ChildUuid");
203 String parentUuidStr = rs.getString("ParentUuid");
204 UUID childUuid = UUID.fromString(childUuidStr);
205 UUID parentUuid = UUID.fromString(parentUuidStr);
206
207 if (!childParentMap.containsKey(childUuid)) {
208
209 childParentMap.put(childUuid, parentUuid);
210
211 } else {
212 if(logger.isDebugEnabled()) {
213 logger.debug("Duplicated child UUID (" + childUuid + ")");
214 }
215 }
216 }
217
218 } catch (SQLException e) {
219 logger.error("SQLException:" + e);
220 success = false;
221 }
222 return success;
223 }
224
225
226 /** Retrieve taxa from FauEu DB and build FauEuTaxonMap only */
227 private boolean retrieveTaxa(FaunaEuropaeaImportState state,
228 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap, int valid) {
229
230 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
231 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
232 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
233 ReferenceBase<?> sourceRef = fauEuConfig.getSourceReference();
234 MapWrapper<TeamOrPersonBase> authorStore = (MapWrapper<TeamOrPersonBase>)stores.get(ICdmIO.TEAM_STORE);
235
236 Source source = fauEuConfig.getSource();
237 // String namespace = "Taxon";
238 int i = 0;
239 boolean success = true;
240
241 try {
242
243 String strQuery =
244 " SELECT MAX(TAX_ID) AS TAX_ID FROM dbo.Taxon ";
245
246 ResultSet rs = source.getResultSet(strQuery);
247 while (rs.next()) {
248 int maxTaxonId = rs.getInt("TAX_ID");
249 highestTaxonIndex = maxTaxonId;
250 }
251
252 String top = "";
253 if (maxTaxa > 0) {
254 top = "TOP " + maxTaxa;
255 }
256
257 String validClause = "";
258 if (valid == T_STATUS_ACCEPTED || valid == T_STATUS_NOT_ACCEPTED) {
259 validClause = " AND " + " TAX_VALID = " + valid;
260 }
261
262 strQuery =
263 " SELECT " + top + " Taxon.*, rank.*, author.* " +
264 " FROM dbo.Taxon " +
265 " LEFT OUTER JOIN dbo.author ON dbo.Taxon.TAX_AUT_ID = dbo.author.aut_id " +
266 " LEFT OUTER JOIN dbo.rank ON dbo.Taxon.TAX_RNK_ID = dbo.rank.rnk_id " +
267 " WHERE (1=1)" +
268 validClause;
269
270 if (logger.isDebugEnabled()) {
271 logger.debug("Query: " + strQuery);
272 }
273 rs = source.getResultSet(strQuery);
274
275 while (rs.next()) {
276
277 if ((i++ % modCount) == 0 && i != 1 ) {
278 if(logger.isInfoEnabled()) {
279 logger.info("Taxa retrieved: " + (i-1));
280 }
281 }
282
283 int taxonId = rs.getInt("TAX_ID");
284 String localName = rs.getString("TAX_NAME");
285 int rankId = rs.getInt("TAX_RNK_ID");
286 int parentId = rs.getInt("TAX_TAX_IDPARENT");
287 int familyId = rs.getInt("TAX_TAX_IDFAMILY");
288 int originalGenusId = rs.getInt("TAX_TAX_IDGENUS");
289 int autId = rs.getInt("TAX_AUT_ID");
290 int status = rs.getInt("TAX_VALID");
291 int year = rs.getInt("TAX_YEAR");
292 int parenthesis = rs.getInt("TAX_PARENTHESIS");
293 String autName = rs.getString("aut_name");
294 Rank rank = null;
295 // UUID taxonBaseUuid = UUID.randomUUID();
296
297 FaunaEuropaeaTaxon fauEuTaxon = new FaunaEuropaeaTaxon();
298 // fauEuTaxon.setUuid(taxonBaseUuid);
299 fauEuTaxon.setLocalName(localName);
300 fauEuTaxon.setParentId(parentId);
301 fauEuTaxon.setOriginalGenusId(originalGenusId);
302 fauEuTaxon.setId(taxonId);
303 fauEuTaxon.setRankId(rankId);
304 fauEuTaxon.setYear(year);
305 fauEuTaxon.setAuthor(autName);
306 if (parenthesis == P_PARENTHESIS) {
307 fauEuTaxon.setParenthesis(true);
308 } else {
309 fauEuTaxon.setParenthesis(false);
310 }
311 if (status == T_STATUS_ACCEPTED) {
312 fauEuTaxon.setValid(true);
313 } else {
314 fauEuTaxon.setValid(false);
315 }
316
317 try {
318 rank = FaunaEuropaeaTransformer.rankId2Rank(rs, false);
319 } catch (UnknownCdmTypeException e) {
320 logger.warn("Taxon (" + taxonId + ") has unknown rank (" + rankId + ") and could not be saved.");
321 continue;
322 } catch (NullPointerException e) {
323 logger.warn("Taxon (" + taxonId + ") has rank null and can not be saved.");
324 continue;
325 }
326
327 try {
328
329 // ReferenceBase<?> sourceReference = fauEuConfig.getSourceReference();
330 // ReferenceBase<?> auctReference = fauEuConfig.getAuctReference();
331 //
332 // ZoologicalName zooName = ZoologicalName.NewInstance(rank);
333 // // set local name cache
334 // zooName.setNameCache(localName);
335 //
336 // TaxonBase<?> taxonBase;
337 //
338 // Synonym synonym;
339 // Taxon taxon;
340 // try {
341 // if ((status == T_STATUS_ACCEPTED) || (autId == A_AUCT)) { // taxon
342 // if (autId == A_AUCT) { // misapplied name
343 // taxon = Taxon.NewInstance(zooName, auctReference);
344 // if (logger.isDebugEnabled()) {
345 // logger.debug("Misapplied name created (" + taxonId + ")");
346 // }
347 // } else { // regular taxon
348 // taxon = Taxon.NewInstance(zooName, sourceReference);
349 // if (logger.isDebugEnabled()) {
350 // logger.debug("Taxon created (" + taxonId + ")");
351 // }
352 //
353 // if (fauEuTaxon.isParenthesis() && (fauEuTaxon.getOriginalGenusId() != 0)
354 // && (fauEuTaxon.getParentId() != fauEuTaxon.getOriginalGenusId())) {
355 //
356 // // create basionym
357 // TeamOrPersonBase<?> author = authorStore.get(autId);
358 // ZoologicalName basionym = ZoologicalName.NewInstance(rank);
359 // basionym.setNameCache(localName);
360 // basionym.setCombinationAuthorTeam(author);
361 // basionym.setPublicationYear(year);
362 // zooName.addBasionym(basionym, sourceReference, null, null);
363 // zooName.setBasionymAuthorTeam(author);
364 // if (logger.isDebugEnabled()) {
365 // logger.debug("Basionym created (" + taxonId + ")");
366 // }
367 //
368 // // create homotypic synonym
369 // Synonym homotypicSynonym = Synonym.NewInstance(basionym, sourceReference);
370 // taxon.addSynonym(homotypicSynonym, SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF(),
371 // sourceReference, null);
372 // if (logger.isDebugEnabled()) {
373 // logger.debug("Homotypic synonym created (" + taxonId + ")");
374 // }
375 //
376 // }
377 //
378 // }
379 // taxonBase = taxon;
380 // } else if ((status == T_STATUS_NOT_ACCEPTED) && (autId != A_AUCT)) { // synonym
381 // synonym = Synonym.NewInstance(zooName, sourceReference);
382 // if (logger.isDebugEnabled()) {
383 // logger.debug("Synonym created (" + taxonId + ")");
384 // }
385 // taxonBase = synonym;
386 // } else {
387 // logger.warn("Unknown taxon status " + status + ". Taxon (" + taxonId + ") ignored.");
388 // continue;
389 // }
390 //
391 // taxonBase.setUuid(taxonBaseUuid);
392 //
393 // ImportHelper.setOriginalSource(taxonBase, fauEuConfig.getSourceReference(), taxonId, namespace);
394 //
395
396 // if (!taxonStore.containsId(taxonId)) {
397 // if (taxonBase == null) {
398 // if (logger.isDebugEnabled()) {
399 // logger.debug("Taxon base is null. Taxon (" + taxonId + ") ignored.");
400 // }
401 // continue;
402 // }
403
404 if (!fauEuTaxonMap.containsKey(taxonId)) {
405 if (fauEuTaxon == null) {
406 if (logger.isDebugEnabled()) {
407 logger.debug("Taxon base is null. Taxon (" + taxonId + ") ignored.");
408 }
409 continue;
410 }
411
412
413 // taxonStore.put(taxonId, taxonBase);
414
415 fauEuTaxonMap.put(taxonId, fauEuTaxon);
416
417 // if (logger.isDebugEnabled()) {
418 // logger.debug("Stored taxon base (" + taxonId + ") " + localName);
419 // }
420 } else {
421 logger.warn("Not imported taxon base with duplicated TAX_ID (" + taxonId +
422 ") " + localName);
423 }
424 } catch (Exception e) {
425 logger.warn("An exception occurred when creating taxon base with id " + taxonId +
426 ". Taxon base could not be saved.");
427 }
428 }
429 } catch (SQLException e) {
430 logger.error("SQLException:" + e);
431 success = false;
432 }
433
434 return success;
435 }
436
437
438 public Map<UUID, UUID> partMap(int border, Map<UUID, UUID> map) {
439
440 if (logger.isInfoEnabled()) {
441 logger.info("Map size: " + map.size());
442 }
443 Set<Map.Entry<UUID, UUID>> entries = map.entrySet();
444 Iterator<Map.Entry<UUID, UUID>> entryIter = entries.iterator();
445 Map<UUID, UUID> partMap = new HashMap<UUID, UUID>();
446
447 for (int i = 0; i < border; i++) {
448 //while (entryIter.hasNext()) {
449
450 Map.Entry<UUID, UUID> mapEntry = (Map.Entry<UUID, UUID>)entryIter.next();
451 partMap.put(mapEntry.getKey(), mapEntry.getValue());
452 entryIter.remove();
453 }
454
455 if (logger.isDebugEnabled()) {
456 logger.debug("Map size: " + map.size());
457 }
458 return partMap;
459 }
460
461 // public Map<UUID, UUID> childParentMap partMap(int start, int limit, Map<UUID, UUID> childParentMap) {
462 //
463 // int index = 0;
464 //
465 // for (int i = 0; i < limit; i++) {
466 //
467 // int j = start + i;
468 //
469 // Object object = childParentMap.get(j);
470 // if(object != null) {
471 // childParentMap.put(index, childParentMap.get(j));
472 // index++;
473 // } else {
474 // if (logger.isDebugEnabled()) { logger.debug("Object (" + j + ") is null"); }
475 // }
476 // }
477 // return (Map<UUID, UUID> childParentMap)internalPartMap.values();
478 // }
479
480
481 /** Creates parent-child relationships.
482 * Parent-child pairs are retrieved via UUID from CDM DB */
483 private boolean createRelationships(FaunaEuropaeaImportState state) {
484
485 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
486 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
487 taxonStore.makeEmpty();
488 Map<UUID, UUID> childParentMap = state.getChildParentMap();
489 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
490
491 int upperBorder = childParentMap.size();
492 int nbrOfBlocks = 0;
493
494 boolean success = true;
495
496 if (upperBorder < limit) { // TODO: test with critical values
497 limit = upperBorder;
498 } else {
499 nbrOfBlocks = upperBorder / limit;
500 }
501
502 if(logger.isInfoEnabled()) {
503 logger.info("number of child-parent pairs = " + upperBorder
504 + ", limit = " + limit
505 + ", number of blocks = " + nbrOfBlocks);
506 }
507
508 for (int j = 1; j <= nbrOfBlocks + 1; j++) {
509 int offset = j - 1;
510 int start = offset * limit;
511
512 if(logger.isInfoEnabled()) { logger.info("Processing child-parent pairs: " + start + " - " + (start + limit - 1)); }
513
514 if(logger.isInfoEnabled()) {
515 logger.info("index = " + j
516 + ", offset = " + offset
517 + ", start = " + start);
518 }
519
520 if (j == nbrOfBlocks + 1) {
521 limit = upperBorder - nbrOfBlocks * limit;
522 if(logger.isInfoEnabled()) { logger.info("number of blocks = " + nbrOfBlocks + " limit = " + limit); }
523 }
524
525 TransactionStatus txStatus = startTransaction();
526
527 // for (int k = 1; k <= start + offset; k++) { // TODO: test borders
528 // int k = 0;
529
530 Map<UUID, UUID> childParentPartMap = partMap(limit, childParentMap);
531 Set<TaxonBase> childSet = new HashSet<TaxonBase>(limit);
532
533 if (logger.isInfoEnabled()) {
534 logger.info("Partmap size: " + childParentPartMap.size());
535 }
536
537 for (UUID childUuid : childParentPartMap.keySet()) {
538 // for (UUID childUuid : childParentMap.keySet()) {
539
540 UUID parentUuid = childParentPartMap.get(childUuid);
541
542 try {
543 TaxonBase<?> parent = getTaxonService().findByUuid(parentUuid);
544 if (logger.isTraceEnabled()) {
545 logger.trace("Parent find called (" + parentUuid + ")");
546 }
547 TaxonBase<?> child = getTaxonService().findByUuid(childUuid);
548 if (logger.isTraceEnabled()) {
549 logger.trace("Child find called (" + childUuid + ")");
550 }
551 Taxon parentTaxon = parent.deproxy(parent, Taxon.class);
552 Taxon childTaxon = parent.deproxy(child, Taxon.class);
553
554 if (childTaxon != null && parentTaxon != null) {
555
556 makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null);
557
558 if (logger.isDebugEnabled()) {
559 logger.debug("Parent-child (" + parentUuid + "-" + childUuid +
560 ") relationship created");
561 }
562 if (!childSet.contains(childTaxon)) {
563
564 childSet.add(childTaxon);
565
566 if (logger.isTraceEnabled()) {
567 logger.trace("Child taxon (" + childUuid + ") added to Set");
568 }
569
570 } else {
571 if (logger.isDebugEnabled()) {
572 logger.debug("Duplicated child taxon (" + childUuid + ")");
573 }
574 }
575 } else {
576 if (logger.isDebugEnabled()) {
577 logger.debug("Parent(" + parentUuid + ") or child (" + childUuid + " is null");
578 }
579 }
580
581 // if (childTaxon != null && !childSet.contains(childTaxon)) {
582 // childSet.add(childTaxon);
583 // if (logger.isDebugEnabled()) {
584 // logger.debug("Child taxon (" + childUuid + ") added to Set");
585 // }
586 // } else {
587 // if (logger.isDebugEnabled()) {
588 // logger.debug("Duplicated child taxon (" + childUuid + ")");
589 // }
590 // }
591
592 } catch (Exception e) {
593 logger.error("Error creating taxonomically included relationship parent-child (" +
594 parentUuid + "-" + childUuid + ")");
595 }
596
597 }
598 getTaxonService().saveTaxonAll(childSet);
599 commitTransaction(txStatus);
600 }
601 return success;
602 }
603
604
605
606 /** Creates parent-child relationships.
607 * Taxon bases are retrieved in blocks from CDM DB.
608 * Parent is retrieved from CDM DB via original source id if not found in current block.
609 * In case of blocksize = 20.000 this takes ca. 1-2 hours per block.
610 * */
611 private boolean createRelationships(FaunaEuropaeaTaxon fauEuTaxon,
612 TaxonBase<?> taxonBase, TaxonNameBase<?,?> taxonName, List<Taxon> taxa,
613 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap, FaunaEuropaeaImportState state) {
614
615 int parentId = fauEuTaxon.getParentId();
616 int taxonId = fauEuTaxon.getId();
617 FaunaEuropaeaTaxon parentFauEuTaxon = fauEuTaxonMap.get(parentId);
618 if (parentFauEuTaxon == null) {
619 if (logger.isInfoEnabled()) {
620 logger.info("Parent taxon is null (" + parentId + ")");
621 }
622 return false;
623 }
624 // UUID parentUuid = parentFauEuTaxon.getUuid();
625 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
626 MapWrapper<TaxonBase> parentTaxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
627 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
628
629 TaxonBase<?> parentTaxonBase = null;
630
631 // for (TaxonBase<?> potentialParentTaxon : taxonBases) {
632 // if(potentialParentTaxon.getUuid().equals(parentUuid)) {
633 // parentTaxonBase = potentialParentTaxon;
634 // break;
635 // }
636 // }
637 // if (parentTaxonBase == null) {
638 // parentTaxonBase = getTaxonService().getTaxonByUuid(parentUuid);
639 // }
640
641 // TODO: Copy parents from taxonBases to parentTaxonStore
642
643 if (parentTaxonStore.containsId(parentId)) {
644 parentTaxonBase = parentTaxonStore.get(parentId);
645 if (logger.isDebugEnabled()) {
646 logger.debug("Parent (" + parentId + ") found in parent taxon store");
647 }
648 // } else {
649 // for (TaxonBase<?> potentialParentTaxon : taxonBases) {
650 // if(potentialParentTaxon.getId() == parentId) {
651 // parentTaxonBase = potentialParentTaxon;
652 // if (logger.isInfoEnabled()) {
653 // logger.info("Parent (" + parentId + ") found in taxon base list");
654 // }
655 // break;
656 // }
657 // }
658 }
659 if (parentTaxonBase == null) {
660 ISourceable sourceable =
661 getCommonService().getSourcedObjectByIdInSource(TaxonBase.class, Integer.toString(parentId), OS_NAMESPACE_TAXON);
662 parentTaxonBase = ((IdentifiableEntity)sourceable).deproxy(sourceable, TaxonBase.class);
663 if (logger.isDebugEnabled()) {
664 logger.debug("Parent (" + parentId + ") retrieved from DB via original source id");
665 }
666 }
667
668 if (!parentTaxonStore.containsId(parentId)) {
669 parentTaxonStore.put(parentId, parentTaxonBase);
670 }
671
672
673
674 Taxon parentTaxon = parentTaxonBase.deproxy(parentTaxonBase, Taxon.class);
675
676 boolean success = true;
677
678 // if (!fauEuTaxon.isValid()) { // FauEu Synonym
679
680 // } else if (fauEuTaxon.isValid()) { // FauEu Taxon
681
682 Taxon taxon = taxonBase.deproxy(taxonBase, Taxon.class);
683
684 try {
685 // add this taxon as child to parent
686 if (parentTaxon != null) {
687 makeTaxonomicallyIncluded(state, parentTaxon, taxon, sourceRef, null);
688 if (logger.isDebugEnabled()) {
689 logger.debug("Parent-child (" + parentId + "-" + taxonId +
690 ") relationship created");
691 }
692 }
693
694 } catch (Exception e) {
695 logger.error("Error creating taxonomically included relationship Parent-child (" +
696 parentId + "-" + taxonId + ")");
697 }
698
699
700 // }
701
702 return success;
703 }
704
705
706 private boolean makeTaxonomicallyIncluded(FaunaEuropaeaImportState state, Taxon toTaxon, Taxon fromTaxon, ReferenceBase citation, String microCitation){
707 boolean success = true;
708 ReferenceBase sec = toTaxon.getSec();
709 TaxonomicTree tree = state.getTree(sec);
710 if (tree == null){
711 tree = makeTree(state, sec);
712 }
713 success = tree.addParentChild(toTaxon, fromTaxon, citation, microCitation);
714 return success;
715 }
716
717
718 // public int calculateBlockSize(int limit, int upperBorder) {
719 //
720 // int blockSize = 0;
721 //
722 // if (upperBorder < limit) {
723 // limit = upperBorder;
724 // } else {
725 // blockSize = upperBorder / limit;
726 // }
727 // }
728
729
730 private boolean processTaxaFromDatabase(FaunaEuropaeaImportState state,
731 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap) {
732
733 if(logger.isInfoEnabled()) { logger.info("Processing taxa second pass..."); }
734
735 MapWrapper<TaxonBase> taxonBaseMap = new MapWrapper<TaxonBase>(null);
736
737 int nbrOfTaxa = getTaxonService().count(Taxon.class);
738 int n = 0;
739
740 boolean success = true;
741
742 if (nbrOfTaxa < limit) { // TODO: test with critical values
743 limit = nbrOfTaxa;
744 } else {
745 n = nbrOfTaxa / limit;
746 }
747
748 if(logger.isInfoEnabled()) {
749 logger.info("number of taxa = " + nbrOfTaxa
750 + ", limit = " + limit
751 + ", n = " + n);
752 }
753
754 // process taxa in chunks of <=limit
755
756 for (int j = 1; j <= n + 1; j++)
757 {
758 int offset = j - 1;
759 int start = offset * limit;
760
761 if(logger.isInfoEnabled()) { logger.info("Processing taxa: " + start + " - " + (start + limit - 1)); }
762
763 if(logger.isInfoEnabled()) {
764 logger.info("index = " + j
765 + ", offset = " + offset
766 + ", start = " + start);
767 }
768
769 if (j == n + 1) {
770 limit = nbrOfTaxa - n * limit;
771 if(logger.isInfoEnabled()) { logger.info("n = " + n + " limit = " + limit); }
772 }
773
774 TransactionStatus txStatus = startTransaction();
775
776 List<Taxon> taxa = getTaxonService().getAllTaxa(limit, start);
777 if(logger.isInfoEnabled()) {
778 logger.info(taxa.size() + " taxa retrieved from CDM DB");
779 }
780
781 for (TaxonBase taxonBase : taxa) {
782
783 TaxonNameBase<?,?> taxonName = taxonBase.getName();
784
785 FaunaEuropaeaTaxon fauEuTaxon = findFauEuTaxonByOriginalSourceId(taxonBase, fauEuTaxonMap);
786
787
788 if (logger.isDebugEnabled()) {
789 logger.debug("Taxon # " + fauEuTaxon.getId());
790 }
791 createRelationships(fauEuTaxon, taxonBase, taxonName, taxa, fauEuTaxonMap, state);
792 }
793
794 getTaxonService().saveTaxonAll(taxa);
795 taxa = null;
796
797 commitTransaction(txStatus);
798
799 // empty parent taxon store
800 // Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
801 // MapWrapper<TaxonBase> parentTaxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
802 // parentTaxonStore.makeEmpty();
803 }
804 return success;
805 }
806
807
808 private FaunaEuropaeaTaxon findFauEuTaxonByOriginalSourceId(TaxonBase<?> taxonBase,
809 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap) {
810
811 Set set = taxonBase.getSources();
812 Object[] array = set.toArray();
813 if (array.length == 0) { return null; }
814 OriginalSource os = (OriginalSource) taxonBase.getSources().toArray()[0];
815 String taxonBaseIdStr = os.getIdInSource();
816 int taxonBaseId = Integer.parseInt(taxonBaseIdStr);
817 FaunaEuropaeaTaxon fauEuTaxon = fauEuTaxonMap.get(taxonBaseId);
818
819 return fauEuTaxon;
820 }
821
822
823 }