0ece881ef583502e7a460b3edfcb169b9634094a
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / faunaEuropaea / FaunaEuropaeaRelTaxonIncludeImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.faunaEuropaea;
11
12 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.A_AUCT;
13 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.P_PARENTHESIS;
14 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.Q_NO_RESTRICTION;
15 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_ACCEPTED;
16 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_NOT_ACCEPTED;
17
18 import java.sql.ResultSet;
19 import java.sql.SQLException;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.Iterator;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Set;
26 import java.util.UUID;
27
28 import org.apache.log4j.Logger;
29 import org.springframework.stereotype.Component;
30 import org.springframework.transaction.TransactionStatus;
31
32 import eu.etaxonomy.cdm.io.common.ICdmIO;
33 import eu.etaxonomy.cdm.io.common.MapWrapper;
34 import eu.etaxonomy.cdm.io.common.Source;
35 import eu.etaxonomy.cdm.io.profiler.ProfilerController;
36 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
37 import eu.etaxonomy.cdm.model.common.CdmBase;
38 import eu.etaxonomy.cdm.model.common.ISourceable;
39 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
40 import eu.etaxonomy.cdm.model.common.OriginalSource;
41 import eu.etaxonomy.cdm.model.name.Rank;
42 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
43 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
44 import eu.etaxonomy.cdm.model.taxon.Taxon;
45 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
46 import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;
47 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
48
49
50
51 /**
52 * @author a.babadshanjan
53 * @created 12.05.2009
54 * @version 1.0
55 */
56 @Component
57 public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase {
58
59 public static final String OS_NAMESPACE_TAXON = "Taxon";
60 private static final Logger logger = Logger.getLogger(FaunaEuropaeaRelTaxonIncludeImport.class);
61
62 /* Max number of taxa to retrieve (for test purposes) */
63 private int maxTaxa = 0;
64 /* Max number of taxa to be saved in CDM DB with one service call */
65 private int limit = 5000; // TODO: Make configurable
66 /* Max number of taxa to be retrieved from CDM DB with one service call */
67 private int limitRetrieve = 10000; // TODO: Make configurable
68 /* Highest taxon index in the FauEu database */
69 private int highestTaxonIndex = 0;
70 /* Number of times method buildParentName() has been called for one taxon */
71 private int callCount = 0;
72 private Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap = new HashMap();
73
74
75
76 /* (non-Javadoc)
77 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
78 */
79 @Override
80 protected boolean doCheck(FaunaEuropaeaImportState state) {
81 boolean result = true;
82 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
83 logger.warn("Checking for Taxa not yet fully implemented");
84 result &= checkTaxonStatus(fauEuConfig);
85
86 return result;
87 }
88
89 /* (non-Javadoc)
90 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
91 */
92 protected boolean isIgnore(FaunaEuropaeaImportState state) {
93 return ! state.getConfig().isDoTaxa();
94 }
95
96 private boolean checkTaxonStatus(FaunaEuropaeaImportConfigurator fauEuConfig) {
97 boolean result = true;
98 // try {
99 Source source = fauEuConfig.getSource();
100 String sqlStr = "";
101 ResultSet rs = source.getResultSet(sqlStr);
102 return result;
103 // } catch (SQLException e) {
104 // e.printStackTrace();
105 // return false;
106 // }
107 }
108
109 protected boolean doInvoke(FaunaEuropaeaImportState state) {
110
111 boolean success = true;
112
113 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
114 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
115 taxonStore.makeEmpty();
116 MapWrapper<TeamOrPersonBase> authorStore = (MapWrapper<TeamOrPersonBase>)stores.get(ICdmIO.TEAM_STORE);
117 authorStore.makeEmpty();
118
119 if(logger.isInfoEnabled()) { logger.info("Start making taxonomically included relationships..."); }
120
121 // TransactionStatus txStatus = startTransaction();
122
123 ProfilerController.memorySnapshot();
124
125 success = processParentsChildren(state);
126
127 ProfilerController.memorySnapshot();
128
129 // commitTransaction(txStatus);
130
131 logger.info("End making taxa...");
132
133 return success;
134 }
135
136 /** Retrieve child-parent uuid map from CDM DB */
137 private boolean processParentsChildren(FaunaEuropaeaImportState state) {
138
139 int limit = state.getConfig().getLimitSave();
140
141 TransactionStatus txStatus = null;
142
143 Map<UUID, UUID> childParentMap = null;
144 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
145 Source source = fauEuConfig.getSource();
146 int i = 0;
147 boolean success = true;
148
149 try {
150
151 String strQuery =
152 " SELECT dbo.Taxon.UUID AS ChildUuid, Parent.UUID AS ParentUuid " +
153 " FROM dbo.Taxon INNER JOIN dbo.Taxon AS Parent " +
154 " ON dbo.Taxon.TAX_TAX_IDPARENT = Parent.TAX_ID " +
155 " WHERE (dbo.Taxon.TAX_VALID <> 0) AND (dbo.Taxon.TAX_AUT_ID <> " + A_AUCT + ")";
156
157 if (logger.isInfoEnabled()) {
158 logger.info("Query: " + strQuery);
159 }
160
161 ResultSet rs = source.getResultSet(strQuery);
162
163 while (rs.next()) {
164
165 if ((i++ % limit) == 0) {
166
167 ProfilerController.memorySnapshot();
168 txStatus = startTransaction();
169 childParentMap = new HashMap<UUID, UUID>(limit);
170
171 if(logger.isInfoEnabled()) {
172 logger.info("Parent-child mappings retrieved: " + (i-1));
173 }
174 }
175
176 String childUuidStr = rs.getString("ChildUuid");
177 String parentUuidStr = rs.getString("ParentUuid");
178 UUID childUuid = UUID.fromString(childUuidStr);
179 UUID parentUuid = UUID.fromString(parentUuidStr);
180
181 if (!childParentMap.containsKey(childUuid)) {
182
183 childParentMap.put(childUuid, parentUuid);
184
185 } else {
186 if(logger.isDebugEnabled()) {
187 logger.debug("Duplicated child UUID (" + childUuid + ")");
188 }
189 }
190 if (((i % limit) == 0 && i != 1 )) {
191
192 success = createRelationships(state, childParentMap);
193
194 childParentMap = null;
195 commitTransaction(txStatus);
196
197 if(logger.isInfoEnabled()) {
198 logger.info("i = " + i + " - Transaction committed");
199 }
200 }
201 }
202
203 } catch (SQLException e) {
204 logger.error("SQLException:" + e);
205 success = false;
206 }
207 return success;
208 }
209
210
211 public Map<UUID, UUID> partMap(int border, Map<UUID, UUID> map) {
212
213 if (logger.isInfoEnabled()) {
214 logger.info("Map size: " + map.size());
215 }
216 Set<Map.Entry<UUID, UUID>> entries = map.entrySet();
217 Iterator<Map.Entry<UUID, UUID>> entryIter = entries.iterator();
218 Map<UUID, UUID> partMap = new HashMap<UUID, UUID>();
219
220 for (int i = 0; i < border; i++) {
221 //while (entryIter.hasNext()) {
222
223 Map.Entry<UUID, UUID> mapEntry = (Map.Entry<UUID, UUID>)entryIter.next();
224 partMap.put(mapEntry.getKey(), mapEntry.getValue());
225 entryIter.remove();
226 }
227
228 if (logger.isDebugEnabled()) {
229 logger.debug("Map size: " + map.size());
230 }
231 return partMap;
232 }
233
234
235 /* Creates parent-child relationships.
236 * Parent-child pairs are retrieved in blocks via findByUUID(Set<UUID>) from CDM DB.
237 * It takes about 5min to save a block of 5000 taxa.*/
238 private boolean createRelationships(FaunaEuropaeaImportState state, Map<UUID, UUID> childParentMap) {
239
240 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
241 boolean success = true;
242
243 //add tree to new session
244 TaxonomicTree tree = state.getTree(sourceRef);
245 if (tree == null){
246 tree = makeTree(state, sourceRef);
247 }
248 getTaxonService().saveTaxonomicTree(tree);
249
250 Set<TaxonBase> childSet = new HashSet<TaxonBase>(limit);
251
252 Set<UUID> childKeysSet = childParentMap.keySet();
253 Set<UUID> parentValuesSet = new HashSet<UUID>(childParentMap.values());
254
255 if (logger.isInfoEnabled()) {
256 logger.info("Start reading children and parents");
257 }
258 List<TaxonBase> children = getTaxonService().findByUuid(childKeysSet);
259 List<TaxonBase> parents = getTaxonService().findByUuid(parentValuesSet);
260 Map<UUID, TaxonBase> parentsMap = new HashMap<UUID, TaxonBase>();
261 for (TaxonBase taxonBase : parents){
262 parentsMap.put(taxonBase.getUuid(), taxonBase);
263 }
264
265
266 if (logger.isInfoEnabled()) {
267 logger.info("End reading children and parents");
268 }
269
270
271 if (logger.isTraceEnabled()) {
272 for (UUID uuid : childKeysSet) {
273 logger.trace("child uuid query: " + uuid);
274 }
275 }
276 if (logger.isTraceEnabled()) {
277 for (UUID uuid : parentValuesSet) {
278 logger.trace("parent uuid query: " + uuid);
279 }
280 }
281 if (logger.isTraceEnabled()) {
282 for (TaxonBase tb : children) {
283 logger.trace("child uuid result: " + tb.getUuid());
284 }
285 }
286 if (logger.isTraceEnabled()) {
287 for (TaxonBase tb : parents) {
288 logger.trace("parent uuid result: " + tb.getUuid());
289 }
290 }
291
292 UUID mappedParentUuid = null;
293 UUID parentUuid = null;
294 UUID childUuid = null;
295
296 for (TaxonBase child : children) {
297
298 try {
299 Taxon childTaxon = child.deproxy(child, Taxon.class);
300 childUuid = childTaxon.getUuid();
301 mappedParentUuid = childParentMap.get(childUuid);
302 TaxonBase parent = null;
303
304 TaxonBase potentialParent = parentsMap.get(mappedParentUuid);
305 // for (TaxonBase potentialParent : parents ) {
306 // parentUuid = potentialParent.getUuid();
307 // if(parentUuid.equals(mappedParentUuid)) {
308 parent = potentialParent;
309 if (logger.isDebugEnabled()) {
310 logger.debug("Parent (" + parentUuid + ") found for child (" + childUuid + ")");
311 }
312 // break;
313 // }
314 // }
315
316 Taxon parentTaxon = parent.deproxy(parent, Taxon.class);
317
318 if (childTaxon != null && parentTaxon != null) {
319
320 // makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null, tree);
321 makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null);
322
323 if (logger.isDebugEnabled()) {
324 logger.debug("Parent-child (" + parentUuid + "-" + childUuid +
325 ") relationship created");
326 }
327 if (!childSet.contains(childTaxon)) {
328
329 childSet.add(childTaxon);
330
331 if (logger.isTraceEnabled()) {
332 logger.trace("Child taxon (" + childUuid + ") added to Set");
333 }
334
335 } else {
336 if (logger.isDebugEnabled()) {
337 logger.debug("Duplicated child taxon (" + childUuid + ")");
338 }
339 }
340 } else {
341 if (logger.isDebugEnabled()) {
342 logger.debug("Parent(" + parentUuid + ") or child (" + childUuid + " is null");
343 }
344 }
345
346 if (childTaxon != null && !childSet.contains(childTaxon)) {
347 childSet.add(childTaxon);
348 if (logger.isDebugEnabled()) {
349 logger.debug("Child taxon (" + childUuid + ") added to Set");
350 }
351 } else {
352 if (logger.isDebugEnabled()) {
353 logger.debug("Duplicated child taxon (" + childUuid + ")");
354 }
355 }
356
357 } catch (Exception e) {
358 logger.error("Error creating taxonomically included relationship parent-child (" +
359 parentUuid + "-" + childUuid + ")");
360 }
361
362 }
363 if (logger.isInfoEnabled()) {
364 logger.info("Start saving childSet");
365 }
366 getTaxonService().saveTaxonAll(childSet);
367 if (logger.isInfoEnabled()) {
368 logger.info("End saving childSet");
369 }
370
371 parentValuesSet = null;
372 childSet = null;
373 children = null;
374 parents = null;
375
376 return success;
377 }
378
379
380 private boolean makeTaxonomicallyIncluded(FaunaEuropaeaImportState state, Taxon toTaxon, Taxon fromTaxon,
381 ReferenceBase citation, String microCitation){
382 boolean success = true;
383 ReferenceBase sec = toTaxon.getSec();
384 sec = CdmBase.deproxy(sec, ReferenceBase.class);
385 sec = citation;
386 TaxonomicTree tree = state.getTree(sec);
387
388 // Session session = getTaxonService().getSession();
389
390 // if (session.contains(sec)) {
391 // logger.debug("Sec contained in session. Id = " + sec.getId());
392 // } else {
393 // logger.info("Sec not contained in session. Id = " + sec.getId());
394 // getReferenceService().merge(sec);
395 // }
396
397 if (tree == null){
398 tree = makeTree(state, sec);
399 }
400
401 // if (session.contains(tree)) {
402 // logger.debug("Taxonomic tree contained in session. Id = " + tree.getId());
403 // } else {
404 // logger.info("Taxonomic tree not contained in session. Id = " + tree.getId());
405 // UUID treeUuid = state.getTree(sec).getUuid();
406 // tree = getTaxonService().getTaxonomicTreeByUuid(treeUuid);
407 // logger.info("Tree retrieved");
408 // }
409
410 success = tree.addParentChild(toTaxon, fromTaxon, citation, microCitation);
411 return success;
412 }
413
414
415 }