FaunaEuropeae basionym import
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / faunaEuropaea / FaunaEuropaeaRelTaxonIncludeImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.faunaEuropaea;
11
12 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.A_AUCT;
13 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.P_PARENTHESIS;
14 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.Q_NO_RESTRICTION;
15 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_ACCEPTED;
16 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_NOT_ACCEPTED;
17
18 import java.sql.ResultSet;
19 import java.sql.SQLException;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.Iterator;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Set;
26 import java.util.UUID;
27
28 import org.apache.log4j.Logger;
29 import org.springframework.stereotype.Component;
30 import org.springframework.transaction.TransactionStatus;
31
32 import eu.etaxonomy.cdm.io.common.ICdmIO;
33 import eu.etaxonomy.cdm.io.common.MapWrapper;
34 import eu.etaxonomy.cdm.io.common.Source;
35 import eu.etaxonomy.cdm.io.profiler.ProfilerController;
36 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
37 import eu.etaxonomy.cdm.model.common.CdmBase;
38 import eu.etaxonomy.cdm.model.common.ISourceable;
39 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
40 import eu.etaxonomy.cdm.model.common.OriginalSource;
41 import eu.etaxonomy.cdm.model.name.Rank;
42 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
43 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
44 import eu.etaxonomy.cdm.model.taxon.Taxon;
45 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
46 import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;
47 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
48
49
50
51 /**
52 * @author a.babadshanjan
53 * @created 12.05.2009
54 * @version 1.0
55 */
56 @Component
57 public class FaunaEuropaeaRelTaxonIncludeImport extends FaunaEuropaeaImportBase {
58
59 public static final String OS_NAMESPACE_TAXON = "Taxon";
60 private static final Logger logger = Logger.getLogger(FaunaEuropaeaRelTaxonIncludeImport.class);
61
62 /* Max number of taxa to retrieve (for test purposes) */
63 private int maxTaxa = 0;
64 /* Max number of taxa to be saved in CDM DB with one service call */
65 private int limit = 5000; // TODO: Make configurable
66 /* Max number of taxa to be retrieved from CDM DB with one service call */
67 private int limitRetrieve = 10000; // TODO: Make configurable
68 /* Highest taxon index in the FauEu database */
69 private int highestTaxonIndex = 0;
70 /* Number of times method buildParentName() has been called for one taxon */
71 private int callCount = 0;
72 //private Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap = new HashMap();
73
74
75
76 /* (non-Javadoc)
77 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
78 */
79 @Override
80 protected boolean doCheck(FaunaEuropaeaImportState state) {
81 boolean result = true;
82 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
83 logger.warn("Checking for Taxa not yet fully implemented");
84 result &= checkTaxonStatus(fauEuConfig);
85
86 return result;
87 }
88
89 /* (non-Javadoc)
90 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
91 */
92 protected boolean isIgnore(FaunaEuropaeaImportState state) {
93 return ! state.getConfig().isDoTaxa();
94 }
95
96 private boolean checkTaxonStatus(FaunaEuropaeaImportConfigurator fauEuConfig) {
97 boolean result = true;
98 // try {
99 Source source = fauEuConfig.getSource();
100 String sqlStr = "";
101 ResultSet rs = source.getResultSet(sqlStr);
102 return result;
103 // } catch (SQLException e) {
104 // e.printStackTrace();
105 // return false;
106 // }
107 }
108
109 protected boolean doInvoke(FaunaEuropaeaImportState state) {
110
111 boolean success = true;
112
113 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
114 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
115 taxonStore.makeEmpty();
116 MapWrapper<TeamOrPersonBase> authorStore = (MapWrapper<TeamOrPersonBase>)stores.get(ICdmIO.TEAM_STORE);
117 authorStore.makeEmpty();
118
119 if(logger.isInfoEnabled()) { logger.info("Start making taxonomically included relationships..."); }
120
121 //ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
122 TransactionStatus txStatus = startTransaction();
123
124 TaxonBase taxon = getTaxonService().getTaxonByUuid(UUID.fromString("ac7b30dc-6207-4c71-9752-ee0fb838a271"));
125 ReferenceBase<?> sourceRef = taxon.getSec();
126 TaxonomicTree tree= getTaxonomicTreeFor(state, sourceRef);
127
128 commitTransaction(txStatus);
129
130 ProfilerController.memorySnapshot();
131 success = processParentsChildren(state);
132 ProfilerController.memorySnapshot();
133
134 logger.info("End making taxa...");
135
136 return success;
137 }
138
139 /** Retrieve child-parent uuid map from CDM DB */
140 private boolean processParentsChildren(FaunaEuropaeaImportState state) {
141
142 int limit = state.getConfig().getLimitSave();
143
144 TransactionStatus txStatus = null;
145
146 Map<UUID, UUID> childParentMap = null;
147 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
148 Source source = fauEuConfig.getSource();
149 int i = 0;
150 boolean success = true;
151
152 try {
153
154 String strQuery =
155 " SELECT dbo.Taxon.UUID AS ChildUuid, Parent.UUID AS ParentUuid " +
156 " FROM dbo.Taxon INNER JOIN dbo.Taxon AS Parent " +
157 " ON dbo.Taxon.TAX_TAX_IDPARENT = Parent.TAX_ID " +
158 " WHERE (dbo.Taxon.TAX_VALID <> 0) AND (dbo.Taxon.TAX_AUT_ID <> " + A_AUCT + " OR dbo.Taxon.TAX_AUT_ID IS NULL )" +
159 " ORDER BY dbo.Taxon.TAX_RNK_ID ASC";
160
161 if (logger.isInfoEnabled()) {
162 logger.info("Query: " + strQuery);
163 }
164
165 ResultSet rs = source.getResultSet(strQuery);
166
167 while (rs.next()) {
168
169 if ((i++ % limit) == 0) {
170
171 txStatus = startTransaction();
172 childParentMap = new HashMap<UUID, UUID>(limit);
173
174 if(logger.isInfoEnabled()) {
175 logger.info("Parent-child mappings retrieved: " + (i-1));
176 }
177 }
178
179 String childUuidStr = rs.getString("ChildUuid");
180 String parentUuidStr = rs.getString("ParentUuid");
181 UUID childUuid = UUID.fromString(childUuidStr);
182 UUID parentUuid = UUID.fromString(parentUuidStr);
183
184 if (!childParentMap.containsKey(childUuid)) {
185
186 childParentMap.put(childUuid, parentUuid);
187
188 } else {
189 if(logger.isDebugEnabled()) {
190 logger.debug("Duplicated child UUID (" + childUuid + ")");
191 }
192 }
193 if (((i % limit) == 0 && i != 1 )) {
194
195 success = createRelationships(state, childParentMap);
196
197 childParentMap = null;
198 commitTransaction(txStatus);
199
200 if(logger.isInfoEnabled()) {
201 logger.info("i = " + i + " - Transaction committed");
202 }
203 }
204 }
205
206 } catch (SQLException e) {
207 logger.error("SQLException:" + e);
208 success = false;
209 }
210 return success;
211 }
212
213
214 public Map<UUID, UUID> partMap(int border, Map<UUID, UUID> map) {
215
216 if (logger.isInfoEnabled()) {
217 logger.info("Map size: " + map.size());
218 }
219 Set<Map.Entry<UUID, UUID>> entries = map.entrySet();
220 Iterator<Map.Entry<UUID, UUID>> entryIter = entries.iterator();
221 Map<UUID, UUID> partMap = new HashMap<UUID, UUID>();
222
223 for (int i = 0; i < border; i++) {
224 //while (entryIter.hasNext()) {
225
226 Map.Entry<UUID, UUID> mapEntry = (Map.Entry<UUID, UUID>)entryIter.next();
227 partMap.put(mapEntry.getKey(), mapEntry.getValue());
228 entryIter.remove();
229 }
230
231 if (logger.isDebugEnabled()) {
232 logger.debug("Map size: " + map.size());
233 }
234 return partMap;
235 }
236
237
238 /* Creates parent-child relationships.
239 * Parent-child pairs are retrieved in blocks via findByUUID(Set<UUID>) from CDM DB.
240 * It takes about 5min to save a block of 5000 taxa.*/
241 private boolean createRelationships(FaunaEuropaeaImportState state, Map<UUID, UUID> childParentMap) {
242
243 TaxonBase taxon = getTaxonService().getTaxonByUuid(UUID.fromString("ac7b30dc-6207-4c71-9752-ee0fb838a271"));
244 ReferenceBase<?> sourceRef = taxon.getSec();
245 boolean success = true;
246
247 TaxonomicTree tree = getTaxonomicTreeFor(state, sourceRef);
248
249 Set<TaxonBase> childSet = new HashSet<TaxonBase>(limit);
250
251 Set<UUID> childKeysSet = childParentMap.keySet();
252 Set<UUID> parentValuesSet = new HashSet<UUID>(childParentMap.values());
253
254 if (logger.isInfoEnabled()) {
255 logger.info("Start reading children and parents");
256 }
257 List<TaxonBase> children = getTaxonService().findByUuid(childKeysSet);
258 List<TaxonBase> parents = getTaxonService().findByUuid(parentValuesSet);
259 Map<UUID, TaxonBase> parentsMap = new HashMap<UUID, TaxonBase>(parents.size());
260 for (TaxonBase taxonBase : parents){
261 parentsMap.put(taxonBase.getUuid(), taxonBase);
262 }
263
264
265 if (logger.isInfoEnabled()) {
266 logger.info("End reading children and parents");
267 }
268
269
270 if (logger.isTraceEnabled()) {
271 for (UUID uuid : childKeysSet) {
272 logger.trace("child uuid query: " + uuid);
273 }
274 }
275 if (logger.isTraceEnabled()) {
276 for (UUID uuid : parentValuesSet) {
277 logger.trace("parent uuid query: " + uuid);
278 }
279 }
280 if (logger.isTraceEnabled()) {
281 for (TaxonBase tb : children) {
282 logger.trace("child uuid result: " + tb.getUuid());
283 }
284 }
285 if (logger.isTraceEnabled()) {
286 for (TaxonBase tb : parents) {
287 logger.trace("parent uuid result: " + tb.getUuid());
288 }
289 }
290
291 UUID mappedParentUuid = null;
292 UUID childUuid = null;
293
294 for (TaxonBase child : children) {
295
296 try {
297 Taxon childTaxon = child.deproxy(child, Taxon.class);
298 childUuid = childTaxon.getUuid();
299 mappedParentUuid = childParentMap.get(childUuid);
300 TaxonBase parent = null;
301
302 TaxonBase potentialParent = parentsMap.get(mappedParentUuid);
303 // for (TaxonBase potentialParent : parents ) {
304 // parentUuid = potentialParent.getUuid();
305 // if(parentUuid.equals(mappedParentUuid)) {
306 parent = potentialParent;
307 if (logger.isDebugEnabled()) {
308 logger.debug("Parent (" + mappedParentUuid + ") found for child (" + childUuid + ")");
309 }
310 // break;
311 // }
312 // }
313
314 Taxon parentTaxon = parent.deproxy(parent, Taxon.class);
315
316 if (childTaxon != null && parentTaxon != null) {
317
318 // makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null, tree);
319 // makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null);
320 tree.addParentChild(parentTaxon, childTaxon, sourceRef, null);
321
322 if (logger.isDebugEnabled()) {
323 logger.debug("Parent-child (" + mappedParentUuid + "-" + childUuid +
324 ") relationship created");
325 }
326 if (!childSet.contains(childTaxon)) {
327
328 childSet.add(childTaxon);
329
330 if (logger.isTraceEnabled()) {
331 logger.trace("Child taxon (" + childUuid + ") added to Set");
332 }
333
334 } else {
335 if (logger.isDebugEnabled()) {
336 logger.debug("Duplicated child taxon (" + childUuid + ")");
337 }
338 }
339 } else {
340 if (logger.isDebugEnabled()) {
341 logger.debug("Parent(" + mappedParentUuid + ") or child (" + childUuid + " is null");
342 }
343 }
344
345 if (childTaxon != null && !childSet.contains(childTaxon)) {
346 childSet.add(childTaxon);
347 if (logger.isDebugEnabled()) {
348 logger.debug("Child taxon (" + childUuid + ") added to Set");
349 }
350 } else {
351 if (logger.isDebugEnabled()) {
352 logger.debug("Duplicated child taxon (" + childUuid + ")");
353 }
354 }
355
356 } catch (Exception e) {
357 logger.error("Error creating taxonomically included relationship parent-child (" +
358 mappedParentUuid + "-" + childUuid + ")", e);
359 }
360
361 }
362 if (logger.isInfoEnabled()) {
363 logger.info("Start saving childSet");
364 }
365 getTaxonService().saveTaxonAll(childSet);
366 if (logger.isInfoEnabled()) {
367 logger.info("End saving childSet");
368 }
369
370 parentValuesSet = null;
371 childSet = null;
372 children = null;
373 parents = null;
374 tree = null;
375
376 return success;
377 }
378
379 /**
380 * @param state
381 * @param sourceRef
382 */
383 private TaxonomicTree getTaxonomicTreeFor(FaunaEuropaeaImportState state, ReferenceBase<?> sourceRef) {
384
385 TaxonomicTree tree;
386 UUID treeUuid = state.getTreeUuid(sourceRef);
387 if (treeUuid == null){
388 if(logger.isInfoEnabled()) { logger.info(".. creating new taxonomic tree"); }
389
390 TransactionStatus txStatus = startTransaction();
391 tree = makeTreeMemSave(state, sourceRef);
392 commitTransaction(txStatus);
393
394 } else {
395 tree = getTaxonService().getTaxonomicTreeByUuid(treeUuid);
396 }
397 return tree;
398 }
399
400 }