Fauna Europaea Import
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / faunaEuropaea / FaunaEuropaeaTaxonImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.faunaEuropaea;
11
12 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.A_AUCT;
13 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.P_PARENTHESIS;
14 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_GENUS;
15 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.R_SUBGENUS;
16 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_ACCEPTED;
17 import static eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaTransformer.T_STATUS_NOT_ACCEPTED;
18
19 import java.sql.ResultSet;
20 import java.sql.SQLException;
21 import java.util.Collection;
22 import java.util.HashMap;
23 import java.util.Map;
24 import java.util.UUID;
25
26 import org.apache.log4j.Logger;
27 import org.springframework.stereotype.Component;
28 import org.springframework.transaction.TransactionStatus;
29
30 import eu.etaxonomy.cdm.common.CdmUtils;
31 import eu.etaxonomy.cdm.io.berlinModel.CdmOneToManyMapper;
32 import eu.etaxonomy.cdm.io.berlinModel.CdmStringMapper;
33 import eu.etaxonomy.cdm.io.common.CdmAttributeMapperBase;
34 import eu.etaxonomy.cdm.io.common.CdmSingleAttributeMapperBase;
35 import eu.etaxonomy.cdm.io.common.ICdmIO;
36 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
37 import eu.etaxonomy.cdm.io.common.ImportHelper;
38 import eu.etaxonomy.cdm.io.common.MapWrapper;
39 import eu.etaxonomy.cdm.io.common.Source;
40 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
41 import eu.etaxonomy.cdm.model.common.CdmBase;
42 import eu.etaxonomy.cdm.model.name.Rank;
43 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
44 import eu.etaxonomy.cdm.model.name.ZoologicalName;
45 import eu.etaxonomy.cdm.model.reference.PublicationBase;
46 import eu.etaxonomy.cdm.model.reference.Publisher;
47 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
48 import eu.etaxonomy.cdm.model.taxon.Synonym;
49 import eu.etaxonomy.cdm.model.taxon.Taxon;
50 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
51 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
52
53
54 /**
55 * @author a.babadshanjan
56 * @created 12.05.2009
57 * @version 1.0
58 */
59 @Component
60 public class FaunaEuropaeaTaxonImport extends FaunaEuropaeaImportBase {
61 private static final Logger logger = Logger.getLogger(FaunaEuropaeaTaxonImport.class);
62
63 /* Interval for progress info message when retrieving taxa */
64 private int modCount = 10000;
65 /* Max number of taxa to be saved with one service call */
66 private int limit = 10000; // TODO: Make configurable
67 /* The highest taxon index in the FauEu database */
68 private int highestTaxonIndex = 0;
69 private Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap = new HashMap();
70
71
72 /* (non-Javadoc)
73 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
74 */
75 @Override
76 protected boolean doCheck(IImportConfigurator config) {
77 boolean result = true;
78 FaunaEuropaeaImportConfigurator fauEuConfig = (FaunaEuropaeaImportConfigurator)config;
79 logger.warn("Checking for Taxa not yet fully implemented");
80 result &= checkTaxonStatus(fauEuConfig);
81
82 return result;
83 }
84
85 /* (non-Javadoc)
86 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
87 */
88 protected boolean isIgnore(IImportConfigurator config) {
89 return !config.isDoTaxa();
90 }
91
92 private boolean checkTaxonStatus(FaunaEuropaeaImportConfigurator fauEuConfig) {
93 boolean result = true;
94 // try {
95 Source source = fauEuConfig.getSource();
96 String sqlStr = "";
97 ResultSet rs = source.getResultSet(sqlStr);
98 return result;
99 // } catch (SQLException e) {
100 // e.printStackTrace();
101 // return false;
102 // }
103 }
104
105 /* (non-Javadoc)
106 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
107 */
108 @Override
109 protected boolean doInvoke(IImportConfigurator config,
110 Map<String, MapWrapper<? extends CdmBase>> stores) {
111
112 MapWrapper<TaxonNameBase<?,?>> taxonNamesStore = (MapWrapper<TaxonNameBase<?,?>>)stores.get(ICdmIO.TAXONNAME_STORE);
113 // Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap = new HashMap();
114 FaunaEuropaeaImportConfigurator fauEuConfig = (FaunaEuropaeaImportConfigurator)config;
115 boolean success = true;
116
117 if(logger.isInfoEnabled()) { logger.info("Start making taxa..."); }
118
119 success = retrieveTaxa(fauEuConfig, stores, fauEuTaxonMap);
120 success = processTaxaSecondPass(fauEuConfig, stores, fauEuTaxonMap);
121 success = saveTaxa(stores);
122
123 logger.info("End making taxa...");
124 return success;
125 }
126
127
128 private boolean retrieveTaxa(FaunaEuropaeaImportConfigurator fauEuConfig,
129 Map<String, MapWrapper<? extends CdmBase>> stores,
130 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap) {
131
132 // MapWrapper<TeamOrPersonBase> authorStore = (MapWrapper<TeamOrPersonBase>)stores.get(ICdmIO.TEAM_STORE);
133 // MapWrapper<ReferenceBase> refStore = (MapWrapper<ReferenceBase>)stores.get(ICdmIO.NOMREF_STORE);
134 // MapWrapper<TaxonNameBase<?,?>> taxonNamesStore = (MapWrapper<TaxonNameBase<?,?>>)stores.get(ICdmIO.TAXONNAME_STORE);
135 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
136
137 Source source = fauEuConfig.getSource();
138 String namespace = "Taxon";
139 int i = 0;
140 boolean success = true;
141
142 try {
143
144 String strQuery =
145 " SELECT MAX(TAX_ID) AS TAX_ID FROM dbo.Taxon ";
146
147 ResultSet rs = source.getResultSet(strQuery);
148 while (rs.next()) {
149 int maxTaxonId = rs.getInt("TAX_ID");
150 highestTaxonIndex = maxTaxonId;
151 }
152
153 // strQuery =
154 // " SELECT Taxon.*, rank.* " +
155 // " FROM dbo.Taxon INNER JOIN dbo.rank ON dbo.Taxon.TAX_RNK_ID = dbo.rank.rnk_id " +
156 // " WHERE (1=1)";
157
158 strQuery =
159 " SELECT Taxon.*, rank.*, author.* " +
160 " FROM dbo.Taxon INNER JOIN dbo.rank ON dbo.Taxon.TAX_RNK_ID = dbo.rank.rnk_id " +
161 " INNER JOIN dbo.author ON dbo.Taxon.TAX_AUT_ID = dbo.author.aut_id " +
162 " WHERE (1=1)";
163
164 rs = source.getResultSet(strQuery);
165
166
167 // int i = 0;
168 while (rs.next()) {
169
170 if ((i++ % modCount) == 0 && i != 1 ) {
171 if(logger.isInfoEnabled()) {
172 logger.info("Taxa retrieved: " + (i-1));
173 }
174 }
175
176 int taxonId = rs.getInt("TAX_ID");
177 String taxonName = rs.getString("TAX_NAME");
178 int rankId = rs.getInt("TAX_RNK_ID");
179 int parentId = rs.getInt("TAX_TAX_IDPARENT");
180 int familyId = rs.getInt("TAX_TAX_IDFAMILY");
181 int genusId = rs.getInt("TAX_TAX_IDGENUS");
182 int autId = rs.getInt("TAX_AUT_ID");
183 int status = rs.getInt("TAX_VALID");
184 int year = rs.getInt("TAX_YEAR");
185 int parenthesis = rs.getInt("TAX_PARENTHESIS");
186 String autName = rs.getString("aut_name");
187 Rank rank = null;
188 UUID taxonBaseUuid = UUID.randomUUID();
189
190 try {
191 rank = FaunaEuropaeaTransformer.rankId2Rank(rs, false);
192 } catch (UnknownCdmTypeException e) {
193 logger.warn("Taxon (" + taxonId + ") has unknown rank (" + rankId + ") and could not be saved.");
194 success = false;
195 }
196
197 ReferenceBase<?> reference = null;
198
199 ZoologicalName zooName = ZoologicalName.NewInstance(rank);
200 String nameTitleCache = taxonName;
201
202 // set local name cache
203
204 zooName.setNameCache(taxonName);
205
206 // StringBuilder nameTitleCacheBuilder = new StringBuilder(taxonName);
207 // if (year != 0) { // TODO: What do do with authors like xp, xf, etc?
208 // nameTitleCacheBuilder.append(" ");
209 // if (parenthesis == P_PARENTHESIS) {
210 // nameTitleCacheBuilder.append("(");
211 // }
212 // nameTitleCacheBuilder.append(autName);
213 // nameTitleCacheBuilder.append(" ");
214 // nameTitleCacheBuilder.append(year);
215 // if (parenthesis == P_PARENTHESIS) {
216 // nameTitleCacheBuilder.append(")");
217 // }
218 // }
219 // nameTitleCache = nameTitleCacheBuilder.toString();
220 // zooName.setTitleCache(nameTitleCache);
221 // zooName.setFullTitleCache(nameTitleCache); // FIXME: reference, NC status
222
223 TaxonBase<?> taxonBase;
224 FaunaEuropaeaTaxon fauEuTaxon = new FaunaEuropaeaTaxon();
225
226 Synonym synonym;
227 Taxon taxon;
228 try {
229 // logger.debug(status);
230 if ((status == T_STATUS_ACCEPTED) || (autId == A_AUCT)) {
231 taxon = Taxon.NewInstance(zooName, reference);
232 taxonBase = taxon;
233 } else if ((status == T_STATUS_NOT_ACCEPTED) && (autId != A_AUCT)) {
234 synonym = Synonym.NewInstance(zooName, reference);
235 taxonBase = synonym;
236 } else {
237 logger.warn("Unknown taxon status " + status + ". Taxon (" + taxonId + ") ignored.");
238 continue;
239 }
240
241 taxonBase.setUuid(taxonBaseUuid);
242
243 // set local title cache
244
245 // taxonBase.setTitleCache(nameTitleCache);
246
247 fauEuTaxon.setUuid(taxonBaseUuid);
248 fauEuTaxon.setParentId(parentId);
249 fauEuTaxon.setId(taxonId);
250 fauEuTaxon.setRankId(rankId);
251 fauEuTaxon.setYear(year);
252 fauEuTaxon.setAuthor(autName);
253 if (parenthesis == P_PARENTHESIS) {
254 fauEuTaxon.setParenthesis(true);
255 } else {
256 fauEuTaxon.setParenthesis(false);
257 }
258
259 ImportHelper.setOriginalSource(taxonBase, fauEuConfig.getSourceReference(), taxonId, namespace);
260
261 if (!taxonStore.containsId(taxonId)) {
262 if (taxonBase == null) {
263 logger.warn("Taxon base is null");
264 }
265 taxonStore.put(taxonId, taxonBase);
266 fauEuTaxonMap.put(taxonId, fauEuTaxon);
267 if (logger.isDebugEnabled()) {
268 logger.debug("Stored taxon base (" + taxonId + ") " + taxonName);
269 }
270 } else {
271 logger.warn("Not imported taxon base with duplicated TAX_ID (" + taxonId +
272 ") " + taxonName);
273 }
274
275 // if(!taxonNamesStore.containsId(taxonId) && !taxonStore.containsId(taxonId) && !taxonStore.containsId(taxonId)) {
276 // taxonNamesStore.put(taxonId, zooName);
277 // taxonStore.put(taxonId, taxonBase);
278 // fauEuTaxonMap.put(taxonId, fauEuTaxon);
279 // } else {
280 // logger.warn("Ignoring taxon with duplicated id " + taxonId);
281 // }
282
283 } catch (Exception e) {
284 logger.warn("An exception occurred when creating taxon base with id " + taxonId +
285 ". Taxon base could not be saved.");
286 }
287 }
288 } catch (SQLException e) {
289 logger.error("SQLException:" + e);
290 success = false;
291 }
292
293 return success;
294 }
295
296
297 private boolean processTaxaSecondPass(FaunaEuropaeaImportConfigurator fauEuConfig,
298 Map<String, MapWrapper<? extends CdmBase>> stores,
299 Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap) {
300
301 if(logger.isInfoEnabled()) { logger.info("Processing taxa second pass..."); }
302
303 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
304
305 boolean success = true;
306
307 for (int id : taxonStore.keySet())
308 {
309 TaxonBase<?> taxonBase = taxonStore.get(id);
310 FaunaEuropaeaTaxon fauEuTaxon = fauEuTaxonMap.get(id);
311
312 // success = updateProperties(fauEuConfig, taxonBase, taxonStore, fauEuTaxon, fauEuTaxonMap);
313 String nameString = buildTaxonName(fauEuTaxon, taxonBase, taxonStore);
314
315 }
316 return success;
317 }
318
319
320 /* Build name title cache */
321 private String buildNameTitleCache(String nameString, FaunaEuropaeaTaxon fauEuTaxon) {
322
323 StringBuilder titleCacheStringBuilder = new StringBuilder(nameString);
324 int year = fauEuTaxon.getYear();
325 if (year != 0) { // TODO: What do do with authors like xp, xf, etc?
326 titleCacheStringBuilder.append(" ");
327 if (fauEuTaxon.isParenthesis() == true) {
328 titleCacheStringBuilder.append("(");
329 }
330 titleCacheStringBuilder.append(fauEuTaxon.getAuthor());
331 titleCacheStringBuilder.append(" ");
332 titleCacheStringBuilder.append(year);
333 if (fauEuTaxon.isParenthesis() == true) {
334 titleCacheStringBuilder.append(")");
335 }
336 }
337 return titleCacheStringBuilder.toString();
338 }
339
340
341 /* Build taxon name */
342 private String buildTaxonName(FaunaEuropaeaTaxon fauEuTaxon,
343 TaxonBase<?> taxonBase, MapWrapper<TaxonBase> taxonStore) {
344
345 String localString = "";
346 String parentString = "";
347
348 FaunaEuropaeaTaxon parent = null;
349 TaxonNameBase<?,?> parentName = null;
350 TaxonBase<?> parentTaxonBase = null;
351 String parentNameCache = null;
352 boolean parentComplete = false;
353
354 TaxonNameBase<?,?> taxonName = taxonBase.getName();
355 ZoologicalName zooName = (ZoologicalName)taxonName;
356
357 if (zooName != null) {
358 localString = zooName.getNameCache();
359 }
360
361 int rank = fauEuTaxon.getRankId();
362 if (rank > R_GENUS) {
363 StringBuilder parentStringBuilder = new StringBuilder();
364 if(logger.isDebugEnabled()) {
365 logger.debug("Local taxon name: (rank = " + rank + ") " + localString);
366 }
367
368 // The scientific name in FaunaEuropaeaTaxon is set only once it has been built completely,
369 // including parent(s) parts.
370
371 int parentId = fauEuTaxon.getParentId();
372 parent = fauEuTaxonMap.get(parentId);
373 if (parent != null) {
374 UUID parentUuid = parent.getUuid();
375 if (parentUuid != null) {
376 parentTaxonBase = taxonStore.get(parentId);
377 parentNameCache = parent.getScientificName();
378 if (parentNameCache == null) { // parent name has not been built yet
379 if (parentTaxonBase != null) {
380 parentName = parentTaxonBase.getName();
381 if(parentName != null) {
382 parentNameCache = ((ZoologicalName)parentName).getNameCache();
383 } else {
384 logger.warn("Parent taxon name of taxon (uuid= " + parentUuid.toString() + "), id = " +
385 parent.getId() + ") is null");
386 }
387 } else {
388 logger.warn("Parent taxon (uuid= " + parentUuid.toString() + "), id = " +
389 parent.getId() + ") is null");
390 }
391 if (parent.getRankId() == R_SUBGENUS) {
392 parentStringBuilder.append("(");
393 }
394 parentStringBuilder.append(parentNameCache);
395 if (parent.getRankId() == R_SUBGENUS) {
396 parentStringBuilder.append(")");
397 }
398 // parentStringBuilder.append(" ");
399 // parentStringBuilder.append(((ZoologicalName)taxonName).getNameCache());
400 parentString = parentStringBuilder.toString();
401 logger.info("Parent name part built: " + parentString);
402 } else {
403 parentComplete = true;
404 parentString = parent.getScientificName();
405 logger.info("Parent name is complete: " + parentString);
406 }
407 } else {
408 logger.warn("Parent uuid of " + localString + " is null");
409 }
410 } else {
411 logger.warn("Parent of " + localString + " is null");
412 }
413 if (parent != null && parent.getRankId() > R_GENUS && parentComplete == false) {
414 parentString = buildTaxonName(parent, parentTaxonBase, taxonStore);
415 }
416 }
417 StringBuilder concatStringBuilder = new StringBuilder(parentString);
418 // if (!concatStringBuilder.equals("")) { concatStringBuilder.append(" "); }
419 concatStringBuilder.append(" ");
420 concatStringBuilder.append(localString);
421 String concatString = concatStringBuilder.toString();
422 concatString = (String) CdmUtils.removeDuplicateWhitespace(concatString.trim());
423
424 zooName.setNameCache(concatString);
425 String titleCache = buildNameTitleCache(concatString, fauEuTaxon);
426 zooName.setTitleCache(titleCache);
427 zooName.setFullTitleCache(titleCache); // TODO: Add reference, NC status
428
429 fauEuTaxon.setScientificName(concatString);
430 return concatString;
431 }
432
433
434 private boolean saveTaxa(Map<String, MapWrapper<? extends CdmBase>> stores) {
435
436 // MapWrapper<TaxonNameBase<?,?>> taxonNameStore = (MapWrapper<TaxonNameBase<?,?>>)stores.get(ICdmIO.TAXONNAME_STORE);
437 MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);
438
439 int n = 0;
440 int nbrOfTaxa = highestTaxonIndex;
441 // int nbrOfTaxa = taxonStore.size();
442 boolean success = true;
443
444 if(logger.isInfoEnabled()) { logger.info("Saving taxa ..."); }
445
446 if (nbrOfTaxa < limit) { // TODO: test with critical values
447 limit = nbrOfTaxa;
448 } else {
449 n = nbrOfTaxa / limit;
450 }
451
452 if(logger.isInfoEnabled()) {
453 logger.info("number of taxa = " + taxonStore.size()
454 + ", highest taxon index = " + highestTaxonIndex
455 + ", limit = " + limit
456 + ", n = " + n);
457 }
458
459 // save taxa in chunks
460
461 for (int j = 1; j <= n + 1; j++)
462 {
463 int offset = j - 1;
464 int start = offset * limit;
465
466 if(logger.isInfoEnabled()) { logger.info("Saving taxa: " + start + " - " + (start + limit - 1)); }
467
468 if(logger.isInfoEnabled()) {
469 logger.info("index = " + j
470 + ", offset = " + offset
471 + ", start = " + start);
472 }
473
474 if (j == n + 1) {
475 limit = nbrOfTaxa - n * limit;
476 if(logger.isInfoEnabled()) { logger.info(", n = " + n + " limit = " + limit); }
477 }
478
479 Collection<TaxonBase> taxonMapPart = taxonStore.objects(start, limit);
480 getTaxonService().saveTaxonAll(taxonMapPart);
481 taxonMapPart = null;
482 taxonStore.removeObjects(start, limit);
483 }
484
485 return success;
486 }
487 }