import java.sql.SQLException;\r
import java.util.Collection;\r
import java.util.HashMap;\r
+import java.util.HashSet;\r
+import java.util.Iterator;\r
import java.util.List;\r
import java.util.Map;\r
import java.util.Set;\r
/* Max number of taxa to retrieve (for test purposes) */\r
private int maxTaxa = 0;\r
/* Max number of taxa to be saved in CDM DB with one service call */\r
- private int limit = 2000; // TODO: Make configurable\r
+ private int limit = 5000; // TODO: Make configurable\r
/* Max number of taxa to be retrieved from CDM DB with one service call */\r
private int limitRetrieve = 10000; // TODO: Make configurable\r
/* Interval for progress info message when retrieving taxa */\r
/* (non-Javadoc)\r
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)\r
*/\r
- protected boolean doInvoke(FaunaEuropaeaImportState state) { \r
+ protected boolean doInvokeAlter(FaunaEuropaeaImportState state) { \r
\r
boolean success = true;\r
\r
}\r
\r
\r
- protected boolean doInvokeAlter(FaunaEuropaeaImportState state) { \r
+ protected boolean doInvoke(FaunaEuropaeaImportState state) { \r
\r
- Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();\r
- MapWrapper<TaxonBase<?>> taxonStore = (MapWrapper<TaxonBase<?>>)stores.get(ICdmIO.TAXON_STORE);\r
-// MapWrapper<TaxonNameBase<?,?>> taxonNamesStore = (MapWrapper<TaxonNameBase<?,?>>)stores.get(ICdmIO.TAXONNAME_STORE);\r
- MapWrapper<TeamOrPersonBase> authorStore = (MapWrapper<TeamOrPersonBase>)stores.get(ICdmIO.TEAM_STORE);\r
-// authorStore = null;\r
-// Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap = new HashMap();\r
- FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();\r
boolean success = true;\r
\r
if(logger.isInfoEnabled()) { logger.info("Start making taxa..."); }\r
\r
- success = retrieveTaxa(state, fauEuTaxonMap, Q_NO_RESTRICTION);\r
-// success = processTaxaSecondPass(state, fauEuTaxonMap);\r
- success = saveTaxa(stores, highestTaxonIndex, limit);\r
-// success = saveTaxa(stores);\r
+ TransactionStatus txStatus = startTransaction();\r
+\r
+ success = retrieveUuids(state);\r
+ success = createRelationships(state);\r
\r
+ commitTransaction(txStatus);\r
+\r
logger.info("End making taxa...");\r
return success;\r
}\r
\r
\r
- /** Retrieve tax from FauEu DB and build FauEuTaxonMap only */\r
+ /** Retrieve child-parent uuid map from CDM DB*/\r
+ private boolean retrieveUuids(FaunaEuropaeaImportState state) {\r
+\r
+ Map<UUID, UUID> childParentMap = state.getChildParentMap();\r
+ Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();\r
+ MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);\r
+ FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();\r
+ ReferenceBase<?> sourceRef = fauEuConfig.getSourceReference();\r
+ Source source = fauEuConfig.getSource();\r
+ int i = 0;\r
+ boolean success = true;\r
+\r
+ try {\r
+\r
+ String strQuery = \r
+ " SELECT dbo.Taxon.UUID AS ChildUuid, Parent.UUID AS ParentUuid " +\r
+ " FROM dbo.Taxon INNER JOIN dbo.Taxon AS Parent " +\r
+ " ON dbo.Taxon.TAX_TAX_IDPARENT = Parent.TAX_ID " +\r
+ " WHERE (dbo.Taxon.TAX_VALID <> 0) ";\r
+\r
+ if (logger.isDebugEnabled()) {\r
+ logger.debug("Query: " + strQuery);\r
+ }\r
+\r
+ ResultSet rs = source.getResultSet(strQuery);\r
+ \r
+ while (rs.next()) {\r
+ \r
+ if ((i++ % modCount) == 0 && i != 1 ) { \r
+ if(logger.isInfoEnabled()) {\r
+ logger.info("Taxa retrieved: " + (i-1)); \r
+ }\r
+ }\r
+\r
+ String childUuidStr = rs.getString("ChildUuid");\r
+ String parentUuidStr = rs.getString("ParentUuid");\r
+ UUID childUuid = UUID.fromString(childUuidStr);\r
+ UUID parentUuid = UUID.fromString(parentUuidStr);\r
+ \r
+ if (!childParentMap.containsKey(childUuid)) {\r
+\r
+ childParentMap.put(childUuid, parentUuid);\r
+\r
+ } else {\r
+ if(logger.isDebugEnabled()) {\r
+ logger.debug("Duplicated child UUID (" + childUuid + ")");\r
+ }\r
+ }\r
+ }\r
+\r
+ } catch (SQLException e) {\r
+ logger.error("SQLException:" + e);\r
+ success = false;\r
+ }\r
+ return success; \r
+ }\r
+\r
+ \r
+ /** Retrieve taxa from FauEu DB and build FauEuTaxonMap only */\r
private boolean retrieveTaxa(FaunaEuropaeaImportState state,\r
Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap, int valid) {\r
\r
}\r
\r
\r
- /** Creates relationships if taxon bases are retrieved in chunks from CDM DB */\r
+ public Map<UUID, UUID> partMap(int border, Map<UUID, UUID> map) {\r
+\r
+ if (logger.isInfoEnabled()) {\r
+ logger.info("Map size: " + map.size());\r
+ }\r
+ Set<Map.Entry<UUID, UUID>> entries = map.entrySet();\r
+ Iterator<Map.Entry<UUID, UUID>> entryIter = entries.iterator();\r
+ Map<UUID, UUID> partMap = new HashMap<UUID, UUID>();\r
+\r
+ for (int i = 0; i < border; i++) {\r
+ //while (entryIter.hasNext()) {\r
+\r
+ Map.Entry<UUID, UUID> mapEntry = (Map.Entry<UUID, UUID>)entryIter.next();\r
+ partMap.put(mapEntry.getKey(), mapEntry.getValue());\r
+ entryIter.remove();\r
+ }\r
+ \r
+ if (logger.isDebugEnabled()) {\r
+ logger.debug("Map size: " + map.size());\r
+ }\r
+ return partMap;\r
+ } \r
+\r
+// public Map<UUID, UUID> childParentMap partMap(int start, int limit, Map<UUID, UUID> childParentMap) {\r
+// \r
+// int index = 0;\r
+// \r
+// for (int i = 0; i < limit; i++) {\r
+// \r
+// int j = start + i;\r
+// \r
+// Object object = childParentMap.get(j);\r
+// if(object != null) {\r
+// childParentMap.put(index, childParentMap.get(j));\r
+// index++;\r
+// } else {\r
+// if (logger.isDebugEnabled()) { logger.debug("Object (" + j + ") is null"); }\r
+// }\r
+// }\r
+// return (Map<UUID, UUID> childParentMap)internalPartMap.values();\r
+// }\r
+\r
+ \r
+ /** Creates parent-child relationships.\r
+ * Parent-child pairs are retrieved via UUID from CDM DB */\r
+ private boolean createRelationships(FaunaEuropaeaImportState state) {\r
+\r
+ Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();\r
+ MapWrapper<TaxonBase> taxonStore = (MapWrapper<TaxonBase>)stores.get(ICdmIO.TAXON_STORE);\r
+ taxonStore.makeEmpty();\r
+ Map<UUID, UUID> childParentMap = state.getChildParentMap();\r
+ ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();\r
+\r
+ int upperBorder = childParentMap.size();\r
+ int nbrOfBlocks = 0;\r
+\r
+ boolean success = true;\r
+\r
+ if (upperBorder < limit) { // TODO: test with critical values\r
+ limit = upperBorder;\r
+ } else {\r
+ nbrOfBlocks = upperBorder / limit;\r
+ }\r
+\r
+ if(logger.isInfoEnabled()) { \r
+ logger.info("number of child-parent pairs = " + upperBorder \r
+ + ", limit = " + limit\r
+ + ", number of blocks = " + nbrOfBlocks); \r
+ }\r
+\r
+ for (int j = 1; j <= nbrOfBlocks + 1; j++) {\r
+ int offset = j - 1;\r
+ int start = offset * limit;\r
+\r
+ if(logger.isInfoEnabled()) { logger.info("Processing child-parent pairs: " + start + " - " + (start + limit - 1)); }\r
+\r
+ if(logger.isInfoEnabled()) { \r
+ logger.info("index = " + j \r
+ + ", offset = " + offset\r
+ + ", start = " + start); \r
+ }\r
+\r
+ if (j == nbrOfBlocks + 1) {\r
+ limit = upperBorder - nbrOfBlocks * limit;\r
+ if(logger.isInfoEnabled()) { logger.info("number of blocks = " + nbrOfBlocks + " limit = " + limit); }\r
+ }\r
+\r
+ TransactionStatus txStatus = startTransaction();\r
+\r
+// for (int k = 1; k <= start + offset; k++) { // TODO: test borders\r
+// int k = 0;\r
+\r
+ Map<UUID, UUID> childParentPartMap = partMap(limit, childParentMap);\r
+ Set<TaxonBase> childSet = new HashSet<TaxonBase>(limit);\r
+ \r
+ if (logger.isInfoEnabled()) {\r
+ logger.info("Partmap size: " + childParentPartMap.size());\r
+ }\r
+\r
+ for (UUID childUuid : childParentPartMap.keySet()) {\r
+// for (UUID childUuid : childParentMap.keySet()) {\r
+\r
+ UUID parentUuid = childParentPartMap.get(childUuid);\r
+\r
+ try {\r
+ TaxonBase<?> parent = getTaxonService().findByUuid(parentUuid);\r
+ if (logger.isTraceEnabled()) {\r
+ logger.trace("Parent find called (" + parentUuid + ")");\r
+ }\r
+ TaxonBase<?> child = getTaxonService().findByUuid(childUuid);\r
+ if (logger.isTraceEnabled()) {\r
+ logger.trace("Child find called (" + childUuid + ")");\r
+ }\r
+ Taxon parentTaxon = parent.deproxy(parent, Taxon.class);\r
+ Taxon childTaxon = parent.deproxy(child, Taxon.class);\r
+\r
+ if (childTaxon != null && parentTaxon != null) {\r
+ \r
+ makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null);\r
+ \r
+ if (logger.isDebugEnabled()) {\r
+ logger.debug("Parent-child (" + parentUuid + "-" + childUuid + \r
+ ") relationship created");\r
+ }\r
+ if (!childSet.contains(childTaxon)) {\r
+ \r
+ childSet.add(childTaxon);\r
+ \r
+ if (logger.isTraceEnabled()) {\r
+ logger.trace("Child taxon (" + childUuid + ") added to Set");\r
+ }\r
+ \r
+ } else {\r
+ if (logger.isDebugEnabled()) {\r
+ logger.debug("Duplicated child taxon (" + childUuid + ")");\r
+ }\r
+ }\r
+ } else {\r
+ if (logger.isDebugEnabled()) {\r
+ logger.debug("Parent(" + parentUuid + ") or child (" + childUuid + " is null");\r
+ }\r
+ }\r
+ \r
+// if (childTaxon != null && !childSet.contains(childTaxon)) {\r
+// childSet.add(childTaxon);\r
+// if (logger.isDebugEnabled()) {\r
+// logger.debug("Child taxon (" + childUuid + ") added to Set");\r
+// }\r
+// } else {\r
+// if (logger.isDebugEnabled()) {\r
+// logger.debug("Duplicated child taxon (" + childUuid + ")");\r
+// }\r
+// }\r
+ \r
+ } catch (Exception e) {\r
+ logger.error("Error creating taxonomically included relationship parent-child (" + \r
+ parentUuid + "-" + childUuid + ")");\r
+ }\r
+\r
+ }\r
+ getTaxonService().saveTaxonAll(childSet);\r
+ commitTransaction(txStatus);\r
+ }\r
+ return success;\r
+ }\r
+ \r
+ \r
+ \r
+ /** Creates parent-child relationships.\r
+ * Taxon bases are retrieved in blocks from CDM DB.\r
+ * Parent is retrieved from CDM DB via original source id if not found in current block.\r
+ * In case of blocksize = 20.000 this takes ca. 1-2 hours per block.\r
+ * */\r
private boolean createRelationships(FaunaEuropaeaTaxon fauEuTaxon,\r
TaxonBase<?> taxonBase, TaxonNameBase<?,?> taxonName, List<Taxon> taxa,\r
Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap, FaunaEuropaeaImportState state) {\r
}\r
\r
\r
+// public int calculateBlockSize(int limit, int upperBorder) {\r
+//\r
+// int blockSize = 0;\r
+// \r
+// if (upperBorder < limit) {\r
+// limit = upperBorder;\r
+// } else {\r
+// blockSize = upperBorder / limit;\r
+// }\r
+// }\r
+ \r
+ \r
private boolean processTaxaFromDatabase(FaunaEuropaeaImportState state,\r
Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap) {\r
\r