2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.faunaEuropaea
;
12 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.A_AUCT
;
13 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.P_PARENTHESIS
;
14 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.Q_NO_RESTRICTION
;
15 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.T_STATUS_ACCEPTED
;
16 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.T_STATUS_NOT_ACCEPTED
;
18 import java
.sql
.ResultSet
;
19 import java
.sql
.SQLException
;
20 import java
.util
.HashMap
;
21 import java
.util
.HashSet
;
22 import java
.util
.Iterator
;
23 import java
.util
.List
;
26 import java
.util
.UUID
;
28 import org
.apache
.log4j
.Logger
;
29 import org
.springframework
.stereotype
.Component
;
30 import org
.springframework
.transaction
.TransactionStatus
;
32 import eu
.etaxonomy
.cdm
.io
.common
.ICdmIO
;
33 import eu
.etaxonomy
.cdm
.io
.common
.MapWrapper
;
34 import eu
.etaxonomy
.cdm
.io
.common
.Source
;
35 import eu
.etaxonomy
.cdm
.io
.profiler
.ProfilerController
;
36 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
37 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
38 import eu
.etaxonomy
.cdm
.model
.common
.ISourceable
;
39 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableEntity
;
40 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSource
;
41 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
42 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
43 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceBase
;
44 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
45 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
46 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonomicTree
;
47 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
52 * @author a.babadshanjan
57 public class FaunaEuropaeaRelTaxonIncludeImport
extends FaunaEuropaeaImportBase
{
59 public static final String OS_NAMESPACE_TAXON
= "Taxon";
60 private static final Logger logger
= Logger
.getLogger(FaunaEuropaeaRelTaxonIncludeImport
.class);
62 /* Max number of taxa to retrieve (for test purposes) */
63 private int maxTaxa
= 0;
64 /* Max number of taxa to be saved in CDM DB with one service call */
65 private int limit
= 5000; // TODO: Make configurable
66 /* Max number of taxa to be retrieved from CDM DB with one service call */
67 private int limitRetrieve
= 10000; // TODO: Make configurable
68 /* Highest taxon index in the FauEu database */
69 private int highestTaxonIndex
= 0;
70 /* Number of times method buildParentName() has been called for one taxon */
71 private int callCount
= 0;
72 //private Map<Integer, FaunaEuropaeaTaxon> fauEuTaxonMap = new HashMap();
77 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
80 protected boolean doCheck(FaunaEuropaeaImportState state
) {
81 boolean result
= true;
82 FaunaEuropaeaImportConfigurator fauEuConfig
= state
.getConfig();
83 logger
.warn("Checking for Taxa not yet fully implemented");
84 result
&= checkTaxonStatus(fauEuConfig
);
90 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
92 protected boolean isIgnore(FaunaEuropaeaImportState state
) {
93 return ! state
.getConfig().isDoTaxa();
96 private boolean checkTaxonStatus(FaunaEuropaeaImportConfigurator fauEuConfig
) {
97 boolean result
= true;
99 Source source
= fauEuConfig
.getSource();
101 ResultSet rs
= source
.getResultSet(sqlStr
);
103 // } catch (SQLException e) {
104 // e.printStackTrace();
109 protected boolean doInvoke(FaunaEuropaeaImportState state
) {
111 boolean success
= true;
113 Map
<String
, MapWrapper
<?
extends CdmBase
>> stores
= state
.getStores();
114 MapWrapper
<TaxonBase
> taxonStore
= (MapWrapper
<TaxonBase
>)stores
.get(ICdmIO
.TAXON_STORE
);
115 taxonStore
.makeEmpty();
116 MapWrapper
<TeamOrPersonBase
> authorStore
= (MapWrapper
<TeamOrPersonBase
>)stores
.get(ICdmIO
.TEAM_STORE
);
117 authorStore
.makeEmpty();
119 if(logger
.isInfoEnabled()) { logger
.info("Start making taxonomically included relationships..."); }
121 //ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
122 TransactionStatus txStatus
= startTransaction();
124 TaxonBase taxon
= getTaxonService().getTaxonByUuid(UUID
.fromString("ac7b30dc-6207-4c71-9752-ee0fb838a271"));
125 ReferenceBase
<?
> sourceRef
= taxon
.getSec();
126 TaxonomicTree tree
= getTaxonomicTreeFor(state
, sourceRef
);
128 commitTransaction(txStatus
);
130 ProfilerController
.memorySnapshot();
131 success
= processParentsChildren(state
);
132 ProfilerController
.memorySnapshot();
134 logger
.info("End making taxa...");
139 /** Retrieve child-parent uuid map from CDM DB */
140 private boolean processParentsChildren(FaunaEuropaeaImportState state
) {
142 int limit
= state
.getConfig().getLimitSave();
144 TransactionStatus txStatus
= null;
146 Map
<UUID
, UUID
> childParentMap
= null;
147 FaunaEuropaeaImportConfigurator fauEuConfig
= state
.getConfig();
148 Source source
= fauEuConfig
.getSource();
150 boolean success
= true;
155 " SELECT dbo.Taxon.UUID AS ChildUuid, Parent.UUID AS ParentUuid " +
156 " FROM dbo.Taxon INNER JOIN dbo.Taxon AS Parent " +
157 " ON dbo.Taxon.TAX_TAX_IDPARENT = Parent.TAX_ID " +
158 " WHERE (dbo.Taxon.TAX_VALID <> 0) AND (dbo.Taxon.TAX_AUT_ID <> " + A_AUCT
+ " OR dbo.Taxon.TAX_AUT_ID IS NULL )" +
159 " ORDER BY dbo.Taxon.TAX_RNK_ID ASC";
161 if (logger
.isInfoEnabled()) {
162 logger
.info("Query: " + strQuery
);
165 ResultSet rs
= source
.getResultSet(strQuery
);
169 if ((i
++ % limit
) == 0) {
171 txStatus
= startTransaction();
172 childParentMap
= new HashMap
<UUID
, UUID
>(limit
);
174 if(logger
.isInfoEnabled()) {
175 logger
.info("Parent-child mappings retrieved: " + (i
-1));
179 String childUuidStr
= rs
.getString("ChildUuid");
180 String parentUuidStr
= rs
.getString("ParentUuid");
181 UUID childUuid
= UUID
.fromString(childUuidStr
);
182 UUID parentUuid
= UUID
.fromString(parentUuidStr
);
184 if (!childParentMap
.containsKey(childUuid
)) {
186 childParentMap
.put(childUuid
, parentUuid
);
189 if(logger
.isDebugEnabled()) {
190 logger
.debug("Duplicated child UUID (" + childUuid
+ ")");
193 if (((i
% limit
) == 0 && i
!= 1 )) {
195 success
= createRelationships(state
, childParentMap
);
197 childParentMap
= null;
198 commitTransaction(txStatus
);
200 if(logger
.isInfoEnabled()) {
201 logger
.info("i = " + i
+ " - Transaction committed");
206 } catch (SQLException e
) {
207 logger
.error("SQLException:" + e
);
214 public Map
<UUID
, UUID
> partMap(int border
, Map
<UUID
, UUID
> map
) {
216 if (logger
.isInfoEnabled()) {
217 logger
.info("Map size: " + map
.size());
219 Set
<Map
.Entry
<UUID
, UUID
>> entries
= map
.entrySet();
220 Iterator
<Map
.Entry
<UUID
, UUID
>> entryIter
= entries
.iterator();
221 Map
<UUID
, UUID
> partMap
= new HashMap
<UUID
, UUID
>();
223 for (int i
= 0; i
< border
; i
++) {
224 //while (entryIter.hasNext()) {
226 Map
.Entry
<UUID
, UUID
> mapEntry
= (Map
.Entry
<UUID
, UUID
>)entryIter
.next();
227 partMap
.put(mapEntry
.getKey(), mapEntry
.getValue());
231 if (logger
.isDebugEnabled()) {
232 logger
.debug("Map size: " + map
.size());
238 /* Creates parent-child relationships.
239 * Parent-child pairs are retrieved in blocks via findByUUID(Set<UUID>) from CDM DB.
240 * It takes about 5min to save a block of 5000 taxa.*/
241 private boolean createRelationships(FaunaEuropaeaImportState state
, Map
<UUID
, UUID
> childParentMap
) {
243 TaxonBase taxon
= getTaxonService().getTaxonByUuid(UUID
.fromString("ac7b30dc-6207-4c71-9752-ee0fb838a271"));
244 ReferenceBase
<?
> sourceRef
= taxon
.getSec();
245 boolean success
= true;
247 TaxonomicTree tree
= getTaxonomicTreeFor(state
, sourceRef
);
249 Set
<TaxonBase
> childSet
= new HashSet
<TaxonBase
>(limit
);
251 Set
<UUID
> childKeysSet
= childParentMap
.keySet();
252 Set
<UUID
> parentValuesSet
= new HashSet
<UUID
>(childParentMap
.values());
254 if (logger
.isInfoEnabled()) {
255 logger
.info("Start reading children and parents");
257 List
<TaxonBase
> children
= getTaxonService().findByUuid(childKeysSet
);
258 List
<TaxonBase
> parents
= getTaxonService().findByUuid(parentValuesSet
);
259 Map
<UUID
, TaxonBase
> parentsMap
= new HashMap
<UUID
, TaxonBase
>(parents
.size());
260 for (TaxonBase taxonBase
: parents
){
261 parentsMap
.put(taxonBase
.getUuid(), taxonBase
);
265 if (logger
.isInfoEnabled()) {
266 logger
.info("End reading children and parents");
270 if (logger
.isTraceEnabled()) {
271 for (UUID uuid
: childKeysSet
) {
272 logger
.trace("child uuid query: " + uuid
);
275 if (logger
.isTraceEnabled()) {
276 for (UUID uuid
: parentValuesSet
) {
277 logger
.trace("parent uuid query: " + uuid
);
280 if (logger
.isTraceEnabled()) {
281 for (TaxonBase tb
: children
) {
282 logger
.trace("child uuid result: " + tb
.getUuid());
285 if (logger
.isTraceEnabled()) {
286 for (TaxonBase tb
: parents
) {
287 logger
.trace("parent uuid result: " + tb
.getUuid());
291 UUID mappedParentUuid
= null;
292 UUID childUuid
= null;
294 for (TaxonBase child
: children
) {
297 Taxon childTaxon
= child
.deproxy(child
, Taxon
.class);
298 childUuid
= childTaxon
.getUuid();
299 mappedParentUuid
= childParentMap
.get(childUuid
);
300 TaxonBase parent
= null;
302 TaxonBase potentialParent
= parentsMap
.get(mappedParentUuid
);
303 // for (TaxonBase potentialParent : parents ) {
304 // parentUuid = potentialParent.getUuid();
305 // if(parentUuid.equals(mappedParentUuid)) {
306 parent
= potentialParent
;
307 if (logger
.isDebugEnabled()) {
308 logger
.debug("Parent (" + mappedParentUuid
+ ") found for child (" + childUuid
+ ")");
314 Taxon parentTaxon
= parent
.deproxy(parent
, Taxon
.class);
316 if (childTaxon
!= null && parentTaxon
!= null) {
318 // makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null, tree);
319 // makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null);
320 tree
.addParentChild(parentTaxon
, childTaxon
, sourceRef
, null);
322 if (logger
.isDebugEnabled()) {
323 logger
.debug("Parent-child (" + mappedParentUuid
+ "-" + childUuid
+
324 ") relationship created");
326 if (!childSet
.contains(childTaxon
)) {
328 childSet
.add(childTaxon
);
330 if (logger
.isTraceEnabled()) {
331 logger
.trace("Child taxon (" + childUuid
+ ") added to Set");
335 if (logger
.isDebugEnabled()) {
336 logger
.debug("Duplicated child taxon (" + childUuid
+ ")");
340 if (logger
.isDebugEnabled()) {
341 logger
.debug("Parent(" + mappedParentUuid
+ ") or child (" + childUuid
+ " is null");
345 if (childTaxon
!= null && !childSet
.contains(childTaxon
)) {
346 childSet
.add(childTaxon
);
347 if (logger
.isDebugEnabled()) {
348 logger
.debug("Child taxon (" + childUuid
+ ") added to Set");
351 if (logger
.isDebugEnabled()) {
352 logger
.debug("Duplicated child taxon (" + childUuid
+ ")");
356 } catch (Exception e
) {
357 logger
.error("Error creating taxonomically included relationship parent-child (" +
358 mappedParentUuid
+ "-" + childUuid
+ ")", e
);
362 if (logger
.isInfoEnabled()) {
363 logger
.info("Start saving childSet");
365 getTaxonService().saveTaxonAll(childSet
);
366 if (logger
.isInfoEnabled()) {
367 logger
.info("End saving childSet");
370 parentValuesSet
= null;
383 private TaxonomicTree
getTaxonomicTreeFor(FaunaEuropaeaImportState state
, ReferenceBase
<?
> sourceRef
) {
386 UUID treeUuid
= state
.getTreeUuid(sourceRef
);
387 if (treeUuid
== null){
388 if(logger
.isInfoEnabled()) { logger
.info(".. creating new taxonomic tree"); }
390 TransactionStatus txStatus
= startTransaction();
391 tree
= makeTreeMemSave(state
, sourceRef
);
392 commitTransaction(txStatus
);
395 tree
= getTaxonService().getTaxonomicTreeByUuid(treeUuid
);