2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.faunaEuropaea
;
12 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.A_AUCT
;
13 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.P_PARENTHESIS
;
14 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.Q_NO_RESTRICTION
;
15 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.T_STATUS_ACCEPTED
;
16 import static eu
.etaxonomy
.cdm
.io
.faunaEuropaea
.FaunaEuropaeaTransformer
.T_STATUS_NOT_ACCEPTED
;
18 import java
.sql
.ResultSet
;
19 import java
.sql
.SQLException
;
20 import java
.util
.HashMap
;
21 import java
.util
.HashSet
;
22 import java
.util
.Iterator
;
23 import java
.util
.List
;
26 import java
.util
.UUID
;
28 import org
.apache
.log4j
.Logger
;
29 import org
.springframework
.stereotype
.Component
;
30 import org
.springframework
.transaction
.TransactionStatus
;
32 import eu
.etaxonomy
.cdm
.io
.common
.ICdmIO
;
33 import eu
.etaxonomy
.cdm
.io
.common
.MapWrapper
;
34 import eu
.etaxonomy
.cdm
.io
.common
.Source
;
35 import eu
.etaxonomy
.cdm
.io
.profiler
.ProfilerController
;
36 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
37 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
38 import eu
.etaxonomy
.cdm
.model
.common
.ISourceable
;
39 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableEntity
;
40 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSource
;
41 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
42 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
43 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceBase
;
44 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
45 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
46 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonomicTree
;
47 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
52 * @author a.babadshanjan
57 public class FaunaEuropaeaRelTaxonIncludeImport
extends FaunaEuropaeaImportBase
{
59 public static final String OS_NAMESPACE_TAXON
= "Taxon";
60 private static final Logger logger
= Logger
.getLogger(FaunaEuropaeaRelTaxonIncludeImport
.class);
62 /* Max number of taxa to retrieve (for test purposes) */
63 private int maxTaxa
= 0;
64 /* Max number of taxa to be saved in CDM DB with one service call */
65 private int limit
= 5000; // TODO: Make configurable
66 /* Max number of taxa to be retrieved from CDM DB with one service call */
67 private int limitRetrieve
= 10000; // TODO: Make configurable
68 /* Highest taxon index in the FauEu database */
69 private int highestTaxonIndex
= 0;
70 /* Number of times method buildParentName() has been called for one taxon */
71 private int callCount
= 0;
72 private Map
<Integer
, FaunaEuropaeaTaxon
> fauEuTaxonMap
= new HashMap();
77 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
80 protected boolean doCheck(FaunaEuropaeaImportState state
) {
81 boolean result
= true;
82 FaunaEuropaeaImportConfigurator fauEuConfig
= state
.getConfig();
83 logger
.warn("Checking for Taxa not yet fully implemented");
84 result
&= checkTaxonStatus(fauEuConfig
);
90 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
92 protected boolean isIgnore(FaunaEuropaeaImportState state
) {
93 return ! state
.getConfig().isDoTaxa();
96 private boolean checkTaxonStatus(FaunaEuropaeaImportConfigurator fauEuConfig
) {
97 boolean result
= true;
99 Source source
= fauEuConfig
.getSource();
101 ResultSet rs
= source
.getResultSet(sqlStr
);
103 // } catch (SQLException e) {
104 // e.printStackTrace();
109 protected boolean doInvoke(FaunaEuropaeaImportState state
) {
111 boolean success
= true;
113 Map
<String
, MapWrapper
<?
extends CdmBase
>> stores
= state
.getStores();
114 MapWrapper
<TaxonBase
> taxonStore
= (MapWrapper
<TaxonBase
>)stores
.get(ICdmIO
.TAXON_STORE
);
115 taxonStore
.makeEmpty();
116 MapWrapper
<TeamOrPersonBase
> authorStore
= (MapWrapper
<TeamOrPersonBase
>)stores
.get(ICdmIO
.TEAM_STORE
);
117 authorStore
.makeEmpty();
119 if(logger
.isInfoEnabled()) { logger
.info("Start making taxonomically included relationships..."); }
121 // TransactionStatus txStatus = startTransaction();
123 ProfilerController
.memorySnapshot();
125 success
= processParentsChildren(state
);
127 ProfilerController
.memorySnapshot();
129 // commitTransaction(txStatus);
131 logger
.info("End making taxa...");
136 /** Retrieve child-parent uuid map from CDM DB */
137 private boolean processParentsChildren(FaunaEuropaeaImportState state
) {
139 int limit
= state
.getConfig().getLimitSave();
141 TransactionStatus txStatus
= null;
143 Map
<UUID
, UUID
> childParentMap
= null;
144 FaunaEuropaeaImportConfigurator fauEuConfig
= state
.getConfig();
145 Source source
= fauEuConfig
.getSource();
147 boolean success
= true;
152 " SELECT dbo.Taxon.UUID AS ChildUuid, Parent.UUID AS ParentUuid " +
153 " FROM dbo.Taxon INNER JOIN dbo.Taxon AS Parent " +
154 " ON dbo.Taxon.TAX_TAX_IDPARENT = Parent.TAX_ID " +
155 " WHERE (dbo.Taxon.TAX_VALID <> 0) AND (dbo.Taxon.TAX_AUT_ID <> " + A_AUCT
+ ")";
157 if (logger
.isInfoEnabled()) {
158 logger
.info("Query: " + strQuery
);
161 ResultSet rs
= source
.getResultSet(strQuery
);
165 if ((i
++ % limit
) == 0) {
167 ProfilerController
.memorySnapshot();
168 txStatus
= startTransaction();
169 childParentMap
= new HashMap
<UUID
, UUID
>(limit
);
171 if(logger
.isInfoEnabled()) {
172 logger
.info("Parent-child mappings retrieved: " + (i
-1));
176 String childUuidStr
= rs
.getString("ChildUuid");
177 String parentUuidStr
= rs
.getString("ParentUuid");
178 UUID childUuid
= UUID
.fromString(childUuidStr
);
179 UUID parentUuid
= UUID
.fromString(parentUuidStr
);
181 if (!childParentMap
.containsKey(childUuid
)) {
183 childParentMap
.put(childUuid
, parentUuid
);
186 if(logger
.isDebugEnabled()) {
187 logger
.debug("Duplicated child UUID (" + childUuid
+ ")");
190 if (((i
% limit
) == 0 && i
!= 1 )) {
192 success
= createRelationships(state
, childParentMap
);
194 childParentMap
= null;
195 commitTransaction(txStatus
);
197 if(logger
.isInfoEnabled()) {
198 logger
.info("i = " + i
+ " - Transaction committed");
203 } catch (SQLException e
) {
204 logger
.error("SQLException:" + e
);
211 public Map
<UUID
, UUID
> partMap(int border
, Map
<UUID
, UUID
> map
) {
213 if (logger
.isInfoEnabled()) {
214 logger
.info("Map size: " + map
.size());
216 Set
<Map
.Entry
<UUID
, UUID
>> entries
= map
.entrySet();
217 Iterator
<Map
.Entry
<UUID
, UUID
>> entryIter
= entries
.iterator();
218 Map
<UUID
, UUID
> partMap
= new HashMap
<UUID
, UUID
>();
220 for (int i
= 0; i
< border
; i
++) {
221 //while (entryIter.hasNext()) {
223 Map
.Entry
<UUID
, UUID
> mapEntry
= (Map
.Entry
<UUID
, UUID
>)entryIter
.next();
224 partMap
.put(mapEntry
.getKey(), mapEntry
.getValue());
228 if (logger
.isDebugEnabled()) {
229 logger
.debug("Map size: " + map
.size());
235 /* Creates parent-child relationships.
236 * Parent-child pairs are retrieved in blocks via findByUUID(Set<UUID>) from CDM DB.
237 * It takes about 5min to save a block of 5000 taxa.*/
238 private boolean createRelationships(FaunaEuropaeaImportState state
, Map
<UUID
, UUID
> childParentMap
) {
240 ReferenceBase
<?
> sourceRef
= state
.getConfig().getSourceReference();
241 boolean success
= true;
243 //add tree to new session
244 TaxonomicTree tree
= state
.getTree(sourceRef
);
246 tree
= makeTree(state
, sourceRef
);
248 getTaxonService().saveTaxonomicTree(tree
);
250 Set
<TaxonBase
> childSet
= new HashSet
<TaxonBase
>(limit
);
252 Set
<UUID
> childKeysSet
= childParentMap
.keySet();
253 Set
<UUID
> parentValuesSet
= new HashSet
<UUID
>(childParentMap
.values());
255 if (logger
.isInfoEnabled()) {
256 logger
.info("Start reading children and parents");
258 List
<TaxonBase
> children
= getTaxonService().findByUuid(childKeysSet
);
259 List
<TaxonBase
> parents
= getTaxonService().findByUuid(parentValuesSet
);
260 Map
<UUID
, TaxonBase
> parentsMap
= new HashMap
<UUID
, TaxonBase
>();
261 for (TaxonBase taxonBase
: parents
){
262 parentsMap
.put(taxonBase
.getUuid(), taxonBase
);
266 if (logger
.isInfoEnabled()) {
267 logger
.info("End reading children and parents");
271 if (logger
.isTraceEnabled()) {
272 for (UUID uuid
: childKeysSet
) {
273 logger
.trace("child uuid query: " + uuid
);
276 if (logger
.isTraceEnabled()) {
277 for (UUID uuid
: parentValuesSet
) {
278 logger
.trace("parent uuid query: " + uuid
);
281 if (logger
.isTraceEnabled()) {
282 for (TaxonBase tb
: children
) {
283 logger
.trace("child uuid result: " + tb
.getUuid());
286 if (logger
.isTraceEnabled()) {
287 for (TaxonBase tb
: parents
) {
288 logger
.trace("parent uuid result: " + tb
.getUuid());
292 UUID mappedParentUuid
= null;
293 UUID parentUuid
= null;
294 UUID childUuid
= null;
296 for (TaxonBase child
: children
) {
299 Taxon childTaxon
= child
.deproxy(child
, Taxon
.class);
300 childUuid
= childTaxon
.getUuid();
301 mappedParentUuid
= childParentMap
.get(childUuid
);
302 TaxonBase parent
= null;
304 TaxonBase potentialParent
= parentsMap
.get(mappedParentUuid
);
305 // for (TaxonBase potentialParent : parents ) {
306 // parentUuid = potentialParent.getUuid();
307 // if(parentUuid.equals(mappedParentUuid)) {
308 parent
= potentialParent
;
309 if (logger
.isDebugEnabled()) {
310 logger
.debug("Parent (" + parentUuid
+ ") found for child (" + childUuid
+ ")");
316 Taxon parentTaxon
= parent
.deproxy(parent
, Taxon
.class);
318 if (childTaxon
!= null && parentTaxon
!= null) {
320 // makeTaxonomicallyIncluded(state, parentTaxon, childTaxon, sourceRef, null, tree);
321 makeTaxonomicallyIncluded(state
, parentTaxon
, childTaxon
, sourceRef
, null);
323 if (logger
.isDebugEnabled()) {
324 logger
.debug("Parent-child (" + parentUuid
+ "-" + childUuid
+
325 ") relationship created");
327 if (!childSet
.contains(childTaxon
)) {
329 childSet
.add(childTaxon
);
331 if (logger
.isTraceEnabled()) {
332 logger
.trace("Child taxon (" + childUuid
+ ") added to Set");
336 if (logger
.isDebugEnabled()) {
337 logger
.debug("Duplicated child taxon (" + childUuid
+ ")");
341 if (logger
.isDebugEnabled()) {
342 logger
.debug("Parent(" + parentUuid
+ ") or child (" + childUuid
+ " is null");
346 if (childTaxon
!= null && !childSet
.contains(childTaxon
)) {
347 childSet
.add(childTaxon
);
348 if (logger
.isDebugEnabled()) {
349 logger
.debug("Child taxon (" + childUuid
+ ") added to Set");
352 if (logger
.isDebugEnabled()) {
353 logger
.debug("Duplicated child taxon (" + childUuid
+ ")");
357 } catch (Exception e
) {
358 logger
.error("Error creating taxonomically included relationship parent-child (" +
359 parentUuid
+ "-" + childUuid
+ ")");
363 if (logger
.isInfoEnabled()) {
364 logger
.info("Start saving childSet");
366 getTaxonService().saveTaxonAll(childSet
);
367 if (logger
.isInfoEnabled()) {
368 logger
.info("End saving childSet");
371 parentValuesSet
= null;
380 private boolean makeTaxonomicallyIncluded(FaunaEuropaeaImportState state
, Taxon toTaxon
, Taxon fromTaxon
,
381 ReferenceBase citation
, String microCitation
){
382 boolean success
= true;
383 ReferenceBase sec
= toTaxon
.getSec();
384 sec
= CdmBase
.deproxy(sec
, ReferenceBase
.class);
386 TaxonomicTree tree
= state
.getTree(sec
);
388 // Session session = getTaxonService().getSession();
390 // if (session.contains(sec)) {
391 // logger.debug("Sec contained in session. Id = " + sec.getId());
393 // logger.info("Sec not contained in session. Id = " + sec.getId());
394 // getReferenceService().merge(sec);
398 tree
= makeTree(state
, sec
);
401 // if (session.contains(tree)) {
402 // logger.debug("Taxonomic tree contained in session. Id = " + tree.getId());
404 // logger.info("Taxonomic tree not contained in session. Id = " + tree.getId());
405 // UUID treeUuid = state.getTree(sec).getUuid();
406 // tree = getTaxonService().getTaxonomicTreeByUuid(treeUuid);
407 // logger.info("Tree retrieved");
410 success
= tree
.addParentChild(toTaxon
, fromTaxon
, citation
, microCitation
);