1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.print;
|
11
|
|
12
|
import java.util.ArrayList;
|
13
|
import java.util.List;
|
14
|
import java.util.Locale;
|
15
|
import java.util.UUID;
|
16
|
|
17
|
import org.apache.log4j.Level;
|
18
|
import org.apache.log4j.Logger;
|
19
|
import org.jdom.Document;
|
20
|
import org.jdom.Element;
|
21
|
import org.jdom.JDOMException;
|
22
|
import org.jdom.xpath.XPath;
|
23
|
|
24
|
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
|
25
|
import eu.etaxonomy.cdm.print.out.IPublishOutputModule;
|
26
|
|
27
|
/**
|
28
|
* Retrieves all necessary data from an {@link IXMLEntityFactory}.
|
29
|
*
|
30
|
* @author n.hoffmann
|
31
|
* @since Apr 8, 2010
|
32
|
* @version 1.0
|
33
|
*/
|
34
|
public class XMLHarvester {
|
35
|
private static final Logger logger = Logger.getLogger(XMLHarvester.class);
|
36
|
|
37
|
private IXMLEntityFactory factory;
|
38
|
|
39
|
private PublishConfigurator configurator;
|
40
|
|
41
|
private List<SimplifiedFeatureNode> simplifiedFeatureTree;
|
42
|
|
43
|
private IProgressMonitor progressMonitor;
|
44
|
|
45
|
/**
|
46
|
*
|
47
|
* @param configurator
|
48
|
*/
|
49
|
public XMLHarvester(PublishConfigurator configurator){
|
50
|
this.configurator = configurator;
|
51
|
this.progressMonitor = configurator.getProgressMonitor();
|
52
|
this.factory = configurator.getFactory();
|
53
|
|
54
|
Element featureTreeElement = factory.getFeatureTree(configurator.getFeatureTreeUuid());
|
55
|
createSimplifiedFeatureTree(featureTreeElement);
|
56
|
}
|
57
|
|
58
|
private void createSimplifiedFeatureTree(Element featureTreeElement) {
|
59
|
Element root = featureTreeElement.getChild("root");
|
60
|
|
61
|
Element realRoot = factory.getFeatureNode(XMLHelper.getUuid(root));
|
62
|
|
63
|
progressMonitor.subTask("Generating simplified Feature Tree.");
|
64
|
simplifiedFeatureTree = featureTreeRecursive(realRoot);
|
65
|
progressMonitor.worked(1);
|
66
|
|
67
|
logger.info("Simplified FeeatureTree created");
|
68
|
}
|
69
|
|
70
|
private List<SimplifiedFeatureNode> featureTreeRecursive(Element featureNode){
|
71
|
List<SimplifiedFeatureNode> result = new ArrayList<SimplifiedFeatureNode>();
|
72
|
|
73
|
if(featureNode != null){
|
74
|
Element children = featureNode.getChild("children");
|
75
|
|
76
|
if (children != null){
|
77
|
List<Element> childFeatureNodes = children.getChildren();
|
78
|
|
79
|
for(Element childNode : childFeatureNodes){
|
80
|
|
81
|
UUID uuid = XMLHelper.getUuid(childNode);
|
82
|
Element featureNodeElement = factory.getFeatureNode(uuid);
|
83
|
Element featureElement = factory.getFeatureForFeatureNode(uuid);
|
84
|
|
85
|
|
86
|
try {
|
87
|
Element featureTitleCache = (Element) XPath.selectSingleNode(featureElement, "//Feature/titleCache");
|
88
|
|
89
|
logger.info("The featureNode uuid is " + uuid);
|
90
|
logger.info("The feature element name is " + featureTitleCache.getValue());
|
91
|
logger.info("The feature title cache text french is " + featureTitleCache.getText().toLowerCase(Locale.FRENCH));
|
92
|
logger.info("The feature title cache value french is " + featureTitleCache.getValue().toLowerCase(Locale.FRENCH));
|
93
|
|
94
|
featureTitleCache.setText(featureTitleCache.getText().toLowerCase(Locale.FRENCH));
|
95
|
|
96
|
} catch (JDOMException e) {
|
97
|
// TODO Auto-generated catch block
|
98
|
e.printStackTrace();
|
99
|
}
|
100
|
|
101
|
////We set it to French here but this isn't the correct place the Feature/titleCache
|
102
|
featureElement.setName(featureElement.getName().toLowerCase(Locale.FRENCH));
|
103
|
SimplifiedFeatureNode simplifiedFeatureNode = new SimplifiedFeatureNode(featureElement, featureTreeRecursive(featureNodeElement));
|
104
|
result.add(simplifiedFeatureNode);
|
105
|
}
|
106
|
}
|
107
|
}
|
108
|
|
109
|
return result;
|
110
|
}
|
111
|
|
112
|
private class SimplifiedFeatureNode{
|
113
|
private Element featureElement;
|
114
|
private List<SimplifiedFeatureNode> children;
|
115
|
|
116
|
public SimplifiedFeatureNode(Element featureElement, List<SimplifiedFeatureNode> children){
|
117
|
this.featureElement = featureElement;
|
118
|
this.children = children;
|
119
|
}
|
120
|
|
121
|
/**
|
122
|
* @return the uuid
|
123
|
*/
|
124
|
public Element getFeatureElement() {
|
125
|
return featureElement;
|
126
|
}
|
127
|
|
128
|
/**
|
129
|
* @return the children
|
130
|
*/
|
131
|
public List<SimplifiedFeatureNode> getChildren() {
|
132
|
return children;
|
133
|
}
|
134
|
}
|
135
|
|
136
|
/**
|
137
|
* Commences harvesting the given {@link List} of taxonNodeElements
|
138
|
*
|
139
|
* @param taxonNodeElements
|
140
|
* @return a {@link Document} containing the necessary XML needed by the {@link IPublishOutputModule IPublishOutputModules}
|
141
|
*/
|
142
|
public Document harvest(List<Element> taxonNodeElements){
|
143
|
|
144
|
Element root = new Element(IXMLElements.ROOT);
|
145
|
|
146
|
for(Element taxonNodeElement : taxonNodeElements){
|
147
|
|
148
|
logger.warn("Adding taxonNodeElement " + taxonNodeElement.getChildText("uuid"));
|
149
|
|
150
|
//temporarily filter c15e12c1-6118-4929-aed0-b0cc90f5ab22 as it's causing a lazyInitializationException
|
151
|
if (!taxonNodeElement.getChildText("uuid").equals("c15e12c1-6118-4929-aed0-b0cc90f5ab22")) {
|
152
|
taxonNodeElement.detach();
|
153
|
|
154
|
populateTreeNodeContainer(taxonNodeElement);
|
155
|
|
156
|
root.addContent(taxonNodeElement);
|
157
|
}
|
158
|
|
159
|
}
|
160
|
|
161
|
|
162
|
Document result = new Document();
|
163
|
|
164
|
result.addContent(root);
|
165
|
|
166
|
cleanDateFields(result);
|
167
|
|
168
|
|
169
|
return result;
|
170
|
}
|
171
|
|
172
|
/**
|
173
|
* FIXME
|
174
|
* This is a hack to circumvent problems with the serialized version of
|
175
|
* datePublished objects. Remove this once this was fixed in the library
|
176
|
* @param element the context
|
177
|
*/
|
178
|
@Deprecated
|
179
|
private void cleanDateFields(Object context) {
|
180
|
String path = "//datePublished/start";
|
181
|
|
182
|
try {
|
183
|
List<Element> nodes = XPath.selectNodes(context, path);
|
184
|
|
185
|
for(Element node : nodes){
|
186
|
|
187
|
String textWithRubbish = node.getText() ;
|
188
|
|
189
|
if (textWithRubbish.length() > 5) {
|
190
|
String cleanedText = textWithRubbish.substring(0, 4);
|
191
|
node.setText(cleanedText);
|
192
|
}
|
193
|
/*else {
|
194
|
|
195
|
Element parentOfParent = (Element) node.getParent().getParent().getParent();
|
196
|
|
197
|
if (parentOfParent.getName().equals("inReference")) {
|
198
|
List<Element> parentNodes = XPath.selectNodes(parentOfParent, "//nomenclaturalReference/titleCache");
|
199
|
for(Element parentNode : parentNodes){
|
200
|
logger.error("Problem with date for node with titleCache: " + parentNode.getText());
|
201
|
}
|
202
|
}
|
203
|
}*/
|
204
|
|
205
|
/*Element parent = (Element) node.getParent().getParent();
|
206
|
|
207
|
if(parent.getName().equals("citation")){
|
208
|
Element parent2 = (Element) parent.getParent();
|
209
|
parent2.setAttribute("sort", cleanedText);
|
210
|
}*/
|
211
|
}
|
212
|
} catch (Exception e) {
|
213
|
logger.error("Error trying to clean date published field", e);
|
214
|
}
|
215
|
}
|
216
|
|
217
|
/**
|
218
|
* Get all additional content that is not included in taxon node initialization
|
219
|
*
|
220
|
* @param container
|
221
|
*/
|
222
|
private void populateTreeNodeContainer(Element taxonNodeElement){
|
223
|
|
224
|
// get the taxon from the generic service to have the uuid for further processing
|
225
|
Element taxonElement = factory.getTaxonForTaxonNode(taxonNodeElement);
|
226
|
|
227
|
progressMonitor.subTask("Gathering data for taxon: " + XMLHelper.getTitleCache(taxonElement));
|
228
|
|
229
|
// get initialized accepted taxon
|
230
|
// TODO right now we are getting that from the portal service but should consider to use the generic service
|
231
|
// as the portal service is more likely to change
|
232
|
Element fullTaxonElement = factory.getAcceptedTaxonElement(taxonElement);
|
233
|
|
234
|
//populateTypeDesignations(fullTaxonElement);
|
235
|
|
236
|
// get descriptions
|
237
|
if(configurator.isDoDescriptions()){
|
238
|
populateDescriptions(fullTaxonElement);
|
239
|
}
|
240
|
|
241
|
// get polytomous key
|
242
|
|
243
|
if(configurator.isDoPolytomousKey()){
|
244
|
populatePolytomousKey(fullTaxonElement);
|
245
|
}
|
246
|
|
247
|
// get synonym
|
248
|
if(configurator.isDoSynonymy()){
|
249
|
populateSynonyms(fullTaxonElement);
|
250
|
}
|
251
|
|
252
|
// get media
|
253
|
if(configurator.isDoImages()){
|
254
|
populateImages(fullTaxonElement);
|
255
|
}
|
256
|
|
257
|
// add taxon element to the node element
|
258
|
XMLHelper.addContent(fullTaxonElement, taxonNodeElement);
|
259
|
|
260
|
// get taxonomically included taxa
|
261
|
if(configurator.isDoPublishEntireBranches()){
|
262
|
populateChildren(taxonNodeElement);
|
263
|
}
|
264
|
|
265
|
try {
|
266
|
populateReferences(fullTaxonElement);
|
267
|
} catch (JDOMException e) {
|
268
|
// TODO Auto-generated catch block
|
269
|
e.printStackTrace();
|
270
|
}
|
271
|
|
272
|
populateTypeDesignations(fullTaxonElement);
|
273
|
|
274
|
progressMonitor.worked(1);
|
275
|
|
276
|
}
|
277
|
|
278
|
// the name isn't populated in the taxonNode http://dev.e-taxonomy.eu/cdmserver/flora_central_africa/taxonNode/de808dae-e50a-42f2-a4da-bd12f2c2faaf/taxon.json
|
279
|
// but can get the name from http://dev.e-taxonomy.eu/cdmserver/flora_central_africa/portal/taxon/8f6d5498-1f4b-420f-a1ae-3f0ed9406bb1.json
|
280
|
private void populateTypeDesignations(Element fullTaxonElement) {
|
281
|
|
282
|
Element nameElement = fullTaxonElement.getChild("name");
|
283
|
Element uuidElement = fullTaxonElement.getChild("uuid");
|
284
|
|
285
|
List<Element> typeDesignations = factory.getTypeDesignations(nameElement);
|
286
|
|
287
|
nameElement.removeChild("typeDesignations");
|
288
|
|
289
|
for(Element typeDesignation: typeDesignations){
|
290
|
XMLHelper.addContent(typeDesignation, "typeDesignations", nameElement);
|
291
|
}
|
292
|
}
|
293
|
|
294
|
private void populateReferences(Element fullTaxonElement) throws JDOMException {
|
295
|
|
296
|
//get the references from the taxonElement
|
297
|
//String referencePattern = "//name/nomenclaturalReference";
|
298
|
String referencePattern = "/Taxon/name/nomenclaturalReference";
|
299
|
|
300
|
//but there could be many references
|
301
|
Element referenceElement = (Element) XPath.selectSingleNode(fullTaxonElement, referencePattern); //Mon 1st july do we get the /Taxon/name/nomenclaturalReference from the taxon node - is this working
|
302
|
//List<Element> descriptionElementElements = XPath.selectNodes(context, featurePattern + "/..");
|
303
|
|
304
|
List<Element> elementList = null;
|
305
|
|
306
|
if(referenceElement != null){ //the referencePattern was found in the taxonElement
|
307
|
|
308
|
List<Element> refs = factory.getReferences(referenceElement);//getReferences
|
309
|
|
310
|
fullTaxonElement.removeChild("nomenclaturalReference");//remove the references
|
311
|
|
312
|
for(Element ref: refs){
|
313
|
XMLHelper.addContent(ref, "nomenclaturalReference", fullTaxonElement);
|
314
|
}
|
315
|
}
|
316
|
|
317
|
}
|
318
|
|
319
|
/**
|
320
|
* Populates all child nodes of the given taxonNodeElement
|
321
|
*
|
322
|
* @param container
|
323
|
*/
|
324
|
private void populateChildren(Element taxonNodeElement){
|
325
|
|
326
|
logger.setLevel(Level.INFO);
|
327
|
logger.info("populating branch");
|
328
|
|
329
|
List<Element> childNodeElements = factory.getChildNodes(taxonNodeElement);
|
330
|
|
331
|
for(Element childNodeElement : childNodeElements){
|
332
|
|
333
|
populateTreeNodeContainer(childNodeElement);
|
334
|
XMLHelper.addContent(childNodeElement, "childNodes", taxonNodeElement);
|
335
|
}
|
336
|
}
|
337
|
|
338
|
private void populatePolytomousKey(Element taxonElement){
|
339
|
logger.setLevel(Level.INFO);
|
340
|
logger.info("populating Polytomous key");
|
341
|
logger.info("populating Polytomous key taxonElement " + XMLHelper.getUuid(taxonElement) + " name " + XMLHelper.getTitleCache(taxonElement));
|
342
|
|
343
|
//List<Element> polytomousKey = factory.getPolytomousKey(taxonElement);
|
344
|
Element polytomousKey = factory.getPolytomousKey(taxonElement);
|
345
|
XMLHelper.addContent(polytomousKey, "key", taxonElement);
|
346
|
|
347
|
/*for(Element keyRow : polytomousKey){
|
348
|
XMLHelper.addContent(keyRow, "key", taxonElement);
|
349
|
}*/
|
350
|
|
351
|
}
|
352
|
|
353
|
/**
|
354
|
* Retrieves descriptions for the given taxonElement and adds them to a SimplifiedFeatureNode
|
355
|
*
|
356
|
* @param taxonElement
|
357
|
*/
|
358
|
private void populateDescriptions(Element taxonElement){
|
359
|
taxonElement.removeChild("descriptions");
|
360
|
|
361
|
Element rawDescriptions = factory.getDescriptions(taxonElement);
|
362
|
|
363
|
Element featureTitleCache;
|
364
|
try {
|
365
|
//featureTitleCache = (Element) XPath.selectSingleNode(rawDescriptions, "//feature/representation_L10n");
|
366
|
List descs = XPath.selectNodes(rawDescriptions, "//feature/representation_L10n");
|
367
|
for(Object des : descs){
|
368
|
logger.info("The descriptions //feature/representation_L10n is " + ((Element) des).getValue());
|
369
|
}
|
370
|
|
371
|
} catch (JDOMException e1) {
|
372
|
// TODO Auto-generated catch block
|
373
|
e1.printStackTrace();
|
374
|
}
|
375
|
|
376
|
|
377
|
//logger.setLevel(Level.DEBUG);
|
378
|
|
379
|
logger.debug("The taxonElement is " + XMLHelper.getUuid(taxonElement) + " name " + XMLHelper.getTitleCache(taxonElement));
|
380
|
|
381
|
Element descriptionsElement = new Element("descriptions");
|
382
|
Element featuresElement = new Element("features");
|
383
|
|
384
|
for(SimplifiedFeatureNode simplifiedFeatureNode : simplifiedFeatureTree){
|
385
|
|
386
|
try {
|
387
|
|
388
|
processFeatureNode(simplifiedFeatureNode, rawDescriptions, featuresElement);
|
389
|
|
390
|
//UUID featureUuid = XMLHelper.getUuid(simplifiedFeatureNode.getFeatureElement());
|
391
|
//String featureTitleCache = XMLHelper.getTitleCache(simplifiedFeatureNode.getFeatureElement());
|
392
|
//logger.debug(" The feature uuid is " + featureUuid + " and name is " + featureTitleCache);
|
393
|
|
394
|
|
395
|
} catch (JDOMException e) {
|
396
|
logger.error(e);
|
397
|
}
|
398
|
}
|
399
|
XMLHelper.addContent(featuresElement, descriptionsElement);
|
400
|
XMLHelper.addContent(descriptionsElement, taxonElement);
|
401
|
}
|
402
|
|
403
|
private void processFeatureNode(SimplifiedFeatureNode featureNode, Object context, Element parentFeatureElement) throws JDOMException{
|
404
|
|
405
|
// gets the feature elements with the current feature uuid
|
406
|
UUID featureUuid = XMLHelper.getUuid(featureNode.getFeatureElement());
|
407
|
|
408
|
String featurePatternold = "//feature[contains(uuid,'" + featureUuid + "')]";
|
409
|
//String featurePattern = "/ArrayList[1]/e[1]/elements[1]/e[1]/feature/uuid[.='" + featureUuid + "']";
|
410
|
//Xpath is now more specific so that only the feature associated with a particular Taxon and not the
|
411
|
//Taxon's parent or children are selected.
|
412
|
//Alternative would be to ensure the context object only contains descriptions for the Taxon element of interest
|
413
|
//Need to look at the taxonPortalController.doGetDescriptions to change this
|
414
|
String featurePattern = "/ArrayList[1]/e/elements/e/feature[contains(uuid,'" + featureUuid + "')]";
|
415
|
|
416
|
Element feature = (Element) XPath.selectSingleNode(context, featurePattern);
|
417
|
|
418
|
if(feature != null){ //the featurePattern was found in the raw descriptions data
|
419
|
|
420
|
List<Element> descriptionElementElements = XPath.selectNodes(context, featurePattern + "/..");
|
421
|
|
422
|
logger.debug("No of desc elements " + descriptionElementElements.size() + " featureUUID " + featureUuid + " feature type is " + XMLHelper.getTitleCache(featureNode.getFeatureElement()));
|
423
|
// add matching description elements as children to this feature element
|
424
|
for(Element descriptionElementElement : descriptionElementElements){
|
425
|
|
426
|
descriptionElementElement.removeChild("feature");
|
427
|
descriptionElementElement.setName("descriptionelement");
|
428
|
XMLHelper.addContent(descriptionElementElement, "descriptionelements", feature);
|
429
|
|
430
|
}
|
431
|
XMLHelper.addContent(feature, parentFeatureElement);
|
432
|
}else if(featureNode.getChildren().size() > 0){
|
433
|
|
434
|
Element featureElement = featureNode.getFeatureElement();
|
435
|
Element featureElementClone = (Element) featureElement.clone();
|
436
|
feature = (Element) featureElementClone.detach();
|
437
|
|
438
|
XMLHelper.addContent(feature, parentFeatureElement);
|
439
|
}
|
440
|
|
441
|
// recurse into children
|
442
|
for(SimplifiedFeatureNode childFeatureNode : featureNode.getChildren()){
|
443
|
|
444
|
logger.debug("No of featureNode children " + featureNode.getChildren().size());//always 10
|
445
|
|
446
|
UUID childFeatureUuid = XMLHelper.getUuid(childFeatureNode.getFeatureElement());
|
447
|
String childFeatureTitleCache = XMLHelper.getTitleCache(childFeatureNode.getFeatureElement());
|
448
|
logger.debug(" The feature is " + childFeatureUuid + " name " + childFeatureTitleCache);
|
449
|
|
450
|
//9 features in each simplifiedFeatureNode but some of the feature elements are null for a particular featureTitleCache,
|
451
|
//e.g. Ecology, Description - Description has child features
|
452
|
|
453
|
//creates the second level features i.e. descriptions/features/feature/feature for the description
|
454
|
processFeatureNode(childFeatureNode, context, feature);
|
455
|
}
|
456
|
}
|
457
|
|
458
|
|
459
|
/*private Element processDescriptionsRecursive(Object context, SimplifiedFeatureNode simplifiedFeatureNode) throws JDOMException{
|
460
|
// gets the feature elements with the current uuid
|
461
|
String featurePattern = "//feature[contains(uuid,'" + simplifiedFeatureNode.getUuid() + "')]";
|
462
|
|
463
|
Element feature = (Element) XPath.selectSingleNode(context, featurePattern);
|
464
|
|
465
|
if(feature != null){
|
466
|
// recurse into children
|
467
|
for(SimplifiedFeatureNode childFeatureNode : simplifiedFeatureNode.getChildren()){
|
468
|
Element childFeatureWithElements = processDescriptionsRecursive(context, childFeatureNode);
|
469
|
XMLHelper.addContent(childFeatureWithElements, "features", feature);
|
470
|
}
|
471
|
|
472
|
// get the parents of all feature elements with the current uuid
|
473
|
List<Element> descriptionElementElements = XPath.selectNodes(context, featurePattern + "/..");
|
474
|
|
475
|
// add matching description elements as children to this feature element
|
476
|
for(Element descriptionElementElement : descriptionElementElements){
|
477
|
descriptionElementElement.removeChild("feature");
|
478
|
descriptionElementElement.setName("descriptionelement");
|
479
|
XMLHelper.addContent(descriptionElementElement, "descriptionelements", feature);
|
480
|
}
|
481
|
}
|
482
|
|
483
|
return feature;
|
484
|
}*/
|
485
|
|
486
|
/**
|
487
|
* Retrieves the synonymy for the given taxonElement
|
488
|
*
|
489
|
* @param taxonElement
|
490
|
*/
|
491
|
private void populateSynonyms(Element taxonElement){
|
492
|
List<Element> synonymy = factory.getSynonymy(taxonElement);
|
493
|
|
494
|
for(Element synonymyNode : synonymy){
|
495
|
|
496
|
List<Element> children = synonymyNode.getChildren("e");
|
497
|
|
498
|
for(Element child : children){
|
499
|
|
500
|
List<Element> children2 = child.getChildren("e");
|
501
|
|
502
|
for(Element child2 : children2){
|
503
|
|
504
|
if (child2.getChild("name") != null) {
|
505
|
populateTypeDesignations(child2);// pass in the name of the synonym from synonymy/e/e/name
|
506
|
//populateImages(child2);
|
507
|
}
|
508
|
|
509
|
}
|
510
|
}
|
511
|
|
512
|
XMLHelper.addContent(synonymyNode, "synonymy", taxonElement);
|
513
|
}
|
514
|
}
|
515
|
|
516
|
|
517
|
|
518
|
/**
|
519
|
*
|
520
|
* @param taxonElement
|
521
|
*/
|
522
|
private void populateImages(Element taxonElement){
|
523
|
|
524
|
factory.getMedia(taxonElement);
|
525
|
logger.warn("Populating images");
|
526
|
//Element nameElement = fullTaxonElement.getChild("name");
|
527
|
//Element uuidElement = fullTaxonElement.getChild("uuid");
|
528
|
|
529
|
List<Element> mediaElements = factory.getMedia(taxonElement);
|
530
|
|
531
|
//nameElement.removeChild("typeDesignations");
|
532
|
|
533
|
for(Element media: mediaElements){
|
534
|
XMLHelper.addContent(media, "media", taxonElement);
|
535
|
}
|
536
|
}
|
537
|
}
|