1
|
package eu.etaxonomy.cdm.print.out.mediawiki;
|
2
|
|
3
|
import java.io.BufferedInputStream;
|
4
|
import java.io.File;
|
5
|
import java.io.FileInputStream;
|
6
|
import java.io.FileWriter;
|
7
|
import java.io.IOException;
|
8
|
import java.net.MalformedURLException;
|
9
|
import java.net.URL;
|
10
|
import java.util.ArrayList;
|
11
|
import java.util.Iterator;
|
12
|
import java.util.List;
|
13
|
import java.util.UUID;
|
14
|
|
15
|
import javax.security.auth.login.LoginException;
|
16
|
|
17
|
import org.apache.commons.io.FileUtils;
|
18
|
import org.apache.log4j.Logger;
|
19
|
import org.jdom.Document;
|
20
|
import org.jdom.Element;
|
21
|
import org.jdom.JDOMException;
|
22
|
import org.jdom.input.SAXBuilder;
|
23
|
import org.jdom.output.Format;
|
24
|
import org.jdom.output.XMLOutputter;
|
25
|
import org.jdom.xpath.XPath;
|
26
|
|
27
|
import eu.etaxonomy.cdm.common.monitor.DefaultProgressMonitor;
|
28
|
import eu.etaxonomy.cdm.config.ConfigFileUtil;
|
29
|
import eu.etaxonomy.cdm.print.IXMLEntityFactory;
|
30
|
import eu.etaxonomy.cdm.print.PublishConfigurator;
|
31
|
import eu.etaxonomy.cdm.print.Publisher;
|
32
|
import eu.etaxonomy.cdm.print.out.IPublishOutputModule;
|
33
|
|
34
|
/**
|
35
|
* fill in all parameters and you get a complete export from a cdm database to a
|
36
|
* mediawiki
|
37
|
*
|
38
|
* TODO would we move this class somewhere else and/or rename it?
|
39
|
*
|
40
|
* @author s.buers, l.morris
|
41
|
*
|
42
|
*/
|
43
|
public class Cdm2MediawikiExporter {
|
44
|
|
45
|
private static final String IMAGES_FOLDER = "images";
|
46
|
|
47
|
private static final String FILESEPARATOR = File.separator;
|
48
|
|
49
|
//constants
|
50
|
private static final String MEDIAWIKI_CDM_SUB_DIR = "mediawiki_tmp";
|
51
|
|
52
|
private static final String IMAGE_DIR = MEDIAWIKI_CDM_SUB_DIR
|
53
|
+ File.separator + "images";
|
54
|
|
55
|
private static final String CDM_EXPORT_FILE_NAME = "cdm_output";
|
56
|
|
57
|
private static final String PAGE_SUMMARY = "automatic import from CDM";
|
58
|
|
59
|
//-------------------
|
60
|
|
61
|
private static final Logger logger = Logger
|
62
|
.getLogger(Cdm2MediawikiExporter.class);
|
63
|
|
64
|
private PublishConfigurator configurator = PublishConfigurator.NewRemoteInstance();
|
65
|
|
66
|
private IXMLEntityFactory factory;
|
67
|
|
68
|
// where the mediawiki xml code is stored
|
69
|
private String mediawikiFileWithPath = null;
|
70
|
|
71
|
// where the cdm exported xml can be stored
|
72
|
private String cdm_output_file = null;
|
73
|
|
74
|
private Document cdmOutputDocument = null;
|
75
|
private Document externalDocument = null;
|
76
|
|
77
|
private MediawikiOutputModule wikiOutputModule;
|
78
|
|
79
|
private File temporaryExportFolder = null;
|
80
|
|
81
|
private List<String> localImages;
|
82
|
|
83
|
|
84
|
public void export(String serviceUrl, UUID taxonNodeUuid, UUID treeNodeUuid, String wikiUrl,
|
85
|
String wikiLoginUid, String passwd, String wikiPageNamespace,
|
86
|
boolean import2Mediawiki, boolean deleteOutputFiles,
|
87
|
boolean importImages) throws MalformedURLException {
|
88
|
|
89
|
//TODO
|
90
|
}
|
91
|
|
92
|
/**
|
93
|
* does the whole export process: runs cdm export to mediawiki xml-file and
|
94
|
* wiki import of this file
|
95
|
*
|
96
|
* @param serviceUrl
|
97
|
* @param taxonName
|
98
|
* @param wikiUrl
|
99
|
* - url of the destination wiki
|
100
|
* @param wikiLoginUid
|
101
|
* - uid of wiki admin
|
102
|
* @param passwd
|
103
|
* - password of the above wiki admin
|
104
|
* @param wikiPageNamespace
|
105
|
* - prefix that, will be added to all pages null or "" will make
|
106
|
* no prefix
|
107
|
* @throws MalformedURLException
|
108
|
*
|
109
|
* TODO: make passwd "unplain" MAYDO: pass more parameters e.g.:
|
110
|
* alternative stylesheet layout parameters (that may force the
|
111
|
* use of different stylesheet) export folder - we use a
|
112
|
* temporary so far boolean for telling if we want to keep the
|
113
|
* mediawiki xml file ...
|
114
|
*/
|
115
|
public void export(String portalUrl, String serviceUrl, String taxonName, String classificationName, String wikiUrl,
|
116
|
String wikiLoginUid, String passwd, String wikiPageNamespace,
|
117
|
boolean import2Mediawiki, boolean deleteOutputFiles,
|
118
|
boolean importImages) throws MalformedURLException {
|
119
|
|
120
|
// get taxon node uuid from taxon name and pass it to the configurator:
|
121
|
// TODO get classification name from export() - add a parameter
|
122
|
// and use it to choose the right taxon
|
123
|
|
124
|
// setup configurator
|
125
|
setupConfigurator(serviceUrl);
|
126
|
configurator.addSelectedTaxonNodeElements(factory.getTaxonNodesByName(taxonName, classificationName));
|
127
|
|
128
|
|
129
|
export(portalUrl, serviceUrl, wikiUrl, wikiLoginUid, passwd,
|
130
|
wikiPageNamespace, import2Mediawiki, deleteOutputFiles,
|
131
|
importImages, true);
|
132
|
}
|
133
|
/**
|
134
|
* if you already have a cdm xml export in some file you put it in here
|
135
|
* the mediawiki xml is created and imported to an mediawiki
|
136
|
* does step 2 and 3 out of all 3 export steps
|
137
|
*
|
138
|
* @param filename
|
139
|
* @param serviceUrl
|
140
|
* @param taxonName
|
141
|
* @param classificationName
|
142
|
* @param wikiUrl
|
143
|
* @param wikiLoginUid
|
144
|
* @param passwd
|
145
|
* @param wikiPageNamespace
|
146
|
* @param import2Mediawiki
|
147
|
* @param deleteOutputFiles
|
148
|
* @param importImages
|
149
|
* @throws MalformedURLException
|
150
|
*/
|
151
|
public void exportFromXmlFile(String portalUrl, String filename, String serviceUrl,
|
152
|
String wikiUrl, String wikiLoginUid,
|
153
|
String passwd, String wikiPageNamespace, boolean import2Mediawiki,
|
154
|
boolean deleteOutputFiles, boolean importImages)
|
155
|
throws MalformedURLException {
|
156
|
|
157
|
//put the document to a field:
|
158
|
externalDocument = getDocument(filename);
|
159
|
|
160
|
// setupConfigurator(serviceUrl);
|
161
|
|
162
|
// and run export with usePublisher=false:
|
163
|
export(portalUrl, serviceUrl, wikiUrl, wikiLoginUid, passwd,
|
164
|
wikiPageNamespace, import2Mediawiki, deleteOutputFiles,
|
165
|
importImages, false);
|
166
|
|
167
|
}
|
168
|
|
169
|
/*
|
170
|
* TODO: make passwd "unplain" MAYDO: pass more parameters e.g.: alternative
|
171
|
* stylesheet layout parameters (that may force the use of different
|
172
|
* stylesheet) export folder - we use a temporary so far boolean for telling
|
173
|
* if we want to keep the mediawiki xml file ...
|
174
|
*/
|
175
|
private void export(String portalUrl, String serviceUrl, String wikiUrl,
|
176
|
String wikiLoginUid, String passwd, String wikiPageNamespace,
|
177
|
boolean import2Mediawiki, boolean deleteOutputFiles,
|
178
|
boolean importImages, boolean usePublisher)
|
179
|
throws MalformedURLException {
|
180
|
|
181
|
// create MediawikiOutputModule with or without mediawiki pages
|
182
|
// namespace:
|
183
|
if (wikiPageNamespace == null
|
184
|
|| wikiPageNamespace.replaceAll(" ", "").equals("")) {
|
185
|
wikiOutputModule = new MediawikiOutputModule(portalUrl);
|
186
|
} else {
|
187
|
wikiOutputModule = new MediawikiOutputModule(wikiPageNamespace, portalUrl);
|
188
|
}
|
189
|
|
190
|
// set username to wikiOutModule for having it in the history of the
|
191
|
// page
|
192
|
// then it will be the same as the username that is used for the actual
|
193
|
// mediawiki import.
|
194
|
wikiOutputModule.setUsername(wikiLoginUid);
|
195
|
|
196
|
// if we actually export from the cdm and not from a file we run the
|
197
|
// Publisher
|
198
|
// with the wikiOutputModule
|
199
|
// else we run the wikiOutputModule with an input document (cdm exportes
|
200
|
// xml) from file
|
201
|
if (usePublisher) {
|
202
|
List<IPublishOutputModule> modules = new ArrayList<IPublishOutputModule>();
|
203
|
modules.add(wikiOutputModule);
|
204
|
configurator.setOutputModules(modules);
|
205
|
|
206
|
// do export from cdm to mediawiki xml file
|
207
|
Publisher.publish(configurator);
|
208
|
|
209
|
} else {
|
210
|
logger.info("read data from local file.");
|
211
|
createTemporaryExportFolder();
|
212
|
wikiOutputModule.output(externalDocument,
|
213
|
temporaryExportFolder,
|
214
|
DefaultProgressMonitor.NewInstance());
|
215
|
}
|
216
|
|
217
|
// we get the whole filename that the wikiOutputModule created
|
218
|
mediawikiFileWithPath = wikiOutputModule
|
219
|
.getFilePath();
|
220
|
|
221
|
logger.info("mediawiki xml file created and saved to"+mediawikiFileWithPath);
|
222
|
// if we want to upload images or save the cdm exported document,
|
223
|
// we put it to a field
|
224
|
if ((usePublisher && !deleteOutputFiles) || importImages) {
|
225
|
// the cdm output where we want to fetch the urls of the
|
226
|
// images:
|
227
|
cdmOutputDocument = wikiOutputModule
|
228
|
.getInputDocument();
|
229
|
}
|
230
|
|
231
|
// if we just created the cdm exported xml and want to
|
232
|
// keep all the output, we save the cdm exported document in a file
|
233
|
if (usePublisher && !deleteOutputFiles) {
|
234
|
saveCdmXmlExportedDocument(temporaryExportFolder, cdmOutputDocument);
|
235
|
}
|
236
|
|
237
|
// import into mediawiki
|
238
|
if (import2Mediawiki) {
|
239
|
uploadToMediawiki(wikiUrl, wikiLoginUid, passwd);
|
240
|
}
|
241
|
|
242
|
if (importImages){
|
243
|
downloadImages();
|
244
|
}
|
245
|
else{
|
246
|
logger.info("did not get images!");
|
247
|
}
|
248
|
|
249
|
if (import2Mediawiki && importImages && !(localImages.isEmpty())) {
|
250
|
uploadImagesToMediawiki(wikiUrl, wikiLoginUid, passwd);
|
251
|
}
|
252
|
|
253
|
if (deleteOutputFiles) {
|
254
|
deleteOutputFiles();
|
255
|
logger.info("deleted temporary file(s)");
|
256
|
}
|
257
|
}
|
258
|
|
259
|
/**
|
260
|
*
|
261
|
*/
|
262
|
private void createTemporaryExportFolder() {
|
263
|
temporaryExportFolder = ConfigFileUtil.getCdmHomeSubDir(MEDIAWIKI_CDM_SUB_DIR);
|
264
|
if (temporaryExportFolder != null) {
|
265
|
logger.info("using " + temporaryExportFolder.getAbsolutePath()
|
266
|
+ " as temporary directory.");
|
267
|
} else {
|
268
|
logger.error("could not create directory"
|
269
|
+ temporaryExportFolder.getAbsolutePath());
|
270
|
return;
|
271
|
}
|
272
|
}
|
273
|
|
274
|
/**
|
275
|
* @param serviceUrl
|
276
|
* @throws MalformedURLException
|
277
|
*/
|
278
|
private void setupConfigurator(String serviceUrl)
|
279
|
throws MalformedURLException {
|
280
|
|
281
|
createTemporaryExportFolder();
|
282
|
|
283
|
configurator.setWebserviceUrl(serviceUrl);
|
284
|
factory = configurator.getFactory();
|
285
|
|
286
|
// get feature tree from taxon name/taxon node and pass it to the
|
287
|
// configurator:
|
288
|
// TODO, get a feature tree name or uuid as method parameters
|
289
|
String featureTree = getDefaultFeatureTree();
|
290
|
configurator.setFeatureTree(UUID.fromString(featureTree));
|
291
|
|
292
|
// pass cdm exportfolder to configurator:
|
293
|
configurator.setExportFolder(temporaryExportFolder);
|
294
|
}
|
295
|
|
296
|
/**
|
297
|
* @return
|
298
|
*/
|
299
|
private String getDefaultFeatureTree() {
|
300
|
List<Element> featureTrees = factory.getFeatureTrees();
|
301
|
for (Element featureTreeElement : featureTrees) {
|
302
|
featureTreeElement.getChild("uuid");
|
303
|
}
|
304
|
String featureTree = featureTrees.get(0).getChild("uuid").getValue();
|
305
|
return featureTree;
|
306
|
}
|
307
|
|
308
|
/**
|
309
|
* @param exportFolder
|
310
|
* @param cdmOutputDocument
|
311
|
*/
|
312
|
private void saveCdmXmlExportedDocument(File exportFolder,
|
313
|
Document cdmOutputDocument) {
|
314
|
//XMLOutputter xmlOutput = new XMLOutputter();
|
315
|
|
316
|
cdm_output_file = exportFolder
|
317
|
+ File.separator
|
318
|
+ wikiOutputModule
|
319
|
.generateFilenameWithDate(CDM_EXPORT_FILE_NAME);
|
320
|
|
321
|
// display nice nice
|
322
|
Format format = Format.getPrettyFormat();
|
323
|
|
324
|
//JDOMParseException Invalid byte 2 of 3-byte UTF-8 sequence which occurs for e.g.
|
325
|
//with German umlauts and French accents on characters
|
326
|
format.setEncoding("ISO-8859-1");//"UTF-8");
|
327
|
XMLOutputter xmlOutput = new XMLOutputter(format);
|
328
|
xmlOutput.setFormat(format);
|
329
|
|
330
|
try {
|
331
|
xmlOutput.output(cdmOutputDocument, new FileWriter(cdm_output_file));
|
332
|
|
333
|
} catch (IOException e) {
|
334
|
// TODO Auto-generated catch block
|
335
|
e.printStackTrace();
|
336
|
}
|
337
|
logger.info("saved CDM output file to: " + cdm_output_file + ".");
|
338
|
}
|
339
|
|
340
|
private void deleteOutputFiles() {
|
341
|
logger.info("delete local files: ");
|
342
|
File file = new File(mediawikiFileWithPath);
|
343
|
file.delete();
|
344
|
logger.info("deleted "+mediawikiFileWithPath+".");
|
345
|
for (String localImage : localImages) {
|
346
|
file= new File(localImage);
|
347
|
file.delete();
|
348
|
logger.info("deleted image "+localImage+".");
|
349
|
}
|
350
|
//TODO delete tmp folders
|
351
|
}
|
352
|
|
353
|
/**
|
354
|
* uploads a given mediawiki xml file to a mediawiki - does only third (last) step
|
355
|
* of the whole export process
|
356
|
*
|
357
|
* @param inputFilePath
|
358
|
* @param wikiUrl
|
359
|
* @param wikiUser
|
360
|
* @param passwd
|
361
|
* @param deleteOutputFile
|
362
|
*/
|
363
|
public void uploadToMediawiki(String inputFilePath, String wikiUrl, String wikiLoginUid, String passwd) {
|
364
|
mediawikiFileWithPath = inputFilePath;
|
365
|
logger.info("reading file "+mediawikiFileWithPath+".");
|
366
|
uploadToMediawiki(wikiUrl, wikiLoginUid, passwd);
|
367
|
}
|
368
|
|
369
|
/*
|
370
|
* @author l.morris
|
371
|
*/
|
372
|
private void uploadToMediawiki(String wikiUrl, String wikiLoginUid, String passwd) {
|
373
|
|
374
|
// login to mediawiki
|
375
|
|
376
|
WikiBot myBot = getBotAndLogin(wikiUrl, wikiLoginUid, passwd);
|
377
|
|
378
|
|
379
|
try {
|
380
|
|
381
|
// parse wiki xml file and import pages one by one
|
382
|
// to mediawiki
|
383
|
// MAYDO import whole file, with functionality from mediawiki API
|
384
|
|
385
|
// get published output file
|
386
|
org.jdom.Document document = getDocument(mediawikiFileWithPath);
|
387
|
// get page nodes
|
388
|
Element rootElement = document.getRootElement();
|
389
|
// export pages
|
390
|
List pages = rootElement.getChildren("page");
|
391
|
Iterator itr = pages.iterator();
|
392
|
int length = pages.size();
|
393
|
int i = 1;
|
394
|
while (itr.hasNext()) {
|
395
|
Element page = (Element) itr.next();
|
396
|
String title = page.getChild("title").getText();
|
397
|
String text = page.getChild("revision").getChild("text")
|
398
|
.getText();
|
399
|
myBot.edit(title, text, PAGE_SUMMARY);
|
400
|
logger.info("exported page " + i + "/" + length + " " + title
|
401
|
+ " to " + wikiUrl + ".");
|
402
|
i++;
|
403
|
}
|
404
|
myBot.logout();
|
405
|
logger.info("all pages uploaded and mediawiki logout.");
|
406
|
} catch (IOException e) {
|
407
|
e.printStackTrace();
|
408
|
} catch (Exception e) {
|
409
|
e.printStackTrace();
|
410
|
e.getMessage();
|
411
|
return;
|
412
|
}
|
413
|
|
414
|
}
|
415
|
/**
|
416
|
* @param wikiUrl
|
417
|
* @param wikiLoginUid
|
418
|
* @param passwd
|
419
|
* @return
|
420
|
*/
|
421
|
private WikiBot getBotAndLogin(String wikiUrl, String wikiLoginUid,
|
422
|
String passwd) {
|
423
|
WikiBot myBot = new WikiBot(wikiUrl, wikiLoginUid, passwd);
|
424
|
|
425
|
// login to mediawiki
|
426
|
try {
|
427
|
myBot.login();
|
428
|
} catch (Exception e) {
|
429
|
logger.info("Cannot log into Mediwiki: "+wikiUrl);
|
430
|
e.printStackTrace();
|
431
|
}
|
432
|
|
433
|
logger.info("logged in to mediawiki as " + wikiLoginUid + ".");
|
434
|
return myBot;
|
435
|
}
|
436
|
|
437
|
private void downloadImages() {
|
438
|
org.jdom.Document document = wikiOutputModule.getInputDocument();
|
439
|
localImages = new ArrayList<String>();
|
440
|
|
441
|
try {
|
442
|
List<Element> media_uris = XPath.selectNodes(document, "//Taxon/media/e/representations/e/parts/e/uri");
|
443
|
|
444
|
if(media_uris.isEmpty()){
|
445
|
logger.info("there are no images in the data.");
|
446
|
return;
|
447
|
}
|
448
|
|
449
|
for (Element urlEl : media_uris) {
|
450
|
String url=urlEl.getValue();
|
451
|
URL imageUrl = new URL(url);
|
452
|
String[] arr = url.split("/");
|
453
|
String filename = arr[arr.length - 1];
|
454
|
//String filePath = temporaryImageExportFolder.getAbsolutePath()
|
455
|
// + File.separator + filename;
|
456
|
String filePath = temporaryExportFolder.getAbsolutePath()
|
457
|
+FILESEPARATOR + IMAGES_FOLDER +FILESEPARATOR+ filename;
|
458
|
logger.info("downloading image " + url+" to "+filePath);
|
459
|
|
460
|
FileUtils.copyURLToFile(imageUrl, new File(filePath));
|
461
|
localImages.add(filePath);
|
462
|
}
|
463
|
|
464
|
} catch (JDOMException e) {
|
465
|
// TODO Auto-generated catch block
|
466
|
e.printStackTrace();
|
467
|
} catch (MalformedURLException e) {
|
468
|
// TODO Auto-generated catch block
|
469
|
e.printStackTrace();
|
470
|
} catch (IOException e) {
|
471
|
// TODO Auto-generated catch block
|
472
|
e.printStackTrace();
|
473
|
}
|
474
|
|
475
|
}
|
476
|
|
477
|
private void uploadImagesToMediawiki(String wikiUrl, String wikiLoginUid, String passwd) {
|
478
|
WikiBot myBot = getBotAndLogin(wikiUrl, wikiLoginUid, passwd);
|
479
|
// get published output file
|
480
|
|
481
|
|
482
|
for(String localUri : localImages){
|
483
|
|
484
|
try {
|
485
|
uploadImage(myBot, localUri);
|
486
|
} catch (MalformedURLException e) {
|
487
|
// TODO Auto-generated catch block
|
488
|
e.printStackTrace();
|
489
|
} catch (IOException e) {
|
490
|
// TODO Auto-generated catch block
|
491
|
e.printStackTrace();
|
492
|
}
|
493
|
}
|
494
|
|
495
|
// logout
|
496
|
myBot.logout();
|
497
|
logger.info("all images uploaded to mediawiki "+wikiUrl+" and logged out.");
|
498
|
}
|
499
|
|
500
|
/**
|
501
|
* @param filePath
|
502
|
* @throws MalformedURLException
|
503
|
* @throws IOException
|
504
|
* TODO give a unique id to each image name
|
505
|
* but this has to be done also in the wikioutput then
|
506
|
*/
|
507
|
private void uploadImage(WikiBot myBot, String filePath) throws MalformedURLException,
|
508
|
IOException {
|
509
|
// URL imageUrl = new URL(url);
|
510
|
// String[] arr = url.split("/");
|
511
|
// String filename = arr[arr.length - 1];
|
512
|
//// System.out.println(filename);
|
513
|
// //String filePath = temporaryImageExportFolder.getAbsolutePath()
|
514
|
// // + File.separator + filename;
|
515
|
// String filePath = temporaryExportFolder.getAbsolutePath()
|
516
|
// +FILESEPARATOR + IMAGES_FOLDER +FILESEPARATOR+ filename;
|
517
|
//// System.out.println(filePath);
|
518
|
// File imageFile = new File(filePath);
|
519
|
// logger.info("downloading image " + url);
|
520
|
|
521
|
// FileUtils.copyURLToFile(imageUrl, new File(filePath));
|
522
|
|
523
|
|
524
|
File imageFile = new File(filePath);
|
525
|
String[] arr = filePath.split("/");
|
526
|
String filename = arr[arr.length - 1];
|
527
|
try {
|
528
|
//Upload image to Mediawiki
|
529
|
//TODO: Change text to give a description of the image
|
530
|
myBot.uploadAFile(imageFile, filename, "some text", "no comment");
|
531
|
} catch (LoginException e) {
|
532
|
// TODO Auto-generated catch block
|
533
|
e.printStackTrace();
|
534
|
}
|
535
|
logger.info("uploaded image " + imageFile.getName()+" to mediawiki.");
|
536
|
}
|
537
|
|
538
|
private Document getDocument(String filePath) {
|
539
|
SAXBuilder saxBuilder = new SAXBuilder();
|
540
|
|
541
|
File file = new File(filePath);
|
542
|
Document document = null;
|
543
|
FileInputStream fileis;
|
544
|
|
545
|
// converted file to document object
|
546
|
try {
|
547
|
//document = saxBuilder.build(file);
|
548
|
fileis = new FileInputStream(file);
|
549
|
BufferedInputStream in = new BufferedInputStream(fileis);
|
550
|
document = saxBuilder.build(in);
|
551
|
|
552
|
} catch (JDOMException e) {
|
553
|
// TODO Auto-generated catch block
|
554
|
logger.error(e.getCause().getMessage());
|
555
|
e.printStackTrace();
|
556
|
} catch (IOException e) {
|
557
|
// TODO Auto-generated catch block
|
558
|
e.printStackTrace();
|
559
|
}
|
560
|
return document;
|
561
|
}
|
562
|
|
563
|
}
|