Project

General

Profile

Download (16.7 KB) Statistics
| Branch: | Tag: | Revision:
1
package eu.etaxonomy.cdm.print.out.mediawiki;
2

    
3
import java.io.BufferedInputStream;
4
import java.io.File;
5
import java.io.FileInputStream;
6
import java.io.FileWriter;
7
import java.io.IOException;
8
import java.net.MalformedURLException;
9
import java.net.URL;
10
import java.util.ArrayList;
11
import java.util.Iterator;
12
import java.util.List;
13
import java.util.UUID;
14

    
15
import javax.security.auth.login.LoginException;
16

    
17
import org.apache.commons.io.FileUtils;
18
import org.apache.log4j.Logger;
19
import org.jdom.Document;
20
import org.jdom.Element;
21
import org.jdom.JDOMException;
22
import org.jdom.input.SAXBuilder;
23
import org.jdom.output.Format;
24
import org.jdom.output.XMLOutputter;
25
import org.jdom.xpath.XPath;
26

    
27
import eu.etaxonomy.cdm.common.monitor.DefaultProgressMonitor;
28
import eu.etaxonomy.cdm.config.ConfigFileUtil;
29
import eu.etaxonomy.cdm.print.IXMLEntityFactory;
30
import eu.etaxonomy.cdm.print.PublishConfigurator;
31
import eu.etaxonomy.cdm.print.Publisher;
32
import eu.etaxonomy.cdm.print.out.IPublishOutputModule;
33

    
34
/**
35
 * fill in all parameters and you get a complete export from a cdm database to a
36
 * mediawiki
37
 *
38
 * TODO would we move this class somewhere else and/or rename it?
39
 *
40
 * @author s.buers, l.morris
41
 *
42
 */
43
public class Cdm2MediawikiExporter {
44

    
45
	private static final String IMAGES_FOLDER = "images";
46

    
47
	private static final String FILESEPARATOR = File.separator;
48

    
49
	//constants
50
	private static final String MEDIAWIKI_CDM_SUB_DIR = "mediawiki_tmp";
51

    
52
	private static final String IMAGE_DIR = MEDIAWIKI_CDM_SUB_DIR
53
			+ File.separator + "images";
54

    
55
	private static final String CDM_EXPORT_FILE_NAME = "cdm_output";
56

    
57
	private static final String PAGE_SUMMARY = "automatic import from CDM";
58

    
59
	//-------------------
60

    
61
	private static final Logger logger = Logger
62
			.getLogger(Cdm2MediawikiExporter.class);
63

    
64
	private PublishConfigurator configurator = PublishConfigurator.NewRemoteInstance();
65

    
66
	private IXMLEntityFactory factory;
67

    
68
	// where the mediawiki xml code is stored
69
	private String mediawikiFileWithPath = null;
70

    
71
	// where the cdm exported xml can be stored
72
	private String cdm_output_file = null;
73

    
74
	private Document cdmOutputDocument = null;
75
	private Document externalDocument = null;
76

    
77
	private MediawikiOutputModule wikiOutputModule;
78

    
79
	private File temporaryExportFolder = null;
80

    
81
	private List<String> localImages;
82

    
83

    
84
	public void export(String serviceUrl, UUID taxonNodeUuid, UUID treeNodeUuid,  String wikiUrl,
85
			String wikiLoginUid, String passwd, String wikiPageNamespace,
86
			boolean import2Mediawiki, boolean deleteOutputFiles,
87
			boolean importImages) throws MalformedURLException {
88

    
89
		//TODO
90
	}
91

    
92
	/**
93
	 * does the whole export process: runs cdm export to mediawiki xml-file and
94
	 * wiki import of this file
95
	 *
96
	 * @param serviceUrl
97
	 * @param taxonName
98
	 * @param wikiUrl
99
	 *            - url of the destination wiki
100
	 * @param wikiLoginUid
101
	 *            - uid of wiki admin
102
	 * @param passwd
103
	 *            - password of the above wiki admin
104
	 * @param wikiPageNamespace
105
	 *            - prefix that, will be added to all pages null or "" will make
106
	 *            no prefix
107
	 * @throws MalformedURLException
108
	 *
109
	 *             TODO: make passwd "unplain" MAYDO: pass more parameters e.g.:
110
	 *             alternative stylesheet layout parameters (that may force the
111
	 *             use of different stylesheet) export folder - we use a
112
	 *             temporary so far boolean for telling if we want to keep the
113
	 *             mediawiki xml file ...
114
	 */
115
	public void export(String portalUrl, String serviceUrl, String taxonName, String classificationName, String wikiUrl,
116
			String wikiLoginUid, String passwd, String wikiPageNamespace,
117
			boolean import2Mediawiki, boolean deleteOutputFiles,
118
			boolean importImages) throws MalformedURLException {
119

    
120
		// get taxon node uuid from taxon name and pass it to the configurator:
121
				// TODO get classification name from export() - add a parameter
122
				// and use it to choose the right taxon
123

    
124
				// setup configurator
125
		setupConfigurator(serviceUrl);
126
		configurator.addSelectedTaxonNodeElements(factory.getTaxonNodesByName(taxonName, classificationName));
127

    
128

    
129
		export(portalUrl, serviceUrl, wikiUrl, wikiLoginUid, passwd,
130
				wikiPageNamespace, import2Mediawiki, deleteOutputFiles,
131
				importImages, true);
132
	}
133
	/**
134
	 * if you already have a cdm xml export in some file you put it in here
135
	 * the mediawiki xml is created and imported to an mediawiki
136
	 * does step 2 and 3 out of all 3 export steps
137
	 *
138
	 * @param filename
139
	 * @param serviceUrl
140
	 * @param taxonName
141
	 * @param classificationName
142
	 * @param wikiUrl
143
	 * @param wikiLoginUid
144
	 * @param passwd
145
	 * @param wikiPageNamespace
146
	 * @param import2Mediawiki
147
	 * @param deleteOutputFiles
148
	 * @param importImages
149
	 * @throws MalformedURLException
150
	 */
151
	public void exportFromXmlFile(String portalUrl, String filename, String serviceUrl,
152
			 String wikiUrl, String wikiLoginUid,
153
			String passwd, String wikiPageNamespace, boolean import2Mediawiki,
154
			boolean deleteOutputFiles, boolean importImages)
155
					throws MalformedURLException {
156

    
157
		//put the document to a field:
158
		externalDocument = getDocument(filename);
159

    
160
//		setupConfigurator(serviceUrl);
161

    
162
		// and run export with usePublisher=false:
163
		export(portalUrl, serviceUrl, wikiUrl, wikiLoginUid, passwd,
164
				wikiPageNamespace, import2Mediawiki, deleteOutputFiles,
165
				importImages, false);
166

    
167
	}
168

    
169
	/*
170
	 * TODO: make passwd "unplain" MAYDO: pass more parameters e.g.: alternative
171
	 * stylesheet layout parameters (that may force the use of different
172
	 * stylesheet) export folder - we use a temporary so far boolean for telling
173
	 * if we want to keep the mediawiki xml file ...
174
	 */
175
	private void export(String portalUrl, String serviceUrl, String wikiUrl,
176
			String wikiLoginUid, String passwd, String wikiPageNamespace,
177
			boolean import2Mediawiki, boolean deleteOutputFiles,
178
			boolean importImages, boolean usePublisher)
179
					throws MalformedURLException {
180

    
181
		// create MediawikiOutputModule with or without mediawiki pages
182
		// namespace:
183
		if (wikiPageNamespace == null
184
				|| wikiPageNamespace.replaceAll(" ", "").equals("")) {
185
			wikiOutputModule = new MediawikiOutputModule(portalUrl);
186
		} else {
187
			wikiOutputModule = new MediawikiOutputModule(wikiPageNamespace, portalUrl);
188
		}
189

    
190
		// set username to wikiOutModule for having it in the history of the
191
		// page
192
		// then it will be the same as the username that is used for the actual
193
		// mediawiki import.
194
		wikiOutputModule.setUsername(wikiLoginUid);
195

    
196
		// if we actually export from the cdm and not from a file we run the
197
		// Publisher
198
		// with the wikiOutputModule
199
		// else we run the wikiOutputModule with an input document (cdm exportes
200
		// xml) from file
201
		if (usePublisher) {
202
			List<IPublishOutputModule> modules = new ArrayList<IPublishOutputModule>();
203
			modules.add(wikiOutputModule);
204
			configurator.setOutputModules(modules);
205

    
206
			// do export from cdm to mediawiki xml file
207
			Publisher.publish(configurator);
208

    
209
		} else {
210
			logger.info("read data from local file.");
211
			createTemporaryExportFolder();
212
			wikiOutputModule.output(externalDocument,
213
					temporaryExportFolder,
214
					DefaultProgressMonitor.NewInstance());
215
		}
216

    
217
		// we get the whole filename that the wikiOutputModule created
218
		mediawikiFileWithPath = wikiOutputModule
219
				.getFilePath();
220

    
221
		logger.info("mediawiki xml file created and saved to"+mediawikiFileWithPath);
222
		// if we want to upload images or save the cdm exported document,
223
		// we put it to a field
224
		if ((usePublisher && !deleteOutputFiles) || importImages) {
225
			// the cdm output where we want to fetch the urls of the
226
			// images:
227
			cdmOutputDocument = wikiOutputModule
228
					.getInputDocument();
229
		}
230

    
231
		// if we just created the cdm exported xml and want to
232
		// keep all the output, we save the cdm exported document in a file
233
		if (usePublisher && !deleteOutputFiles) {
234
			saveCdmXmlExportedDocument(temporaryExportFolder, cdmOutputDocument);
235
		}
236

    
237
		// import into mediawiki
238
		if (import2Mediawiki) {
239
			uploadToMediawiki(wikiUrl, wikiLoginUid, passwd);
240
		}
241

    
242
		if (importImages){
243
			downloadImages();
244
		}
245
		else{
246
			logger.info("did not get images!");
247
		}
248

    
249
		if (import2Mediawiki && importImages && !(localImages.isEmpty())) {
250
			uploadImagesToMediawiki(wikiUrl, wikiLoginUid, passwd);
251
		}
252

    
253
		if (deleteOutputFiles) {
254
			deleteOutputFiles();
255
			logger.info("deleted temporary file(s)");
256
		}
257
	}
258

    
259
	/**
260
	 *
261
	 */
262
	private void createTemporaryExportFolder() {
263
		temporaryExportFolder = ConfigFileUtil.getCdmHomeSubDir(MEDIAWIKI_CDM_SUB_DIR);
264
		if (temporaryExportFolder != null) {
265
			logger.info("using " + temporaryExportFolder.getAbsolutePath()
266
					+ " as temporary directory.");
267
		} else {
268
			logger.error("could not create directory"
269
					+ temporaryExportFolder.getAbsolutePath());
270
			return;
271
		}
272
	}
273

    
274
	/**
275
	 * @param serviceUrl
276
	 * @throws MalformedURLException
277
	 */
278
	private void setupConfigurator(String serviceUrl)
279
			throws MalformedURLException {
280

    
281
		createTemporaryExportFolder();
282

    
283
		configurator.setWebserviceUrl(serviceUrl);
284
		factory = configurator.getFactory();
285

    
286
		// get feature tree from taxon name/taxon node and pass it to the
287
		// configurator:
288
		// TODO, get a feature tree name or uuid as method parameters
289
		String featureTree = getDefaultFeatureTree();
290
		configurator.setFeatureTree(UUID.fromString(featureTree));
291

    
292
		// pass cdm exportfolder to configurator:
293
		configurator.setExportFolder(temporaryExportFolder);
294
	}
295

    
296
	/**
297
	 * @return
298
	 */
299
	private String getDefaultFeatureTree() {
300
		List<Element> featureTrees = factory.getFeatureTrees();
301
		for (Element featureTreeElement : featureTrees) {
302
			featureTreeElement.getChild("uuid");
303
		}
304
		String featureTree = featureTrees.get(0).getChild("uuid").getValue();
305
		return featureTree;
306
	}
307

    
308
	/**
309
	 * @param exportFolder
310
	 * @param cdmOutputDocument
311
	 */
312
	private void saveCdmXmlExportedDocument(File exportFolder,
313
			Document cdmOutputDocument) {
314
		//XMLOutputter xmlOutput = new XMLOutputter();
315

    
316
		cdm_output_file = exportFolder
317
				+ File.separator
318
				+ wikiOutputModule
319
				.generateFilenameWithDate(CDM_EXPORT_FILE_NAME);
320

    
321
		// display nice nice
322
		Format format = Format.getPrettyFormat();
323

    
324
		//JDOMParseException Invalid byte 2 of 3-byte UTF-8 sequence which occurs for e.g.
325
		//with German umlauts and French accents on characters
326
		format.setEncoding("ISO-8859-1");//"UTF-8");
327
		XMLOutputter xmlOutput = new XMLOutputter(format);
328
		xmlOutput.setFormat(format);
329

    
330
		try {
331
			xmlOutput.output(cdmOutputDocument, new FileWriter(cdm_output_file));
332

    
333
		} catch (IOException e) {
334
			// TODO Auto-generated catch block
335
			e.printStackTrace();
336
		}
337
		logger.info("saved CDM output file to: " + cdm_output_file + ".");
338
	}
339

    
340
	private void deleteOutputFiles() {
341
		logger.info("delete local files: ");
342
		File file = new File(mediawikiFileWithPath);
343
		file.delete();
344
		logger.info("deleted "+mediawikiFileWithPath+".");
345
		for (String localImage : localImages) {
346
			file= new File(localImage);
347
			file.delete();
348
			logger.info("deleted image "+localImage+".");
349
		}
350
		//TODO delete tmp folders
351
	}
352

    
353
	/**
354
	 * uploads a given mediawiki xml file to a mediawiki - does only third (last) step
355
	 * of the whole export process
356
	 *
357
	 * @param inputFilePath
358
	 * @param wikiUrl
359
	 * @param wikiUser
360
	 * @param passwd
361
	 * @param deleteOutputFile
362
	 */
363
	public void uploadToMediawiki(String inputFilePath, String wikiUrl, String wikiLoginUid, String passwd) {
364
		mediawikiFileWithPath = inputFilePath;
365
		logger.info("reading file "+mediawikiFileWithPath+".");
366
		uploadToMediawiki(wikiUrl, wikiLoginUid, passwd);
367
	}
368

    
369
	/*
370
	 * @author l.morris
371
	 */
372
	private void uploadToMediawiki(String wikiUrl, String wikiLoginUid, String passwd) {
373

    
374
		// login to mediawiki
375

    
376
		WikiBot myBot = getBotAndLogin(wikiUrl, wikiLoginUid, passwd);
377

    
378

    
379
		try {
380

    
381
			// parse wiki xml file and import pages one by one
382
			// to mediawiki
383
			// MAYDO import whole file, with functionality from mediawiki API
384

    
385
			// get published output file
386
			org.jdom.Document document = getDocument(mediawikiFileWithPath);
387
			// get page nodes
388
			Element rootElement = document.getRootElement();
389
			// export pages
390
			List pages = rootElement.getChildren("page");
391
			Iterator itr = pages.iterator();
392
			int length = pages.size();
393
			int i = 1;
394
			while (itr.hasNext()) {
395
				Element page = (Element) itr.next();
396
				String title = page.getChild("title").getText();
397
				String text = page.getChild("revision").getChild("text")
398
						.getText();
399
				myBot.edit(title, text, PAGE_SUMMARY);
400
				logger.info("exported page " + i + "/" + length + " " + title
401
						+ " to " + wikiUrl + ".");
402
				i++;
403
			}
404
			myBot.logout();
405
			logger.info("all pages uploaded and mediawiki logout.");
406
		} catch (IOException e) {
407
			e.printStackTrace();
408
		} catch (Exception e) {
409
			e.printStackTrace();
410
			e.getMessage();
411
			return;
412
		}
413

    
414
	}
415
	/**
416
	 * @param wikiUrl
417
	 * @param wikiLoginUid
418
	 * @param passwd
419
	 * @return
420
	 */
421
	private WikiBot getBotAndLogin(String wikiUrl, String wikiLoginUid,
422
			String passwd) {
423
		WikiBot myBot = new WikiBot(wikiUrl, wikiLoginUid, passwd);
424

    
425
		// login to mediawiki
426
		try {
427
			myBot.login();
428
		} catch (Exception e) {
429
			logger.info("Cannot log into Mediwiki: "+wikiUrl);
430
			e.printStackTrace();
431
		}
432

    
433
		logger.info("logged in to mediawiki as " + wikiLoginUid + ".");
434
		return myBot;
435
	}
436

    
437
	private void downloadImages() {
438
		org.jdom.Document document = wikiOutputModule.getInputDocument();
439
		localImages = new ArrayList<String>();
440

    
441
		try {
442
			List<Element> media_uris = XPath.selectNodes(document, "//Taxon/media/e/representations/e/parts/e/uri");
443

    
444
			if(media_uris.isEmpty()){
445
				logger.info("there are no images in the data.");
446
				return;
447
			}
448

    
449
			for (Element urlEl : media_uris) {
450
				String url=urlEl.getValue();
451
				URL imageUrl = new URL(url);
452
				String[] arr = url.split("/");
453
				String filename = arr[arr.length - 1];
454
				//String filePath = temporaryImageExportFolder.getAbsolutePath()
455
					//	+ File.separator + filename;
456
				String filePath = temporaryExportFolder.getAbsolutePath()
457
						+FILESEPARATOR + IMAGES_FOLDER +FILESEPARATOR+ filename;
458
				logger.info("downloading image " + url+" to "+filePath);
459

    
460
				FileUtils.copyURLToFile(imageUrl, new File(filePath));
461
				localImages.add(filePath);
462
			}
463

    
464
		} catch (JDOMException e) {
465
			// TODO Auto-generated catch block
466
			e.printStackTrace();
467
		} catch (MalformedURLException e) {
468
			// TODO Auto-generated catch block
469
			e.printStackTrace();
470
		} catch (IOException e) {
471
			// TODO Auto-generated catch block
472
			e.printStackTrace();
473
		}
474

    
475
	}
476

    
477
	private void uploadImagesToMediawiki(String wikiUrl, String wikiLoginUid, String passwd) {
478
		WikiBot myBot = getBotAndLogin(wikiUrl, wikiLoginUid, passwd);
479
		// get published output file
480

    
481

    
482
			for(String localUri : localImages){
483

    
484
				try {
485
					uploadImage(myBot, localUri);
486
				} catch (MalformedURLException e) {
487
					// TODO Auto-generated catch block
488
					e.printStackTrace();
489
				} catch (IOException e) {
490
					// TODO Auto-generated catch block
491
					e.printStackTrace();
492
				}
493
			}
494

    
495
			// logout
496
		myBot.logout();
497
		logger.info("all images uploaded to mediawiki "+wikiUrl+" and logged out.");
498
	}
499

    
500
	/**
501
	 * @param filePath
502
	 * @throws MalformedURLException
503
	 * @throws IOException
504
	 * TODO give a unique id to each image name
505
	 * 			but this has to be done also in the wikioutput then
506
	 */
507
	private void uploadImage(WikiBot myBot, String filePath) throws MalformedURLException,
508
	IOException {
509
//		URL imageUrl = new URL(url);
510
//		String[] arr = url.split("/");
511
//		String filename = arr[arr.length - 1];
512
////		System.out.println(filename);
513
//		//String filePath = temporaryImageExportFolder.getAbsolutePath()
514
//			//	+ File.separator + filename;
515
//		String filePath = temporaryExportFolder.getAbsolutePath()
516
//				+FILESEPARATOR + IMAGES_FOLDER +FILESEPARATOR+ filename;
517
////		System.out.println(filePath);
518
//		File imageFile = new File(filePath);
519
//		logger.info("downloading image " + url);
520

    
521
//		FileUtils.copyURLToFile(imageUrl, new File(filePath));
522

    
523

    
524
		File imageFile = new File(filePath);
525
		String[] arr = filePath.split("/");
526
		String filename = arr[arr.length - 1];
527
		try {
528
			//Upload image to Mediawiki
529
			//TODO: Change text to give a description of the image
530
			myBot.uploadAFile(imageFile, filename, "some text", "no comment");
531
		} catch (LoginException e) {
532
			// TODO Auto-generated catch block
533
			e.printStackTrace();
534
		}
535
		logger.info("uploaded image " + imageFile.getName()+" to mediawiki.");
536
	}
537

    
538
	private Document getDocument(String filePath) {
539
		SAXBuilder saxBuilder = new SAXBuilder();
540

    
541
		File file = new File(filePath);
542
		Document document = null;
543
		FileInputStream fileis;
544

    
545
		// converted file to document object
546
		try {
547
			//document = saxBuilder.build(file);
548
			fileis = new FileInputStream(file);
549
			BufferedInputStream in = new BufferedInputStream(fileis);
550
			document = saxBuilder.build(in);
551

    
552
		} catch (JDOMException e) {
553
			// TODO Auto-generated catch block
554
			logger.error(e.getCause().getMessage());
555
			e.printStackTrace();
556
		} catch (IOException e) {
557
			// TODO Auto-generated catch block
558
			e.printStackTrace();
559
		}
560
		return document;
561
	}
562

    
563
}
(1-1/5)