45c917ab43505f61398d8d6f368760a55c19ffcf
[cdmlib.git] / cdmlib-print / src / main / java / eu / etaxonomy / cdm / print / out / mediawiki / Cdm2MediawikiExporter.java
1 package eu.etaxonomy.cdm.print.out.mediawiki;
2
3 import java.io.BufferedInputStream;
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.FileWriter;
7 import java.io.IOException;
8 import java.net.MalformedURLException;
9 import java.net.URL;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.UUID;
14
15 import javax.security.auth.login.LoginException;
16
17 import org.apache.commons.io.FileUtils;
18 import org.apache.log4j.Logger;
19 import org.jdom.Document;
20 import org.jdom.Element;
21 import org.jdom.JDOMException;
22 import org.jdom.input.SAXBuilder;
23 import org.jdom.output.Format;
24 import org.jdom.output.XMLOutputter;
25 import org.jdom.xpath.XPath;
26
27 import eu.etaxonomy.cdm.common.CdmUtils;
28 import eu.etaxonomy.cdm.common.monitor.DefaultProgressMonitor;
29 import eu.etaxonomy.cdm.print.IXMLEntityFactory;
30 import eu.etaxonomy.cdm.print.PublishConfigurator;
31 import eu.etaxonomy.cdm.print.Publisher;
32 import eu.etaxonomy.cdm.print.out.IPublishOutputModule;
33
34 /**
35 * fill in all parameters and you get a complete export from a cdm database to a
36 * mediawiki
37 *
38 * TODO would we move this class somewhere else and/or rename it?
39 *
40 * @author s.buers, l.morris
41 *
42 */
43 public class Cdm2MediawikiExporter {
44
45 private static final String IMAGES_FOLDER = "images";
46
47 private static final String FILESEPARATOR = File.separator;
48
49 //constants
50 private static final String MEDIAWIKI_CDM_SUB_DIR = "mediawiki_tmp";
51
52 private static final String IMAGE_DIR = MEDIAWIKI_CDM_SUB_DIR
53 + File.separator + "images";
54
55 private static final String CDM_EXPORT_FILE_NAME = "cdm_output";
56
57 private static final String PAGE_SUMMARY = "automatic import from CDM";
58
59 //-------------------
60
61 private static final Logger logger = Logger
62 .getLogger(Cdm2MediawikiExporter.class);
63
64 private PublishConfigurator configurator = PublishConfigurator.NewRemoteInstance();
65
66 private IXMLEntityFactory factory;
67
68 // where the mediawiki xml code is stored
69 private String mediawikiFileWithPath = null;
70
71 // where the cdm exported xml can be stored
72 private String cdm_output_file = null;
73
74 private Document cdmOutputDocument = null;
75 private Document externalDocument = null;
76
77 private MediawikiOutputModule wikiOutputModule;
78
79 private File temporaryExportFolder = null;
80
81 private List<String> localImages;
82
83
84 public void export(String serviceUrl, UUID taxonNodeUuid, UUID treeNodeUuid, String wikiUrl,
85 String wikiLoginUid, String passwd, String wikiPageNamespace,
86 boolean import2Mediawiki, boolean deleteOutputFiles,
87 boolean importImages) throws MalformedURLException {
88
89 //TODO
90 }
91
92 /**
93 * does the whole export process: runs cdm export to mediawiki xml-file and
94 * wiki import of this file
95 *
96 * @param serviceUrl
97 * @param taxonName
98 * @param wikiUrl
99 * - url of the destination wiki
100 * @param wikiLoginUid
101 * - uid of wiki admin
102 * @param passwd
103 * - password of the above wiki admin
104 * @param wikiPageNamespace
105 * - prefix that, will be added to all pages null or "" will make
106 * no prefix
107 * @throws MalformedURLException
108 *
109 * TODO: make passwd "unplain" MAYDO: pass more parameters e.g.:
110 * alternative stylesheet layout parameters (that may force the
111 * use of different stylesheet) export folder - we use a
112 * temporary so far boolean for telling if we want to keep the
113 * mediawiki xml file ...
114 */
115 public void export(String portalUrl, String serviceUrl, String taxonName, String classificationName, String wikiUrl,
116 String wikiLoginUid, String passwd, String wikiPageNamespace,
117 boolean import2Mediawiki, boolean deleteOutputFiles,
118 boolean importImages) throws MalformedURLException {
119
120 // get taxon node uuid from taxon name and pass it to the configurator:
121 // TODO get classification name from export() - add a parameter
122 // and use it to choose the right taxon
123
124 // setup configurator
125 setupConfigurator(serviceUrl);
126 configurator.addSelectedTaxonNodeElements(factory.getTaxonNodesByName(taxonName, classificationName));
127
128
129 export(portalUrl, serviceUrl, wikiUrl, wikiLoginUid, passwd,
130 wikiPageNamespace, import2Mediawiki, deleteOutputFiles,
131 importImages, true);
132 }
133 /**
134 * if you already have a cdm xml export in some file you put it in here
135 * the mediawiki xml is created and imported to an mediawiki
136 * does step 2 and 3 out of all 3 export steps
137 *
138 * @param filename
139 * @param serviceUrl
140 * @param taxonName
141 * @param classificationName
142 * @param wikiUrl
143 * @param wikiLoginUid
144 * @param passwd
145 * @param wikiPageNamespace
146 * @param import2Mediawiki
147 * @param deleteOutputFiles
148 * @param importImages
149 * @throws MalformedURLException
150 */
151 public void exportFromXmlFile(String portalUrl, String filename, String serviceUrl,
152 String wikiUrl, String wikiLoginUid,
153 String passwd, String wikiPageNamespace, boolean import2Mediawiki,
154 boolean deleteOutputFiles, boolean importImages)
155 throws MalformedURLException {
156
157 //put the document to a field:
158 externalDocument = getDocument(filename);
159
160 // setupConfigurator(serviceUrl);
161
162 // and run export with usePublisher=false:
163 export(portalUrl, serviceUrl, wikiUrl, wikiLoginUid, passwd,
164 wikiPageNamespace, import2Mediawiki, deleteOutputFiles,
165 importImages, false);
166
167 }
168
169 /*
170 * TODO: make passwd "unplain" MAYDO: pass more parameters e.g.: alternative
171 * stylesheet layout parameters (that may force the use of different
172 * stylesheet) export folder - we use a temporary so far boolean for telling
173 * if we want to keep the mediawiki xml file ...
174 */
175 private void export(String portalUrl, String serviceUrl, String wikiUrl,
176 String wikiLoginUid, String passwd, String wikiPageNamespace,
177 boolean import2Mediawiki, boolean deleteOutputFiles,
178 boolean importImages, boolean usePublisher)
179 throws MalformedURLException {
180
181 // create MediawikiOutputModule with or without mediawiki pages
182 // namespace:
183 if (wikiPageNamespace == null
184 || wikiPageNamespace.replaceAll(" ", "").equals("")) {
185 wikiOutputModule = new MediawikiOutputModule(portalUrl);
186 } else {
187 wikiOutputModule = new MediawikiOutputModule(wikiPageNamespace, portalUrl);
188 }
189
190 // set username to wikiOutModule for having it in the history of the
191 // page
192 // then it will be the same as the username that is used for the actual
193 // mediawiki import.
194 ((MediawikiOutputModule) wikiOutputModule).setUsername(wikiLoginUid);
195
196 // if we actually export from the cdm and not from a file we run the
197 // Publisher
198 // with the wikiOutputModule
199 // else we run the wikiOutputModule with an input document (cdm exportes
200 // xml) from file
201 if (usePublisher) {
202 List<IPublishOutputModule> modules = new ArrayList<IPublishOutputModule>();
203 modules.add(wikiOutputModule);
204 configurator.setOutputModules(modules);
205
206 // do export from cdm to mediawiki xml file
207 Publisher.publish(configurator);
208
209 } else {
210 logger.info("read data from local file.");
211 createTemporaryExportFolder();
212 wikiOutputModule.output(externalDocument,
213 temporaryExportFolder,
214 DefaultProgressMonitor.NewInstance());
215 }
216
217 // we get the whole filename that the wikiOutputModule created
218 mediawikiFileWithPath = ((MediawikiOutputModule) wikiOutputModule)
219 .getFilePath();
220
221 logger.info("mediawiki xml file created and saved to"+mediawikiFileWithPath);
222 // if we want to upload images or save the cdm exported document,
223 // we put it to a field
224 if ((usePublisher && !deleteOutputFiles) || importImages) {
225 // the cdm output where we want to fetch the urls of the
226 // images:
227 cdmOutputDocument = ((MediawikiOutputModule) wikiOutputModule)
228 .getInputDocument();
229 }
230
231 // if we just created the cdm exported xml and want to
232 // keep all the output, we save the cdm exported document in a file
233 if (usePublisher && !deleteOutputFiles) {
234 saveCdmXmlExportedDocument(temporaryExportFolder, cdmOutputDocument);
235 }
236
237 // import into mediawiki
238 if (import2Mediawiki) {
239 uploadToMediawiki(wikiUrl, wikiLoginUid, passwd);
240 }
241
242 if (importImages){
243 downloadImages();
244 }
245 else{
246 logger.info("did not get images!");
247 }
248
249 if (import2Mediawiki && importImages && !(localImages.isEmpty())) {
250 uploadImagesToMediawiki(wikiUrl, wikiLoginUid, passwd);
251 }
252
253 if (deleteOutputFiles) {
254 deleteOutputFiles();
255 logger.info("deleted temporary file(s)");
256 }
257 }
258
259 /**
260 *
261 */
262 private void createTemporaryExportFolder() {
263 temporaryExportFolder = CdmUtils.getCdmHomeSubDir(MEDIAWIKI_CDM_SUB_DIR);
264 if (temporaryExportFolder != null) {
265 logger.info("using " + temporaryExportFolder.getAbsolutePath()
266 + " as temporary directory.");
267 } else {
268 logger.error("could not create directory"
269 + temporaryExportFolder.getAbsolutePath());
270 return;
271 }
272 }
273
274 /**
275 * @param serviceUrl
276 * @throws MalformedURLException
277 */
278 private void setupConfigurator(String serviceUrl)
279 throws MalformedURLException {
280
281 createTemporaryExportFolder();
282
283 configurator.setWebserviceUrl(serviceUrl);
284 factory = configurator.getFactory();
285
286 // get feature tree from taxon name/taxon node and pass it to the
287 // configurator:
288 // TODO, get a feature tree name or uuid as method parameters
289 String featureTree = getDefaultFeatureTree();
290 configurator.setFeatureTree(UUID.fromString(featureTree));
291
292 // pass cdm exportfolder to configurator:
293 configurator.setExportFolder(temporaryExportFolder);
294 }
295
296 /**
297 * @return
298 */
299 private String getDefaultFeatureTree() {
300 List<Element> featureTrees = factory.getFeatureTrees();
301 for (Element featureTreeElement : featureTrees) {
302 featureTreeElement.getChild("uuid");
303 }
304 String featureTree = featureTrees.get(0).getChild("uuid").getValue();
305 return featureTree;
306 }
307
308 /**
309 * @param exportFolder
310 * @param cdmOutputDocument
311 */
312 private void saveCdmXmlExportedDocument(File exportFolder,
313 Document cdmOutputDocument) {
314 //XMLOutputter xmlOutput = new XMLOutputter();
315
316 cdm_output_file = exportFolder
317 + File.separator
318 + wikiOutputModule
319 .generateFilenameWithDate(CDM_EXPORT_FILE_NAME);
320
321 // display nice nice
322 Format format = Format.getPrettyFormat();
323
324 //JDOMParseException Invalid byte 2 of 3-byte UTF-8 sequence which occurs for e.g.
325 //with German umlauts and French accents on characters
326 format.setEncoding("ISO-8859-1");//"UTF-8");
327 XMLOutputter xmlOutput = new XMLOutputter(format);
328 xmlOutput.setFormat(format);
329
330 try {
331 xmlOutput.output(cdmOutputDocument, new FileWriter(cdm_output_file));
332
333 } catch (IOException e) {
334 // TODO Auto-generated catch block
335 e.printStackTrace();
336 }
337 logger.info("saved CDM output file to: " + cdm_output_file + ".");
338 }
339
340 private void deleteOutputFiles() {
341 logger.info("delete local files: ");
342 File file = new File(mediawikiFileWithPath);
343 file.delete();
344 logger.info("deleted "+mediawikiFileWithPath+".");
345 for (String localImage : localImages) {
346 file= new File(localImage);
347 file.delete();
348 logger.info("deleted image "+localImage+".");
349 }
350 //TODO delete tmp folders
351 }
352
353 /**
354 * uploads a given mediawiki xml file to a mediawiki - does only third (last) step
355 * of the whole export process
356 *
357 * @param inputFilePath
358 * @param wikiUrl
359 * @param wikiUser
360 * @param passwd
361 * @param deleteOutputFile
362 */
363 public void uploadToMediawiki(String inputFilePath, String wikiUrl, String wikiLoginUid, String passwd) {
364 mediawikiFileWithPath = inputFilePath;
365 logger.info("reading file "+mediawikiFileWithPath+".");
366 uploadToMediawiki(wikiUrl, wikiLoginUid, passwd);
367 }
368
369 /*
370 * @author l.morris
371 */
372 private void uploadToMediawiki(String wikiUrl, String wikiLoginUid, String passwd) {
373
374 // login to mediawiki
375
376 WikiBot myBot = getBotAndLogin(wikiUrl, wikiLoginUid, passwd);
377
378
379 try {
380
381 // parse wiki xml file and import pages one by one
382 // to mediawiki
383 // MAYDO import whole file, with functionality from mediawiki API
384
385 // get published output file
386 org.jdom.Document document = getDocument(mediawikiFileWithPath);
387 // get page nodes
388 Element rootElement = document.getRootElement();
389 // export pages
390 List pages = rootElement.getChildren("page");
391 Iterator itr = pages.iterator();
392 int length = pages.size();
393 int i = 1;
394 while (itr.hasNext()) {
395 Element page = (Element) itr.next();
396 String title = page.getChild("title").getText();
397 String text = page.getChild("revision").getChild("text")
398 .getText();
399 myBot.edit(title, text, PAGE_SUMMARY);
400 logger.info("exported page " + i + "/" + length + " " + title
401 + " to " + wikiUrl + ".");
402 i++;
403 }
404 myBot.logout();
405 logger.info("all pages uploaded and mediawiki logout.");
406 } catch (IOException e) {
407 e.printStackTrace();
408 } catch (Exception e) {
409 e.printStackTrace();
410 e.getMessage();
411 return;
412 }
413
414 }
415 /**
416 * @param wikiUrl
417 * @param wikiLoginUid
418 * @param passwd
419 * @return
420 */
421 private WikiBot getBotAndLogin(String wikiUrl, String wikiLoginUid,
422 String passwd) {
423 WikiBot myBot = new WikiBot(wikiUrl, wikiLoginUid, passwd);
424
425 // login to mediawiki
426 try {
427 myBot.login();
428 } catch (Exception e) {
429 logger.info("Cannot log into Mediwiki: "+wikiUrl);
430 e.printStackTrace();
431 }
432
433 logger.info("logged in to mediawiki as " + wikiLoginUid + ".");
434 return myBot;
435 }
436
437 private void downloadImages() {
438 org.jdom.Document document = wikiOutputModule.getInputDocument();
439 localImages = new ArrayList<String>();
440
441 try {
442 List<Element> media_uris = XPath.selectNodes(document, "//Taxon/media/e/representations/e/parts/e/uri");
443
444 if(media_uris.isEmpty()){
445 logger.info("there are no images in the data.");
446 return;
447 }
448
449 for (Element urlEl : media_uris) {
450 String url=urlEl.getValue();
451 URL imageUrl = new URL(url);
452 String[] arr = url.split("/");
453 String filename = arr[arr.length - 1];
454 //String filePath = temporaryImageExportFolder.getAbsolutePath()
455 // + File.separator + filename;
456 String filePath = temporaryExportFolder.getAbsolutePath()
457 +FILESEPARATOR + IMAGES_FOLDER +FILESEPARATOR+ filename;
458 logger.info("downloading image " + url+" to "+filePath);
459
460 FileUtils.copyURLToFile(imageUrl, new File(filePath));
461 localImages.add(filePath);
462 }
463
464 } catch (JDOMException e) {
465 // TODO Auto-generated catch block
466 e.printStackTrace();
467 } catch (MalformedURLException e) {
468 // TODO Auto-generated catch block
469 e.printStackTrace();
470 } catch (IOException e) {
471 // TODO Auto-generated catch block
472 e.printStackTrace();
473 }
474
475 }
476
477 private void uploadImagesToMediawiki(String wikiUrl, String wikiLoginUid, String passwd) {
478 WikiBot myBot = getBotAndLogin(wikiUrl, wikiLoginUid, passwd);
479 // get published output file
480
481
482 for(String localUri : localImages){
483
484 try {
485 uploadImage(myBot, localUri);
486 } catch (MalformedURLException e) {
487 // TODO Auto-generated catch block
488 e.printStackTrace();
489 } catch (IOException e) {
490 // TODO Auto-generated catch block
491 e.printStackTrace();
492 }
493 }
494
495 // logout
496 myBot.logout();
497 logger.info("all images uploaded to mediawiki "+wikiUrl+" and logged out.");
498 }
499
500 /**
501 * @param filePath
502 * @throws MalformedURLException
503 * @throws IOException
504 * TODO give a unique id to each image name
505 * but this has to be done also in the wikioutput then
506 */
507 private void uploadImage(WikiBot myBot, String filePath) throws MalformedURLException,
508 IOException {
509 // URL imageUrl = new URL(url);
510 // String[] arr = url.split("/");
511 // String filename = arr[arr.length - 1];
512 //// System.out.println(filename);
513 // //String filePath = temporaryImageExportFolder.getAbsolutePath()
514 // // + File.separator + filename;
515 // String filePath = temporaryExportFolder.getAbsolutePath()
516 // +FILESEPARATOR + IMAGES_FOLDER +FILESEPARATOR+ filename;
517 //// System.out.println(filePath);
518 // File imageFile = new File(filePath);
519 // logger.info("downloading image " + url);
520
521 // FileUtils.copyURLToFile(imageUrl, new File(filePath));
522
523
524 File imageFile = new File(filePath);
525 String[] arr = filePath.split("/");
526 String filename = arr[arr.length - 1];
527 try {
528 //Upload image to Mediawiki
529 //TODO: Change text to give a description of the image
530 myBot.uploadAFile(imageFile, filename, "some text", "no comment");
531 } catch (LoginException e) {
532 // TODO Auto-generated catch block
533 e.printStackTrace();
534 }
535 logger.info("uploaded image " + imageFile.getName()+" to mediawiki.");
536 }
537
538 private Document getDocument(String filePath) {
539 SAXBuilder saxBuilder = new SAXBuilder();
540
541 File file = new File(filePath);
542 Document document = null;
543 FileInputStream fileis;
544
545 // converted file to document object
546 try {
547 //document = saxBuilder.build(file);
548 fileis = new FileInputStream(file);
549 BufferedInputStream in = new BufferedInputStream(fileis);
550 document = saxBuilder.build(in);
551
552 } catch (JDOMException e) {
553 // TODO Auto-generated catch block
554 logger.error(e.getCause().getMessage());
555 e.printStackTrace();
556 } catch (IOException e) {
557 // TODO Auto-generated catch block
558 e.printStackTrace();
559 }
560 return document;
561 }
562
563 }