1 package eu
.etaxonomy
.cdm
.print
.out
.mediawiki
;
3 import java
.io
.BufferedInputStream
;
5 import java
.io
.FileInputStream
;
6 import java
.io
.FileWriter
;
7 import java
.io
.IOException
;
8 import java
.net
.MalformedURLException
;
10 import java
.util
.ArrayList
;
11 import java
.util
.Iterator
;
12 import java
.util
.List
;
13 import java
.util
.UUID
;
15 import javax
.security
.auth
.login
.LoginException
;
17 import org
.apache
.commons
.io
.FileUtils
;
18 import org
.apache
.log4j
.Logger
;
19 import org
.jdom
.Document
;
20 import org
.jdom
.Element
;
21 import org
.jdom
.JDOMException
;
22 import org
.jdom
.input
.SAXBuilder
;
23 import org
.jdom
.output
.Format
;
24 import org
.jdom
.output
.XMLOutputter
;
25 import org
.jdom
.xpath
.XPath
;
27 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
28 import eu
.etaxonomy
.cdm
.common
.monitor
.DefaultProgressMonitor
;
29 import eu
.etaxonomy
.cdm
.print
.IXMLEntityFactory
;
30 import eu
.etaxonomy
.cdm
.print
.PublishConfigurator
;
31 import eu
.etaxonomy
.cdm
.print
.Publisher
;
32 import eu
.etaxonomy
.cdm
.print
.out
.IPublishOutputModule
;
35 * fill in all parameters and you get a complete export from a cdm database to a
38 * TODO would we move this class somewhere else and/or rename it?
40 * @author s.buers, l.morris
43 public class Cdm2MediawikiExporter
{
45 private static final String IMAGES_FOLDER
= "images";
47 private static final String FILESEPARATOR
= File
.separator
;
50 private static final String MEDIAWIKI_CDM_SUB_DIR
= "mediawiki_tmp";
52 private static final String IMAGE_DIR
= MEDIAWIKI_CDM_SUB_DIR
53 + File
.separator
+ "images";
55 private static final String CDM_EXPORT_FILE_NAME
= "cdm_output";
57 private static final String PAGE_SUMMARY
= "automatic import from CDM";
61 private static final Logger logger
= Logger
62 .getLogger(Cdm2MediawikiExporter
.class);
64 private PublishConfigurator configurator
= PublishConfigurator
.NewRemoteInstance();
66 private IXMLEntityFactory factory
;
68 // where the mediawiki xml code is stored
69 private String mediawikiFileWithPath
= null;
71 // where the cdm exported xml can be stored
72 private String cdm_output_file
= null;
74 private Document cdmOutputDocument
= null;
75 private Document externalDocument
= null;
77 private MediawikiOutputModule wikiOutputModule
;
79 private File temporaryExportFolder
= null;
81 private List
<String
> localImages
;
84 public void export(String serviceUrl
, UUID taxonNodeUuid
, UUID treeNodeUuid
, String wikiUrl
,
85 String wikiLoginUid
, String passwd
, String wikiPageNamespace
,
86 boolean import2Mediawiki
, boolean deleteOutputFiles
,
87 boolean importImages
) throws MalformedURLException
{
93 * does the whole export process: runs cdm export to mediawiki xml-file and
94 * wiki import of this file
99 * - url of the destination wiki
100 * @param wikiLoginUid
101 * - uid of wiki admin
103 * - password of the above wiki admin
104 * @param wikiPageNamespace
105 * - prefix that, will be added to all pages null or "" will make
107 * @throws MalformedURLException
109 * TODO: make passwd "unplain" MAYDO: pass more parameters e.g.:
110 * alternative stylesheet layout parameters (that may force the
111 * use of different stylesheet) export folder - we use a
112 * temporary so far boolean for telling if we want to keep the
113 * mediawiki xml file ...
115 public void export(String portalUrl
, String serviceUrl
, String taxonName
, String classificationName
, String wikiUrl
,
116 String wikiLoginUid
, String passwd
, String wikiPageNamespace
,
117 boolean import2Mediawiki
, boolean deleteOutputFiles
,
118 boolean importImages
) throws MalformedURLException
{
120 // get taxon node uuid from taxon name and pass it to the configurator:
121 // TODO get classification name from export() - add a parameter
122 // and use it to choose the right taxon
124 // setup configurator
125 setupConfigurator(serviceUrl
);
126 configurator
.addSelectedTaxonNodeElements(factory
.getTaxonNodesByName(taxonName
, classificationName
));
129 export(portalUrl
, serviceUrl
, wikiUrl
, wikiLoginUid
, passwd
,
130 wikiPageNamespace
, import2Mediawiki
, deleteOutputFiles
,
134 * if you already have a cdm xml export in some file you put it in here
135 * the mediawiki xml is created and imported to an mediawiki
136 * does step 2 and 3 out of all 3 export steps
141 * @param classificationName
143 * @param wikiLoginUid
145 * @param wikiPageNamespace
146 * @param import2Mediawiki
147 * @param deleteOutputFiles
148 * @param importImages
149 * @throws MalformedURLException
151 public void exportFromXmlFile(String portalUrl
, String filename
, String serviceUrl
,
152 String wikiUrl
, String wikiLoginUid
,
153 String passwd
, String wikiPageNamespace
, boolean import2Mediawiki
,
154 boolean deleteOutputFiles
, boolean importImages
)
155 throws MalformedURLException
{
157 //put the document to a field:
158 externalDocument
= getDocument(filename
);
160 // setupConfigurator(serviceUrl);
162 // and run export with usePublisher=false:
163 export(portalUrl
, serviceUrl
, wikiUrl
, wikiLoginUid
, passwd
,
164 wikiPageNamespace
, import2Mediawiki
, deleteOutputFiles
,
165 importImages
, false);
170 * TODO: make passwd "unplain" MAYDO: pass more parameters e.g.: alternative
171 * stylesheet layout parameters (that may force the use of different
172 * stylesheet) export folder - we use a temporary so far boolean for telling
173 * if we want to keep the mediawiki xml file ...
175 private void export(String portalUrl
, String serviceUrl
, String wikiUrl
,
176 String wikiLoginUid
, String passwd
, String wikiPageNamespace
,
177 boolean import2Mediawiki
, boolean deleteOutputFiles
,
178 boolean importImages
, boolean usePublisher
)
179 throws MalformedURLException
{
181 // create MediawikiOutputModule with or without mediawiki pages
183 if (wikiPageNamespace
== null
184 || wikiPageNamespace
.replaceAll(" ", "").equals("")) {
185 wikiOutputModule
= new MediawikiOutputModule(portalUrl
);
187 wikiOutputModule
= new MediawikiOutputModule(wikiPageNamespace
, portalUrl
);
190 // set username to wikiOutModule for having it in the history of the
192 // then it will be the same as the username that is used for the actual
194 ((MediawikiOutputModule
) wikiOutputModule
).setUsername(wikiLoginUid
);
196 // if we actually export from the cdm and not from a file we run the
198 // with the wikiOutputModule
199 // else we run the wikiOutputModule with an input document (cdm exportes
202 List
<IPublishOutputModule
> modules
= new ArrayList
<IPublishOutputModule
>();
203 modules
.add(wikiOutputModule
);
204 configurator
.setOutputModules(modules
);
206 // do export from cdm to mediawiki xml file
207 Publisher
.publish(configurator
);
210 logger
.info("read data from local file.");
211 createTemporaryExportFolder();
212 wikiOutputModule
.output(externalDocument
,
213 temporaryExportFolder
,
214 DefaultProgressMonitor
.NewInstance());
217 // we get the whole filename that the wikiOutputModule created
218 mediawikiFileWithPath
= ((MediawikiOutputModule
) wikiOutputModule
)
221 logger
.info("mediawiki xml file created and saved to"+mediawikiFileWithPath
);
222 // if we want to upload images or save the cdm exported document,
223 // we put it to a field
224 if ((usePublisher
&& !deleteOutputFiles
) || importImages
) {
225 // the cdm output where we want to fetch the urls of the
227 cdmOutputDocument
= ((MediawikiOutputModule
) wikiOutputModule
)
231 // if we just created the cdm exported xml and want to
232 // keep all the output, we save the cdm exported document in a file
233 if (usePublisher
&& !deleteOutputFiles
) {
234 saveCdmXmlExportedDocument(temporaryExportFolder
, cdmOutputDocument
);
237 // import into mediawiki
238 if (import2Mediawiki
) {
239 uploadToMediawiki(wikiUrl
, wikiLoginUid
, passwd
);
246 logger
.info("did not get images!");
249 if (import2Mediawiki
&& importImages
&& !(localImages
.isEmpty())) {
250 uploadImagesToMediawiki(wikiUrl
, wikiLoginUid
, passwd
);
253 if (deleteOutputFiles
) {
255 logger
.info("deleted temporary file(s)");
262 private void createTemporaryExportFolder() {
263 temporaryExportFolder
= CdmUtils
.getCdmHomeSubDir(MEDIAWIKI_CDM_SUB_DIR
);
264 if (temporaryExportFolder
!= null) {
265 logger
.info("using " + temporaryExportFolder
.getAbsolutePath()
266 + " as temporary directory.");
268 logger
.error("could not create directory"
269 + temporaryExportFolder
.getAbsolutePath());
276 * @throws MalformedURLException
278 private void setupConfigurator(String serviceUrl
)
279 throws MalformedURLException
{
281 createTemporaryExportFolder();
283 configurator
.setWebserviceUrl(serviceUrl
);
284 factory
= configurator
.getFactory();
286 // get feature tree from taxon name/taxon node and pass it to the
288 // TODO, get a feature tree name or uuid as method parameters
289 String featureTree
= getDefaultFeatureTree();
290 configurator
.setFeatureTree(UUID
.fromString(featureTree
));
292 // pass cdm exportfolder to configurator:
293 configurator
.setExportFolder(temporaryExportFolder
);
299 private String
getDefaultFeatureTree() {
300 List
<Element
> featureTrees
= factory
.getFeatureTrees();
301 for (Element featureTreeElement
: featureTrees
) {
302 featureTreeElement
.getChild("uuid");
304 String featureTree
= featureTrees
.get(0).getChild("uuid").getValue();
309 * @param exportFolder
310 * @param cdmOutputDocument
312 private void saveCdmXmlExportedDocument(File exportFolder
,
313 Document cdmOutputDocument
) {
314 //XMLOutputter xmlOutput = new XMLOutputter();
316 cdm_output_file
= exportFolder
319 .generateFilenameWithDate(CDM_EXPORT_FILE_NAME
);
322 Format format
= Format
.getPrettyFormat();
324 //JDOMParseException Invalid byte 2 of 3-byte UTF-8 sequence which occurs for e.g.
325 //with German umlauts and French accents on characters
326 format
.setEncoding("ISO-8859-1");//"UTF-8");
327 XMLOutputter xmlOutput
= new XMLOutputter(format
);
328 xmlOutput
.setFormat(format
);
331 xmlOutput
.output(cdmOutputDocument
, new FileWriter(cdm_output_file
));
333 } catch (IOException e
) {
334 // TODO Auto-generated catch block
337 logger
.info("saved CDM output file to: " + cdm_output_file
+ ".");
340 private void deleteOutputFiles() {
341 logger
.info("delete local files: ");
342 File file
= new File(mediawikiFileWithPath
);
344 logger
.info("deleted "+mediawikiFileWithPath
+".");
345 for (String localImage
: localImages
) {
346 file
= new File(localImage
);
348 logger
.info("deleted image "+localImage
+".");
350 //TODO delete tmp folders
354 * uploads a given mediawiki xml file to a mediawiki - does only third (last) step
355 * of the whole export process
357 * @param inputFilePath
361 * @param deleteOutputFile
363 public void uploadToMediawiki(String inputFilePath
, String wikiUrl
, String wikiLoginUid
, String passwd
) {
364 mediawikiFileWithPath
= inputFilePath
;
365 logger
.info("reading file "+mediawikiFileWithPath
+".");
366 uploadToMediawiki(wikiUrl
, wikiLoginUid
, passwd
);
372 private void uploadToMediawiki(String wikiUrl
, String wikiLoginUid
, String passwd
) {
374 // login to mediawiki
376 WikiBot myBot
= getBotAndLogin(wikiUrl
, wikiLoginUid
, passwd
);
381 // parse wiki xml file and import pages one by one
383 // MAYDO import whole file, with functionality from mediawiki API
385 // get published output file
386 org
.jdom
.Document document
= getDocument(mediawikiFileWithPath
);
388 Element rootElement
= document
.getRootElement();
390 List pages
= rootElement
.getChildren("page");
391 Iterator itr
= pages
.iterator();
392 int length
= pages
.size();
394 while (itr
.hasNext()) {
395 Element page
= (Element
) itr
.next();
396 String title
= page
.getChild("title").getText();
397 String text
= page
.getChild("revision").getChild("text")
399 myBot
.edit(title
, text
, PAGE_SUMMARY
);
400 logger
.info("exported page " + i
+ "/" + length
+ " " + title
401 + " to " + wikiUrl
+ ".");
405 logger
.info("all pages uploaded and mediawiki logout.");
406 } catch (IOException e
) {
408 } catch (Exception e
) {
417 * @param wikiLoginUid
421 private WikiBot
getBotAndLogin(String wikiUrl
, String wikiLoginUid
,
423 WikiBot myBot
= new WikiBot(wikiUrl
, wikiLoginUid
, passwd
);
425 // login to mediawiki
428 } catch (Exception e
) {
429 logger
.info("Cannot log into Mediwiki: "+wikiUrl
);
433 logger
.info("logged in to mediawiki as " + wikiLoginUid
+ ".");
437 private void downloadImages() {
438 org
.jdom
.Document document
= wikiOutputModule
.getInputDocument();
439 localImages
= new ArrayList
<String
>();
442 List
<Element
> media_uris
= XPath
.selectNodes(document
, "//Taxon/media/e/representations/e/parts/e/uri");
444 if(media_uris
.isEmpty()){
445 logger
.info("there are no images in the data.");
449 for (Element urlEl
: media_uris
) {
450 String url
=urlEl
.getValue();
451 URL imageUrl
= new URL(url
);
452 String
[] arr
= url
.split("/");
453 String filename
= arr
[arr
.length
- 1];
454 //String filePath = temporaryImageExportFolder.getAbsolutePath()
455 // + File.separator + filename;
456 String filePath
= temporaryExportFolder
.getAbsolutePath()
457 +FILESEPARATOR
+ IMAGES_FOLDER
+FILESEPARATOR
+ filename
;
458 logger
.info("downloading image " + url
+" to "+filePath
);
460 FileUtils
.copyURLToFile(imageUrl
, new File(filePath
));
461 localImages
.add(filePath
);
464 } catch (JDOMException e
) {
465 // TODO Auto-generated catch block
467 } catch (MalformedURLException e
) {
468 // TODO Auto-generated catch block
470 } catch (IOException e
) {
471 // TODO Auto-generated catch block
477 private void uploadImagesToMediawiki(String wikiUrl
, String wikiLoginUid
, String passwd
) {
478 WikiBot myBot
= getBotAndLogin(wikiUrl
, wikiLoginUid
, passwd
);
479 // get published output file
482 for(String localUri
: localImages
){
485 uploadImage(myBot
, localUri
);
486 } catch (MalformedURLException e
) {
487 // TODO Auto-generated catch block
489 } catch (IOException e
) {
490 // TODO Auto-generated catch block
497 logger
.info("all images uploaded to mediawiki "+wikiUrl
+" and logged out.");
502 * @throws MalformedURLException
503 * @throws IOException
504 * TODO give a unique id to each image name
505 * but this has to be done also in the wikioutput then
507 private void uploadImage(WikiBot myBot
, String filePath
) throws MalformedURLException
,
509 // URL imageUrl = new URL(url);
510 // String[] arr = url.split("/");
511 // String filename = arr[arr.length - 1];
512 //// System.out.println(filename);
513 // //String filePath = temporaryImageExportFolder.getAbsolutePath()
514 // // + File.separator + filename;
515 // String filePath = temporaryExportFolder.getAbsolutePath()
516 // +FILESEPARATOR + IMAGES_FOLDER +FILESEPARATOR+ filename;
517 //// System.out.println(filePath);
518 // File imageFile = new File(filePath);
519 // logger.info("downloading image " + url);
521 // FileUtils.copyURLToFile(imageUrl, new File(filePath));
524 File imageFile
= new File(filePath
);
525 String
[] arr
= filePath
.split("/");
526 String filename
= arr
[arr
.length
- 1];
528 //Upload image to Mediawiki
529 //TODO: Change text to give a description of the image
530 myBot
.uploadAFile(imageFile
, filename
, "some text", "no comment");
531 } catch (LoginException e
) {
532 // TODO Auto-generated catch block
535 logger
.info("uploaded image " + imageFile
.getName()+" to mediawiki.");
538 private Document
getDocument(String filePath
) {
539 SAXBuilder saxBuilder
= new SAXBuilder();
541 File file
= new File(filePath
);
542 Document document
= null;
543 FileInputStream fileis
;
545 // converted file to document object
547 //document = saxBuilder.build(file);
548 fileis
= new FileInputStream(file
);
549 BufferedInputStream in
= new BufferedInputStream(fileis
);
550 document
= saxBuilder
.build(in
);
552 } catch (JDOMException e
) {
553 // TODO Auto-generated catch block
554 logger
.error(e
.getCause().getMessage());
556 } catch (IOException e
) {
557 // TODO Auto-generated catch block