cdmlib-ext/src/main/java/eu/etaxonomy/cdm/ext/openurl/MobotOpenUrlServiceWrapper.java

   1 // $Id$
   2 /**
   3  * Copyright (C) 2009 EDIT
   4  * European Distributed Institute of Taxonomy
   5  * http://www.e-taxonomy.eu
   6  *
   7  * The contents of this file are subject to the Mozilla Public License Version 1.1
   8  * See LICENSE.TXT at the top of this package for the full license terms.
   9  */
  10 package eu.etaxonomy.cdm.ext.openurl;
  11
  12 import java.io.IOException;
  13 import java.io.InputStream;
  14 import java.net.URI;
  15 import java.net.URISyntaxException;
  16 import java.util.ArrayList;
  17 import java.util.HashMap;
  18 import java.util.List;
  19 import java.util.Map;
  20
  21 import org.apache.http.NameValuePair;
  22 import org.apache.http.message.BasicNameValuePair;
  23
  24 import eu.etaxonomy.cdm.ext.common.SchemaAdapterBase;
  25 import eu.etaxonomy.cdm.ext.common.ServiceWrapperBase;
  26 import eu.etaxonomy.cdm.model.reference.Reference;
  27
  28 /**
  29  * Generic ServiceWrapper for OpenUrl 1.0 services, initially implemented to be
  30  * used with the BHL OpenUrl resolver
  31  * (http://www.biodiversitylibrary.org/openurl) but might also work with other
  32  * resolvers which meet the Z39.88-2004 (=OpenURL 1.0) specification
  33  * <p>
  34  * For references see:
  35  * <ul>
  36  * <li>BHL OpenUrl resolver reference:
  37  * http://www.biodiversitylibrary.org/openurlhelp.aspx</li>
  38  * <li>ANSI/NISO Z39.88-2004 (=OpenURL 1.0) specification:
  39  * http://www.niso.org/kst/reports/standards?step=2&gid=&project_key=
  40  * d5320409c5160be4697dc046613f71b9a773cd9e</li>
  41  * </ul>
  42  *
  43  * @author a.kohlbecker
  44  * @date 24.08.2010
  45  *
  46  */
  47 public class MobotOpenUrlServiceWrapper extends ServiceWrapperBase<OpenUrlReference> {
  48
  49     private String urlVersion = "Z39.88-2004";
  50
  51     public MobotOpenUrlServiceWrapper(){
  52         addSchemaAdapter(new MobotOpenUrlResponseSchemaAdapter());
  53     }
  54
  55     /**
  56      * BHL uses the response format as specified in the
  57      * http://code.google.com/p/
  58      * bhl-bits/source/browse/trunk/portal/OpenUrlUtilities
  59      * /OpenUrlResponse.cs?r=17 there seems to be no xml schema available
  60      * though.
  61      * @param query the MobotOpenUrlQuery object
  62      * @return
  63      */
  64     public List<OpenUrlReference> doResolve(MobotOpenUrlQuery query) {
  65
  66         List<NameValuePair> pairs = new ArrayList<NameValuePair>();
  67
  68         // find the appropriate schemadapter using the schemaShortName
  69         if(query.schemaShortName == null){
  70             query.schemaShortName = "MOBOT.OpenUrl.Utilities.OpenUrlResponse";
  71         }
  72         SchemaAdapterBase<OpenUrlReference> schemaAdapter = schemaAdapterMap.get(query.schemaShortName);
  73         if (schemaAdapter == null) {
  74             logger.error("No SchemaAdapter found for " + query.schemaShortName);
  75         }
  76
  77         addNameValuePairTo(pairs, "format", "xml");
  78         addNameValuePairTo(pairs, "url_ver", urlVersion);
  79         /* info:ofi/fmt:kev:mtx:book or info:ofi/fmt:kev:mtx:journal */
  80         addNameValuePairTo(pairs, "rft_val_fmt", "info:ofi/fmt:kev:mtx:" + query.refType);
  81         /* Book title */
  82         addNameValuePairTo(pairs, "rft.btitle", query.bookTitle);
  83         /* Journal title */
  84         addNameValuePairTo(pairs, "rft.jtitle", query.journalTitle);
  85         /* Author name ("last, first" or "corporation") */
  86         addNameValuePairTo(pairs, "rft.au", query.authorName);
  87         /* Author last name */
  88         addNameValuePairTo(pairs, "rft.aulast", query.authorLastName);
  89         /* Author first name */
  90         addNameValuePairTo(pairs, "rft.aufirst", query.authorFirstName);
  91         /* Author name (corporation) */
  92         addNameValuePairTo(pairs, "rft.aucorp", query.authorNameCorporation);
  93         /* Publication details */
  94         addNameValuePairTo(pairs, "rft.publisher", query.publicationDetails);
  95         /* Publisher name */
  96         addNameValuePairTo(pairs, "rft.pub", query.publisherName);
  97         /* Publication place */
  98         addNameValuePairTo(pairs, "rft.place", query.publicationPlace);
  99         /* Publication date (YYYY or YYYY-MM or YYYY-MM-DD) */
 100         addNameValuePairTo(pairs, "rft.date", query.publicationDate);
 101         /* ISSN */
 102         addNameValuePairTo(pairs, "rft.issn", query.ISSN);
 103         /* ISBN */
 104         addNameValuePairTo(pairs, "rft.isbn", query.ISBN);
 105         /* CODEN */
 106         addNameValuePairTo(pairs, "rft.coden", query.CODEN);
 107         /* Abbreviation = abbreviated Title */
 108         addNameValuePairTo(pairs, "rft.stitle", query.abbreviation);
 109         /* Volume */
 110         addNameValuePairTo(pairs, "rft.volume", query.volume);
 111         /* Issue */
 112         addNameValuePairTo(pairs, "rft.issue", query.issue);
 113         /* Start page */
 114         if(query.startPage != null){
 115             Integer page = parsePageNumber(query.startPage);
 116             addNameValuePairTo(pairs, "rft.spage", page.toString());
 117         }
 118         /* BHL title ID (where XXXX is the ID value)*/
 119         addNameValuePairTo(pairs, "rft_id" , query.bhlTitleURI);
 120         /* BHL page ID (where XXXX is the ID value)*/
 121         addNameValuePairTo(pairs, "rft_id", query.bhlPageURI);
 122
 123         /* OCLC number (where XXXX is the ID value)*/
 124         if(query.oclcNumber != null){
 125             pairs.add(new BasicNameValuePair("rft_id", "info:oclcnum/" +query.oclcNumber));
 126         }
 127         /* Lib. of Congress ID (where XXXX is the ID value)*/
 128         if(query.libofCongressID != null){
 129             pairs.add(new BasicNameValuePair("rft_id", "info:lccn/" +query.libofCongressID));
 130         }
 131
 132         Map<String, String> requestHeaders = new HashMap<String, String>();
 133         requestHeaders.put("Accept-Charset", "UTF-8");
 134
 135         try {
 136             URI requestUri = createUri(null, pairs);
 137
 138             InputStream stream = executeHttpGet(requestUri, requestHeaders);
 139 //                      String search = "utf-16";
 140 //                      String replace = "UTF-8";
 141 ////                    stream = StreamUtils.streamReplace(stream, search, replace);
 142             // fix the "org.xml.sax.SAXParseException: An invalid XML character (Unicode: 0x1) was found" problem
 143 //                      stream = StreamUtils.streamReplaceAll(stream, "[\\x00-\\x10]", " ");
 144
 145             List<OpenUrlReference> referenceList = schemaAdapter.getCmdEntities(stream);
 146             // TODO : we need to set ReferenceType here unless we know that the field Genre returns the reference type
 147             for(OpenUrlReference ref : referenceList){
 148                 ref.setReferenceType(query.refType);
 149             }
 150             return referenceList;
 151
 152         } catch (IOException e) {
 153             // thrown by doHttpGet
 154             logger.error(e);
 155         } catch (URISyntaxException e) {
 156             // thrown by createUri
 157             logger.error(e);
 158         }
 159
 160         return null;
 161
 162     }
 163
 164     private Integer parsePageNumber(String startPage) {
 165         String pageNumbers = startPage.replaceAll("(?i)page|pages|p|p\\.|pp\\.|pp", "");
 166         String[] pageNumbersTokens = pageNumbers.split("[,-]", 1);
 167         Integer page = null;
 168         try {
 169             if(pageNumbersTokens[0] != null){
 170                 pageNumbersTokens[0] = pageNumbersTokens[0].trim();
 171             } else {
 172                 throw new NumberFormatException();
 173             }
 174             page = Integer.valueOf(pageNumbersTokens[0]);
 175         } catch (NumberFormatException e) {
 176             logger.warn("First page number token of " + startPage + " is not a Number", e);
 177             throw e;
 178         }
 179         return page;
 180     }
 181
 182
 183     /**
 184      * @param reference
 185      *            the OpenUrlReference instance as a starting point for paging.
 186      * @param forward
 187      *            integer indicating the number of pages to page forward. An
 188      *            negative integer will page backwards
 189      * @return
 190      * @throws IllegalArgumentException
 191      *             if the requested page number is not existent or if the field
 192      *             or if OpenUrlReference.pages is not parsable
 193      */
 194     public List<OpenUrlReference> doPage(OpenUrlReference reference, int forward) throws IllegalArgumentException{
 195
 196         Integer pageNumber = null;
 197         try{
 198             if(reference.getPages() != null){
 199                 pageNumber = parsePageNumber(reference.getPages());
 200             }
 201         }catch(NumberFormatException e){
 202             String errorMessage = "Reference has no page number or the field 'pages' is not parsable";
 203             logger.warn(errorMessage);
 204             throw new IllegalArgumentException(errorMessage);
 205         }
 206
 207         MobotOpenUrlQuery query = new MobotOpenUrlQuery();
 208         query.bhlTitleURI = reference.getTitleUri();
 209         pageNumber += forward;
 210         query.startPage = pageNumber.toString();
 211         query.refType = reference.getReferenceType();
 212         return doResolve(query);
 213     }
 214
 215     public enum ReferenceType{
 216         book, journal;
 217
 218         public static ReferenceType getReferenceType(Reference reference){
 219             if(eu.etaxonomy.cdm.model.reference.ReferenceType.Book.equals(reference.getType())){
 220                 return book;
 221             }
 222             else if(eu.etaxonomy.cdm.model.reference.ReferenceType.Journal.equals(reference.getType())){
 223                 return journal;
 224             }
 225             else {
 226                 return null;
 227             }
 228         }
 229     }
 230
 231 }