- merged changes from campanula branch
[cdmlib.git] / cdmlib-ext / src / main / java / eu / etaxonomy / cdm / ext / openurl / MobotOpenUrlServiceWrapper.java
1 // $Id$
2 /**
3 * Copyright (C) 2009 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.ext.openurl;
11
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.net.URI;
15 import java.net.URISyntaxException;
16 import java.util.ArrayList;
17 import java.util.HashMap;
18 import java.util.List;
19 import java.util.Map;
20
21 import org.apache.http.NameValuePair;
22 import org.apache.http.message.BasicNameValuePair;
23
24 import eu.etaxonomy.cdm.ext.common.SchemaAdapterBase;
25 import eu.etaxonomy.cdm.ext.common.ServiceWrapperBase;
26 import eu.etaxonomy.cdm.model.reference.Reference;
27
28 /**
29 * Generic ServiceWrapper for OpenUrl 1.0 services, initially implemented to be
30 * used with the BHL OpenUrl resolver
31 * (http://www.biodiversitylibrary.org/openurl) but might also work with other
32 * resolvers which meet the Z39.88-2004 (=OpenURL 1.0) specification
33 * <p>
34 * For references see:
35 * <ul>
36 * <li>BHL OpenUrl resolver reference:
37 * http://www.biodiversitylibrary.org/openurlhelp.aspx</li>
38 * <li>ANSI/NISO Z39.88-2004 (=OpenURL 1.0) specification:
39 * http://www.niso.org/kst/reports/standards?step=2&gid=&project_key=
40 * d5320409c5160be4697dc046613f71b9a773cd9e</li>
41 * </ul>
42 *
43 * @author a.kohlbecker
44 * @date 24.08.2010
45 *
46 */
47 public class MobotOpenUrlServiceWrapper extends ServiceWrapperBase<OpenUrlReference> {
48
49 private String urlVersion = "Z39.88-2004";
50
51 public MobotOpenUrlServiceWrapper(){
52 addSchemaAdapter(new MobotOpenUrlResponseSchemaAdapter());
53 }
54
55 /**
56 * BHL uses the response format as specified in the
57 * http://code.google.com/p/
58 * bhl-bits/source/browse/trunk/portal/OpenUrlUtilities
59 * /OpenUrlResponse.cs?r=17 there seems to be no xml schema available
60 * though.
61 * @param query the MobotOpenUrlQuery object
62 * @return
63 */
64 public List<OpenUrlReference> doResolve(MobotOpenUrlQuery query) {
65
66 List<NameValuePair> pairs = new ArrayList<NameValuePair>();
67
68 // find the appropriate schemadapter using the schemaShortName
69 if(query.schemaShortName == null){
70 query.schemaShortName = "MOBOT.OpenUrl.Utilities.OpenUrlResponse";
71 }
72 SchemaAdapterBase<OpenUrlReference> schemaAdapter = schemaAdapterMap.get(query.schemaShortName);
73 if (schemaAdapter == null) {
74 logger.error("No SchemaAdapter found for " + query.schemaShortName);
75 }
76
77 addNameValuePairTo(pairs, "format", "xml");
78 addNameValuePairTo(pairs, "url_ver", urlVersion);
79 /* info:ofi/fmt:kev:mtx:book or info:ofi/fmt:kev:mtx:journal */
80 addNameValuePairTo(pairs, "rft_val_fmt", "info:ofi/fmt:kev:mtx:" + query.refType);
81 /* Book title */
82 addNameValuePairTo(pairs, "rft.btitle", query.bookTitle);
83 /* Journal title */
84 addNameValuePairTo(pairs, "rft.jtitle", query.journalTitle);
85 /* Author name ("last, first" or "corporation") */
86 addNameValuePairTo(pairs, "rft.au", query.authorName);
87 /* Author last name */
88 addNameValuePairTo(pairs, "rft.aulast", query.authorLastName);
89 /* Author first name */
90 addNameValuePairTo(pairs, "rft.aufirst", query.authorFirstName);
91 /* Author name (corporation) */
92 addNameValuePairTo(pairs, "rft.aucorp", query.authorNameCorporation);
93 /* Publication details */
94 addNameValuePairTo(pairs, "rft.publisher", query.publicationDetails);
95 /* Publisher name */
96 addNameValuePairTo(pairs, "rft.pub", query.publisherName);
97 /* Publication place */
98 addNameValuePairTo(pairs, "rft.place", query.publicationPlace);
99 /* Publication date (YYYY or YYYY-MM or YYYY-MM-DD) */
100 addNameValuePairTo(pairs, "rft.date", query.publicationDate);
101 /* ISSN */
102 addNameValuePairTo(pairs, "rft.issn", query.ISSN);
103 /* ISBN */
104 addNameValuePairTo(pairs, "rft.isbn", query.ISBN);
105 /* CODEN */
106 addNameValuePairTo(pairs, "rft.coden", query.CODEN);
107 /* Abbreviation = abbreviated Title */
108 addNameValuePairTo(pairs, "rft.stitle", query.abbreviation);
109 /* Volume */
110 addNameValuePairTo(pairs, "rft.volume", query.volume);
111 /* Issue */
112 addNameValuePairTo(pairs, "rft.issue", query.issue);
113 /* Start page */
114 if(query.startPage != null){
115 Integer page = parsePageNumber(query.startPage);
116 addNameValuePairTo(pairs, "rft.spage", page.toString());
117 }
118 /* BHL title ID (where XXXX is the ID value)*/
119 addNameValuePairTo(pairs, "rft_id" , query.bhlTitleURI);
120 /* BHL page ID (where XXXX is the ID value)*/
121 addNameValuePairTo(pairs, "rft_id", query.bhlPageURI);
122
123 /* OCLC number (where XXXX is the ID value)*/
124 if(query.oclcNumber != null){
125 pairs.add(new BasicNameValuePair("rft_id", "info:oclcnum/" +query.oclcNumber));
126 }
127 /* Lib. of Congress ID (where XXXX is the ID value)*/
128 if(query.libofCongressID != null){
129 pairs.add(new BasicNameValuePair("rft_id", "info:lccn/" +query.libofCongressID));
130 }
131
132 Map<String, String> requestHeaders = new HashMap<String, String>();
133 requestHeaders.put("Accept-Charset", "UTF-8");
134
135 try {
136 URI requestUri = createUri(null, pairs);
137
138 InputStream stream = executeHttpGet(requestUri, requestHeaders);
139 // String search = "utf-16";
140 // String replace = "UTF-8";
141 //// stream = StreamUtils.streamReplace(stream, search, replace);
142 // fix the "org.xml.sax.SAXParseException: An invalid XML character (Unicode: 0x1) was found" problem
143 // stream = StreamUtils.streamReplaceAll(stream, "[\\x00-\\x10]", " ");
144
145 List<OpenUrlReference> referenceList = schemaAdapter.getCmdEntities(stream);
146 // TODO : we need to set ReferenceType here unless we know that the field Genre returns the reference type
147 for(OpenUrlReference ref : referenceList){
148 ref.setReferenceType(query.refType);
149 }
150 return referenceList;
151
152 } catch (IOException e) {
153 // thrown by doHttpGet
154 logger.error(e);
155 } catch (URISyntaxException e) {
156 // thrown by createUri
157 logger.error(e);
158 }
159
160 return null;
161
162 }
163
164 private Integer parsePageNumber(String startPage) {
165 String pageNumbers = startPage.replaceAll("(?i)page|pages|p|p\\.|pp\\.|pp", "");
166 String[] pageNumbersTokens = pageNumbers.split("[,-]", 1);
167 Integer page = null;
168 try {
169 if(pageNumbersTokens[0] != null){
170 pageNumbersTokens[0] = pageNumbersTokens[0].trim();
171 } else {
172 throw new NumberFormatException();
173 }
174 page = Integer.valueOf(pageNumbersTokens[0]);
175 } catch (NumberFormatException e) {
176 logger.warn("First page number token of " + startPage + " is not a Number", e);
177 throw e;
178 }
179 return page;
180 }
181
182
183 /**
184 * @param reference
185 * the OpenUrlReference instance as a starting point for paging.
186 * @param forward
187 * integer indicating the number of pages to page forward. An
188 * negative integer will page backwards
189 * @return
190 * @throws IllegalArgumentException
191 * if the requested page number is not existent or if the field
192 * or if OpenUrlReference.pages is not parsable
193 */
194 public List<OpenUrlReference> doPage(OpenUrlReference reference, int forward) throws IllegalArgumentException{
195
196 Integer pageNumber = null;
197 try{
198 if(reference.getPages() != null){
199 pageNumber = parsePageNumber(reference.getPages());
200 }
201 }catch(NumberFormatException e){
202 String errorMessage = "Reference has no page number or the field 'pages' is not parsable";
203 logger.warn(errorMessage);
204 throw new IllegalArgumentException(errorMessage);
205 }
206
207 MobotOpenUrlQuery query = new MobotOpenUrlQuery();
208 query.bhlTitleURI = reference.getTitleUri();
209 pageNumber += forward;
210 query.startPage = pageNumber.toString();
211 query.refType = reference.getReferenceType();
212 return doResolve(query);
213 }
214
215 public enum ReferenceType{
216 book, journal;
217
218 public static ReferenceType getReferenceType(Reference reference){
219 if(eu.etaxonomy.cdm.model.reference.ReferenceType.Book.equals(reference.getType())){
220 return book;
221 }
222 else if(eu.etaxonomy.cdm.model.reference.ReferenceType.Journal.equals(reference.getType())){
223 return journal;
224 }
225 else {
226 return null;
227 }
228 }
229 }
230
231 }