1
|
/**
|
2
|
* Copyright (C) 2009 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.ext.openurl;
|
10
|
|
11
|
import java.io.IOException;
|
12
|
import java.io.InputStream;
|
13
|
import java.net.URI;
|
14
|
import java.net.URISyntaxException;
|
15
|
import java.util.ArrayList;
|
16
|
import java.util.HashMap;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
|
20
|
import org.apache.http.NameValuePair;
|
21
|
import org.apache.http.message.BasicNameValuePair;
|
22
|
|
23
|
import eu.etaxonomy.cdm.ext.common.SchemaAdapterBase;
|
24
|
import eu.etaxonomy.cdm.ext.common.ServiceWrapperBase;
|
25
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
26
|
|
27
|
/**
|
28
|
* Generic ServiceWrapper for OpenUrl 1.0 services, initially implemented to be
|
29
|
* used with the BHL OpenUrl resolver
|
30
|
* (http://www.biodiversitylibrary.org/openurl) but might also work with other
|
31
|
* resolvers which meet the Z39.88-2004 (=OpenURL 1.0) specification
|
32
|
* <p>
|
33
|
* For references see:
|
34
|
* <ul>
|
35
|
* <li>BHL OpenUrl resolver reference:
|
36
|
* http://www.biodiversitylibrary.org/openurlhelp.aspx</li>
|
37
|
* <li>ANSI/NISO Z39.88-2004 (=OpenURL 1.0) specification:
|
38
|
* http://www.niso.org/kst/reports/standards?step=2&gid=&project_key=
|
39
|
* d5320409c5160be4697dc046613f71b9a773cd9e</li>
|
40
|
* </ul>
|
41
|
*
|
42
|
* @author a.kohlbecker
|
43
|
\* @since 24.08.2010
|
44
|
*
|
45
|
*/
|
46
|
public class MobotOpenUrlServiceWrapper extends ServiceWrapperBase<OpenUrlReference> {
|
47
|
|
48
|
private String urlVersion = "Z39.88-2004";
|
49
|
|
50
|
public MobotOpenUrlServiceWrapper(){
|
51
|
addSchemaAdapter(new MobotOpenUrlResponseSchemaAdapter());
|
52
|
}
|
53
|
|
54
|
/**
|
55
|
* BHL uses the response format as specified in the
|
56
|
* http://code.google.com/p/
|
57
|
* bhl-bits/source/browse/trunk/portal/OpenUrlUtilities
|
58
|
* /OpenUrlResponse.cs?r=17 there seems to be no xml schema available
|
59
|
* though.
|
60
|
* @param query the MobotOpenUrlQuery object
|
61
|
* @return
|
62
|
*/
|
63
|
public List<OpenUrlReference> doResolve(MobotOpenUrlQuery query) {
|
64
|
|
65
|
List<NameValuePair> pairs = new ArrayList<NameValuePair>();
|
66
|
|
67
|
// find the appropriate schemadapter using the schemaShortName
|
68
|
if(query.schemaShortName == null){
|
69
|
query.schemaShortName = "MOBOT.OpenUrl.Utilities.OpenUrlResponse";
|
70
|
}
|
71
|
SchemaAdapterBase<OpenUrlReference> schemaAdapter = schemaAdapterMap.get(query.schemaShortName);
|
72
|
if (schemaAdapter == null) {
|
73
|
logger.error("No SchemaAdapter found for " + query.schemaShortName);
|
74
|
}
|
75
|
|
76
|
addNameValuePairTo(pairs, "format", "xml");
|
77
|
addNameValuePairTo(pairs, "url_ver", urlVersion);
|
78
|
/* info:ofi/fmt:kev:mtx:book or info:ofi/fmt:kev:mtx:journal */
|
79
|
addNameValuePairTo(pairs, "rft_val_fmt", "info:ofi/fmt:kev:mtx:" + query.refType);
|
80
|
/* Book title */
|
81
|
addNameValuePairTo(pairs, "rft.btitle", query.bookTitle);
|
82
|
/* Journal title */
|
83
|
addNameValuePairTo(pairs, "rft.jtitle", query.journalTitle);
|
84
|
/* Author name ("last, first" or "corporation") */
|
85
|
addNameValuePairTo(pairs, "rft.au", query.authorName);
|
86
|
/* Author last name */
|
87
|
addNameValuePairTo(pairs, "rft.aulast", query.authorLastName);
|
88
|
/* Author first name */
|
89
|
addNameValuePairTo(pairs, "rft.aufirst", query.authorFirstName);
|
90
|
/* Author name (corporation) */
|
91
|
addNameValuePairTo(pairs, "rft.aucorp", query.authorNameCorporation);
|
92
|
/* Publication details */
|
93
|
addNameValuePairTo(pairs, "rft.publisher", query.publicationDetails);
|
94
|
/* Publisher name */
|
95
|
addNameValuePairTo(pairs, "rft.pub", query.publisherName);
|
96
|
/* Publication place */
|
97
|
addNameValuePairTo(pairs, "rft.place", query.publicationPlace);
|
98
|
/* Publication date (YYYY or YYYY-MM or YYYY-MM-DD) */
|
99
|
addNameValuePairTo(pairs, "rft.date", query.publicationDate);
|
100
|
/* ISSN */
|
101
|
addNameValuePairTo(pairs, "rft.issn", query.ISSN);
|
102
|
/* ISBN */
|
103
|
addNameValuePairTo(pairs, "rft.isbn", query.ISBN);
|
104
|
/* CODEN */
|
105
|
addNameValuePairTo(pairs, "rft.coden", query.CODEN);
|
106
|
/* Abbreviation = abbreviated Title */
|
107
|
addNameValuePairTo(pairs, "rft.stitle", query.abbreviation);
|
108
|
/* Volume */
|
109
|
addNameValuePairTo(pairs, "rft.volume", query.volume);
|
110
|
/* Issue */
|
111
|
addNameValuePairTo(pairs, "rft.issue", query.issue);
|
112
|
/* Start page */
|
113
|
if(query.startPage != null){
|
114
|
Integer page = parsePageNumber(query.startPage);
|
115
|
addNameValuePairTo(pairs, "rft.spage", page.toString());
|
116
|
}
|
117
|
/* BHL title ID (where XXXX is the ID value)*/
|
118
|
addNameValuePairTo(pairs, "rft_id" , query.bhlTitleURI);
|
119
|
/* BHL page ID (where XXXX is the ID value)*/
|
120
|
addNameValuePairTo(pairs, "rft_id", query.bhlPageURI);
|
121
|
|
122
|
/* OCLC number (where XXXX is the ID value)*/
|
123
|
if(query.oclcNumber != null){
|
124
|
pairs.add(new BasicNameValuePair("rft_id", "info:oclcnum/" +query.oclcNumber));
|
125
|
}
|
126
|
/* Lib. of Congress ID (where XXXX is the ID value)*/
|
127
|
if(query.libofCongressID != null){
|
128
|
pairs.add(new BasicNameValuePair("rft_id", "info:lccn/" +query.libofCongressID));
|
129
|
}
|
130
|
|
131
|
Map<String, String> requestHeaders = new HashMap<String, String>();
|
132
|
requestHeaders.put("Accept-Charset", "UTF-8");
|
133
|
|
134
|
try {
|
135
|
URI requestUri = createUri(null, pairs);
|
136
|
|
137
|
InputStream stream = executeHttpGet(requestUri, requestHeaders);
|
138
|
// String search = "utf-16";
|
139
|
// String replace = "UTF-8";
|
140
|
//// stream = StreamUtils.streamReplace(stream, search, replace);
|
141
|
// fix the "org.xml.sax.SAXParseException: An invalid XML character (Unicode: 0x1) was found" problem
|
142
|
// stream = StreamUtils.streamReplaceAll(stream, "[\\x00-\\x10]", " ");
|
143
|
|
144
|
List<OpenUrlReference> referenceList = schemaAdapter.getCmdEntities(stream);
|
145
|
// TODO : we need to set ReferenceType here unless we know that the field Genre returns the reference type
|
146
|
for(OpenUrlReference ref : referenceList){
|
147
|
ref.setReferenceType(query.refType);
|
148
|
}
|
149
|
return referenceList;
|
150
|
|
151
|
} catch (IOException e) {
|
152
|
// thrown by doHttpGet
|
153
|
logger.error(e);
|
154
|
} catch (URISyntaxException e) {
|
155
|
// thrown by createUri
|
156
|
logger.error(e);
|
157
|
}
|
158
|
|
159
|
return null;
|
160
|
|
161
|
}
|
162
|
|
163
|
private Integer parsePageNumber(String startPage) {
|
164
|
String pageNumbers = startPage.replaceAll("(?i)page|pages|p|p\\.|pp\\.|pp", "");
|
165
|
String[] pageNumbersTokens = pageNumbers.split("[,-]", 1);
|
166
|
Integer page = null;
|
167
|
try {
|
168
|
if(pageNumbersTokens[0] != null){
|
169
|
pageNumbersTokens[0] = pageNumbersTokens[0].trim();
|
170
|
} else {
|
171
|
throw new NumberFormatException();
|
172
|
}
|
173
|
page = Integer.valueOf(pageNumbersTokens[0]);
|
174
|
} catch (NumberFormatException e) {
|
175
|
logger.warn("First page number token of " + startPage + " is not a Number", e);
|
176
|
throw e;
|
177
|
}
|
178
|
return page;
|
179
|
}
|
180
|
|
181
|
|
182
|
/**
|
183
|
* @param reference
|
184
|
* the OpenUrlReference instance as a starting point for paging.
|
185
|
* @param forward
|
186
|
* integer indicating the number of pages to page forward. An
|
187
|
* negative integer will page backwards
|
188
|
* @return
|
189
|
* @throws IllegalArgumentException
|
190
|
* if the requested page number is not existent or if the field
|
191
|
* or if OpenUrlReference.pages is not parsable
|
192
|
*/
|
193
|
public List<OpenUrlReference> doPage(OpenUrlReference reference, int forward) throws IllegalArgumentException{
|
194
|
|
195
|
Integer pageNumber = null;
|
196
|
try{
|
197
|
if(reference.getPages() != null){
|
198
|
pageNumber = parsePageNumber(reference.getPages());
|
199
|
}
|
200
|
}catch(NumberFormatException e){
|
201
|
String errorMessage = "Reference has no page number or the field 'pages' is not parsable";
|
202
|
logger.warn(errorMessage);
|
203
|
throw new IllegalArgumentException(errorMessage);
|
204
|
}
|
205
|
|
206
|
MobotOpenUrlQuery query = new MobotOpenUrlQuery();
|
207
|
query.bhlTitleURI = reference.getTitleUri();
|
208
|
pageNumber += forward;
|
209
|
query.startPage = pageNumber.toString();
|
210
|
query.refType = reference.getReferenceType();
|
211
|
return doResolve(query);
|
212
|
}
|
213
|
|
214
|
public enum ReferenceType{
|
215
|
book, journal;
|
216
|
|
217
|
public static ReferenceType getReferenceType(Reference reference){
|
218
|
if(eu.etaxonomy.cdm.model.reference.ReferenceType.Book.equals(reference.getType())){
|
219
|
return book;
|
220
|
}
|
221
|
else if(eu.etaxonomy.cdm.model.reference.ReferenceType.Journal.equals(reference.getType())){
|
222
|
return journal;
|
223
|
}
|
224
|
else {
|
225
|
return null;
|
226
|
}
|
227
|
}
|
228
|
}
|
229
|
|
230
|
}
|