1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.common;
|
10
|
|
11
|
import java.util.regex.Matcher;
|
12
|
import java.util.regex.Pattern;
|
13
|
|
14
|
import org.apache.commons.lang.StringUtils;
|
15
|
|
16
|
|
17
|
/**
|
18
|
* A class for handling DOIs (http://www.doi.org).
|
19
|
* It offers parsing and formatting functionality as well as validation.
|
20
|
* A {@link DOI} object can only be created by syntactic valid input.
|
21
|
* It internally stores a doi 2 strings, the first one being the registrant number
|
22
|
* (including sub numbers), the second being the suffix.
|
23
|
*
|
24
|
*
|
25
|
* @author a.mueller
|
26
|
* @since 2013-09-04
|
27
|
*/
|
28
|
public final class DOI implements java.io.Serializable{
|
29
|
|
30
|
/**
|
31
|
* Explicit serialVersionUID for interoperability.
|
32
|
*/
|
33
|
private static final long serialVersionUID = -3871039785359980553L;
|
34
|
|
35
|
public static final int MAX_LENGTH = 1000;
|
36
|
|
37
|
/**
|
38
|
* The default public DOI proxy server
|
39
|
*/
|
40
|
public static final String HTTP_DOI_ORG = "http://doi.org/";
|
41
|
|
42
|
/**
|
43
|
* The former default public DOI proxy server, still supported but no longer preferred.
|
44
|
* @see #HTTP_DOI_ORG
|
45
|
*/
|
46
|
public static final String HTTP_OLD_DOI_ORG = "http://dx.doi.org/";
|
47
|
|
48
|
private volatile transient int hashCode = -1; // Zero ==> undefined
|
49
|
|
50
|
//http://www.doi.org/doi_handbook/2_Numbering.html#2.2.1
|
51
|
// prefix + suffix, no defined length, case-insensitive, any printable characters
|
52
|
|
53
|
|
54
|
//********************************* VARIABLES *************************************/
|
55
|
|
56
|
/**
|
57
|
* The directory indicator for DOIs as registered at
|
58
|
*/
|
59
|
public static final String DIRECTORY_INDICATOR = "10";
|
60
|
private String prefix_registrantCode;
|
61
|
|
62
|
private String suffix;
|
63
|
|
64
|
// ***************************** FACTORY METHODS ***************************************/
|
65
|
|
66
|
public static DOI fromString(String doi) throws IllegalArgumentException{
|
67
|
return new DOI(doi);
|
68
|
}
|
69
|
|
70
|
public static DOI fromRegistrantCodeAndSuffix(String registrantCode, String suffix) throws IllegalArgumentException{
|
71
|
return new DOI(registrantCode, suffix);
|
72
|
}
|
73
|
|
74
|
|
75
|
// ******************************* CONSTRUCTOR ************************************/
|
76
|
private DOI(){}; //empty constructor required for JAXB
|
77
|
|
78
|
|
79
|
/**
|
80
|
* Creates a doi by its registrantCode and its suffix
|
81
|
* @param registrantCode the registrant code, the is the part following the directoryIndicator "10."
|
82
|
* and preceding the first forward slash (followed by the suffix)
|
83
|
* @param suffix the suffix is the part of the DOI following the first forward slash. It is provided
|
84
|
* by the registrant
|
85
|
*/
|
86
|
private DOI(String registrantCode, String suffix) {
|
87
|
//preliminary until prefix_registrantCode and suffix validation is implemented
|
88
|
this("10." + registrantCode + "/" + suffix);
|
89
|
|
90
|
//use only after validation of both parts
|
91
|
// this.prefix_registrantCode = registrantCode;
|
92
|
// this.suffix = suffix;
|
93
|
}
|
94
|
|
95
|
private DOI(String doiString) {
|
96
|
super();
|
97
|
parseDoiString(doiString);
|
98
|
}
|
99
|
|
100
|
//************************************ GETTER ***********************************/
|
101
|
|
102
|
public String getPrefix() {
|
103
|
return makePrefix();
|
104
|
}
|
105
|
|
106
|
public String getPrefix_registrantCode() {
|
107
|
return prefix_registrantCode;
|
108
|
}
|
109
|
|
110
|
public String getSuffix() {
|
111
|
return suffix;
|
112
|
}
|
113
|
|
114
|
private static Pattern doiPattern = Pattern.compile("^doi:\\s*", Pattern.CASE_INSENSITIVE);
|
115
|
|
116
|
// ********************************************* PARSER *******************************/
|
117
|
|
118
|
private void parseDoiString(String doi){
|
119
|
boolean isUrn = false;
|
120
|
if (StringUtils.isBlank(doi)){
|
121
|
throw new IllegalArgumentException("Doi string must not be null or blank");
|
122
|
}
|
123
|
doi = doi.trim();
|
124
|
if (doi.startsWith("https") ){
|
125
|
doi = doi.replaceFirst("https", "http").trim();
|
126
|
}
|
127
|
Matcher matcher = doiPattern.matcher(doi);
|
128
|
if (matcher.find()){
|
129
|
doi = matcher.replaceFirst("").trim();
|
130
|
}
|
131
|
|
132
|
|
133
|
//replace URI prefix
|
134
|
if (doi.startsWith(HTTP_DOI_ORG)){
|
135
|
doi = doi.replaceFirst(HTTP_DOI_ORG,"");
|
136
|
}else if (doi.startsWith(HTTP_OLD_DOI_ORG)){
|
137
|
doi = doi.replaceFirst(HTTP_OLD_DOI_ORG,"");
|
138
|
}
|
139
|
|
140
|
|
141
|
|
142
|
//handle URN prefix
|
143
|
if (doi.startsWith("urn:doi:")){
|
144
|
doi = doi.replaceFirst("urn:doi:","");
|
145
|
isUrn = true;
|
146
|
}
|
147
|
|
148
|
|
149
|
//now we should have the pure doi
|
150
|
if (doi.length() > MAX_LENGTH){
|
151
|
//for persistence reason we currently restrict the length of DOIs to 1000
|
152
|
throw new IllegalArgumentException("DOIs may have a maximum length of 1000 in the CDM.");
|
153
|
}
|
154
|
|
155
|
if (! doi.startsWith("10.")){
|
156
|
throw new IllegalArgumentException("DOI not parsable. DOI must start with 10. or an URI or URN prefix ");
|
157
|
}
|
158
|
doi = doi.substring(3);
|
159
|
String sep = isUrn? ":" : "/";
|
160
|
|
161
|
// registrant
|
162
|
String registrant = doi.split(sep)[0];
|
163
|
if (!registrant.matches("[0-9]{2,}(?:[.][0-9]+)*")){ //per definition the number of digits may also be 1, however the lowest known number is 3 so we may be on the safe side here
|
164
|
String message = "Invalid prefix '10.%s'";
|
165
|
throw new IllegalArgumentException(String.format(message, registrant));
|
166
|
}
|
167
|
//suffix
|
168
|
String suffix = doi.replaceFirst(registrant + sep,"");
|
169
|
if (! suffix.matches("\\p{Print}+")){
|
170
|
String message = "Suffix should only include printable characters";
|
171
|
throw new IllegalArgumentException(message);
|
172
|
}
|
173
|
if (isUrn){
|
174
|
//TODO do some other replacements according to http://www.doi.org/doi_handbook/2_Numbering.html#2.6.3
|
175
|
//e.g. slash becomes : in URN
|
176
|
//TODO do we need this also for other URIs? According to http://www.doi.org/doi_handbook/2_Numbering.html#2.6 it is only required for URNs
|
177
|
suffix = UrlUtf8Coder.unescape(suffix);
|
178
|
}
|
179
|
//success
|
180
|
this.prefix_registrantCode = registrant;
|
181
|
this.suffix = suffix;
|
182
|
|
183
|
}
|
184
|
|
185
|
|
186
|
private String makePrefix(){
|
187
|
return DIRECTORY_INDICATOR + "." + this.prefix_registrantCode;
|
188
|
}
|
189
|
|
190
|
private String makeDoi(){
|
191
|
return makePrefix() + "/" + this.suffix;
|
192
|
}
|
193
|
|
194
|
public String asURI(){
|
195
|
return HTTP_DOI_ORG + makePrefix() + "/" + uriEncodedSuffix();
|
196
|
}
|
197
|
|
198
|
private String uriEncodedSuffix() {
|
199
|
String result = UrlUtf8Coder.encode(this.suffix);
|
200
|
return result;
|
201
|
}
|
202
|
|
203
|
//************************************************* toString/equals /hashCode *********************/
|
204
|
|
205
|
|
206
|
|
207
|
@Override
|
208
|
public int hashCode() {
|
209
|
if (hashCode == -1) {
|
210
|
hashCode = 31 * prefix_registrantCode.toUpperCase().hashCode() + suffix.toUpperCase().hashCode();
|
211
|
}
|
212
|
return hashCode;
|
213
|
}
|
214
|
|
215
|
|
216
|
@Override
|
217
|
public boolean equals(Object obj) {
|
218
|
if (obj instanceof DOI){
|
219
|
DOI doi = (DOI)obj;
|
220
|
if (this.prefix_registrantCode.toUpperCase().equals(doi.prefix_registrantCode.toUpperCase()) &&
|
221
|
this.suffix.toUpperCase().equals(doi.suffix.toUpperCase())){
|
222
|
return true;
|
223
|
}
|
224
|
}
|
225
|
return false;
|
226
|
}
|
227
|
|
228
|
|
229
|
@Override
|
230
|
public String toString(){
|
231
|
return makeDoi();
|
232
|
}
|
233
|
}
|