1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.common;
|
10
|
|
11
|
import java.util.regex.Matcher;
|
12
|
import java.util.regex.Pattern;
|
13
|
|
14
|
import org.apache.commons.lang3.StringUtils;
|
15
|
|
16
|
|
17
|
/**
|
18
|
* A class for handling DOIs (http://www.doi.org).
|
19
|
* It offers parsing and formatting functionality as well as validation.
|
20
|
* A {@link DOI} object can only be created by syntactic valid input.
|
21
|
* It internally stores 2 strings, the first one being the registrant number
|
22
|
* (including sub numbers), the second being the suffix.
|
23
|
*
|
24
|
*
|
25
|
* @author a.mueller
|
26
|
* @since 2013-09-04
|
27
|
*/
|
28
|
public final class DOI implements java.io.Serializable{
|
29
|
|
30
|
/**
|
31
|
* Explicit serialVersionUID for interoperability.
|
32
|
*/
|
33
|
private static final long serialVersionUID = -3871039785359980553L;
|
34
|
|
35
|
public static final int MAX_LENGTH = 1000;
|
36
|
|
37
|
public static final String DOI_ORG = "doi.org/";
|
38
|
|
39
|
public static final String OLD_DOI_ORG = "dx." + DOI_ORG;
|
40
|
|
41
|
/**
|
42
|
* The default public DOI proxy server
|
43
|
*/
|
44
|
public static final String HTTP_DOI_ORG = "http://" + DOI_ORG;
|
45
|
|
46
|
/**
|
47
|
* The former default public DOI proxy server, still supported but no longer preferred.
|
48
|
* @see #HTTP_DOI_ORG
|
49
|
*/
|
50
|
public static final String HTTP_OLD_DOI_ORG = "http://" + OLD_DOI_ORG;
|
51
|
|
52
|
private volatile transient int hashCode = -1; // Zero ==> undefined
|
53
|
|
54
|
//http://www.doi.org/doi_handbook/2_Numbering.html#2.2.1
|
55
|
// prefix + suffix, no defined length, case-insensitive, any printable characters
|
56
|
|
57
|
|
58
|
//********************************* VARIABLES *************************************/
|
59
|
|
60
|
/**
|
61
|
* The directory indicator for DOIs as registered at
|
62
|
*/
|
63
|
public static final String DIRECTORY_INDICATOR = "10";
|
64
|
private String prefix_registrantCode;
|
65
|
|
66
|
private String suffix;
|
67
|
|
68
|
// ***************************** FACTORY METHODS ***************************************/
|
69
|
|
70
|
public static DOI fromString(String doi) throws IllegalArgumentException{
|
71
|
return new DOI(doi);
|
72
|
}
|
73
|
|
74
|
public static DOI fromRegistrantCodeAndSuffix(String registrantCode, String suffix) throws IllegalArgumentException{
|
75
|
return new DOI(registrantCode, suffix);
|
76
|
}
|
77
|
|
78
|
|
79
|
// ******************************* CONSTRUCTOR ************************************/
|
80
|
private DOI(){} //empty constructor required for JAXB
|
81
|
|
82
|
|
83
|
/**
|
84
|
* Creates a doi by its registrantCode and its suffix
|
85
|
* @param registrantCode the registrant code, the is the part following the directoryIndicator "10."
|
86
|
* and preceding the first forward slash (followed by the suffix)
|
87
|
* @param suffix the suffix is the part of the DOI following the first forward slash. It is provided
|
88
|
* by the registrant
|
89
|
*/
|
90
|
private DOI(String registrantCode, String suffix) {
|
91
|
//preliminary until prefix_registrantCode and suffix validation is implemented
|
92
|
this("10." + registrantCode + "/" + suffix);
|
93
|
|
94
|
//use only after validation of both parts
|
95
|
// this.prefix_registrantCode = registrantCode;
|
96
|
// this.suffix = suffix;
|
97
|
}
|
98
|
|
99
|
private DOI(String doiString) {
|
100
|
super();
|
101
|
parseDoiString(doiString);
|
102
|
}
|
103
|
|
104
|
//************************************ GETTER ***********************************/
|
105
|
|
106
|
public String getPrefix() {
|
107
|
return makePrefix();
|
108
|
}
|
109
|
|
110
|
public String getPrefix_registrantCode() {
|
111
|
return prefix_registrantCode;
|
112
|
}
|
113
|
|
114
|
public String getSuffix() {
|
115
|
return suffix;
|
116
|
}
|
117
|
|
118
|
private static Pattern doiPattern = Pattern.compile("^doi:\\s*", Pattern.CASE_INSENSITIVE);
|
119
|
|
120
|
// ********************************************* PARSER *******************************/
|
121
|
|
122
|
private void parseDoiString(String doi){
|
123
|
boolean isUrn = false;
|
124
|
if (StringUtils.isBlank(doi)){
|
125
|
throw new IllegalArgumentException("Doi string must not be null or blank");
|
126
|
}
|
127
|
doi = doi.trim();
|
128
|
if (doi.startsWith("https") ){
|
129
|
doi = doi.replaceFirst("https", "http").trim();
|
130
|
}
|
131
|
Matcher matcher = doiPattern.matcher(doi);
|
132
|
if (matcher.find()){
|
133
|
doi = matcher.replaceFirst("").trim();
|
134
|
}
|
135
|
|
136
|
//replace URI prefix
|
137
|
if (doi.startsWith(HTTP_DOI_ORG)){
|
138
|
doi = doi.replaceFirst(HTTP_DOI_ORG, "");
|
139
|
}else if (doi.startsWith(HTTP_OLD_DOI_ORG)){
|
140
|
doi = doi.replaceFirst(HTTP_OLD_DOI_ORG, "");
|
141
|
}else if (doi.startsWith(DOI_ORG)){
|
142
|
doi = doi.replaceFirst(DOI_ORG, "");
|
143
|
}else if (doi.startsWith(OLD_DOI_ORG)){
|
144
|
doi = doi.replaceFirst(OLD_DOI_ORG, "");
|
145
|
}
|
146
|
|
147
|
//handle URN prefix
|
148
|
if (doi.startsWith("urn:doi:")){
|
149
|
doi = doi.replaceFirst("urn:doi:","");
|
150
|
isUrn = true;
|
151
|
}
|
152
|
|
153
|
//now we should have the pure doi
|
154
|
if (doi.length() > MAX_LENGTH){
|
155
|
//for persistence reason we currently restrict the length of DOIs to 1000
|
156
|
throw new IllegalArgumentException("DOIs may have a maximum length of 1000 in the CDM.");
|
157
|
}
|
158
|
|
159
|
if (! doi.startsWith("10.")){
|
160
|
throw new IllegalArgumentException("DOI not parsable. DOI must start with 10. or an URI or URN prefix. But was " + doi);
|
161
|
}
|
162
|
doi = doi.substring(3);
|
163
|
String sep = isUrn? ":" : "/";
|
164
|
|
165
|
// registrant
|
166
|
String registrant = doi.split(sep)[0];
|
167
|
if (!registrant.matches("[0-9]{2,}(?:[.][0-9]+)*")){ //per definition the number of digits may also be 1, however the lowest known number is 3 so we may be on the safe side here
|
168
|
String message = "Invalid prefix '10.%s'";
|
169
|
throw new IllegalArgumentException(String.format(message, registrant));
|
170
|
}
|
171
|
//suffix
|
172
|
String suffix = doi.replaceFirst(registrant + sep,"");
|
173
|
if (suffix.equals("")){
|
174
|
String message = "Suffix must not be empty";
|
175
|
throw new IllegalArgumentException(message);
|
176
|
}
|
177
|
if (! suffix.matches("\\p{Print}+")){
|
178
|
String message = "Suffix should only include printable characters";
|
179
|
throw new IllegalArgumentException(message + ": " + suffix);
|
180
|
}
|
181
|
if (isUrn){
|
182
|
//TODO do some other replacements according to http://www.doi.org/doi_handbook/2_Numbering.html#2.6.3
|
183
|
//e.g. slash becomes : in URN
|
184
|
//TODO do we need this also for other URIs? According to http://www.doi.org/doi_handbook/2_Numbering.html#2.6 it is only required for URNs
|
185
|
suffix = UrlUtf8Coder.unescape(suffix);
|
186
|
}
|
187
|
//success
|
188
|
this.prefix_registrantCode = registrant;
|
189
|
this.suffix = suffix;
|
190
|
|
191
|
}
|
192
|
|
193
|
|
194
|
private String makePrefix(){
|
195
|
return DIRECTORY_INDICATOR + "." + this.prefix_registrantCode;
|
196
|
}
|
197
|
|
198
|
private String makeDoi(){
|
199
|
return makePrefix() + "/" + this.suffix;
|
200
|
}
|
201
|
|
202
|
public String asURI(){
|
203
|
return HTTP_DOI_ORG + makePrefix() + "/" + uriEncodedSuffix();
|
204
|
}
|
205
|
|
206
|
private String uriEncodedSuffix() {
|
207
|
String result = UrlUtf8Coder.encode(this.suffix);
|
208
|
return result;
|
209
|
}
|
210
|
|
211
|
//************************************************* toString/equals /hashCode *********************/
|
212
|
|
213
|
|
214
|
|
215
|
@Override
|
216
|
public int hashCode() {
|
217
|
if (hashCode == -1) {
|
218
|
hashCode = 31 * prefix_registrantCode.toUpperCase().hashCode() + suffix.toUpperCase().hashCode();
|
219
|
}
|
220
|
return hashCode;
|
221
|
}
|
222
|
|
223
|
|
224
|
@Override
|
225
|
public boolean equals(Object obj) {
|
226
|
if (obj instanceof DOI){
|
227
|
DOI doi = (DOI)obj;
|
228
|
if (this.prefix_registrantCode.toUpperCase().equals(doi.prefix_registrantCode.toUpperCase()) &&
|
229
|
this.suffix.toUpperCase().equals(doi.suffix.toUpperCase())){
|
230
|
return true;
|
231
|
}
|
232
|
}
|
233
|
return false;
|
234
|
}
|
235
|
|
236
|
|
237
|
@Override
|
238
|
public String toString(){
|
239
|
return makeDoi();
|
240
|
}
|
241
|
}
|