Project

General

Profile

Download (7.38 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.common;
10

    
11
import java.util.regex.Matcher;
12
import java.util.regex.Pattern;
13

    
14
import org.apache.commons.lang3.StringUtils;
15

    
16

    
17
/**
18
 * A class for handling DOIs (http://www.doi.org).
19
 * It offers parsing and formatting functionality as well as validation.
20
 * A {@link DOI} object can only be created by syntactic valid input.
21
 * It internally stores 2 strings, the first one being the registrant number
22
 * (including sub numbers), the second being the suffix.
23
 *
24
 *
25
 * @author a.mueller
26
 * @since 2013-09-04
27
 */
28
public final class DOI implements java.io.Serializable{
29

    
30
	/**
31
     * Explicit serialVersionUID for interoperability.
32
     */
33
	private static final long serialVersionUID = -3871039785359980553L;
34

    
35
	public static final int MAX_LENGTH = 1000;
36

    
37
    public static final String DOI_ORG = "doi.org/";
38

    
39
    public static final String OLD_DOI_ORG = "dx." + DOI_ORG;
40

    
41
	/**
42
	 * The default public DOI proxy server
43
	 */
44
	public static final String HTTP_DOI_ORG = "http://" + DOI_ORG;
45

    
46
	/**
47
	 * The former default public DOI proxy server, still supported but no longer preferred.
48
	 * @see #HTTP_DOI_ORG
49
	 */
50
	public static final String HTTP_OLD_DOI_ORG = "http://" + OLD_DOI_ORG;
51

    
52
    private volatile transient int hashCode = -1;	// Zero ==> undefined
53

    
54
	//http://www.doi.org/doi_handbook/2_Numbering.html#2.2.1
55
//	prefix + suffix, no defined length, case-insensitive, any printable characters
56

    
57

    
58
//********************************* VARIABLES *************************************/
59

    
60
	/**
61
	 * The directory indicator for DOIs as registered at
62
	 */
63
	public static final String DIRECTORY_INDICATOR = "10";
64
	private String prefix_registrantCode;
65

    
66
	private String suffix;
67

    
68
// ***************************** FACTORY METHODS ***************************************/
69

    
70
	public static DOI fromString(String doi) throws IllegalArgumentException{
71
		return new DOI(doi);
72
	}
73

    
74
	public static DOI fromRegistrantCodeAndSuffix(String registrantCode, String suffix) throws IllegalArgumentException{
75
		return new DOI(registrantCode, suffix);
76
	}
77

    
78

    
79
// ******************************* CONSTRUCTOR ************************************/
80
	private DOI(){} //empty constructor required for JAXB
81

    
82

    
83
    /**
84
     * Creates a doi by its registrantCode and its suffix
85
     * @param registrantCode the registrant code, the is the part following the directoryIndicator "10."
86
     * 	and preceding the first forward slash (followed by the suffix)
87
     * @param suffix the suffix is the part of the DOI following the first forward slash. It is provided
88
     * by the registrant
89
     */
90
    private DOI(String registrantCode, String suffix) {
91
    	//preliminary until prefix_registrantCode and suffix validation is implemented
92
		this("10." + registrantCode + "/" + suffix);
93

    
94
		//use only after validation of both parts
95
//		this.prefix_registrantCode = registrantCode;
96
//		this.suffix = suffix;
97
	}
98

    
99
    private DOI(String doiString) {
100
		super();
101
		parseDoiString(doiString);
102
	}
103

    
104
//************************************ GETTER ***********************************/
105

    
106
	public String getPrefix() {
107
		return makePrefix();
108
	}
109

    
110
	public String getPrefix_registrantCode() {
111
		return prefix_registrantCode;
112
	}
113

    
114
	public String getSuffix() {
115
		return suffix;
116
	}
117

    
118
	private static Pattern doiPattern = Pattern.compile("^doi:\\s*", Pattern.CASE_INSENSITIVE);
119

    
120
// ********************************************* PARSER *******************************/
121

    
122
	private void parseDoiString(String doi){
123
		boolean isUrn = false;
124
		if (StringUtils.isBlank(doi)){
125
			throw new IllegalArgumentException("Doi string must not be null or blank");
126
		}
127
		doi = doi.trim();
128
		if (doi.startsWith("https") ){
129
			doi = doi.replaceFirst("https", "http").trim();
130
		}
131
		Matcher matcher = doiPattern.matcher(doi);
132
		if (matcher.find()){
133
			doi = matcher.replaceFirst("").trim();
134
		}
135

    
136
		//replace URI prefix
137
		if (doi.startsWith(HTTP_DOI_ORG)){
138
			doi = doi.replaceFirst(HTTP_DOI_ORG, "");
139
		}else if (doi.startsWith(HTTP_OLD_DOI_ORG)){
140
			doi = doi.replaceFirst(HTTP_OLD_DOI_ORG, "");
141
		}else if (doi.startsWith(DOI_ORG)){
142
            doi = doi.replaceFirst(DOI_ORG, "");
143
        }else if (doi.startsWith(OLD_DOI_ORG)){
144
            doi = doi.replaceFirst(OLD_DOI_ORG, "");
145
        }
146

    
147
		//handle URN prefix
148
		if (doi.startsWith("urn:doi:")){
149
			doi = doi.replaceFirst("urn:doi:","");
150
			isUrn = true;
151
		}
152

    
153
		//now we should have the pure doi
154
		if (doi.length() > MAX_LENGTH){
155
			//for persistence reason we currently restrict the length of DOIs to 1000
156
			throw new IllegalArgumentException("DOIs may have a maximum length of 1000 in the CDM.");
157
		}
158

    
159
		if (! doi.startsWith("10.")){
160
			throw new IllegalArgumentException("DOI not parsable. DOI must start with 10. or an URI or URN prefix. But was " + doi);
161
		}
162
		doi = doi.substring(3);
163
		String sep = isUrn? ":" : "/";
164

    
165
//		registrant
166
		String registrant = doi.split(sep)[0];
167
		if (!registrant.matches("[0-9]{2,}(?:[.][0-9]+)*")){   //per definition the number of digits may also be 1, however the lowest known number is 3 so we may be on the safe side here
168
			String message = "Invalid prefix '10.%s'";
169
			throw new IllegalArgumentException(String.format(message, registrant));
170
		}
171
		//suffix
172
		String suffix = doi.replaceFirst(registrant + sep,"");
173
		if (suffix.equals("")){
174
            String message = "Suffix must not be empty";
175
            throw new IllegalArgumentException(message);
176
        }
177
		if (! suffix.matches("\\p{Print}+")){
178
			String message = "Suffix should only include printable characters";
179
			throw new IllegalArgumentException(message + ": " + suffix);
180
		}
181
		if (isUrn){
182
			//TODO do some other replacements according to http://www.doi.org/doi_handbook/2_Numbering.html#2.6.3
183
			//e.g. slash becomes : in URN
184
			//TODO do we need this also for other URIs? According to http://www.doi.org/doi_handbook/2_Numbering.html#2.6 it is only required for URNs
185
			suffix = UrlUtf8Coder.unescape(suffix);
186
		}
187
		//success
188
		this.prefix_registrantCode = registrant;
189
		this.suffix = suffix;
190

    
191
	}
192

    
193

    
194
	private String makePrefix(){
195
		return DIRECTORY_INDICATOR + "." + this.prefix_registrantCode;
196
	}
197

    
198
	private String makeDoi(){
199
		return makePrefix() + "/" + this.suffix;
200
	}
201

    
202
	public String asURI(){
203
		return HTTP_DOI_ORG + makePrefix() + "/" + uriEncodedSuffix();
204
	}
205

    
206
	private String uriEncodedSuffix() {
207
		String result = UrlUtf8Coder.encode(this.suffix);
208
		return result;
209
	}
210

    
211
//************************************************* toString/equals /hashCode *********************/
212

    
213

    
214

    
215
	@Override
216
	public int hashCode() {
217
		if (hashCode == -1) {
218
            hashCode = 31 * prefix_registrantCode.toUpperCase().hashCode() + suffix.toUpperCase().hashCode();
219
        }
220
        return hashCode;
221
	}
222

    
223

    
224
	@Override
225
	public boolean equals(Object obj) {
226
		if (obj instanceof DOI){
227
			DOI doi = (DOI)obj;
228
			if (this.prefix_registrantCode.toUpperCase().equals(doi.prefix_registrantCode.toUpperCase()) &&
229
					this.suffix.toUpperCase().equals(doi.suffix.toUpperCase())){
230
				return true;
231
			}
232
		}
233
		return false;
234
	}
235

    
236

    
237
	@Override
238
	public String toString(){
239
		return makeDoi();
240
	}
241
}
(5-5/23)