Project

General

Profile

Download (6.95 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.common;
10

    
11
import java.util.regex.Matcher;
12
import java.util.regex.Pattern;
13

    
14
import org.apache.commons.lang.StringUtils;
15

    
16

    
17
/**
18
 * A class for handling DOIs (http://www.doi.org).
19
 * It offers parsing and formatting functionality as well as validation.
20
 * A {@link DOI} object can only be created by syntactic valid input.
21
 * It internally stores a doi 2 strings, the first one being the registrant number
22
 * (including sub numbers), the second being the suffix.
23
 * 
24
 * 
25
 * @author a.mueller
26
 * @created 2013-09-04
27
 */
28
public final class DOI implements java.io.Serializable{
29
	
30
	/**
31
     * Explicit serialVersionUID for interoperability.
32
     */
33
	private static final long serialVersionUID = -3871039785359980553L;
34

    
35
	public static final int MAX_LENGTH = 1000;
36

    
37
	/**
38
	 * The default public DOI proxy server
39
	 */
40
	public static final String HTTP_DOI_ORG = "http://doi.org/";
41

    
42
	/**
43
	 * The former default public DOI proxy server, still supported but no longer preferred.
44
	 * @see #HTTP_DOI_ORG
45
	 */
46
	public static final String HTTP_OLD_DOI_ORG = "http://dx.doi.org/";
47
	
48
    private volatile transient int hashCode = -1;	// Zero ==> undefined
49

    
50
	//http://www.doi.org/doi_handbook/2_Numbering.html#2.2.1
51
//	prefix + suffix, no defined length, case-insensitive, any printable characters
52

    
53
	
54
//********************************* VARIABLES *************************************/	
55
	
56
	/**
57
	 * The directory indicator for DOIs as registered at 
58
	 */
59
	public static final String DIRECTORY_INDICATOR = "10";
60
	private String prefix_registrantCode;
61

    
62
	private String suffix;
63

    
64
// ***************************** FACTORY METHODS ***************************************/
65
	
66
	public static DOI fromString(String doi) throws IllegalArgumentException{
67
		return new DOI(doi);
68
	}
69
	
70
	public static DOI fromRegistrantCodeAndSuffix(String registrantCode, String suffix) throws IllegalArgumentException{
71
		return new DOI(registrantCode, suffix);
72
	}
73
	
74
	
75
// ******************************* CONSTRUCTOR ************************************/	
76
	private DOI(){}; //empty constructor required for JAXB
77
	
78
	
79
    /**
80
     * Creates a doi by its registrantCode and its suffix
81
     * @param registrantCode the registrant code, the is the part following the directoryIndicator "10." 
82
     * 	and preceding the first forward slash (followed by the suffix)
83
     * @param suffix the suffix is the part of the DOI following the first forward slash. It is provided 
84
     * by the registrant
85
     */
86
    private DOI(String registrantCode, String suffix) {
87
    	//preliminary until prefix_registrantCode and suffix validation is implemented
88
		this("10." + registrantCode + "/" + suffix);
89
		
90
		//use only after validation of both parts
91
//		this.prefix_registrantCode = registrantCode;
92
//		this.suffix = suffix;
93
	}
94

    
95
    private DOI(String doiString) {
96
		super();
97
		parseDoiString(doiString);
98
	}
99

    
100
//************************************ GETTER ***********************************/    
101
	
102
	public String getPrefix() {
103
		return makePrefix();
104
	}
105
    
106
	public String getPrefix_registrantCode() {
107
		return prefix_registrantCode;
108
	}
109

    
110
	public String getSuffix() {
111
		return suffix;
112
	}
113

    
114
	private static Pattern doiPattern = Pattern.compile("^doi:\\s*", Pattern.CASE_INSENSITIVE); 
115
	
116
// ********************************************* PARSER *******************************/
117
    
118
	private void parseDoiString(String doi){
119
		boolean isUrn = false;
120
		if (StringUtils.isBlank(doi)){
121
			throw new IllegalArgumentException("Doi string must not be null or blank");
122
		}
123
		doi = doi.trim();
124
		if (doi.startsWith("https") ){
125
			doi = doi.replaceFirst("https", "http").trim();
126
		}
127
		Matcher matcher = doiPattern.matcher(doi);
128
		if (matcher.find()){
129
			doi = matcher.replaceFirst("").trim();
130
		}
131

    
132
		
133
		//replace URI prefix
134
		if (doi.startsWith(HTTP_DOI_ORG)){
135
			doi = doi.replaceFirst(HTTP_DOI_ORG,"");
136
		}else if (doi.startsWith(HTTP_OLD_DOI_ORG)){
137
			doi = doi.replaceFirst(HTTP_OLD_DOI_ORG,"");
138
		}
139
		
140
		
141

    
142
		//handle URN prefix
143
		if (doi.startsWith("urn:doi:")){
144
			doi = doi.replaceFirst("urn:doi:","");
145
			isUrn = true;
146
		}
147
		
148
		
149
		//now we should have the pure doi
150
		if (doi.length() > MAX_LENGTH){
151
			//for persistence reason we currently restrict the length of DOIs to 1000
152
			throw new IllegalArgumentException("DOIs may have a maximum length of 1000 in the CDM.");
153
		}
154
		
155
		if (! doi.startsWith("10.")){
156
			throw new IllegalArgumentException("DOI not parsable. DOI must start with 10. or an URI or URN prefix ");
157
		}
158
		doi = doi.substring(3);
159
		String sep = isUrn? ":" : "/";
160
		
161
//		registrant
162
		String registrant = doi.split(sep)[0];
163
		if (!registrant.matches("[0-9]{2,}(?:[.][0-9]+)*")){   //per definition the number of digits may also be 1, however the lowest known number is 3 so we may be on the safe side here 
164
			String message = "Invalid prefix '10.%s'";
165
			throw new IllegalArgumentException(String.format(message, registrant));
166
		}
167
		//suffix
168
		String suffix = doi.replaceFirst(registrant + sep,"");
169
		if (! suffix.matches("\\p{Print}+")){
170
			String message = "Suffix should only include printable characters";
171
			throw new IllegalArgumentException(message);
172
		}
173
		if (isUrn){
174
			//TODO do some other replacements according to http://www.doi.org/doi_handbook/2_Numbering.html#2.6.3
175
			//e.g. slash becomes : in URN
176
			//TODO do we need this also for other URIs? According to http://www.doi.org/doi_handbook/2_Numbering.html#2.6 it is only required for URNs
177
			suffix = UrlUtf8Coder.unescape(suffix);
178
		}
179
		//success
180
		this.prefix_registrantCode = registrant;
181
		this.suffix = suffix;
182
			
183
	}
184
	
185
	
186
	private String makePrefix(){
187
		return DIRECTORY_INDICATOR + "." + this.prefix_registrantCode;
188
	}
189
	
190
	private String makeDoi(){
191
		return makePrefix() + "/" + this.suffix;
192
	}
193
	
194
	public String asURI(){
195
		return HTTP_DOI_ORG + makePrefix() + "/" + uriEncodedSuffix();
196
	}
197
	
198
	private String uriEncodedSuffix() {
199
		String result = UrlUtf8Coder.encode(this.suffix);
200
		return result;
201
	}
202

    
203
//************************************************* toString/equals /hashCode *********************/	
204

    
205
	
206
	
207
	@Override
208
	public int hashCode() {
209
		if (hashCode == -1) {
210
            hashCode = 31 * prefix_registrantCode.toUpperCase().hashCode() + suffix.toUpperCase().hashCode();
211
        }
212
        return hashCode;
213
	}
214

    
215

    
216
	@Override
217
	public boolean equals(Object obj) {
218
		if (obj instanceof DOI){
219
			DOI doi = (DOI)obj;
220
			if (this.prefix_registrantCode.toUpperCase().equals(doi.prefix_registrantCode.toUpperCase()) &&
221
					this.suffix.toUpperCase().equals(doi.suffix.toUpperCase())){
222
				return true;
223
			}
224
		}
225
		return false;
226
	}
227

    
228

    
229
	@Override
230
	public String toString(){
231
		return makeDoi();
232
	}
233
}
(4-4/20)