Revision 87e91505
Added by Andreas Müller almost 11 years ago
.gitattributes | ||
---|---|---|
6 | 6 |
cdmlib-commons/pom.xml -text |
7 | 7 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/AccountStore.java -text |
8 | 8 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/CdmUtils.java -text |
9 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/DOI.java -text |
|
9 | 10 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/DocUtils.java -text |
10 | 11 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/DoubleResult.java -text |
11 | 12 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/ExcelUtils.java -text |
... | ... | |
17 | 18 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/TreeNode.java -text |
18 | 19 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UTF8.java -text |
19 | 20 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UriUtils.java -text |
21 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UrlUtf8Coder.java -text |
|
20 | 22 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/XmlHelp.java -text |
21 | 23 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/media/AudioInfo.java -text |
22 | 24 |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/media/ImageInfo.java -text |
... | ... | |
32 | 34 |
cdmlib-commons/src/main/resources/MUST-EXIST.txt -text |
33 | 35 |
cdmlib-commons/src/main/resources/log4j.properties -text |
34 | 36 |
cdmlib-commons/src/test/java/eu/etaxonomy/cdm/common/CdmUtilsTest.java -text |
37 |
cdmlib-commons/src/test/java/eu/etaxonomy/cdm/common/DoiTest.java -text |
|
35 | 38 |
cdmlib-commons/src/test/java/eu/etaxonomy/cdm/common/GeneralParserTest.java -text |
36 | 39 |
cdmlib-commons/src/test/java/eu/etaxonomy/cdm/common/UriUtilsTest.java -text |
37 | 40 |
cdmlib-commons/src/test/java/eu/etaxonomy/cdm/common/UuidGenerator.java -text |
... | ... | |
681 | 684 |
cdmlib-model/README.TXT -text |
682 | 685 |
cdmlib-model/pom.xml -text |
683 | 686 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/aspectj/PropertyChangeAspect.aj -text |
687 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/DOIUserType.java -text |
|
684 | 688 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/EnumUserType.java -text |
685 | 689 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/HibernateProxyHelper.java -text |
686 | 690 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/PartialUserType.java -text |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/DOI.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
package eu.etaxonomy.cdm.common; |
|
10 |
|
|
11 |
import java.util.regex.Matcher; |
|
12 |
import java.util.regex.Pattern; |
|
13 |
|
|
14 |
import org.apache.commons.lang.StringUtils; |
|
15 |
|
|
16 |
|
|
17 |
/** |
|
18 |
* A class for handling DOIs (http://www.doi.org). |
|
19 |
* It offers parsing and formatting functionality as well as validation. |
|
20 |
* A {@link DOI} object can only be created by syntactic valid input. |
|
21 |
* It internally stores a doi 2 strings, the first one being the registrant number |
|
22 |
* (including sub numbers), the second being the suffix. |
|
23 |
* |
|
24 |
* |
|
25 |
* @author a.mueller |
|
26 |
* @created 2013-09-04 |
|
27 |
*/ |
|
28 |
public final class DOI implements java.io.Serializable{ |
|
29 |
|
|
30 |
/** |
|
31 |
* Explicit serialVersionUID for interoperability. |
|
32 |
*/ |
|
33 |
private static final long serialVersionUID = -3871039785359980553L; |
|
34 |
|
|
35 |
|
|
36 |
/** |
|
37 |
* The default public DOI proxy server |
|
38 |
*/ |
|
39 |
public static final String HTTP_DOI_ORG = "http://doi.org/"; |
|
40 |
|
|
41 |
/** |
|
42 |
* The former default public DOI proxy server, still supported but no longer preferred. |
|
43 |
* @see #HTTP_DOI_ORG |
|
44 |
*/ |
|
45 |
public static final String HTTP_OLD_DOI_ORG = "http://dx.doi.org/"; |
|
46 |
|
|
47 |
private volatile transient int hashCode = -1; // Zero ==> undefined |
|
48 |
|
|
49 |
//http://www.doi.org/doi_handbook/2_Numbering.html#2.2.1 |
|
50 |
// prefix + suffix, no defined length, case-insensitive, any printable characters |
|
51 |
|
|
52 |
|
|
53 |
//********************************* VARIABLES *************************************/ |
|
54 |
|
|
55 |
/** |
|
56 |
* The directory indicator for DOIs as registered at |
|
57 |
*/ |
|
58 |
public static final String DIRECTORY_INDICATOR = "10"; |
|
59 |
private String prefix_registrantCode; |
|
60 |
|
|
61 |
private String suffix; |
|
62 |
|
|
63 |
// ***************************** FACTORY METHODS ***************************************/ |
|
64 |
|
|
65 |
public static DOI fromString(String doi) throws IllegalArgumentException{ |
|
66 |
return new DOI(doi); |
|
67 |
} |
|
68 |
|
|
69 |
public static DOI fromRegistrantCodeAndSuffix(String registrantCode, String suffix) throws IllegalArgumentException{ |
|
70 |
return new DOI(registrantCode, suffix); |
|
71 |
} |
|
72 |
|
|
73 |
|
|
74 |
// ******************************* CONSTRUCTOR ************************************/ |
|
75 |
|
|
76 |
/** |
|
77 |
* Creates a doi by its registrantCode and its suffix |
|
78 |
* @param registrantCode the registrant code, the is the part following the directoryIndicator "10." |
|
79 |
* and preceding the first forward slash (followed by the suffix) |
|
80 |
* @param suffix the suffix is the part of the DOI following the first forward slash. It is provided |
|
81 |
* by the registrant |
|
82 |
*/ |
|
83 |
private DOI(String registrantCode, String suffix) { |
|
84 |
//preliminary until prefix_registrantCode and suffix validation is implemented |
|
85 |
this("10." + registrantCode + "/" + suffix); |
|
86 |
|
|
87 |
//use only after validation of both parts |
|
88 |
// this.prefix_registrantCode = registrantCode; |
|
89 |
// this.suffix = suffix; |
|
90 |
} |
|
91 |
|
|
92 |
private DOI(String doiString) { |
|
93 |
super(); |
|
94 |
parseDoiString(doiString); |
|
95 |
} |
|
96 |
|
|
97 |
//************************************ GETTER ***********************************/ |
|
98 |
|
|
99 |
public String getPrefix() { |
|
100 |
return makePrefix(); |
|
101 |
} |
|
102 |
|
|
103 |
public String getPrefix_registrantCode() { |
|
104 |
return prefix_registrantCode; |
|
105 |
} |
|
106 |
|
|
107 |
public String getSuffix() { |
|
108 |
return suffix; |
|
109 |
} |
|
110 |
|
|
111 |
private static Pattern doiPattern = Pattern.compile("^doi:\\s*", Pattern.CASE_INSENSITIVE); |
|
112 |
|
|
113 |
// ********************************************* PARSER *******************************/ |
|
114 |
|
|
115 |
private void parseDoiString(String doi){ |
|
116 |
boolean isUrn = false; |
|
117 |
if (StringUtils.isBlank(doi)){ |
|
118 |
throw new IllegalArgumentException("Doi string must not be null or blank"); |
|
119 |
} |
|
120 |
doi = doi.trim(); |
|
121 |
if (doi.startsWith("https") ){ |
|
122 |
doi = doi.replaceFirst("https", "http").trim(); |
|
123 |
} |
|
124 |
Matcher matcher = doiPattern.matcher(doi); |
|
125 |
if (matcher.find()){ |
|
126 |
doi = matcher.replaceFirst("").trim(); |
|
127 |
} |
|
128 |
|
|
129 |
|
|
130 |
//replace URI prefix |
|
131 |
if (doi.startsWith(HTTP_DOI_ORG)){ |
|
132 |
doi = doi.replaceFirst(HTTP_DOI_ORG,""); |
|
133 |
}else if (doi.startsWith(HTTP_OLD_DOI_ORG)){ |
|
134 |
doi = doi.replaceFirst(HTTP_OLD_DOI_ORG,""); |
|
135 |
} |
|
136 |
|
|
137 |
|
|
138 |
|
|
139 |
//handle URN prefix |
|
140 |
if (doi.startsWith("urn:doi:")){ |
|
141 |
doi = doi.replaceFirst("urn:doi:",""); |
|
142 |
isUrn = true; |
|
143 |
} |
|
144 |
|
|
145 |
|
|
146 |
//now we should have the pure doi |
|
147 |
if (doi.length() > 1000){ |
|
148 |
//for persistence reason we currently restrict the length of DOIs to 1000 |
|
149 |
throw new IllegalArgumentException("DOIs may have a maximum length of 1000 in the CDM."); |
|
150 |
} |
|
151 |
|
|
152 |
if (! doi.startsWith("10.")){ |
|
153 |
throw new IllegalArgumentException("DOI not parsable. DOI must start with 10. or an URI or URN prefix "); |
|
154 |
} |
|
155 |
doi = doi.substring(3); |
|
156 |
String sep = isUrn? ":" : "/"; |
|
157 |
|
|
158 |
// registrant |
|
159 |
String registrant = doi.split(sep)[0]; |
|
160 |
if (!registrant.matches("[0-9]{2,}(?:[.][0-9]+)*")){ //per definition the number of digits may also be 1, however the lowest known number is 3 so we may be on the safe side here |
|
161 |
String message = "Invalid prefix '10.%s'"; |
|
162 |
throw new IllegalArgumentException(String.format(message, registrant)); |
|
163 |
} |
|
164 |
//suffix |
|
165 |
String suffix = doi.replaceFirst(registrant + sep,""); |
|
166 |
if (! suffix.matches("\\p{Print}+")){ |
|
167 |
String message = "Suffix should only include printable characters"; |
|
168 |
throw new IllegalArgumentException(message); |
|
169 |
} |
|
170 |
if (isUrn){ |
|
171 |
//TODO do some other replacements according to http://www.doi.org/doi_handbook/2_Numbering.html#2.6.3 |
|
172 |
//e.g. slash becomes : in URN |
|
173 |
//TODO do we need this also for other URIs? According to http://www.doi.org/doi_handbook/2_Numbering.html#2.6 it is only required for URNs |
|
174 |
suffix = UrlUtf8Coder.unescape(suffix); |
|
175 |
} |
|
176 |
//success |
|
177 |
this.prefix_registrantCode = registrant; |
|
178 |
this.suffix = suffix; |
|
179 |
|
|
180 |
} |
|
181 |
|
|
182 |
|
|
183 |
private String makePrefix(){ |
|
184 |
return DIRECTORY_INDICATOR + "." + this.prefix_registrantCode; |
|
185 |
} |
|
186 |
|
|
187 |
private String makeDoi(){ |
|
188 |
return makePrefix() + "/" + this.suffix; |
|
189 |
} |
|
190 |
|
|
191 |
public String asURI(){ |
|
192 |
return HTTP_DOI_ORG + makePrefix() + "/" + uriEncodedSuffix(); |
|
193 |
} |
|
194 |
|
|
195 |
private String uriEncodedSuffix() { |
|
196 |
String result = UrlUtf8Coder.encode(this.suffix); |
|
197 |
return result; |
|
198 |
} |
|
199 |
|
|
200 |
//************************************************* toString/equals /hashCode *********************/ |
|
201 |
|
|
202 |
|
|
203 |
|
|
204 |
@Override |
|
205 |
public int hashCode() { |
|
206 |
if (hashCode == -1) { |
|
207 |
hashCode = 31 * prefix_registrantCode.toUpperCase().hashCode() + suffix.toUpperCase().hashCode(); |
|
208 |
} |
|
209 |
return hashCode; |
|
210 |
} |
|
211 |
|
|
212 |
|
|
213 |
@Override |
|
214 |
public boolean equals(Object obj) { |
|
215 |
if (obj instanceof DOI){ |
|
216 |
DOI doi = (DOI)obj; |
|
217 |
if (this.prefix_registrantCode.toUpperCase().equals(doi.prefix_registrantCode.toUpperCase()) && |
|
218 |
this.suffix.toUpperCase().equals(doi.suffix.toUpperCase())){ |
|
219 |
return true; |
|
220 |
} |
|
221 |
} |
|
222 |
return false; |
|
223 |
} |
|
224 |
|
|
225 |
|
|
226 |
@Override |
|
227 |
public String toString(){ |
|
228 |
return makeDoi(); |
|
229 |
} |
|
230 |
} |
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UrlUtf8Coder.java | ||
---|---|---|
1 |
/** |
|
2 |
* Provides a method to encode any string into a URL-safe |
|
3 |
* form. |
|
4 |
* Non-ASCII characters are first encoded as sequences of |
|
5 |
* two or three bytes, using the UTF-8 algorithm, before being |
|
6 |
* encoded as %HH escapes. |
|
7 |
* |
|
8 |
* Created: 17 April 1997 |
|
9 |
* Author: Bert Bos <bert@w3.org> |
|
10 |
* |
|
11 |
* URLUTF8Encoder: http://www.w3.org/International/URLUTF8Encoder.java |
|
12 |
* |
|
13 |
* Copyright © 1997 World Wide Web Consortium, (Massachusetts |
|
14 |
* Institute of Technology, European Research Consortium for |
|
15 |
* Informatics and Mathematics, Keio University). All Rights Reserved. |
|
16 |
* This work is distributed under the W3C® Software License [1] in the |
|
17 |
* hope that it will be useful, but WITHOUT ANY WARRANTY; without even |
|
18 |
* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
19 |
* PURPOSE. |
|
20 |
* |
|
21 |
* [1] http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 |
|
22 |
*/ |
|
23 |
package eu.etaxonomy.cdm.common; |
|
24 |
|
|
25 |
public class UrlUtf8Coder{ |
|
26 |
|
|
27 |
final static String[] hex = { |
|
28 |
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", |
|
29 |
"%08", "%09", "%0a", "%0b", "%0c", "%0d", "%0e", "%0f", |
|
30 |
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", |
|
31 |
"%18", "%19", "%1a", "%1b", "%1c", "%1d", "%1e", "%1f", |
|
32 |
"%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", |
|
33 |
"%28", "%29", "%2a", "%2b", "%2c", "%2d", "%2e", "%2f", |
|
34 |
"%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37", |
|
35 |
"%38", "%39", "%3a", "%3b", "%3c", "%3d", "%3e", "%3f", |
|
36 |
"%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47", |
|
37 |
"%48", "%49", "%4a", "%4b", "%4c", "%4d", "%4e", "%4f", |
|
38 |
"%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", |
|
39 |
"%58", "%59", "%5a", "%5b", "%5c", "%5d", "%5e", "%5f", |
|
40 |
"%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67", |
|
41 |
"%68", "%69", "%6a", "%6b", "%6c", "%6d", "%6e", "%6f", |
|
42 |
"%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77", |
|
43 |
"%78", "%79", "%7a", "%7b", "%7c", "%7d", "%7e", "%7f", |
|
44 |
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", |
|
45 |
"%88", "%89", "%8a", "%8b", "%8c", "%8d", "%8e", "%8f", |
|
46 |
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", |
|
47 |
"%98", "%99", "%9a", "%9b", "%9c", "%9d", "%9e", "%9f", |
|
48 |
"%a0", "%a1", "%a2", "%a3", "%a4", "%a5", "%a6", "%a7", |
|
49 |
"%a8", "%a9", "%aa", "%ab", "%ac", "%ad", "%ae", "%af", |
|
50 |
"%b0", "%b1", "%b2", "%b3", "%b4", "%b5", "%b6", "%b7", |
|
51 |
"%b8", "%b9", "%ba", "%bb", "%bc", "%bd", "%be", "%bf", |
|
52 |
"%c0", "%c1", "%c2", "%c3", "%c4", "%c5", "%c6", "%c7", |
|
53 |
"%c8", "%c9", "%ca", "%cb", "%cc", "%cd", "%ce", "%cf", |
|
54 |
"%d0", "%d1", "%d2", "%d3", "%d4", "%d5", "%d6", "%d7", |
|
55 |
"%d8", "%d9", "%da", "%db", "%dc", "%dd", "%de", "%df", |
|
56 |
"%e0", "%e1", "%e2", "%e3", "%e4", "%e5", "%e6", "%e7", |
|
57 |
"%e8", "%e9", "%ea", "%eb", "%ec", "%ed", "%ee", "%ef", |
|
58 |
"%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", |
|
59 |
"%f8", "%f9", "%fa", "%fb", "%fc", "%fd", "%fe", "%ff" |
|
60 |
}; |
|
61 |
|
|
62 |
/** |
|
63 |
* Encode a string to the "x-www-form-urlencoded" form, enhanced |
|
64 |
* with the UTF-8-in-URL proposal. This is what happens: |
|
65 |
* |
|
66 |
* <ul> |
|
67 |
* <li><p>The ASCII characters 'a' through 'z', 'A' through 'Z', |
|
68 |
* and '0' through '9' remain the same. |
|
69 |
* |
|
70 |
* <li><p>The unreserved characters - _ . ! ~ * ' ( ) remain the same. |
|
71 |
* |
|
72 |
* <li><p>The space character ' ' is converted into a plus sign '+'. |
|
73 |
* |
|
74 |
* <li><p>All other ASCII characters are converted into the |
|
75 |
* 3-character string "%xy", where xy is |
|
76 |
* the two-digit hexadecimal representation of the character |
|
77 |
* code |
|
78 |
* |
|
79 |
* <li><p>All non-ASCII characters are encoded in two steps: first |
|
80 |
* to a sequence of 2 or 3 bytes, using the UTF-8 algorithm; |
|
81 |
* secondly each of these bytes is encoded as "%xx". |
|
82 |
* </ul> |
|
83 |
* |
|
84 |
* @param s The string to be encoded |
|
85 |
* @return The encoded string |
|
86 |
*/ |
|
87 |
public static String encode(String s) |
|
88 |
{ |
|
89 |
StringBuffer sbuf = new StringBuffer(); |
|
90 |
int len = s.length(); |
|
91 |
for (int i = 0; i < len; i++) { |
|
92 |
int ch = s.charAt(i); |
|
93 |
if ('A' <= ch && ch <= 'Z') { // 'A'..'Z' |
|
94 |
sbuf.append((char)ch); |
|
95 |
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' |
|
96 |
sbuf.append((char)ch); |
|
97 |
} else if ('0' <= ch && ch <= '9') { // '0'..'9' |
|
98 |
sbuf.append((char)ch); |
|
99 |
} else if (ch == ' ') { // space |
|
100 |
sbuf.append(hex[ch]); //Note: changed from + to %20 for use according to http://www.doi.org/doi_handbook/2_Numbering.html#2.5.2.4 |
|
101 |
} else if (ch == '-' || ch == '_' // unreserved |
|
102 |
|| ch == '.' || ch == '!' |
|
103 |
|| ch == '~' || ch == '*' |
|
104 |
|| ch == '\'' || ch == '(' |
|
105 |
|| ch == ')') { |
|
106 |
sbuf.append((char)ch); |
|
107 |
} else if (ch <= 0x007f) { // other ASCII |
|
108 |
sbuf.append(hex[ch]); |
|
109 |
} else if (ch <= 0x07FF) { // non-ASCII <= 0x7FF |
|
110 |
sbuf.append(hex[0xc0 | (ch >> 6)]); |
|
111 |
sbuf.append(hex[0x80 | (ch & 0x3F)]); |
|
112 |
} else { // 0x7FF < ch <= 0xFFFF |
|
113 |
sbuf.append(hex[0xe0 | (ch >> 12)]); |
|
114 |
sbuf.append(hex[0x80 | ((ch >> 6) & 0x3F)]); |
|
115 |
sbuf.append(hex[0x80 | (ch & 0x3F)]); |
|
116 |
} |
|
117 |
} |
|
118 |
return sbuf.toString(); |
|
119 |
} |
|
120 |
|
|
121 |
/* |
|
122 |
* Created: 17 April 1997 |
|
123 |
* Author: Bert Bos <bert@w3.org> |
|
124 |
* |
|
125 |
* unescape: http://www.w3.org/International/unescape.java |
|
126 |
* |
|
127 |
* Copyright © 1997 World Wide Web Consortium, (Massachusetts |
|
128 |
* Institute of Technology, European Research Consortium for |
|
129 |
* Informatics and Mathematics, Keio University). All Rights Reserved. |
|
130 |
* This work is distributed under the W3C® Software License [1] in the |
|
131 |
* hope that it will be useful, but WITHOUT ANY WARRANTY; without even |
|
132 |
* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
133 |
* PURPOSE. |
|
134 |
* |
|
135 |
* [1] http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 |
|
136 |
*/ |
|
137 |
public static String unescape(String s) { |
|
138 |
StringBuffer sbuf = new StringBuffer () ; |
|
139 |
int l = s.length() ; |
|
140 |
int ch = -1 ; |
|
141 |
int b, sumb = 0; |
|
142 |
for (int i = 0, more = -1 ; i < l ; i++) { |
|
143 |
/* Get next byte b from URL segment s */ |
|
144 |
switch (ch = s.charAt(i)) { |
|
145 |
case '%': |
|
146 |
ch = s.charAt (++i) ; |
|
147 |
int hb = (Character.isDigit ((char) ch) |
|
148 |
? ch - '0' |
|
149 |
: 10+Character.toLowerCase((char) ch) - 'a') & 0xF ; |
|
150 |
ch = s.charAt (++i) ; |
|
151 |
int lb = (Character.isDigit ((char) ch) |
|
152 |
? ch - '0' |
|
153 |
: 10+Character.toLowerCase ((char) ch)-'a') & 0xF ; |
|
154 |
b = (hb << 4) | lb ; |
|
155 |
break ; |
|
156 |
case '+': |
|
157 |
b = ' ' ; |
|
158 |
break ; |
|
159 |
default: |
|
160 |
b = ch ; |
|
161 |
} |
|
162 |
/* Decode byte b as UTF-8, sumb collects incomplete chars */ |
|
163 |
if ((b & 0xc0) == 0x80) { // 10xxxxxx (continuation byte) |
|
164 |
sumb = (sumb << 6) | (b & 0x3f) ; // Add 6 bits to sumb |
|
165 |
if (--more == 0) sbuf.append((char) sumb) ; // Add char to sbuf |
|
166 |
} else if ((b & 0x80) == 0x00) { // 0xxxxxxx (yields 7 bits) |
|
167 |
sbuf.append((char) b) ; // Store in sbuf |
|
168 |
} else if ((b & 0xe0) == 0xc0) { // 110xxxxx (yields 5 bits) |
|
169 |
sumb = b & 0x1f; |
|
170 |
more = 1; // Expect 1 more byte |
|
171 |
} else if ((b & 0xf0) == 0xe0) { // 1110xxxx (yields 4 bits) |
|
172 |
sumb = b & 0x0f; |
|
173 |
more = 2; // Expect 2 more bytes |
|
174 |
} else if ((b & 0xf8) == 0xf0) { // 11110xxx (yields 3 bits) |
|
175 |
sumb = b & 0x07; |
|
176 |
more = 3; // Expect 3 more bytes |
|
177 |
} else if ((b & 0xfc) == 0xf8) { // 111110xx (yields 2 bits) |
|
178 |
sumb = b & 0x03; |
|
179 |
more = 4; // Expect 4 more bytes |
|
180 |
} else /*if ((b & 0xfe) == 0xfc)*/ { // 1111110x (yields 1 bit) |
|
181 |
sumb = b & 0x01; |
|
182 |
more = 5; // Expect 5 more bytes |
|
183 |
} |
|
184 |
/* We don't test if the UTF-8 encoding is well-formed */ |
|
185 |
} |
|
186 |
return sbuf.toString() ; |
|
187 |
} |
|
188 |
|
|
189 |
} |
cdmlib-commons/src/test/java/eu/etaxonomy/cdm/common/DoiTest.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
package eu.etaxonomy.cdm.common; |
|
10 |
|
|
11 |
import org.junit.Assert; |
|
12 |
import org.junit.Before; |
|
13 |
import org.junit.Test; |
|
14 |
|
|
15 |
/** |
|
16 |
* |
|
17 |
* Test class for testing the {@link DOI} class. |
|
18 |
* |
|
19 |
* For doi syntax see also http://www.doi.org/doi_handbook/2_Numbering.html |
|
20 |
* or |
|
21 |
* http://stackoverflow.com/questions/27910/finding-a-doi-in-a-document-or-page |
|
22 |
* |
|
23 |
* @author a.mueller |
|
24 |
* |
|
25 |
*/ |
|
26 |
public class DoiTest { |
|
27 |
|
|
28 |
/** |
|
29 |
* @throws java.lang.Exception |
|
30 |
*/ |
|
31 |
@Before |
|
32 |
public void setUp() throws Exception { |
|
33 |
} |
|
34 |
|
|
35 |
@Test |
|
36 |
public void testValidParser() { |
|
37 |
String validDoi = "10.1002/1234"; |
|
38 |
DOI doi = DOI.fromString(validDoi); |
|
39 |
Assert.assertEquals("10.1002", doi.getPrefix()); |
|
40 |
Assert.assertEquals("1234", doi.getSuffix()); |
|
41 |
|
|
42 |
validDoi = "10.1002/(SICI)1522-2594(199911)42:5<952::AID-MRM16>3.0.CO;2-S"; |
|
43 |
doi = DOI.fromString(validDoi); |
|
44 |
Assert.assertEquals("10.1002", doi.getPrefix()); |
|
45 |
Assert.assertEquals("(SICI)1522-2594(199911)42:5<952::AID-MRM16>3.0.CO;2-S", doi.getSuffix()); |
|
46 |
|
|
47 |
validDoi = "10.1007.10/978-3-642-28108-2_19"; |
|
48 |
doi = DOI.fromString(validDoi); |
|
49 |
Assert.assertEquals("10.1007.10", doi.getPrefix()); |
|
50 |
Assert.assertEquals("978-3-642-28108-2_19", doi.getSuffix()); |
|
51 |
|
|
52 |
validDoi="10.1579/0044-7447(2006)35\\[89:RDUICP\\]2.0.CO;2"; |
|
53 |
doi = DOI.fromString(validDoi); |
|
54 |
Assert.assertEquals("10.1579", doi.getPrefix()); |
|
55 |
Assert.assertEquals("0044-7447(2006)35\\[89:RDUICP\\]2.0.CO;2", doi.getSuffix()); |
|
56 |
|
|
57 |
} |
|
58 |
|
|
59 |
@Test |
|
60 |
public void testFromRegistrantCodeAndSuffix() { |
|
61 |
DOI doi = DOI.fromRegistrantCodeAndSuffix("1579", "978-3-642-28108-2_19"); |
|
62 |
Assert.assertEquals("10.1579", doi.getPrefix()); |
|
63 |
Assert.assertEquals("978-3-642-28108-2_19", doi.getSuffix()); |
|
64 |
Assert.assertNotEquals("1234", doi.getSuffix()); |
|
65 |
} |
|
66 |
|
|
67 |
@Test |
|
68 |
public void testParserFail() { |
|
69 |
String invalidDoi = "10.4515260,51.1656910"; //must never match to avoid matches with geo coordinates |
|
70 |
testInvalid(invalidDoi); |
|
71 |
invalidDoi = "4210.1000/123456"; //directoryIndicator must always be 10 |
|
72 |
testInvalid(invalidDoi); |
|
73 |
invalidDoi = "10.1002/12\u0004345"; //control characters (here U+0004) must fail |
|
74 |
testInvalid(invalidDoi); |
|
75 |
invalidDoi = "10.1a02/12345"; //registrant code must include only number and dots (to separate sub codes) |
|
76 |
testInvalid(invalidDoi); |
|
77 |
invalidDoi = "10.1002:12345"; //column separator is only allowed (+required) in URNs |
|
78 |
testInvalid(invalidDoi); |
|
79 |
invalidDoi = "10.1002/"; //doi must always have a suffix length > 0 (if this should changed in future, please do adapt equals and hashCode) |
|
80 |
testInvalid(invalidDoi); |
|
81 |
invalidDoi = "10./1234"; //doi must always have a registrant prefix length > 0 (if this should changed in future, please do adapt equals and hashCode) |
|
82 |
testInvalid(invalidDoi); |
|
83 |
} |
|
84 |
|
|
85 |
@Test |
|
86 |
public void testParserWithPrefixes() { |
|
87 |
String validDoi = "DOI: 10.1002/1234"; |
|
88 |
DOI doi = DOI.fromString(validDoi); |
|
89 |
Assert.assertEquals("10.1002", doi.getPrefix()); |
|
90 |
Assert.assertEquals("1234", doi.getSuffix()); |
|
91 |
|
|
92 |
validDoi = "http://doi.org/10.1002/1234"; |
|
93 |
doi = DOI.fromString(validDoi); |
|
94 |
Assert.assertEquals("10.1002", doi.getPrefix()); |
|
95 |
Assert.assertEquals("1234", doi.getSuffix()); |
|
96 |
|
|
97 |
|
|
98 |
validDoi = "http://doi.org/urn:doi:10.123:456ABC%2Fzyz"; |
|
99 |
doi = DOI.fromString(validDoi); |
|
100 |
Assert.assertEquals("10.123", doi.getPrefix()); |
|
101 |
Assert.assertEquals("456ABC/zyz", doi.getSuffix()); //urn must be percentage encoded ( / -> %2F) |
|
102 |
|
|
103 |
} |
|
104 |
|
|
105 |
@Test |
|
106 |
public void testEquals() { |
|
107 |
String validDoi = "10.1002/12a4"; |
|
108 |
DOI doi1 = DOI.fromString(validDoi); |
|
109 |
validDoi = "10.1002/12A4"; |
|
110 |
DOI doi2 = DOI.fromString(validDoi); |
|
111 |
Assert.assertEquals("DOIs must be equal case insensitive", doi1, doi2); |
|
112 |
validDoi = "10.1002/12b4"; |
|
113 |
DOI doi3 = DOI.fromString(validDoi); |
|
114 |
Assert.assertNotEquals("Different DOIs must not be equal", doi1, doi3); |
|
115 |
} |
|
116 |
|
|
117 |
@Test |
|
118 |
public void testAsURI() { |
|
119 |
//mandatory encoding according to http://www.doi.org/doi_handbook/2_Numbering.html#2.5.2.4 |
|
120 |
String validDoi = "10.1002/1234%56\"78#90 12?34"; |
|
121 |
DOI doi1 = DOI.fromString(validDoi); |
|
122 |
String uri = doi1.asURI(); |
|
123 |
Assert.assertEquals(DOI.HTTP_DOI_ORG + "10.1002/1234%2556%2278%2390%2012%3f34", uri); |
|
124 |
|
|
125 |
//recommendedEncoding |
|
126 |
validDoi = "10.1002/1234<56>78{90}12^34"; |
|
127 |
doi1 = DOI.fromString(validDoi); |
|
128 |
uri = doi1.asURI(); |
|
129 |
Assert.assertEquals(DOI.HTTP_DOI_ORG + "10.1002/1234%3c56%3e78%7b90%7d12%5e34", uri); |
|
130 |
|
|
131 |
//recommendedEncoding (cont.) |
|
132 |
validDoi = "10.1002/1234[56]78`90|12\\34+56"; |
|
133 |
doi1 = DOI.fromString(validDoi); |
|
134 |
uri = doi1.asURI(); |
|
135 |
Assert.assertEquals(DOI.HTTP_DOI_ORG + "10.1002/1234%5b56%5d78%6090%7c12%5c34%2b56", uri); |
|
136 |
|
|
137 |
} |
|
138 |
|
|
139 |
|
|
140 |
|
|
141 |
|
|
142 |
private void testInvalid(String invalidDoi) { |
|
143 |
try { |
|
144 |
DOI.fromString(invalidDoi); |
|
145 |
Assert.fail("DOI should not be parsable: " + invalidDoi); |
|
146 |
} catch (IllegalArgumentException e) { |
|
147 |
//OK |
|
148 |
} |
|
149 |
} |
|
150 |
|
|
151 |
} |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/DOIUserType.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
|
|
10 |
package eu.etaxonomy.cdm.hibernate; |
|
11 |
|
|
12 |
import java.io.Serializable; |
|
13 |
import java.sql.PreparedStatement; |
|
14 |
import java.sql.ResultSet; |
|
15 |
import java.sql.SQLException; |
|
16 |
import java.sql.Types; |
|
17 |
|
|
18 |
import org.apache.log4j.Logger; |
|
19 |
import org.hibernate.HibernateException; |
|
20 |
import org.hibernate.engine.spi.SessionImplementor; |
|
21 |
import org.hibernate.type.StandardBasicTypes; |
|
22 |
import org.hibernate.usertype.UserType; |
|
23 |
import org.jadira.usertype.dateandtime.shared.spi.AbstractUserType; |
|
24 |
|
|
25 |
import eu.etaxonomy.cdm.common.DOI; |
|
26 |
|
|
27 |
/** |
|
28 |
* Hibernate user type for the {@link DOI} class. |
|
29 |
* @author a.mueller |
|
30 |
* @created 05.09.2013 |
|
31 |
*/ |
|
32 |
public class DOIUserType extends AbstractUserType implements UserType { |
|
33 |
private static final long serialVersionUID = 2227841000128722278L; |
|
34 |
|
|
35 |
@SuppressWarnings("unused") |
|
36 |
private static final Logger logger = Logger.getLogger(DOIUserType.class); |
|
37 |
|
|
38 |
private static final int[] SQL_TYPES = { Types.VARCHAR }; |
|
39 |
|
|
40 |
@Override |
|
41 |
public Object deepCopy(Object o) throws HibernateException { |
|
42 |
if (o == null) { |
|
43 |
return null; |
|
44 |
} |
|
45 |
|
|
46 |
DOI doi = (DOI) o; |
|
47 |
|
|
48 |
try { |
|
49 |
return DOI.fromString(doi.toString()); |
|
50 |
} catch (IllegalArgumentException e) { |
|
51 |
throw new HibernateException(e); |
|
52 |
} |
|
53 |
} |
|
54 |
|
|
55 |
|
|
56 |
@Override |
|
57 |
public Serializable disassemble(Object value) throws HibernateException { |
|
58 |
if(value == null) { |
|
59 |
return null; |
|
60 |
} else { |
|
61 |
DOI doi = (DOI) value; |
|
62 |
return doi.toString(); |
|
63 |
} |
|
64 |
} |
|
65 |
|
|
66 |
@Override |
|
67 |
public DOI nullSafeGet(ResultSet rs, String[] names, SessionImplementor session, Object owner) |
|
68 |
throws HibernateException, SQLException { |
|
69 |
String val = (String) StandardBasicTypes.STRING.nullSafeGet(rs, names, session, owner); |
|
70 |
|
|
71 |
if(val == null) { |
|
72 |
return null; |
|
73 |
} else { |
|
74 |
|
|
75 |
try { |
|
76 |
return DOI.fromString(val); |
|
77 |
} catch (IllegalArgumentException e) { |
|
78 |
throw new HibernateException(e); |
|
79 |
} |
|
80 |
} |
|
81 |
} |
|
82 |
|
|
83 |
@Override |
|
84 |
public void nullSafeSet(PreparedStatement statement, Object value, int index, SessionImplementor session) |
|
85 |
throws HibernateException, SQLException { |
|
86 |
if (value == null) { |
|
87 |
StandardBasicTypes.STRING.nullSafeSet(statement, value, index, session); |
|
88 |
} else { |
|
89 |
DOI doi = (DOI)value; |
|
90 |
StandardBasicTypes.STRING.nullSafeSet(statement, doi.toString(), index, session); |
|
91 |
} |
|
92 |
} |
|
93 |
|
|
94 |
|
|
95 |
/* (non-Javadoc) |
|
96 |
* @see org.jadira.usertype.dateandtime.shared.spi.AbstractSingleColumnUserType#returnedClass() |
|
97 |
*/ |
|
98 |
@Override |
|
99 |
public Class returnedClass() { |
|
100 |
return DOI.class; |
|
101 |
} |
|
102 |
|
|
103 |
@Override |
|
104 |
public int[] sqlTypes() { |
|
105 |
return SQL_TYPES; |
|
106 |
} |
|
107 |
|
|
108 |
|
|
109 |
|
|
110 |
|
|
111 |
|
|
112 |
} |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/common/package-info.java | ||
---|---|---|
102 | 102 |
@org.hibernate.annotations.TypeDef(name="partialUserType", typeClass=eu.etaxonomy.cdm.hibernate.PartialUserType.class), |
103 | 103 |
@org.hibernate.annotations.TypeDef(name="uuidUserType", typeClass=eu.etaxonomy.cdm.hibernate.UUIDUserType.class), |
104 | 104 |
@org.hibernate.annotations.TypeDef(name="uriUserType", typeClass=eu.etaxonomy.cdm.hibernate.URIUserType.class), |
105 |
@org.hibernate.annotations.TypeDef(name="enumUserType", typeClass=eu.etaxonomy.cdm.hibernate.EnumUserType.class) |
|
105 |
@org.hibernate.annotations.TypeDef(name="enumUserType", typeClass=eu.etaxonomy.cdm.hibernate.EnumUserType.class), |
|
106 |
@org.hibernate.annotations.TypeDef(name="doiUserType", typeClass=eu.etaxonomy.cdm.hibernate.DOIUserType.class) |
|
106 | 107 |
}) |
107 | 108 |
@org.hibernate.annotations.AnyMetaDef(name = "CdmBase" , |
108 | 109 |
metaType="string", |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/reference/IPublicationBase.java | ||
---|---|---|
9 | 9 |
|
10 | 10 |
package eu.etaxonomy.cdm.model.reference; |
11 | 11 |
|
12 |
import eu.etaxonomy.cdm.common.DOI; |
|
13 |
|
|
12 | 14 |
/** |
13 | 15 |
* This base interface represents all different kind of published |
14 | 16 |
* {@link IReference references} which constitute a physical |
... | ... | |
54 | 56 |
/** |
55 | 57 |
* @return |
56 | 58 |
*/ |
57 |
public String getDoi();
|
|
59 |
public DOI getDoi();
|
|
58 | 60 |
|
59 |
public void setDoi(String doi);
|
|
61 |
public void setDoi(DOI doi);
|
|
60 | 62 |
|
61 | 63 |
} |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/reference/Reference.java | ||
---|---|---|
45 | 45 |
import org.hibernate.search.annotations.IndexedEmbedded; |
46 | 46 |
import org.hibernate.validator.constraints.Length; |
47 | 47 |
|
48 |
import eu.etaxonomy.cdm.common.DOI; |
|
48 | 49 |
import eu.etaxonomy.cdm.model.agent.Institution; |
49 | 50 |
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase; |
50 | 51 |
import eu.etaxonomy.cdm.model.common.TimePeriod; |
... | ... | |
207 | 208 |
|
208 | 209 |
@XmlElement(name = "Doi") |
209 | 210 |
@Field |
210 |
@NullOrNotEmpty |
|
211 |
@Length(max = 255)
|
|
212 |
// @Pattern(regexp = "(?=.{13}$)\\d{1,5}([- ])\\d{1,7}\\1\\d{1,6}\\1(\\d|X)$", groups = Level2.class, message = "{eu.etaxonomy.cdm.model.reference.Reference.doi.message}")
|
|
213 |
protected String doi;
|
|
211 |
// @NullOrNotEmpty
|
|
212 |
// @Length(max = 1000)
|
|
213 |
@Type(type="doiUserType")
|
|
214 |
protected DOI doi;
|
|
214 | 215 |
|
215 | 216 |
|
216 | 217 |
@XmlElement(name = "ISSN") |
... | ... | |
487 | 488 |
} |
488 | 489 |
|
489 | 490 |
@Override |
490 |
public String getDoi() {
|
|
491 |
public DOI getDoi() {
|
|
491 | 492 |
return doi; |
492 | 493 |
} |
493 | 494 |
|
494 | 495 |
@Override |
495 |
public void setDoi(String doi) {
|
|
496 |
public void setDoi(DOI doi) {
|
|
496 | 497 |
this.doi = doi; |
497 | 498 |
} |
498 | 499 |
|
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/match/DefaultMatchStrategy.java | ||
---|---|---|
28 | 28 |
import org.apache.log4j.Logger; |
29 | 29 |
|
30 | 30 |
import eu.etaxonomy.cdm.common.CdmUtils; |
31 |
import eu.etaxonomy.cdm.common.DOI; |
|
31 | 32 |
import eu.etaxonomy.cdm.common.DoubleResult; |
32 | 33 |
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper; |
33 | 34 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
... | ... | |
211 | 212 |
//result &= matchPrimitiveField(matchFirst, matchSecond, fieldMatcher, replaceModeList); |
212 | 213 |
}else if(fieldType == URI.class){ |
213 | 214 |
result &= matchPrimitiveField(matchFirst, matchSecond, fieldMatcher, replaceModeList); |
215 |
}else if(fieldType == DOI.class){ |
|
216 |
result &= matchPrimitiveField(matchFirst, matchSecond, fieldMatcher, replaceModeList); |
|
214 | 217 |
}else if(isSingleCdmBaseObject(fieldType)){ |
215 | 218 |
result &= matchPrimitiveField(matchFirst, matchSecond, fieldMatcher, replaceModeList); |
216 | 219 |
}else if (isCollection(fieldType)){ |
Also available in: Unified diff
implement DOI class and use in Reference #3572