1
|
// $Id$
|
2
|
/**
|
3
|
* Copyright (C) 2007 EDIT
|
4
|
* European Distributed Institute of Taxonomy
|
5
|
* http://www.e-taxonomy.eu
|
6
|
*
|
7
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
8
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
9
|
*/
|
10
|
|
11
|
package eu.etaxonomy.cdm.io.globis;
|
12
|
|
13
|
import java.util.HashSet;
|
14
|
import java.util.Set;
|
15
|
import java.util.UUID;
|
16
|
|
17
|
import org.apache.log4j.Logger;
|
18
|
|
19
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
20
|
import eu.etaxonomy.cdm.io.common.mapping.InputTransformerBase;
|
21
|
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
|
22
|
import eu.etaxonomy.cdm.model.common.ExtensionType;
|
23
|
import eu.etaxonomy.cdm.model.common.Language;
|
24
|
import eu.etaxonomy.cdm.model.description.Feature;
|
25
|
import eu.etaxonomy.cdm.model.name.NameTypeDesignationStatus;
|
26
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
27
|
|
28
|
/**
|
29
|
* @author a.mueller
|
30
|
* @created 01.03.2010
|
31
|
* @version 1.0
|
32
|
*/
|
33
|
public final class GlobisTransformer extends InputTransformerBase {
|
34
|
private static final Logger logger = Logger.getLogger(GlobisTransformer.class);
|
35
|
|
36
|
// public static final int SOURCE_USE_ORIGINAL_DESCRIPTION = 1;
|
37
|
// public static final int SOURCE_USE_BASIS_OF_RECORD = 2;
|
38
|
// public static final int SOURCE_USE_ADDITIONAL_SOURCE = 3;
|
39
|
// public static final int SOURCE_USE_SOURCE_OF_SYNONYMY = 4;
|
40
|
// public static final int SOURCE_USE_REDESCRIPTION = 5;
|
41
|
// public static final int SOURCE_USE_NEW_COMBINATION_REFERENCE = 6;
|
42
|
// public static final int SOURCE_USE_STATUS_SOURCE = 7;
|
43
|
// public static final int SOURCE_USE_EMENDATION = 8;
|
44
|
|
45
|
//extension types
|
46
|
public static final UUID uuidEdition = UUID.fromString("c42dfb85-abbe-49b3-8a2b-56cc1b8eb6d0");
|
47
|
public static final UUID uuidEditor = UUID.fromString("07752659-3018-4880-bf26-41bb396fbf37");
|
48
|
public static final UUID uuidGeneralKeywords = UUID.fromString("aaa67b2a-c45b-42ed-b4fa-1028ffe41e44");
|
49
|
public static final UUID uuidGeoKeywords = UUID.fromString("a1afb697-d37b-4a8c-84d8-63f8f01ae10a");
|
50
|
public static final UUID uuidLibrary = UUID.fromString("71a3e44d-4ed2-44f9-be6a-76fa26a294bd");
|
51
|
|
52
|
// public static final UUID uuidEditor = UUID.fromString("07752659-3018-4880-bf26-41bb396fbf37");
|
53
|
// public static final UUID uuidEditor = UUID.fromString("07752659-3018-4880-bf26-41bb396fbf37");
|
54
|
|
55
|
|
56
|
//language uuids
|
57
|
|
58
|
|
59
|
public static NomenclaturalCode kingdomId2NomCode(Integer kingdomId){
|
60
|
switch (kingdomId){
|
61
|
case 1: return null;
|
62
|
case 2: return NomenclaturalCode.ICZN; //Animalia
|
63
|
case 3: return NomenclaturalCode.ICBN; //Plantae
|
64
|
case 4: return NomenclaturalCode.ICBN; //Fungi
|
65
|
case 5: return NomenclaturalCode.ICZN ; //Protozoa
|
66
|
case 6: return NomenclaturalCode.ICNB ; //Bacteria
|
67
|
case 7: return NomenclaturalCode.ICBN; //Chromista
|
68
|
case 147415: return NomenclaturalCode.ICNB; //Monera
|
69
|
default: return null;
|
70
|
|
71
|
}
|
72
|
|
73
|
}
|
74
|
|
75
|
|
76
|
|
77
|
|
78
|
/* (non-Javadoc)
|
79
|
* @see eu.etaxonomy.cdm.io.common.mapping.InputTransformerBase#getNameTypeDesignationStatusByKey(java.lang.String)
|
80
|
*/
|
81
|
@Override
|
82
|
public NameTypeDesignationStatus getNameTypeDesignationStatusByKey(String key) throws UndefinedTransformerMethodException {
|
83
|
if (key == null){
|
84
|
return null;
|
85
|
}
|
86
|
Integer intDesignationId = Integer.valueOf(key);
|
87
|
switch (intDesignationId){
|
88
|
case 1: return NameTypeDesignationStatus.ORIGINAL_DESIGNATION();
|
89
|
case 2: return NameTypeDesignationStatus.SUBSEQUENT_DESIGNATION();
|
90
|
case 3: return NameTypeDesignationStatus.MONOTYPY();
|
91
|
default:
|
92
|
String warning = "Unknown name type designation status id " + key;
|
93
|
logger.warn(warning);
|
94
|
return null;
|
95
|
}
|
96
|
}
|
97
|
|
98
|
|
99
|
|
100
|
|
101
|
/* (non-Javadoc)
|
102
|
* @see eu.etaxonomy.cdm.io.common.mapping.InputTransformerBase#getNameTypeDesignationStatusUuid(java.lang.String)
|
103
|
*/
|
104
|
@Override
|
105
|
public UUID getNameTypeDesignationStatusUuid(String key) throws UndefinedTransformerMethodException {
|
106
|
//nott needed
|
107
|
return super.getNameTypeDesignationStatusUuid(key);
|
108
|
}
|
109
|
|
110
|
|
111
|
public Language getLanguageByKey(String ermsAbbrev) throws IllegalArgumentException {
|
112
|
Set<String> unhandledLanguages = new HashSet<String>();
|
113
|
if (CdmUtils.isEmpty(ermsAbbrev)){return null;
|
114
|
}else if (ermsAbbrev.equals("af")){return Language.AFRIKAANS();
|
115
|
}else if (ermsAbbrev.equals("al")){return Language.ALEUT();
|
116
|
}else if (ermsAbbrev.equals("ar")){return Language.ARABIC();
|
117
|
}else if (ermsAbbrev.equals("as")){return Language.ASSAMESE();
|
118
|
// }else if (ermsAbbrev.equals("au")){return Language.AUNS(); //??
|
119
|
}else if (ermsAbbrev.equals("az")){return Language.AZERBAIJANI();
|
120
|
}else if (ermsAbbrev.equals("ba")){return Language.BASQUE();
|
121
|
}else if (ermsAbbrev.equals("be")){return Language.BELORUSSIAN();
|
122
|
}else if (ermsAbbrev.equals("bg")){return Language.BULGARIAN();
|
123
|
}else if (ermsAbbrev.equals("bn")){return Language.BENGALI();
|
124
|
}else if (ermsAbbrev.equals("br")){return Language.BRETON();
|
125
|
}else if (ermsAbbrev.equals("bu")){return Language.BURMESE();
|
126
|
}else if (ermsAbbrev.equals("ca")){return Language.CATALAN_VALENCIAN(); //??? (Catalan)
|
127
|
}else if (ermsAbbrev.equals("ce")){return Language.CEBUANO();
|
128
|
}else if (ermsAbbrev.equals("ch")){return Language.CHINESE();
|
129
|
// }else if (ermsAbbrev.equals("cl")){return Language.CHUKCHI(); // (LOURAVETLANY)(); //iso639-3: ckt //also known as Luoravetlan, Chukot and Chukcha is a Palaeosiberian language spoken by Chukchi people in the easternmost extremity of Siberia, mainly in Chukotka Autonomous Okrug.
|
130
|
}else if (ermsAbbrev.equals("cr")){return Language.CROATIAN();
|
131
|
}else if (ermsAbbrev.equals("cs")){return Language.CZECH();
|
132
|
}else if (ermsAbbrev.equals("da")){return Language.DANISH();
|
133
|
}else if (ermsAbbrev.equals("de")){return Language.GERMAN();
|
134
|
// }else if (ermsAbbrev.equals("ec")){return Language.ENGLISH-CANADIAN(); //no iso
|
135
|
}else if (ermsAbbrev.equals("ee")){return Language.ESTONIAN();
|
136
|
// }else if (ermsAbbrev.equals("ek")){return Language.EVEN-KAMCHATKA(); //iso639-3: eve Lamut, Ewen, Eben, Orich, Ilqan; Russian: ???�????? ???�?, earlier also ????????? ???�?) is a Tungusic language spoken by the Evens in Siberia
|
137
|
}else if (ermsAbbrev.equals("en")){return Language.ENGLISH();
|
138
|
}else if (ermsAbbrev.equals("ep")){return Language.ESPERANTO();
|
139
|
}else if (ermsAbbrev.equals("es")){return Language.SPANISH_CASTILIAN();
|
140
|
// }else if (ermsAbbrev.equals("eu")){return Language.ENGLISH-UNITED STATES(); no iso //ENGLISH();
|
141
|
// }else if (ermsAbbrev.equals("ev")){return Language.EVENKI(); iso: evn //languages of Tungusic family
|
142
|
}else if (ermsAbbrev.equals("fa")){return Language.PERSIAN();
|
143
|
// }else if (ermsAbbrev.equals("fc")){return Language.FRENCH-CANADIAN(); no iso //FRENCH();
|
144
|
}else if (ermsAbbrev.equals("fi")){return Language.FINNISH();
|
145
|
}else if (ermsAbbrev.equals("fj")){return Language.FIJIAN();
|
146
|
}else if (ermsAbbrev.equals("fl")){return Language.DUTCH_FLEMISH();
|
147
|
}else if (ermsAbbrev.equals("fo")){return Language.FAROESE();
|
148
|
}else if (ermsAbbrev.equals("fr")){return Language.FRENCH();
|
149
|
}else if (ermsAbbrev.equals("ga")){return Language.GAELIC_SCOTTISH_GAELIC(); //??
|
150
|
}else if (ermsAbbrev.equals("ge")){return Language.KALAALLISUT_GREENLANDIC(); // GREENLANDIC
|
151
|
}else if (ermsAbbrev.equals("gl")){return Language.GALICIAN();
|
152
|
}else if (ermsAbbrev.equals("gr")){return Language.GREEK_MODERN(); //(Greek)
|
153
|
// }else if (ermsAbbrev.equals("gu")){return Language.GUARAYO(); //GUARANI() ??
|
154
|
// }else if (ermsAbbrev.equals("ha")){return Language.HASSANYA(); Hassaniyya Arabic ios 639-3: mey
|
155
|
}else if (ermsAbbrev.equals("he")){return Language.HEBREW();
|
156
|
}else if (ermsAbbrev.equals("hi")){return Language.HINDI();
|
157
|
}else if (ermsAbbrev.equals("hu")){return Language.HUNGARIAN();
|
158
|
}else if (ermsAbbrev.equals("hw")){return Language.HAWAIIAN();
|
159
|
}else if (ermsAbbrev.equals("hy")){return Language.ARMENIAN();
|
160
|
}else if (ermsAbbrev.equals("in")){return Language.INDONESIAN();
|
161
|
}else if (ermsAbbrev.equals("iq")){return Language.INUPIAQ();
|
162
|
}else if (ermsAbbrev.equals("ir")){return Language.IRISH();
|
163
|
}else if (ermsAbbrev.equals("is")){return Language.ICELANDIC();
|
164
|
}else if (ermsAbbrev.equals("it")){return Language.ITALIAN();
|
165
|
}else if (ermsAbbrev.equals("ja")){return Language.JAPANESE();
|
166
|
// }else if (ermsAbbrev.equals("ji")){return Language.JIVARA(); //??
|
167
|
// }else if (ermsAbbrev.equals("ka")){return Language.KAMCHADAL(); iso 639-3:itl //Itelmen, formerly also known as Kamchadal, is a language belonging to the Chukotko-Kamchatkan family traditionally spoken in the Kamchatka Peninsula.
|
168
|
}else if (ermsAbbrev.equals("ko")){return Language.KOREAN();
|
169
|
// }else if (ermsAbbrev.equals("kr")){return Language.KORYAK(); //iso639-3: kpy
|
170
|
}else if (ermsAbbrev.equals("la")){return Language.LATIN();
|
171
|
}else if (ermsAbbrev.equals("li")){return Language.LITHUANIAN();
|
172
|
// }else if (ermsAbbrev.equals("lp")){return Language.LAPP(); //??
|
173
|
}else if (ermsAbbrev.equals("lv")){return Language.LATVIAN();
|
174
|
}else if (ermsAbbrev.equals("ma")){return Language.MACEDONIAN();
|
175
|
// }else if (ermsAbbrev.equals("mh")){return Language.MAHR(); //Marathi ; Mari ??
|
176
|
// }else if (ermsAbbrev.equals("mk")){return Language.MAKAH (QWIQWIDICCIAT)(); //iso639-3: myh
|
177
|
}else if (ermsAbbrev.equals("ml")){return Language.MALAY();
|
178
|
// }else if (ermsAbbrev.equals("ne")){return Language.NENETS(); iso639-3 yrk; iso639-2: mis
|
179
|
}else if (ermsAbbrev.equals("nl")){return Language.DUTCH_FLEMISH();
|
180
|
}else if (ermsAbbrev.equals("no")){return Language.NORWEGIAN();
|
181
|
}else if (ermsAbbrev.equals("np")){return Language.NEPALI();
|
182
|
// }else if (ermsAbbrev.equals("os")){return Language.OSTYAK(); //Ostyak on its own or in combination, can refer, especially in older literature, to several Siberian peoples and languages:
|
183
|
// Khanty language (kca; 639-2: fiu); Ket language(ket); Selkup language(sel; 639-2: sel)
|
184
|
// }else if (ermsAbbrev.equals("pi")){return Language.PIRAYAGUARA(); //??
|
185
|
}else if (ermsAbbrev.equals("pl")){return Language.POLISH();
|
186
|
}else if (ermsAbbrev.equals("pt")){return Language.PORTUGUESE();
|
187
|
}else if (ermsAbbrev.equals("ro")){return Language.ROMANIAN();
|
188
|
}else if (ermsAbbrev.equals("ru")){return Language.RUSSIAN();
|
189
|
}else if (ermsAbbrev.equals("sc")){return Language.SCOTS();
|
190
|
}else if (ermsAbbrev.equals("sd")){return Language.SINDHI();
|
191
|
// }else if (ermsAbbrev.equals("sh")){return Language.SERBO_CROATIAN(); //hbs
|
192
|
}else if (ermsAbbrev.equals("si")){return Language.SINHALA_SINHALESE();
|
193
|
}else if (ermsAbbrev.equals("sk")){return Language.SLOVAK();
|
194
|
}else if (ermsAbbrev.equals("sn")){return Language.SLOVENIAN();
|
195
|
}else if (ermsAbbrev.equals("sr")){return Language.SERBIAN();
|
196
|
}else if (ermsAbbrev.equals("st")){return Language.SRANAN_TONGO();
|
197
|
}else if (ermsAbbrev.equals("sv")){return Language.SWEDISH();
|
198
|
}else if (ermsAbbrev.equals("sw")){return Language.SWAHILI();
|
199
|
}else if (ermsAbbrev.equals("ta")){return Language.TAMIL();
|
200
|
}else if (ermsAbbrev.equals("te")){return Language.TELUGU();
|
201
|
}else if (ermsAbbrev.equals("tg")){return Language.TAGALOG();
|
202
|
}else if (ermsAbbrev.equals("th")){return Language.THAI();
|
203
|
// }else if (ermsAbbrev.equals("tm")){return Language.TAMUL(); //??
|
204
|
}else if (ermsAbbrev.equals("tr")){return Language.TURKISH();
|
205
|
}else if (ermsAbbrev.equals("tu")){return Language.TUPIS();
|
206
|
}else if (ermsAbbrev.equals("uk")){return Language.UKRAINIAN();
|
207
|
}else if (ermsAbbrev.equals("ur")){return Language.URDU();
|
208
|
}else if (ermsAbbrev.equals("vi")){return Language.VIETNAMESE();
|
209
|
}else if (ermsAbbrev.equals("we")){return Language.WELSH();
|
210
|
}else if (ermsAbbrev.equals("wo")){return Language.WOLOF();
|
211
|
}else if (ermsAbbrev.equals("ya")){return Language.YAKUT();
|
212
|
}else if (ermsAbbrev.equals("yp")){return Language.YUPIKS();
|
213
|
// }else if (ermsAbbrev.equals("yu")){return Language.YUKAGIR(); 639-2: mis; 639-3 yux (Southern Yukaghir)- ykg(Tundra Yukaghir)
|
214
|
}else{
|
215
|
unhandledLanguages.add("au");
|
216
|
unhandledLanguages.add("cl");
|
217
|
unhandledLanguages.add("ec");
|
218
|
unhandledLanguages.add("ek");
|
219
|
unhandledLanguages.add("eu");
|
220
|
unhandledLanguages.add("ev");
|
221
|
unhandledLanguages.add("fc");
|
222
|
unhandledLanguages.add("gu");
|
223
|
unhandledLanguages.add("ha");
|
224
|
unhandledLanguages.add("ji");
|
225
|
unhandledLanguages.add("ka");
|
226
|
unhandledLanguages.add("kr");
|
227
|
unhandledLanguages.add("lp");
|
228
|
unhandledLanguages.add("mh");
|
229
|
unhandledLanguages.add("mk");
|
230
|
unhandledLanguages.add("ne");
|
231
|
unhandledLanguages.add("os");
|
232
|
unhandledLanguages.add("pi");
|
233
|
unhandledLanguages.add("sh");
|
234
|
unhandledLanguages.add("tm");
|
235
|
unhandledLanguages.add("sh");
|
236
|
unhandledLanguages.add("yu");
|
237
|
|
238
|
if (unhandledLanguages.contains(ermsAbbrev)){
|
239
|
logger.warn("Unhandled language '" + ermsAbbrev + "' replaced by 'UNDETERMINED'" );
|
240
|
return Language.UNDETERMINED();
|
241
|
}
|
242
|
String warning = "New language abbreviation " + ermsAbbrev;
|
243
|
logger.warn(warning);
|
244
|
throw new IllegalArgumentException(warning);
|
245
|
}
|
246
|
|
247
|
|
248
|
|
249
|
}
|
250
|
|
251
|
/* (non-Javadoc)
|
252
|
* @see eu.etaxonomy.cdm.io.common.mapping.InputTransformerBase#getLanguageUuid(java.lang.String)
|
253
|
*/
|
254
|
@Override
|
255
|
public UUID getLanguageUuid(String key)
|
256
|
throws UndefinedTransformerMethodException {
|
257
|
return super.getLanguageUuid(key);
|
258
|
}
|
259
|
|
260
|
|
261
|
|
262
|
|
263
|
|
264
|
|
265
|
/* (non-Javadoc)
|
266
|
* @see eu.etaxonomy.cdm.io.common.mapping.InputTransformerBase#getExtensionTypeByKey(java.lang.String)
|
267
|
*/
|
268
|
@Override
|
269
|
public ExtensionType getExtensionTypeByKey(String key) throws UndefinedTransformerMethodException {
|
270
|
if (key == null){return null;
|
271
|
}
|
272
|
return null;
|
273
|
}
|
274
|
|
275
|
/* (non-Javadoc)
|
276
|
* @see eu.etaxonomy.cdm.io.common.mapping.InputTransformerBase#getExtensionTypeUuid(java.lang.String)
|
277
|
*/
|
278
|
@Override
|
279
|
public UUID getExtensionTypeUuid(String key)
|
280
|
throws UndefinedTransformerMethodException {
|
281
|
if (key == null){return null;
|
282
|
// }else if (key.equalsIgnoreCase("recent only")){return uuidRecentOnly;
|
283
|
// }else if (key.equalsIgnoreCase("recent + fossil")){return uuidRecentAndFossil;
|
284
|
// }else if (key.equalsIgnoreCase("fossil only")){return uuidFossilOnly;
|
285
|
}
|
286
|
return null;
|
287
|
}
|
288
|
|
289
|
|
290
|
|
291
|
/* (non-Javadoc)
|
292
|
* @see eu.etaxonomy.cdm.io.common.mapping.InputTransformerBase#getFeatureByKey(java.lang.String)
|
293
|
*/
|
294
|
@Override
|
295
|
public Feature getFeatureByKey(String key) throws UndefinedTransformerMethodException {
|
296
|
if (CdmUtils.isEmpty(key)){return null;
|
297
|
}else if (key.equalsIgnoreCase("Distribution")){return Feature.DISTRIBUTION();
|
298
|
}else if (key.equalsIgnoreCase("Ecology")){return Feature.ECOLOGY();
|
299
|
}else if (key.equalsIgnoreCase("Diagnosis")){return Feature.DIAGNOSIS();
|
300
|
}else if (key.equalsIgnoreCase("Biology")){return Feature.BIOLOGY_ECOLOGY();
|
301
|
}else if (key.equalsIgnoreCase("Host")){return Feature.HOSTPLANT();
|
302
|
}else{
|
303
|
return null;
|
304
|
}
|
305
|
}
|
306
|
|
307
|
|
308
|
|
309
|
|
310
|
}
|