1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.berlinModel.in;
|
11
|
|
12
|
import java.sql.ResultSet;
|
13
|
import java.sql.SQLException;
|
14
|
import java.util.ArrayList;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Set;
|
20
|
import java.util.SortedSet;
|
21
|
import java.util.TreeSet;
|
22
|
import java.util.UUID;
|
23
|
|
24
|
import org.apache.commons.lang.StringUtils;
|
25
|
import org.apache.log4j.Logger;
|
26
|
import org.springframework.stereotype.Component;
|
27
|
import org.springframework.transaction.TransactionStatus;
|
28
|
|
29
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
30
|
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
|
31
|
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelCommonNamesImportValidator;
|
32
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
33
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
34
|
import eu.etaxonomy.cdm.io.common.Source;
|
35
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
36
|
import eu.etaxonomy.cdm.model.common.Extension;
|
37
|
import eu.etaxonomy.cdm.model.common.ExtensionType;
|
38
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
39
|
import eu.etaxonomy.cdm.model.common.Language;
|
40
|
import eu.etaxonomy.cdm.model.common.Marker;
|
41
|
import eu.etaxonomy.cdm.model.common.MarkerType;
|
42
|
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
|
43
|
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
|
44
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
45
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
46
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
47
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
48
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
49
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
50
|
import eu.etaxonomy.cdm.model.term.OrderedTermVocabulary;
|
51
|
import eu.etaxonomy.cdm.model.term.Representation;
|
52
|
import eu.etaxonomy.cdm.model.term.TermType;
|
53
|
import eu.etaxonomy.cdm.model.term.TermVocabulary;
|
54
|
|
55
|
/**
|
56
|
*
|
57
|
* @author a.mueller
|
58
|
* @since 20.03.2008
|
59
|
*/
|
60
|
@Component
|
61
|
public class BerlinModelCommonNamesImport extends BerlinModelImportBase {
|
62
|
private static final long serialVersionUID = -8921948187177864321L;
|
63
|
|
64
|
private static final Logger logger = Logger.getLogger(BerlinModelCommonNamesImport.class);
|
65
|
|
66
|
public static final UUID REFERENCE_LANGUAGE_ISO639_2_UUID = UUID.fromString("40c4f8dd-3d9c-44a4-b77a-76e137a89a5f");
|
67
|
public static final UUID REFERENCE_LANGUAGE_STRING_UUID = UUID.fromString("2a1b678f-c27d-48c1-b43e-98fd0d426305");
|
68
|
public static final UUID COMMONNAME_STATUS_RECOMMENDED_UUID = UUID.fromString("e3f7b80a-1286-458d-812c-5e818f731968");
|
69
|
public static final UUID COMMONNAME_STATUS_SYNONYM_UUID = UUID.fromString("169b2d97-a706-49de-b28b-c67f0ee6764b");
|
70
|
|
71
|
public static final String NAMESPACE = "common name";
|
72
|
|
73
|
|
74
|
private static final String pluralString = "common names";
|
75
|
private static final String dbTableName = "emCommonName";
|
76
|
|
77
|
|
78
|
//map that stores the regions (named areas) and makes them accessible via the regionFk
|
79
|
private Map<String, NamedArea> regionFkToAreaMap = new HashMap<>();
|
80
|
|
81
|
public BerlinModelCommonNamesImport(){
|
82
|
super(dbTableName, pluralString);
|
83
|
}
|
84
|
|
85
|
@Override
|
86
|
protected String getIdQuery(BerlinModelImportState state) {
|
87
|
String result = " SELECT CommonNameId FROM emCommonName WHERE (1=1) ";
|
88
|
if (isNotBlank(state.getConfig().getCommonNameFilter())){
|
89
|
result += " AND " + state.getConfig().getCommonNameFilter();
|
90
|
}
|
91
|
result += " ORDER BY PTNameFk, CommonNameId ";
|
92
|
|
93
|
return result;
|
94
|
}
|
95
|
|
96
|
@Override
|
97
|
protected String getRecordQuery(BerlinModelImportConfigurator config) {
|
98
|
String recordQuery = "";
|
99
|
recordQuery =
|
100
|
" SELECT rel.RelPTaxonId, rel.RelQualifierFk, acc.RIdentifier accTaxonId, factTaxon.RIdentifier factTaxonId, accName.NameId, f.FactId, " +
|
101
|
" cn.CommonNameId, cn.CommonName, tax.RIdentifier AS taxonId, cn.PTNameFk, cn.RefFk AS refId, cn.Status, cn.RegionFks, cn.MisNameRefFk, " +
|
102
|
" cn.NameInSourceFk, cn.Created_When, cn.Updated_When, cn.Created_Who, cn.Updated_Who, cn.Note AS Notes, languageCommonName.Language, " +
|
103
|
" languageCommonName.LanguageOriginal, languageCommonName.ISO639_1, languageCommonName.ISO639_2, " +
|
104
|
" emLanguageReference.RefFk AS languageRefRefFk, emLanguageReference.ReferenceShort, emLanguageReference.ReferenceLong, " +
|
105
|
" emLanguageReference.LanguageFk, languageReferenceLanguage.Language AS refLanguage, languageReferenceLanguage.ISO639_2 AS refLanguageIso639_2, "+
|
106
|
" misappliedTaxon.RIdentifier AS misappliedTaxonId " +
|
107
|
" FROM PTaxon AS misappliedTaxon RIGHT OUTER JOIN " +
|
108
|
" emLanguage AS languageReferenceLanguage RIGHT OUTER JOIN " +
|
109
|
" emLanguageReference ON languageReferenceLanguage.LanguageId = emLanguageReference.LanguageFk RIGHT OUTER JOIN " +
|
110
|
" emCommonName AS cn INNER JOIN " +
|
111
|
" PTaxon AS tax ON cn.PTNameFk = tax.PTNameFk AND cn.PTRefFk = tax.PTRefFk ON " +
|
112
|
" emLanguageReference.ReferenceId = cn.LanguageRefFk LEFT OUTER JOIN " +
|
113
|
" emLanguage AS languageCommonName ON cn.LanguageFk = languageCommonName.LanguageId ON misappliedTaxon.PTNameFk = cn.NameInSourceFk AND " +
|
114
|
" misappliedTaxon.PTRefFk = cn.MisNameRefFk " +
|
115
|
|
116
|
" LEFT OUTER JOIN Fact f ON cn.CommonNameId = f.ExtensionFk " +
|
117
|
" LEFT OUTER JOIN PTaxon factTaxon ON factTaxon.PTNameFk = f.PTNameFk AND factTaxon.PTRefFk = f.PTRefFk " +
|
118
|
" LEFT OUTER JOIN RelPTaxon rel ON rel.PTNameFk1 = tax.PTNameFk AND rel.PTRefFk1 = tax.PTRefFk AND rel.RelQualifierFk IN (2,6,7) " +
|
119
|
" LEFT OUTER JOIN PTaxon acc ON rel.PTNameFk2 = acc.PTNameFk AND rel.PTRefFk2 = acc.PTRefFk " +
|
120
|
" LEFT OUTER JOIN Name accName ON accName.NameId = acc.PTNameFk " +
|
121
|
" WHERE cn.CommonNameId IN (" + ID_LIST_TOKEN + ") " +
|
122
|
" ORDER BY cn.PTNameFk, cn.CommonNameId ";
|
123
|
|
124
|
return recordQuery;
|
125
|
}
|
126
|
|
127
|
@Override
|
128
|
protected void doInvoke(BerlinModelImportState state) {
|
129
|
try {
|
130
|
makeRegions(state);
|
131
|
} catch (Exception e) {
|
132
|
logger.error("Error when creating common name regions:" + e.getMessage());
|
133
|
e.printStackTrace();
|
134
|
state.setUnsuccessfull();
|
135
|
}
|
136
|
super.doInvoke(state);
|
137
|
return;
|
138
|
}
|
139
|
|
140
|
/**
|
141
|
* @param state
|
142
|
*
|
143
|
*/
|
144
|
private void makeRegions(BerlinModelImportState state) {
|
145
|
try {
|
146
|
TransactionStatus tx = startTransaction();
|
147
|
SortedSet<Integer> regionFks = new TreeSet<>();
|
148
|
Source source = state.getConfig().getSource();
|
149
|
|
150
|
//fill set with all regionFk from emCommonName.regionFks
|
151
|
fillRegionFks(state, regionFks, source);
|
152
|
//concat filter string
|
153
|
String sqlWhere = getSqlWhere(regionFks);
|
154
|
|
155
|
//get E+M - TDWG Mapping
|
156
|
// Map<String, String> emTdwgMap = getEmTdwgMap(source);
|
157
|
Map<String, NamedArea> emCodeToAreaMap = getEmCodeToAreaMap(source);
|
158
|
//fill regionMap
|
159
|
fillRegionMap(state, sqlWhere, emCodeToAreaMap);
|
160
|
|
161
|
commitTransaction(tx);
|
162
|
|
163
|
return;
|
164
|
} catch (NumberFormatException e) {
|
165
|
e.printStackTrace();
|
166
|
state.setUnsuccessfull();
|
167
|
return;
|
168
|
} catch (SQLException e) {
|
169
|
e.printStackTrace();
|
170
|
state.setUnsuccessfull();
|
171
|
return;
|
172
|
}
|
173
|
}
|
174
|
|
175
|
|
176
|
private Map<String, Language> iso6392Map = new HashMap<>();
|
177
|
|
178
|
@Override
|
179
|
public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
|
180
|
boolean success = true ;
|
181
|
|
182
|
@SuppressWarnings("rawtypes")
|
183
|
Set<TaxonBase> taxaToSave = new HashSet<>();
|
184
|
@SuppressWarnings("unchecked")
|
185
|
Map<String, Taxon> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
|
186
|
@SuppressWarnings("unchecked")
|
187
|
Map<String, TaxonName> taxonNameMap = partitioner.getObjectMap(BerlinModelTaxonNameImport.NAMESPACE);
|
188
|
@SuppressWarnings("unchecked")
|
189
|
Map<String, Reference> refMap = partitioner.getObjectMap(BerlinModelReferenceImport.REFERENCE_NAMESPACE);
|
190
|
|
191
|
|
192
|
// logger.warn("MisappliedNameRefFk not yet implemented for Common Names");
|
193
|
|
194
|
ResultSet rs = partitioner.getResultSet();
|
195
|
Integer lastCommonNameId = null;
|
196
|
try{
|
197
|
while (rs.next()){
|
198
|
|
199
|
//create TaxonName element
|
200
|
Integer commonNameId = rs.getInt("CommonNameId");
|
201
|
int taxonId = rs.getInt("taxonId");
|
202
|
Integer factTaxonId = nullSafeInt(rs, "factTaxonId");
|
203
|
Integer accTaxonId = nullSafeInt(rs, "accTaxonId"); //if common name is related to synonym this is the accepted taxon id
|
204
|
|
205
|
Integer refId = nullSafeInt(rs, "refId");
|
206
|
// Integer ptNameFk = nullSafeInt(rs,"PTNameFk");
|
207
|
String commonNameString = rs.getString("CommonName");
|
208
|
String iso639_2 = rs.getString("ISO639_2");
|
209
|
String iso639_1 = rs.getString("ISO639_1");
|
210
|
String languageString = rs.getString("Language");
|
211
|
String originalLanguageString = rs.getString("LanguageOriginal");
|
212
|
Integer misNameRefFk = nullSafeInt(rs, "MisNameRefFk");
|
213
|
Integer languageRefRefFk = nullSafeInt(rs, "languageRefRefFk");
|
214
|
String refLanguage = rs.getString("refLanguage");
|
215
|
String refLanguageIso639_2 = rs.getString("refLanguageIso639_2");
|
216
|
String status = rs.getString("Status");
|
217
|
Integer nameInSourceFk = nullSafeInt( rs, "NameInSourceFk");
|
218
|
Integer misappliedTaxonId = nullSafeInt( rs, "misappliedTaxonId");
|
219
|
|
220
|
if (commonNameId == lastCommonNameId){
|
221
|
logger.warn("CommonNameId >1 times in query. This may happen due to LEFT JOINS to fact and/or accepted taxon and e.g. multiple taxon relationships. 2018-04-01 no such double relation existed in E+M. ");
|
222
|
}else{
|
223
|
lastCommonNameId = commonNameId;
|
224
|
}
|
225
|
|
226
|
final String NO_REGION = "";
|
227
|
//regions
|
228
|
String regionFks = rs.getString("RegionFks");
|
229
|
String[] regionFkSplit = (regionFks==null)? new String[]{NO_REGION} : regionFks.trim().split(",");
|
230
|
if (regionFkSplit.length == 0){
|
231
|
String message = "regionFkSplit should never be empty but was for common name id " + commonNameId;
|
232
|
logger.warn(message);
|
233
|
}
|
234
|
|
235
|
//commonNameString
|
236
|
if (isBlank(commonNameString)){
|
237
|
String message = "CommonName is empty or null. Do not import record for taxon " + taxonId;
|
238
|
logger.warn(message);
|
239
|
continue;
|
240
|
}
|
241
|
|
242
|
//taxon
|
243
|
Taxon taxon = null;
|
244
|
TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(taxonId));
|
245
|
if (taxonBase == null){
|
246
|
logger.warn("Taxon (" + taxonId + ") could not be found. Common name " + commonNameString + "(" + commonNameId + ") not imported");
|
247
|
continue;
|
248
|
}else if (taxonBase.isInstanceOf(Taxon.class)){
|
249
|
taxon = CdmBase.deproxy(taxonBase, Taxon.class);
|
250
|
if (factTaxonId != null && !factTaxonId.equals(taxonId)){
|
251
|
logger.warn("Fact taxon ("+factTaxonId+") for common name "+commonNameId+" differs from common name taxon " + taxonId);
|
252
|
}
|
253
|
}else{
|
254
|
Taxon factTaxon = null;
|
255
|
if (factTaxonId != null && factTaxonId.equals(accTaxonId)){
|
256
|
factTaxon = taxonMap.get(String.valueOf(factTaxonId));
|
257
|
}
|
258
|
if (factTaxon != null){
|
259
|
taxon = factTaxon;
|
260
|
}else{
|
261
|
logger.warn("taxon (" + taxonId + ") is not accepted. Can't import common name " + commonNameId + ". FactTaxonId= " + factTaxonId + "; accTaxonId = " + accTaxonId);
|
262
|
continue;
|
263
|
}
|
264
|
}
|
265
|
|
266
|
//Language
|
267
|
Language language = getAndHandleLanguage(iso6392Map, iso639_2, iso639_1, languageString, originalLanguageString, state);
|
268
|
|
269
|
//CommonTaxonName
|
270
|
List<CommonTaxonName> commonTaxonNames = new ArrayList<>();
|
271
|
for (String regionFk : regionFkSplit){ //
|
272
|
CommonTaxonName commonTaxonName;
|
273
|
if (commonTaxonNames.size() == 0){
|
274
|
commonTaxonName = CommonTaxonName.NewInstance(commonNameString, language);
|
275
|
}else{
|
276
|
commonTaxonName = (CommonTaxonName)commonTaxonNames.get(0).clone();
|
277
|
}
|
278
|
commonTaxonNames.add(commonTaxonName);
|
279
|
regionFk = regionFk.trim();
|
280
|
NamedArea area = regionFkToAreaMap.get(regionFk);
|
281
|
if (area == null){
|
282
|
if (isNotBlank(regionFk) && regionFk != NO_REGION){
|
283
|
logger.warn("Area for " + regionFk + " not defined in regionMap.");
|
284
|
}else{
|
285
|
//no region is defined
|
286
|
}
|
287
|
}else{
|
288
|
commonTaxonName.setArea(area);
|
289
|
}
|
290
|
TaxonDescription description = getDescription(taxon);
|
291
|
description.addElement(commonTaxonName);
|
292
|
}
|
293
|
|
294
|
//Reference/Source
|
295
|
if (! CdmUtils.nullSafeEqual(refId, languageRefRefFk)){
|
296
|
//use strRefId if languageRefFk is null
|
297
|
if (languageRefRefFk == null){
|
298
|
languageRefRefFk = refId;
|
299
|
}else{
|
300
|
logger.warn("CommonName.RefFk (" + CdmUtils.Nz(refId) + ") and LanguageReference.RefFk " + languageRefRefFk + " are not equal. I will import only languageReference.RefFk");
|
301
|
}
|
302
|
}
|
303
|
|
304
|
Reference reference = refMap.get(String.valueOf(languageRefRefFk));
|
305
|
if (reference == null && languageRefRefFk != null){
|
306
|
logger.warn("CommonName reference was null but reference exists. languageRefRefFk = " + languageRefRefFk + "; commonNameId = " + commonNameId);
|
307
|
}
|
308
|
String microCitation = null;
|
309
|
String originalNameString = null;
|
310
|
|
311
|
TaxonName nameUsedInSource = taxonNameMap.get(String.valueOf(nameInSourceFk));
|
312
|
if (nameInSourceFk != null && nameUsedInSource == null){
|
313
|
if (nameInSourceFk != -1 || !state.getConfig().isEuroMed()){
|
314
|
logger.warn("Name used in source (" + nameInSourceFk + ") was not found for common name " + commonNameId);
|
315
|
}
|
316
|
}
|
317
|
for (CommonTaxonName commonTaxonName : commonTaxonNames){
|
318
|
DescriptionElementSource source = DescriptionElementSource.NewPrimarySourceInstance(reference, microCitation, nameUsedInSource, originalNameString);
|
319
|
commonTaxonName.addSource(source);
|
320
|
}
|
321
|
|
322
|
//MisNameRef
|
323
|
if (misNameRefFk != null){
|
324
|
//Taxon misappliedName = getMisappliedName(biblioRefMap, nomRefMap, misNameRefFk, taxon);
|
325
|
Taxon misappliedNameTaxon = null;
|
326
|
if (misappliedTaxonId != null){
|
327
|
TaxonBase<?> misTaxonBase = taxonMap.get(String.valueOf(misappliedTaxonId));
|
328
|
if (misTaxonBase == null){
|
329
|
logger.warn("MisappliedName not found for misappliedTaxonId " + misappliedTaxonId + "; commonNameId: " + commonNameId);
|
330
|
}else if (misTaxonBase.isInstanceOf(Taxon.class)){
|
331
|
misappliedNameTaxon = CdmBase.deproxy(misTaxonBase, Taxon.class);
|
332
|
}else{
|
333
|
logger.warn("Misapplied name taxon is not of type Taxon but " + misTaxonBase.getClass().getSimpleName());
|
334
|
}
|
335
|
}else{
|
336
|
Reference sec = refMap.get(String.valueOf(misNameRefFk));
|
337
|
if (nameUsedInSource == null || sec == null){
|
338
|
logger.warn("Taxon name or misapplied name reference is null for common name " + commonNameId);
|
339
|
}else{
|
340
|
misappliedNameTaxon = Taxon.NewInstance(nameUsedInSource, sec);
|
341
|
MarkerType misCommonNameMarker = getMarkerType(state, BerlinModelTransformer.uuidMisappliedCommonName,"Misapplied Common Name in Berlin Model",
|
342
|
"Misapplied taxon was automatically created by Berlin Model import for a common name with a misapplied name reference", "MCN", getEuroMedMarkerTypeVoc(state));
|
343
|
Marker marker = Marker.NewInstance(misCommonNameMarker, true);
|
344
|
misappliedNameTaxon.addMarker(marker);
|
345
|
taxaToSave.add(misappliedNameTaxon);
|
346
|
logger.warn("Misapplied name taxon could not be found in database but misapplied name reference exists for common name. " +
|
347
|
"New misapplied name for misapplied reference common name was added. CommonNameId: " + commonNameId);
|
348
|
}
|
349
|
}
|
350
|
if (misappliedNameTaxon != null){
|
351
|
|
352
|
if (! taxon.getMisappliedNames(false).contains(misappliedNameTaxon)){
|
353
|
taxon.addMisappliedName(misappliedNameTaxon, state.getTransactionalSourceReference(), null);
|
354
|
logger.warn("Misapplied name for common name was not found related to the accepted taxon. Created new relationship. CommonNameId: " + commonNameId);
|
355
|
}
|
356
|
|
357
|
//add common name also to missaplied taxon
|
358
|
//TODO is this really wanted
|
359
|
TaxonDescription misappliedNameDescription = getDescription(misappliedNameTaxon);
|
360
|
for (CommonTaxonName commonTaxonName : commonTaxonNames){
|
361
|
CommonTaxonName commonNameClone = (CommonTaxonName)commonTaxonName.clone();
|
362
|
misappliedNameDescription.addElement(commonNameClone);
|
363
|
doIdCreatedUpdatedNotes(state, commonNameClone, rs, String.valueOf(commonNameId), NAMESPACE);
|
364
|
}
|
365
|
}else{
|
366
|
//wird schon oben gelogged
|
367
|
//logger.warn("Misapplied name is null for common name " + commonNameId);
|
368
|
}
|
369
|
|
370
|
}
|
371
|
|
372
|
//reference extensions
|
373
|
if (reference != null){
|
374
|
if (isNotBlank(refLanguage) && !reference.hasExtension(REFERENCE_LANGUAGE_STRING_UUID, refLanguage)){
|
375
|
ExtensionType refLanguageExtensionType = getExtensionType( state, REFERENCE_LANGUAGE_STRING_UUID, "reference language","The language of the reference","ref. lang.");
|
376
|
Extension.NewInstance(reference, refLanguage, refLanguageExtensionType);
|
377
|
}
|
378
|
|
379
|
if (isNotBlank(refLanguageIso639_2) && !reference.hasExtension(REFERENCE_LANGUAGE_ISO639_2_UUID, refLanguage)){
|
380
|
ExtensionType refLanguageIsoExtensionType = getExtensionType( state, REFERENCE_LANGUAGE_ISO639_2_UUID, "reference language iso 639-2","The iso 639-2 code of the references language","ref. lang. 639-2");
|
381
|
Extension.NewInstance(reference, refLanguageIso639_2, refLanguageIsoExtensionType);
|
382
|
}
|
383
|
}else if (isNotBlank(refLanguage) || isNotBlank(refLanguageIso639_2)){
|
384
|
logger.warn("Reference is null (" + languageRefRefFk + ") but refLanguage (" + CdmUtils.Nz(refLanguage) + ") or iso639_2 (" + CdmUtils.Nz(refLanguageIso639_2) + ") was not null for common name ("+ commonNameId +")");
|
385
|
}
|
386
|
|
387
|
//status
|
388
|
if (isNotBlank(status)){
|
389
|
TermVocabulary<MarkerType> markerTypeVoc = getEuroMedMarkerTypeVoc(state);
|
390
|
MarkerType recommendedMarkerType = getMarkerType( state, COMMONNAME_STATUS_RECOMMENDED_UUID, "recommended","If the common name has the status recommended (see also status 'synonym', if none of them is true the default status is 'unassessed')",
|
391
|
"recommended", markerTypeVoc);
|
392
|
MarkerType synonymMarkerType = getMarkerType( state, COMMONNAME_STATUS_SYNONYM_UUID, "synonym","If the common name has the status synonym (see also status 'recommended', if none of them is true the default status is 'unassessed')",
|
393
|
"synonym", markerTypeVoc);
|
394
|
for (CommonTaxonName commonTaxonName : commonTaxonNames){
|
395
|
Marker marker = null;
|
396
|
if (status.equals("recommended")){
|
397
|
marker = Marker.NewInstance(recommendedMarkerType, true);
|
398
|
}else if (status.equals("synonym")){
|
399
|
marker = Marker.NewInstance(synonymMarkerType, true);
|
400
|
}else if (status.equals("unassessed")){
|
401
|
//do nothing
|
402
|
}else{
|
403
|
logger.warn("Unknown common name status: " + status);
|
404
|
}
|
405
|
if (marker != null){
|
406
|
commonTaxonName.addMarker(marker);
|
407
|
}
|
408
|
}
|
409
|
}
|
410
|
|
411
|
//Notes
|
412
|
for (CommonTaxonName commonTaxonName : commonTaxonNames){
|
413
|
doIdCreatedUpdatedNotes(state, commonTaxonName, rs, String.valueOf(commonNameId), NAMESPACE);
|
414
|
}
|
415
|
partitioner.startDoSave();
|
416
|
taxaToSave.add(taxon);
|
417
|
|
418
|
}
|
419
|
} catch (SQLException e) {
|
420
|
logger.error("SQLException:" + e);
|
421
|
return false;
|
422
|
} catch (ClassCastException e) {
|
423
|
e.printStackTrace();
|
424
|
} catch (Exception e) {
|
425
|
throw e;
|
426
|
}
|
427
|
|
428
|
// logger.info( i + " names handled");
|
429
|
getTaxonService().save(taxaToSave);
|
430
|
return success;
|
431
|
|
432
|
}
|
433
|
|
434
|
|
435
|
/**
|
436
|
* @param iso6392Map
|
437
|
* @param iso639_2
|
438
|
* @param languageString
|
439
|
* @param originalLanguageString
|
440
|
* @param state
|
441
|
* @return
|
442
|
*/
|
443
|
private Language getAndHandleLanguage(Map<String, Language> iso639Map, String iso639_2, String iso639_1, String languageString, String originalLanguageString, BerlinModelImportState state) {
|
444
|
Language language;
|
445
|
if (isNotBlank(iso639_2)|| isNotBlank(iso639_1) ){
|
446
|
//TODO test performance, implement in state
|
447
|
language = getLanguageFromIsoMap(iso639Map, iso639_2, iso639_1);
|
448
|
|
449
|
if (language == null){
|
450
|
language = getTermService().getLanguageByIso(iso639_2);
|
451
|
iso639Map.put(iso639_2, language);
|
452
|
if (language == null){
|
453
|
try {
|
454
|
language = getTermService().getLanguageByIso(iso639_1);
|
455
|
} catch (Exception e) {
|
456
|
// TODO Auto-generated catch block
|
457
|
// TODO remove if problem with duplicate DescElement_Annot id is solved
|
458
|
e.printStackTrace();
|
459
|
}
|
460
|
iso639Map.put(iso639_1, language);
|
461
|
}
|
462
|
if (language == null){
|
463
|
logger.warn("Language for code ISO693-2 '" + iso639_2 + "' and ISO693-1 '" + iso639_1 + "' was not found");
|
464
|
}
|
465
|
}
|
466
|
} else if ("unknown".equals(languageString)){
|
467
|
language = Language.UNKNOWN_LANGUAGE();
|
468
|
} else if ("Majorcan".equalsIgnoreCase(languageString)){
|
469
|
language = getLanguage(state, BerlinModelTransformer.uuidLangMajorcan, "Majorcan", "Majorcan (original 'mallorqu\u00EDn')", null);
|
470
|
} else if ("High Aragonese".equalsIgnoreCase(languageString)){
|
471
|
language = getLanguage(state, BerlinModelTransformer.uuidLangHighAragonese, "High Aragonese", "High Aragonese (original 'altoaragonés')", null);
|
472
|
} else if ("Valencian".equalsIgnoreCase(languageString)){
|
473
|
language = getLanguage(state, BerlinModelTransformer.uuidLangValencian, "Valencian", "Valencian (original 'valenciano')", null);
|
474
|
}else{
|
475
|
logger.warn("language ISO 639_1 and ISO 639_2 were empty for " + languageString);
|
476
|
language = null;
|
477
|
}
|
478
|
addOriginalLanguage(language, originalLanguageString);
|
479
|
return language;
|
480
|
}
|
481
|
|
482
|
|
483
|
/**
|
484
|
* @param iso639Map
|
485
|
* @param iso639_2
|
486
|
* @param iso639_1
|
487
|
* @return
|
488
|
*/
|
489
|
private Language getLanguageFromIsoMap(Map<String, Language> iso639Map, String iso639_2, String iso639_1) {
|
490
|
Language language;
|
491
|
language = iso639Map.get(iso639_2);
|
492
|
if (language == null){
|
493
|
language = iso639Map.get(iso639_1);
|
494
|
}
|
495
|
return language;
|
496
|
}
|
497
|
|
498
|
/**
|
499
|
* @param language
|
500
|
* @param originalLanguageString
|
501
|
*/
|
502
|
private void addOriginalLanguage(Language language, String originalLanguageString) {
|
503
|
if (isBlank(originalLanguageString)){
|
504
|
return;
|
505
|
}else if (language == null){
|
506
|
logger.warn("Language could not be defined, but originalLanguageString exists: " + originalLanguageString);
|
507
|
}else {
|
508
|
Representation representation = language.getRepresentation(language);
|
509
|
if (representation == null){
|
510
|
language.addRepresentation(Representation.NewInstance(originalLanguageString, originalLanguageString, originalLanguageString, language));
|
511
|
getTermService().saveOrUpdate(language);
|
512
|
}
|
513
|
}
|
514
|
|
515
|
}
|
516
|
|
517
|
|
518
|
|
519
|
/**
|
520
|
* Fills the regionFks with all regionFks from emCommonName. Comma separated regionFks will be split.
|
521
|
* @param state
|
522
|
* @param regionFks
|
523
|
* @param source
|
524
|
* @return
|
525
|
* @throws SQLException
|
526
|
*
|
527
|
*/
|
528
|
private void fillRegionFks(BerlinModelImportState state, SortedSet<Integer> regionFks,
|
529
|
Source source) throws SQLException {
|
530
|
String sql =
|
531
|
" SELECT DISTINCT RegionFks "
|
532
|
+ " FROM emCommonName";
|
533
|
if (state.getConfig().getCommonNameFilter() != null){
|
534
|
sql += " WHERE " + state.getConfig().getCommonNameFilter();
|
535
|
}
|
536
|
|
537
|
ResultSet rs = source.getResultSet(sql);
|
538
|
while (rs.next()){
|
539
|
String strRegionFks = rs.getString("RegionFks");
|
540
|
if (isBlank(strRegionFks)){
|
541
|
continue;
|
542
|
}
|
543
|
|
544
|
String[] regionFkArray = strRegionFks.split(",");
|
545
|
for (String regionFk: regionFkArray){
|
546
|
regionFk = regionFk.trim();
|
547
|
if (! StringUtils.isNumeric(regionFk) || "".equals(regionFk) ){
|
548
|
state.setUnsuccessfull();
|
549
|
logger.warn("RegionFk is not numeric: " + regionFk + " ( part of " + strRegionFks + ")");
|
550
|
}else{
|
551
|
regionFks.add(Integer.valueOf(regionFk));
|
552
|
}
|
553
|
}
|
554
|
}
|
555
|
return;
|
556
|
}
|
557
|
|
558
|
|
559
|
|
560
|
/**
|
561
|
* Fills the {@link #regionMap} by all emLanguageRegion regions defined in the sql filter.
|
562
|
* {@link #regionMap} maps emLanguageRegion.RegionId to named areas.
|
563
|
* @param state
|
564
|
* @param sqlWhere
|
565
|
* @param emTdwgMap
|
566
|
* @throws SQLException
|
567
|
*/
|
568
|
private void fillRegionMap_old(BerlinModelImportState state, String sqlWhere,
|
569
|
Map<String, NamedArea> emCodeToAreaMap) throws SQLException {
|
570
|
|
571
|
Source source = state.getConfig().getSource();
|
572
|
String sql =
|
573
|
" SELECT RegionId, Region "
|
574
|
+ " FROM emLanguageRegion "
|
575
|
+ " WHERE RegionId IN ("+ sqlWhere+ ") ";
|
576
|
ResultSet rs = source.getResultSet(sql);
|
577
|
while (rs.next()){
|
578
|
Object regionId = rs.getObject("RegionId");
|
579
|
String region = rs.getString("Region");
|
580
|
String[] splitRegion = region.split("-");
|
581
|
if (splitRegion.length <= 1){
|
582
|
NamedArea newArea = getNamedArea(state, null, region, "Language region '" + region + "'", null, null, null);
|
583
|
// getTermService().save(newArea);
|
584
|
regionFkToAreaMap.put(String.valueOf(regionId), newArea);
|
585
|
logger.warn("Found new area: " + region);
|
586
|
}else if (splitRegion.length == 2){
|
587
|
String emCode = splitRegion[1].trim().replace(" ", "");
|
588
|
|
589
|
NamedArea area = emCodeToAreaMap.get(emCode);
|
590
|
if (area == null){
|
591
|
area = normalizeAmbigousAreas(emCode, emCodeToAreaMap);
|
592
|
}
|
593
|
if (area == null){
|
594
|
|
595
|
String[] splits = emCode.split("/");
|
596
|
if (splits.length == 2){
|
597
|
area = emCodeToAreaMap.get(splits[0]);
|
598
|
}
|
599
|
if (area != null){
|
600
|
logger.warn("emCode ambigous. This should not happen anymore due to normalization! Use larger area as default: " + CdmUtils.Nz(emCode) + "->" + regionId);
|
601
|
}else{
|
602
|
logger.warn("emCode not recognized. Region not defined: " + CdmUtils.Nz(emCode) + "->" + regionId);
|
603
|
}
|
604
|
}
|
605
|
if (area != null){
|
606
|
regionFkToAreaMap.put(String.valueOf(regionId), area);
|
607
|
}
|
608
|
}
|
609
|
}
|
610
|
}
|
611
|
|
612
|
private void fillRegionMap(BerlinModelImportState state, String sqlWhere,
|
613
|
Map<String, NamedArea> emCodeToAreaMap) throws SQLException {
|
614
|
|
615
|
@SuppressWarnings("unchecked")
|
616
|
OrderedTermVocabulary<NamedArea> voc = areaVoc = OrderedTermVocabulary.NewInstance(TermType.NamedArea, "Euro+Med common name areas", "E+M Common Name Areas", null, null);
|
617
|
getVocabularyService().save(areaVoc);
|
618
|
|
619
|
Map<String,NamedArea> existingAreas = new HashMap<>();
|
620
|
Source source = state.getConfig().getSource();
|
621
|
String sql =
|
622
|
" SELECT RegionId, Region "
|
623
|
+ " FROM emLanguageRegion "
|
624
|
+ " WHERE RegionId IN ("+ sqlWhere+ ") "
|
625
|
+ " ORDER BY Region ";
|
626
|
ResultSet rs = source.getResultSet(sql);
|
627
|
while (rs.next()){
|
628
|
Object regionId = rs.getObject("RegionId");
|
629
|
String region = rs.getString("Region");
|
630
|
|
631
|
NamedArea area;
|
632
|
if (existingAreas.containsKey(region)){
|
633
|
area = existingAreas.get(region);
|
634
|
}else{
|
635
|
|
636
|
String[] splitRegion = region.split("-");
|
637
|
|
638
|
String emMapping = "None";
|
639
|
if (splitRegion.length == 2){
|
640
|
String emCode = splitRegion[1].trim().replace(" ", "");
|
641
|
|
642
|
NamedArea emArea = emCodeToAreaMap.get(emCode);
|
643
|
if (emArea == null){
|
644
|
emArea = normalizeAmbigousAreas(emCode, emCodeToAreaMap);
|
645
|
}
|
646
|
if (emArea == null){
|
647
|
|
648
|
String[] splits = emCode.split("/");
|
649
|
if (splits.length == 2){
|
650
|
emArea = emCodeToAreaMap.get(splits[0]);
|
651
|
}
|
652
|
if (emArea != null){
|
653
|
logger.warn("emCode ambigous. This should not happen anymore due to normalization! Use larger area as default: " + CdmUtils.Nz(emCode) + "->" + regionId);
|
654
|
}else{
|
655
|
logger.warn("emCode not recognized. Region not defined: " + CdmUtils.Nz(emCode) + "->" + regionId);
|
656
|
}
|
657
|
}
|
658
|
emMapping = emArea == null? "not recognized": emArea.getIdInVocabulary();
|
659
|
}
|
660
|
|
661
|
String label = splitRegion[0].trim();
|
662
|
String description = "Language region '" + region + "'; EM Area Mapping: " + emMapping ;
|
663
|
area = getNamedArea(state, null, label, description, null, null, null, voc, null);
|
664
|
existingAreas.put(region, area);
|
665
|
}
|
666
|
regionFkToAreaMap.put(String.valueOf(regionId), area);
|
667
|
}
|
668
|
}
|
669
|
|
670
|
/**
|
671
|
* Use area according to mail ERS 2018-09-24
|
672
|
* @param emCodeToAreaMap
|
673
|
*/
|
674
|
private NamedArea normalizeAmbigousAreas(String emCode, Map<String, NamedArea> emCodeToAreaMap) {
|
675
|
if (emCode == null){
|
676
|
return null;
|
677
|
}else if (emCode.equals("Ar/Ar(A)")){
|
678
|
return emCodeToAreaMap.get("Ar");
|
679
|
}else if (emCode.equals("Ab/Ab(A)")){
|
680
|
return emCodeToAreaMap.get("Ab");
|
681
|
}else if (emCode.equals("Ga/Ga(F)")){
|
682
|
return emCodeToAreaMap.get("Ga(F)");
|
683
|
}else if (emCode.equals("Hb/Hb(E)")){
|
684
|
return emCodeToAreaMap.get("Hb");
|
685
|
}else if (emCode.equals("It/It(I)")){
|
686
|
return emCodeToAreaMap.get("It");
|
687
|
}else if (emCode.equals("Uk/Uk(U)")){
|
688
|
return emCodeToAreaMap.get("Uk(U)");
|
689
|
}else if (emCode.equals("Hs/Hs(S)")){
|
690
|
return emCodeToAreaMap.get("Hs(S)");
|
691
|
}
|
692
|
return null;
|
693
|
}
|
694
|
|
695
|
/**
|
696
|
* @param regionFks
|
697
|
* @return
|
698
|
*/
|
699
|
private String getSqlWhere(SortedSet<Integer> regionFks) {
|
700
|
String sqlWhere = "";
|
701
|
for (Integer regionFk : regionFks){
|
702
|
sqlWhere += regionFk + ",";
|
703
|
}
|
704
|
sqlWhere = sqlWhere.substring(0, sqlWhere.length()-1);
|
705
|
return sqlWhere;
|
706
|
}
|
707
|
|
708
|
// /**
|
709
|
// * Returns a map which is filled by the emCode->TdwgCode mapping defined in emArea.
|
710
|
// * Some exceptions are defined for emCode 'Ab','Rf','Uk' and some additional mapping is added
|
711
|
// * for 'Ab / Ab(A)', 'Ga / Ga(F)', 'It / It(I)', 'Ar / Ar(A)','Hs / Hs(S)'
|
712
|
// * @param source
|
713
|
// * @throws SQLException
|
714
|
// */
|
715
|
// private Map<String, String> getEmTdwgMap(Source source) throws SQLException {
|
716
|
//
|
717
|
// Map<String, String> emTdwgMap = new HashMap<>();
|
718
|
// String sql = " SELECT EmCode, TDWGCode "
|
719
|
// + " FROM emArea ";
|
720
|
// ResultSet rs = source.getResultSet(sql);
|
721
|
// while (rs.next()){
|
722
|
// String emCode = rs.getString("EMCode");
|
723
|
// String TDWGCode = rs.getString("TDWGCode");
|
724
|
// if (isNotBlank(emCode) ){
|
725
|
// emCode = emCode.trim();
|
726
|
// if (emCode.equalsIgnoreCase("Ab") || emCode.equalsIgnoreCase("Rf")||
|
727
|
// emCode.equalsIgnoreCase("Uk") || emCode.equalsIgnoreCase("Gg")
|
728
|
// || emCode.equalsIgnoreCase("SM") || emCode.equalsIgnoreCase("Tu")){
|
729
|
// emTdwgMap.put(emCode, emCode);
|
730
|
// }else if (isNotBlank(TDWGCode)){
|
731
|
// emTdwgMap.put(emCode, TDWGCode.trim());
|
732
|
// }
|
733
|
// }
|
734
|
// }
|
735
|
// emTdwgMap.put("Ab / Ab(A)", "Ab");
|
736
|
// emTdwgMap.put("Ga / Ga(F)", "FRA-FR");
|
737
|
// emTdwgMap.put("It / It(I)", "ITA");
|
738
|
// emTdwgMap.put("Uk / Uk(U)", "Uk");
|
739
|
// emTdwgMap.put("Ar / Ar(A)", "TCS-AR");
|
740
|
// emTdwgMap.put("Hs / Hs(S)", "SPA-SP");
|
741
|
// emTdwgMap.put("Hb / Hb(E)", "IRE-IR");
|
742
|
//
|
743
|
// return emTdwgMap;
|
744
|
// }
|
745
|
|
746
|
|
747
|
|
748
|
/**
|
749
|
* @param source
|
750
|
* @return
|
751
|
* @throws SQLException
|
752
|
*/
|
753
|
private Map<String, NamedArea> getEmCodeToAreaMap(Source source) throws SQLException {
|
754
|
Map<String, NamedArea> emCodeToAreaMap = new HashMap<>();
|
755
|
String sql =
|
756
|
" SELECT EmCode, AreaId "
|
757
|
+ " FROM emArea ";
|
758
|
ResultSet rs = source.getResultSet(sql);
|
759
|
while (rs.next()){
|
760
|
|
761
|
String emCode = rs.getString("EMCode");
|
762
|
if (isNotBlank(emCode)){
|
763
|
Integer areaId = rs.getInt("AreaId");
|
764
|
NamedArea area = getAreaByAreaId(areaId);
|
765
|
if (area != null){
|
766
|
emCodeToAreaMap.put(emCode.trim(), area);
|
767
|
}else{
|
768
|
logger.warn("Area not found for areaId " + areaId);
|
769
|
}
|
770
|
}
|
771
|
|
772
|
}
|
773
|
|
774
|
// emTdwgMap.put("Ab / Ab(A)", "Ab");
|
775
|
|
776
|
return emCodeToAreaMap;
|
777
|
}
|
778
|
|
779
|
/**
|
780
|
* @param emCode
|
781
|
* @return
|
782
|
*/
|
783
|
private NamedArea getAreaByAreaId(int areaId) {
|
784
|
NamedArea result = null;
|
785
|
String areaIdStr = String.valueOf(areaId);
|
786
|
OrderedTermVocabulary<NamedArea> voc = getEmAreaVoc();
|
787
|
getVocabularyService().update(voc);
|
788
|
for (NamedArea area : voc.getTerms()){
|
789
|
for (IdentifiableSource source : area.getSources()){
|
790
|
if (areaIdStr.equals(source.getIdInSource()) && BerlinModelAreaImport.NAMESPACE.equals(source.getIdNamespace())){
|
791
|
if (result != null){
|
792
|
logger.warn("Result for areaId already exists. areaId: " + areaId);
|
793
|
}
|
794
|
result = area;
|
795
|
}
|
796
|
}
|
797
|
}
|
798
|
return result;
|
799
|
}
|
800
|
|
801
|
private OrderedTermVocabulary<NamedArea> areaVoc;
|
802
|
@SuppressWarnings("unchecked")
|
803
|
private OrderedTermVocabulary<NamedArea> getEmAreaVoc(){
|
804
|
if (areaVoc == null){
|
805
|
areaVoc = (OrderedTermVocabulary<NamedArea>)getVocabularyService().find(BerlinModelTransformer.uuidVocEuroMedAreas);
|
806
|
}
|
807
|
return areaVoc;
|
808
|
}
|
809
|
|
810
|
|
811
|
/**
|
812
|
* Returns the first non-image gallery description. Creates a new one if no description exists.
|
813
|
* @param taxon
|
814
|
* @return
|
815
|
*/
|
816
|
private TaxonDescription getDescription(Taxon taxon) {
|
817
|
TaxonDescription result = null;
|
818
|
for (TaxonDescription taxonDescription : taxon.getDescriptions()){
|
819
|
if (! taxonDescription.isImageGallery()){
|
820
|
result = taxonDescription;
|
821
|
}
|
822
|
}
|
823
|
if (result == null){
|
824
|
result = TaxonDescription.NewInstance(taxon);
|
825
|
}
|
826
|
return result;
|
827
|
}
|
828
|
|
829
|
@Override
|
830
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
|
831
|
|
832
|
String nameSpace;
|
833
|
Set<String> idSet;
|
834
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
|
835
|
|
836
|
String pos = "0";
|
837
|
try{
|
838
|
Set<String> taxonIdSet = new HashSet<>();
|
839
|
Set<String> nameIdSet = new HashSet<>();
|
840
|
Set<String> referenceIdSet = new HashSet<>();
|
841
|
while (rs.next()){
|
842
|
handleForeignKey(rs, taxonIdSet, "taxonId");
|
843
|
handleForeignKey(rs, taxonIdSet, "factTaxonId");
|
844
|
handleForeignKey(rs, taxonIdSet, "misappliedTaxonId");
|
845
|
handleForeignKey(rs, referenceIdSet, "refId");
|
846
|
handleForeignKey(rs, referenceIdSet, "languageRefRefFk");
|
847
|
handleForeignKey(rs, nameIdSet, "NameInSourceFk");
|
848
|
handleForeignKey(rs, nameIdSet, "PTNameFk");
|
849
|
handleForeignKey(rs, referenceIdSet, "MisNameRefFk");
|
850
|
}
|
851
|
|
852
|
//name map
|
853
|
nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
|
854
|
idSet = nameIdSet;
|
855
|
Map<String, TaxonName> nameMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonName.class, idSet, nameSpace);
|
856
|
result.put(nameSpace, nameMap);
|
857
|
|
858
|
//taxon map
|
859
|
nameSpace = BerlinModelTaxonImport.NAMESPACE;
|
860
|
idSet = taxonIdSet;
|
861
|
@SuppressWarnings("rawtypes")
|
862
|
Map<String, TaxonBase> taxonMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonBase.class, idSet, nameSpace);
|
863
|
result.put(nameSpace, taxonMap);
|
864
|
|
865
|
//reference map
|
866
|
nameSpace = BerlinModelReferenceImport.REFERENCE_NAMESPACE;
|
867
|
idSet = referenceIdSet;
|
868
|
@SuppressWarnings("unchecked")
|
869
|
Map<String, Reference> referenceMap = getCommonService().getSourcedObjectsByIdInSourceC(Reference.class, idSet, nameSpace);
|
870
|
result.put(nameSpace, referenceMap);
|
871
|
// TODO remove if problem with duplicate DescElement_Annot id is solved
|
872
|
} catch (SQLException e) {
|
873
|
throw new RuntimeException("pos: " + pos, e);
|
874
|
} catch (NullPointerException nep){
|
875
|
logger.error("NullPointerException in getRelatedObjectsForPartition()");
|
876
|
}
|
877
|
return result;
|
878
|
}
|
879
|
|
880
|
|
881
|
@Override
|
882
|
protected boolean doCheck(BerlinModelImportState state){
|
883
|
IOValidator<BerlinModelImportState> validator = new BerlinModelCommonNamesImportValidator();
|
884
|
return validator.validate(state);
|
885
|
}
|
886
|
|
887
|
|
888
|
@Override
|
889
|
protected boolean isIgnore(BerlinModelImportState state){
|
890
|
return ! state.getConfig().isDoCommonNames();
|
891
|
}
|
892
|
|
893
|
}
|