1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.excel.common;
|
11
|
|
12
|
import java.util.HashMap;
|
13
|
import java.util.List;
|
14
|
import java.util.Set;
|
15
|
import java.util.UUID;
|
16
|
|
17
|
import org.apache.commons.lang.StringUtils;
|
18
|
import org.apache.log4j.Logger;
|
19
|
|
20
|
import eu.etaxonomy.cdm.api.service.pager.Pager;
|
21
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
22
|
import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase.PostfixTerm;
|
23
|
import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenCdmExcelImportState;
|
24
|
import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenRow;
|
25
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
26
|
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
|
27
|
import eu.etaxonomy.cdm.model.common.Extension;
|
28
|
import eu.etaxonomy.cdm.model.common.ExtensionType;
|
29
|
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
|
30
|
import eu.etaxonomy.cdm.model.description.Feature;
|
31
|
|
32
|
/**
|
33
|
* @author a.mueller
|
34
|
\* @since 12.07.2011
|
35
|
*/
|
36
|
public abstract class ExcelTaxonOrSpecimenImportBase<STATE extends ExcelImportState<CONFIG, ROW>, CONFIG extends ExcelImportConfiguratorBase, ROW extends ExcelRowBase>
|
37
|
extends ExcelImportBase<STATE, CONFIG, ROW> {
|
38
|
private static final Logger logger = Logger.getLogger(ExcelTaxonOrSpecimenImportBase.class);
|
39
|
|
40
|
|
41
|
protected static final String CDM_UUID_COLUMN = "(?i)(CdmUuid)";
|
42
|
protected static final String IGNORE_COLUMN = "(?i)(Ignore|Not)";
|
43
|
|
44
|
|
45
|
protected static final String RANK_COLUMN = "(?i)(Rank)";
|
46
|
protected static final String FULL_NAME_COLUMN = "(?i)(FullName)";
|
47
|
protected static final String TAXON_UUID_COLUMN = "(?i)(taxonUuid)";
|
48
|
protected static final String FAMILY_COLUMN = "(?i)(Family)";
|
49
|
protected static final String GENUS_COLUMN = "(?i)(Genus)";
|
50
|
protected static final String SPECIFIC_EPITHET_COLUMN = "(?i)(SpecificEpi(thet)?)";
|
51
|
protected static final String INFRASPECIFIC_EPITHET_COLUMN = "(?i)(InfraSpecificEpi(thet)?)";
|
52
|
|
53
|
protected static final String LANGUAGE = "(?i)(Language)";
|
54
|
|
55
|
@Override
|
56
|
protected void analyzeRecord(HashMap<String, String> record, STATE state) {
|
57
|
Set<String> keys = record.keySet();
|
58
|
|
59
|
ROW row = createDataHolderRow();
|
60
|
state.setCurrentRow(row);
|
61
|
|
62
|
for (String originalKey: keys) {
|
63
|
KeyValue keyValue = makeKeyValue(record, originalKey, state);
|
64
|
if (StringUtils.isBlank(keyValue.value)){
|
65
|
continue;
|
66
|
}
|
67
|
if (isBaseColumn(keyValue)){
|
68
|
handleBaseColumn(keyValue, row);
|
69
|
}else{
|
70
|
analyzeSingleValue(keyValue, state);
|
71
|
}
|
72
|
}
|
73
|
return;
|
74
|
}
|
75
|
|
76
|
protected abstract ROW createDataHolderRow();
|
77
|
|
78
|
/**
|
79
|
* Analyzes a single record value and fills the row instance accordingly.
|
80
|
* @param keyValue
|
81
|
* @param state
|
82
|
* @return
|
83
|
*/
|
84
|
protected abstract void analyzeSingleValue(KeyValue keyValue, STATE state);
|
85
|
|
86
|
/**
|
87
|
* DataHolder class for all key and value information for a cell.
|
88
|
* Value is the value of the cell (as String). Key is the main attribute, further defined by postfix,
|
89
|
* and in case of multiple values indexed.
|
90
|
* TODO doc for refXXX
|
91
|
*/
|
92
|
protected class KeyValue{
|
93
|
public KeyValue() {}
|
94
|
|
95
|
//original Key
|
96
|
public String originalKey;
|
97
|
//value
|
98
|
public String value;
|
99
|
//atomized key
|
100
|
public String key;
|
101
|
public String postfix;
|
102
|
public int index = 0;
|
103
|
public SourceType refType;
|
104
|
public int refIndex = 0;
|
105
|
public boolean hasError = false;
|
106
|
public boolean isKeyData() {
|
107
|
return (refType == null);
|
108
|
}
|
109
|
public boolean isLanguage(){
|
110
|
return (refType.isLanguage());
|
111
|
}
|
112
|
}
|
113
|
|
114
|
public enum SourceType{
|
115
|
Author("RefAuthor"),
|
116
|
Title("RefTitle"),
|
117
|
Year("RefYear"),
|
118
|
RefExtension("RefExt(ension)?"),
|
119
|
Language("Lang") //strictly not a reference, so some refactoring/renaming is needed
|
120
|
;
|
121
|
|
122
|
String keyMatch = null;
|
123
|
private SourceType(String keyName){
|
124
|
this.keyMatch = keyName;
|
125
|
}
|
126
|
|
127
|
|
128
|
boolean isLanguage(){
|
129
|
return (this.equals(Language));
|
130
|
}
|
131
|
|
132
|
static SourceType byKeyName(String str){
|
133
|
if (StringUtils.isBlank(str)){
|
134
|
return null;
|
135
|
}
|
136
|
for (SourceType type : SourceType.values()){
|
137
|
if (str.matches("(?i)(" + type.keyMatch + ")")){
|
138
|
return type;
|
139
|
}
|
140
|
}
|
141
|
return null;
|
142
|
}
|
143
|
|
144
|
static boolean isKeyName(String str){
|
145
|
return (byKeyName(str) != null);
|
146
|
}
|
147
|
|
148
|
}
|
149
|
|
150
|
|
151
|
/**
|
152
|
* @param record
|
153
|
* @param originalKey
|
154
|
* @param state
|
155
|
* @param keyValue
|
156
|
* @return
|
157
|
*/
|
158
|
protected KeyValue makeKeyValue(HashMap<String, String> record, String originalKey, STATE state) {
|
159
|
KeyValue keyValue = new KeyValue();
|
160
|
keyValue.originalKey = originalKey;
|
161
|
String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
|
162
|
String[] split = indexedKey.split("_");
|
163
|
int current = 0;
|
164
|
//key
|
165
|
keyValue.key = split[current++];
|
166
|
//postfix
|
167
|
if (split.length > current && ! isRefType(split[current]) && ! isInteger(split[current]) ){
|
168
|
keyValue.postfix = split[current++];
|
169
|
}
|
170
|
//index
|
171
|
if (split.length > current && isInteger(split[current]) ){
|
172
|
keyValue.index = Integer.valueOf(split[current++]);
|
173
|
}else{
|
174
|
keyValue.index = 0;
|
175
|
}
|
176
|
//source
|
177
|
if (split.length > current && ! isIgnore(keyValue.key)){
|
178
|
//refType
|
179
|
if (isRefType(split[current])){
|
180
|
String refTypeStr = split[current++];
|
181
|
keyValue.refType = SourceType.byKeyName(refTypeStr);
|
182
|
if (keyValue.refType == null){
|
183
|
String message = "Unmatched source key: " + refTypeStr;
|
184
|
fireWarningEvent(message, state, 10);
|
185
|
logger.warn(message);
|
186
|
}
|
187
|
}else {
|
188
|
String message = "RefType expected at %d position of key. But %s is no valid reftype";
|
189
|
message = String.format(message, current, split[current]);
|
190
|
fireWarningEvent(message, state, 10);
|
191
|
logger.warn(message);
|
192
|
keyValue.hasError = true;
|
193
|
}
|
194
|
//ref index
|
195
|
if (split.length > current){
|
196
|
if (isInteger(split[current])){
|
197
|
keyValue.refIndex = Integer.valueOf(split[current++]);
|
198
|
}else{
|
199
|
String message = "Ref index expected at position %d of key. But %s is no valid reftype";
|
200
|
message = String.format(message, current, split[current]);
|
201
|
fireWarningEvent(message, state, 10);
|
202
|
logger.warn(message);
|
203
|
keyValue.hasError = true;
|
204
|
}
|
205
|
}else {
|
206
|
keyValue.refIndex = 0;
|
207
|
}
|
208
|
|
209
|
}
|
210
|
if (split.length > current && ! isIgnore(keyValue.key)){
|
211
|
String message = "Key has unexpected part at position %d of key. %s (and following parts) can not be handled";
|
212
|
message = String.format(message, current, split[current]);
|
213
|
fireWarningEvent(message, state, 10);
|
214
|
logger.warn(message);
|
215
|
keyValue.hasError = true;
|
216
|
}
|
217
|
|
218
|
//TODO shouldn't we use originalKey here??
|
219
|
String value = record.get(indexedKey);
|
220
|
if (! StringUtils.isBlank(value)) {
|
221
|
if (logger.isDebugEnabled()) { logger.debug(keyValue.key + ": " + value); }
|
222
|
value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
|
223
|
keyValue.value = value;
|
224
|
}else{
|
225
|
keyValue.value = null;
|
226
|
}
|
227
|
return keyValue;
|
228
|
}
|
229
|
|
230
|
|
231
|
private boolean isIgnore(String key) {
|
232
|
return key.matches(IGNORE_COLUMN);
|
233
|
}
|
234
|
|
235
|
private boolean isRefType(String string) {
|
236
|
return SourceType.isKeyName(string);
|
237
|
}
|
238
|
|
239
|
|
240
|
private boolean handleBaseColumn(KeyValue keyValue, ExcelRowBase row) {
|
241
|
String key = keyValue.key;
|
242
|
String value = keyValue.value;
|
243
|
if (key.matches(CDM_UUID_COLUMN)) {
|
244
|
row.setCdmUuid(UUID.fromString(value)); //VALIDATE UUID
|
245
|
}
|
246
|
return true;
|
247
|
}
|
248
|
|
249
|
private boolean isBaseColumn(KeyValue keyValue) {
|
250
|
String key = keyValue.key;
|
251
|
if (key.matches(CDM_UUID_COLUMN)){
|
252
|
return true;
|
253
|
} else if(isIgnore(keyValue.key)) {
|
254
|
logger.debug("Ignored column" + keyValue.originalKey);
|
255
|
return true;
|
256
|
}
|
257
|
return false;
|
258
|
}
|
259
|
|
260
|
protected boolean isInteger(String value){
|
261
|
try {
|
262
|
Integer.valueOf(value);
|
263
|
return true;
|
264
|
} catch (NumberFormatException e) {
|
265
|
return false;
|
266
|
}
|
267
|
}
|
268
|
|
269
|
|
270
|
protected boolean analyzeFeatures(STATE state, KeyValue keyValue) {
|
271
|
String key = keyValue.key;
|
272
|
Pager<DefinedTermBase> features = getTermService().findByTitle(Feature.class, key, null, null, null, null, null, null);
|
273
|
|
274
|
if (features.getCount() > 1){
|
275
|
String message = "More than one feature found matching key " + key;
|
276
|
fireWarningEvent(message, state, 4);
|
277
|
return false;
|
278
|
}else if (features.getCount() == 0){
|
279
|
return false;
|
280
|
}else{
|
281
|
Feature feature = CdmBase.deproxy(features.getRecords().get(0), Feature.class);
|
282
|
ROW row = state.getCurrentRow();
|
283
|
if ( keyValue.isKeyData()){
|
284
|
row.putFeature(feature.getUuid(), keyValue.index, keyValue.value);
|
285
|
}else if (keyValue.isLanguage()){
|
286
|
row.putFeatureLanguage(feature.getUuid(), keyValue.index, keyValue.value);
|
287
|
}else{
|
288
|
row.putFeatureSource(feature.getUuid(), keyValue.index, keyValue.refType, keyValue.value, keyValue.refIndex);
|
289
|
}
|
290
|
return true;
|
291
|
}
|
292
|
}
|
293
|
|
294
|
|
295
|
protected void handleExtensions(IdentifiableEntity<?> identifiable, SpecimenRow row, SpecimenCdmExcelImportState state) {
|
296
|
List<PostfixTerm> extensions = row.getExtensions();
|
297
|
|
298
|
for (PostfixTerm exType : extensions){
|
299
|
ExtensionType extensionType = state.getPostfixExtensionType(exType.postfix);
|
300
|
|
301
|
Extension extension = Extension.NewInstance();
|
302
|
extension.setType(extensionType);
|
303
|
extension.setValue(exType.term);
|
304
|
identifiable.addExtension(extension);
|
305
|
}
|
306
|
|
307
|
}
|
308
|
|
309
|
|
310
|
protected void fireWarningEvent(String message, STATE state, int severity) {
|
311
|
fireWarningEvent(message, "Record" + state.getCurrentLine(), severity, 1);
|
312
|
}
|
313
|
}
|