Latest updates to Specimen Excel import
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / common / ExcelTaxonOrSpecimenImportBase.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.excel.common;
11
12 import java.util.HashMap;
13 import java.util.List;
14 import java.util.Set;
15 import java.util.UUID;
16
17 import org.apache.commons.lang.StringUtils;
18 import org.apache.log4j.Logger;
19
20 import eu.etaxonomy.cdm.api.service.pager.Pager;
21 import eu.etaxonomy.cdm.common.CdmUtils;
22 import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase.PostfixTerm;
23 import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenCdmExcelImportState;
24 import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenRow;
25 import eu.etaxonomy.cdm.model.common.CdmBase;
26 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
27 import eu.etaxonomy.cdm.model.common.Extension;
28 import eu.etaxonomy.cdm.model.common.ExtensionType;
29 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
30 import eu.etaxonomy.cdm.model.description.Feature;
31
32 /**
33 * @author a.mueller
34 * @date 12.07.2011
35 */
36 public abstract class ExcelTaxonOrSpecimenImportBase<STATE extends ExcelImportState<? extends ExcelImportConfiguratorBase, ROW>, ROW extends ExcelRowBase> extends ExcelImporterBase<STATE> {
37 private static final Logger logger = Logger.getLogger(ExcelTaxonOrSpecimenImportBase.class);
38
39
40 protected static final String CDM_UUID_COLUMN = "(?i)(CdmUuid)";
41 protected static final String IGNORE_COLUMN = "(?i)(Ignore|Not)";
42
43
44 protected static final String RANK_COLUMN = "(?i)(Rank)";
45 protected static final String FULL_NAME_COLUMN = "(?i)(FullName)";
46 protected static final String TAXON_UUID_COLUMN = "(?i)(taxonUuid)";
47 protected static final String FAMILY_COLUMN = "(?i)(Family)";
48 protected static final String GENUS_COLUMN = "(?i)(Genus)";
49 protected static final String SPECIFIC_EPITHET_COLUMN = "(?i)(SpecificEpi(thet)?)";
50 protected static final String INFRASPECIFIC_EPITHET_COLUMN = "(?i)(InfraSpecificEpi(thet)?)";
51
52 protected static final String LANGUAGE = "(?i)(Language)";
53
54 @Override
55 protected void analyzeRecord(HashMap<String, String> record, STATE state) {
56 Set<String> keys = record.keySet();
57
58 ROW row = createDataHolderRow();
59 state.setCurrentRow(row);
60
61 for (String originalKey: keys) {
62 KeyValue keyValue = makeKeyValue(record, originalKey, state);
63 if (StringUtils.isBlank(keyValue.value)){
64 continue;
65 }
66 if (isBaseColumn(keyValue)){
67 handleBaseColumn(keyValue, row);
68 }else{
69 analyzeSingleValue(keyValue, state);
70 }
71 }
72 return;
73 }
74
75 protected abstract ROW createDataHolderRow();
76
77 /**
78 * Analyzes a single record value and fills the row instance accordingly.
79 * @param keyValue
80 * @param state
81 * @return
82 */
83 protected abstract void analyzeSingleValue(KeyValue keyValue, STATE state);
84
85 /**
86 * DataHolder class for all key and value information for a cell.
87 * Value is the value of the cell (as String). Key is the main attribute, further defined by postfix,
88 * and in case of multiple values indexed.
89 * TODO doc for refXXX
90 */
91 protected class KeyValue{
92 public KeyValue() {}
93
94 //original Key
95 public String originalKey;
96 //value
97 public String value;
98 //atomized key
99 public String key;
100 public String postfix;
101 public int index = 0;
102 public SourceType refType;
103 public int refIndex = 0;
104 public boolean hasError = false;
105 public boolean isKeyData() {
106 return (refType == null);
107 }
108 public boolean isLanguage(){
109 return (refType.isLanguage());
110 }
111 }
112
113 public enum SourceType{
114 Author("RefAuthor"),
115 Title("RefTitle"),
116 Year("RefYear"),
117 RefExtension("RefExt(ension)?"),
118 Language("Lang") //strictly not a reference, so some refactoring/renaming is needed
119 ;
120
121 String keyMatch = null;
122 private SourceType(String keyName){
123 this.keyMatch = keyName;
124 }
125
126
127 boolean isLanguage(){
128 return (this.equals(Language));
129 }
130
131 static SourceType byKeyName(String str){
132 if (StringUtils.isBlank(str)){
133 return null;
134 }
135 for (SourceType type : SourceType.values()){
136 if (str.matches("(?i)(" + type.keyMatch + ")")){
137 return type;
138 }
139 }
140 return null;
141 }
142
143 static boolean isKeyName(String str){
144 return (byKeyName(str) != null);
145 }
146
147 }
148
149
150 /**
151 * @param record
152 * @param originalKey
153 * @param state
154 * @param keyValue
155 * @return
156 */
157 protected KeyValue makeKeyValue(HashMap<String, String> record, String originalKey, STATE state) {
158 KeyValue keyValue = new KeyValue();
159 keyValue.originalKey = originalKey;
160 String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
161 String[] split = indexedKey.split("_");
162 int current = 0;
163 //key
164 keyValue.key = split[current++];
165 //postfix
166 if (split.length > current && ! isRefType(split[current]) && ! isInteger(split[current]) ){
167 keyValue.postfix = split[current++];
168 }
169 //index
170 if (split.length > current && isInteger(split[current]) ){
171 keyValue.index = Integer.valueOf(split[current++]);
172 }else{
173 keyValue.index = 0;
174 }
175 //source
176 if (split.length > current && ! isIgnore(keyValue.key)){
177 //refType
178 if (isRefType(split[current])){
179 String refTypeStr = split[current++];
180 keyValue.refType = SourceType.byKeyName(refTypeStr);
181 if (keyValue.refType == null){
182 String message = "Unmatched source key: " + refTypeStr;
183 fireWarningEvent(message, state, 10);
184 logger.warn(message);
185 }
186 }else {
187 String message = "RefType expected at %d position of key. But %s is no valid reftype";
188 message = String.format(message, current, split[current]);
189 fireWarningEvent(message, state, 10);
190 logger.warn(message);
191 keyValue.hasError = true;
192 }
193 //ref index
194 if (split.length > current){
195 if (isInteger(split[current])){
196 keyValue.refIndex = Integer.valueOf(split[current++]);
197 }else{
198 String message = "Ref index expected at position %d of key. But %s is no valid reftype";
199 message = String.format(message, current, split[current]);
200 fireWarningEvent(message, state, 10);
201 logger.warn(message);
202 keyValue.hasError = true;
203 }
204 }else {
205 keyValue.refIndex = 0;
206 }
207
208 }
209 if (split.length > current && ! isIgnore(keyValue.key)){
210 String message = "Key has unexpected part at position %d of key. %s (and following parts) can not be handled";
211 message = String.format(message, current, split[current]);
212 fireWarningEvent(message, state, 10);
213 logger.warn(message);
214 keyValue.hasError = true;
215 }
216
217 //TODO shouldn't we use originalKey here??
218 String value = (String) record.get(indexedKey);
219 if (! StringUtils.isBlank(value)) {
220 if (logger.isDebugEnabled()) { logger.debug(keyValue.key + ": " + value); }
221 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
222 keyValue.value = value;
223 }else{
224 keyValue.value = null;
225 }
226 return keyValue;
227 }
228
229
230 private boolean isIgnore(String key) {
231 return key.matches(IGNORE_COLUMN);
232 }
233
234 private boolean isRefType(String string) {
235 return SourceType.isKeyName(string);
236 }
237
238
239 private boolean handleBaseColumn(KeyValue keyValue, ExcelRowBase row) {
240 String key = keyValue.key;
241 String value = keyValue.value;
242 if (key.matches(CDM_UUID_COLUMN)) {
243 row.setCdmUuid(UUID.fromString(value)); //VALIDATE UUID
244 }
245 return true;
246 }
247
248 private boolean isBaseColumn(KeyValue keyValue) {
249 String key = keyValue.key;
250 if (key.matches(CDM_UUID_COLUMN)){
251 return true;
252 } else if(isIgnore(keyValue.key)) {
253 logger.debug("Ignored column" + keyValue.originalKey);
254 return true;
255 }
256 return false;
257 }
258
259 protected boolean isInteger(String value){
260 try {
261 Integer.valueOf(value);
262 return true;
263 } catch (NumberFormatException e) {
264 return false;
265 }
266 }
267
268
269 protected boolean analyzeFeatures(STATE state, KeyValue keyValue) {
270 String key = keyValue.key;
271 Pager<DefinedTermBase> features = getTermService().findByTitle(Feature.class, key, null, null, null, null, null, null);
272
273 if (features.getCount() > 1){
274 String message = "More than one feature found matching key " + key;
275 fireWarningEvent(message, state, 4);
276 return false;
277 }else if (features.getCount() == 0){
278 return false;
279 }else{
280 Feature feature = CdmBase.deproxy(features.getRecords().get(0), Feature.class);
281 ROW row = state.getCurrentRow();
282 if ( keyValue.isKeyData()){
283 row.putFeature(feature.getUuid(), keyValue.index, keyValue.value);
284 }else if (keyValue.isLanguage()){
285 row.putFeatureLanguage(feature.getUuid(), keyValue.index, keyValue.value);
286 }else{
287 row.putFeatureSource(feature.getUuid(), keyValue.index, keyValue.refType, keyValue.value, keyValue.refIndex);
288 }
289 return true;
290 }
291 }
292
293
294 protected void handleExtensions(IdentifiableEntity<?> identifiable, SpecimenRow row, SpecimenCdmExcelImportState state) {
295 List<PostfixTerm> extensions = row.getExtensions();
296
297 for (PostfixTerm exType : extensions){
298 ExtensionType extensionType = state.getPostfixExtensionType(exType.postfix);
299
300 Extension extension = Extension.NewInstance();
301 extension.setType(extensionType);
302 extension.setValue(exType.term);
303 identifiable.addExtension(extension);
304 }
305
306 }
307
308
309 protected void fireWarningEvent(String message, STATE state, int severity) {
310 fireWarningEvent(message, "Record" + state.getCurrentLine(), severity, 1);
311 }
312 }