last updates for Taxon Excel Import and moving all success variables to state
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / common / ExcelTaxonOrSpecimenImportBase.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.excel.common;
11
12 import java.util.HashMap;
13 import java.util.List;
14 import java.util.Set;
15 import java.util.UUID;
16
17 import org.apache.commons.lang.StringUtils;
18 import org.apache.log4j.Logger;
19
20 import eu.etaxonomy.cdm.api.service.pager.Pager;
21 import eu.etaxonomy.cdm.common.CdmUtils;
22 import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase.PostfixTerm;
23 import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenCdmExcelImportState;
24 import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenRow;
25 import eu.etaxonomy.cdm.model.common.CdmBase;
26 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
27 import eu.etaxonomy.cdm.model.common.Extension;
28 import eu.etaxonomy.cdm.model.common.ExtensionType;
29 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
30 import eu.etaxonomy.cdm.model.description.Feature;
31
32 /**
33 * @author a.mueller
34 * @date 12.07.2011
35 */
36 public abstract class ExcelTaxonOrSpecimenImportBase<STATE extends ExcelImportState<? extends ExcelImportConfiguratorBase, ROW>, ROW extends ExcelRowBase> extends ExcelImporterBase<STATE> {
37 private static final Logger logger = Logger.getLogger(ExcelTaxonOrSpecimenImportBase.class);
38
39
40 protected static final String CDM_UUID_COLUMN = "(?i)(CdmUuid)";
41 protected static final String IGNORE_COLUMN = "(?i)(Ignore|Not)";
42
43
44 protected static final String RANK_COLUMN = "(?i)(Rank)";
45 protected static final String FULL_NAME_COLUMN = "(?i)(FullName)";
46 protected static final String FAMILY_COLUMN = "(?i)(Family)";
47 protected static final String GENUS_COLUMN = "(?i)(Genus)";
48 protected static final String SPECIFIC_EPITHET_COLUMN = "(?i)(SpecificEpi(thet)?)";
49 protected static final String INFRASPECIFIC_EPITHET_COLUMN = "(?i)(InfraSpecificEpi(thet)?)";
50
51 @Override
52 protected void analyzeRecord(HashMap<String, String> record, STATE state) {
53 Set<String> keys = record.keySet();
54
55 ROW row = createDataHolderRow();
56 state.setCurrentRow(row);
57
58 for (String originalKey: keys) {
59 KeyValue keyValue = makeKeyValue(record, originalKey, state);
60 if (StringUtils.isBlank(keyValue.value)){
61 continue;
62 }
63 if (isBaseColumn(keyValue)){
64 handleBaseColumn(keyValue, row);
65 }else{
66 analyzeSingleValue(keyValue, state);
67 }
68 }
69 return;
70 }
71
72 protected abstract ROW createDataHolderRow();
73
74 /**
75 * Analyzes a single record value and fills the row instance accordingly.
76 * @param keyValue
77 * @param state
78 * @return
79 */
80 protected abstract void analyzeSingleValue(KeyValue keyValue, STATE state);
81
82 /**
83 * DataHolder class for all key and value information for a cell.
84 * Value is the value of the cell (as String). Key is the main attribute, further defined by postfix,
85 * and in case of multiple values indexed.
86 * TODO doc for refXXX
87 */
88 protected class KeyValue{
89 public KeyValue() {}
90
91 //original Key
92 public String originalKey;
93 //value
94 public String value;
95 //atomized key
96 public String key;
97 public String postfix;
98 public int index = 0;
99 public SourceType refType;
100 public int refIndex = 0;
101 public boolean hasError = false;
102 public boolean isKeyData() {
103 return (refType == null);
104 }
105 public boolean isLanguage(){
106 return (refType.isLanguage());
107 }
108 }
109
110 public enum SourceType{
111 Author("RefAuthor"),
112 Title("RefTitle"),
113 Year("RefYear"),
114 RefExtension("RefExt(ension)?"),
115 Language("Lang") //strictly not a reference, so some refactoring/renaming is needed
116 ;
117
118 String keyMatch = null;
119 private SourceType(String keyName){
120 this.keyMatch = keyName;
121 }
122
123
124 boolean isLanguage(){
125 return (this.equals(Language));
126 }
127
128 static SourceType byKeyName(String str){
129 if (StringUtils.isBlank(str)){
130 return null;
131 }
132 for (SourceType type : SourceType.values()){
133 if (str.matches("(?i)(" + type.keyMatch + ")")){
134 return type;
135 }
136 }
137 return null;
138 }
139
140 static boolean isKeyName(String str){
141 return (byKeyName(str) != null);
142 }
143
144 }
145
146
147 /**
148 * @param record
149 * @param originalKey
150 * @param state
151 * @param keyValue
152 * @return
153 */
154 protected KeyValue makeKeyValue(HashMap<String, String> record, String originalKey, STATE state) {
155 KeyValue keyValue = new KeyValue();
156 keyValue.originalKey = originalKey;
157 String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
158 String[] split = indexedKey.split("_");
159 int current = 0;
160 //key
161 keyValue.key = split[current++];
162 //postfix
163 if (split.length > current && ! isRefType(split[current]) && ! isInteger(split[current]) ){
164 keyValue.postfix = split[current++];
165 }
166 //index
167 if (split.length > current && isInteger(split[current]) ){
168 keyValue.index = Integer.valueOf(split[current++]);
169 }else{
170 keyValue.index = 0;
171 }
172 //source
173 if (split.length > current){
174 //refType
175 if (isRefType(split[current])){
176 String refTypeStr = split[current++];
177 keyValue.refType = SourceType.byKeyName(refTypeStr);
178 if (keyValue.refType == null){
179 String message = "Unmatched source key: " + refTypeStr;
180 fireWarningEvent(message, state, 10);
181 logger.warn(message);
182 }
183 }else {
184 String message = "RefType expected at %d position of key. But %s is no valid reftype";
185 message = String.format(message, current, split[current]);
186 fireWarningEvent(message, state, 10);
187 logger.warn(message);
188 keyValue.hasError = true;
189 }
190 //ref index
191 if (split.length > current){
192 if (isInteger(split[current])){
193 keyValue.refIndex = Integer.valueOf(split[current++]);
194 }else{
195 String message = "Ref index expected at position %d of key. But %s is no valid reftype";
196 message = String.format(message, current, split[current]);
197 fireWarningEvent(message, state, 10);
198 logger.warn(message);
199 keyValue.hasError = true;
200 }
201 }else {
202 keyValue.refIndex = 0;
203 }
204
205 }
206 if (split.length > current){
207 String message = "Key has unexpected part at position %d of key. %s (and following parts) can not be handled";
208 message = String.format(message, current, split[current]);
209 fireWarningEvent(message, state, 10);
210 logger.warn(message);
211 keyValue.hasError = true;
212 }
213
214 //TODO shouldn't we use originalKey here??
215 String value = (String) record.get(indexedKey);
216 if (! StringUtils.isBlank(value)) {
217 if (logger.isDebugEnabled()) { logger.debug(keyValue.key + ": " + value); }
218 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
219 keyValue.value = value;
220 }else{
221 keyValue.value = null;
222 }
223 return keyValue;
224 }
225
226
227 private boolean isRefType(String string) {
228 return SourceType.isKeyName(string);
229 }
230
231
232 private boolean handleBaseColumn(KeyValue keyValue, ExcelRowBase row) {
233 String key = keyValue.key;
234 String value = keyValue.value;
235 if (key.matches(CDM_UUID_COLUMN)) {
236 row.setCdmUuid(UUID.fromString(value)); //VALIDATE UUID
237 }
238 return true;
239 }
240
241 private boolean isBaseColumn(KeyValue keyValue) {
242 String key = keyValue.key;
243 if (key.matches(CDM_UUID_COLUMN)){
244 return true;
245 } else if(keyValue.key.matches(IGNORE_COLUMN)) {
246 logger.debug("Ignored column" + keyValue.originalKey);
247 return true;
248 }
249 return false;
250 }
251
252 protected boolean isInteger(String value){
253 try {
254 Integer.valueOf(value);
255 return true;
256 } catch (NumberFormatException e) {
257 return false;
258 }
259 }
260
261
262 protected boolean analyzeFeatures(STATE state, KeyValue keyValue) {
263 String key = keyValue.key;
264 Pager<DefinedTermBase> features = getTermService().findByTitle(Feature.class, key, null, null, null, null, null, null);
265 if (features.getCount() > 1){
266 String message = "More than one feature found matching key " + key;
267 fireWarningEvent(message, state, 4);
268 return false;
269 }else if (features.getCount() == 0){
270 return false;
271 }else{
272 Feature feature = CdmBase.deproxy(features.getRecords().get(0), Feature.class);
273 ROW row = state.getCurrentRow();
274 if ( keyValue.isKeyData()){
275 row.putFeature(feature.getUuid(), keyValue.index, keyValue.value);
276 }else if (keyValue.isLanguage()){
277 row.putFeatureLanguage(feature.getUuid(), keyValue.index, keyValue.value);
278 }else{
279 row.putFeatureSource(feature.getUuid(), keyValue.index, keyValue.refType, keyValue.value, keyValue.refIndex);
280 }
281 return true;
282 }
283 }
284
285
286 protected void handleExtensions(IdentifiableEntity<?> identifiable, SpecimenRow row, SpecimenCdmExcelImportState state) {
287 List<PostfixTerm> extensions = row.getExtensions();
288
289 for (PostfixTerm exType : extensions){
290 ExtensionType extensionType = state.getPostfixExtensionType(exType.postfix);
291
292 Extension extension = Extension.NewInstance();
293 extension.setType(extensionType);
294 extension.setValue(exType.term);
295 identifiable.addExtension(extension);
296 }
297
298 }
299
300
301 protected void fireWarningEvent(String message, STATE state, int severity) {
302 fireWarningEvent(message, "Record" + state.getCurrentLine(), severity, 1);
303 }
304 }