add language to feature for NEEI
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / common / ExcelTaxonOrSpecimenImportBase.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.excel.common;
11
12 import java.util.HashMap;
13 import java.util.List;
14 import java.util.Set;
15 import java.util.UUID;
16
17 import org.apache.commons.lang.StringUtils;
18 import org.apache.log4j.Logger;
19
20 import eu.etaxonomy.cdm.api.service.pager.Pager;
21 import eu.etaxonomy.cdm.common.CdmUtils;
22 import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase.PostfixTerm;
23 import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenCdmExcelImportState;
24 import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenRow;
25 import eu.etaxonomy.cdm.model.common.CdmBase;
26 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
27 import eu.etaxonomy.cdm.model.common.Extension;
28 import eu.etaxonomy.cdm.model.common.ExtensionType;
29 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
30 import eu.etaxonomy.cdm.model.description.Feature;
31
32 /**
33 * @author a.mueller
34 * @date 12.07.2011
35 */
36 public abstract class ExcelTaxonOrSpecimenImportBase<STATE extends ExcelImportState<? extends ExcelImportConfiguratorBase, ROW>, ROW extends ExcelRowBase> extends ExcelImporterBase<STATE> {
37 private static final Logger logger = Logger.getLogger(ExcelTaxonOrSpecimenImportBase.class);
38
39
40 protected static final String CDM_UUID_COLUMN = "(?i)(CdmUuid)";
41
42
43 protected static final String RANK_COLUMN = "(?i)(Rank)";
44 protected static final String FULL_NAME_COLUMN = "(?i)(FullName)";
45 protected static final String FAMILY_COLUMN = "(?i)(Family)";
46 protected static final String GENUS_COLUMN = "(?i)(Genus)";
47 protected static final String SPECIFIC_EPITHET_COLUMN = "(?i)(SpecificEpi(thet)?)";
48 protected static final String INFRASPECIFIC_EPITHET_COLUMN = "(?i)(InfraSpecificEpi(thet)?)";
49
50 @Override
51 protected boolean analyzeRecord(HashMap<String, String> record, STATE state) {
52 boolean success = true;
53 Set<String> keys = record.keySet();
54
55 ROW row = createDataHolderRow();
56 state.setCurrentRow(row);
57
58 for (String originalKey: keys) {
59 KeyValue keyValue = makeKeyValue(record, originalKey, state);
60 if (StringUtils.isBlank(keyValue.value)){
61 continue;
62 }
63 if (isBaseColumn(keyValue)){
64 success &= handleBaseColumn(keyValue, row);
65 }else{
66 success &= analyzeSingleValue(keyValue, state);
67 }
68 }
69 return success;
70 }
71
72 protected abstract ROW createDataHolderRow();
73
74 /**
75 * Analyzes a single record value and fills the row instance accordingly.
76 * @param keyValue
77 * @param state
78 * @return
79 */
80 protected abstract boolean analyzeSingleValue(KeyValue keyValue, STATE state);
81
82 /**
83 * DataHolder class for all key and value information for a cell.
84 * Value is the value of the cell (as String). Key is the main attribute, further defined by postfix,
85 * and in case of multiple values indexed.
86 * TODO doc for refXXX
87 */
88 protected class KeyValue{
89 public KeyValue() {}
90
91 //original Key
92 public String originalKey;
93 //value
94 public String value;
95 //atomized key
96 public String key;
97 public String postfix;
98 public int index = 0;
99 public SourceType refType;
100 public int refIndex = 0;
101 public boolean hasError = false;
102 public boolean isKeyData() {
103 return (refType == null);
104 }
105 public boolean isLanguage(){
106 return (refType.isLanguage());
107 }
108 }
109
110 public enum SourceType{
111 Author("RefAuthor"),
112 Title("RefTitle"),
113 Year("RefYear"),
114 Language("Lang") //strictly not a reference, so some refactoring/renaming is needed
115 ;
116
117 String keyMatch = null;
118 private SourceType(String keyName){
119 this.keyMatch = keyName;
120 }
121
122
123 boolean isLanguage(){
124 return (this.equals(Language));
125 }
126
127 static SourceType byKeyName(String str){
128 if (StringUtils.isBlank(str)){
129 return null;
130 }
131 for (SourceType type : SourceType.values()){
132 if (str.matches("(?i)(" + type.keyMatch + ")")){
133 return type;
134 }
135 }
136 return null;
137 }
138
139 static boolean isKeyName(String str){
140 return (byKeyName(str) != null);
141 }
142
143 }
144
145
146 /**
147 * @param record
148 * @param originalKey
149 * @param state
150 * @param keyValue
151 * @return
152 */
153 protected KeyValue makeKeyValue(HashMap<String, String> record, String originalKey, STATE state) {
154 KeyValue keyValue = new KeyValue();
155 keyValue.originalKey = originalKey;
156 String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
157 String[] split = indexedKey.split("_");
158 int current = 0;
159 //key
160 keyValue.key = split[current++];
161 //postfix
162 if (split.length > current && ! isRefType(split[current]) && ! isInteger(split[current]) ){
163 keyValue.postfix = split[current++];
164 }
165 //index
166 if (split.length > current && isInteger(split[current]) ){
167 keyValue.index = Integer.valueOf(split[current++]);
168 }else{
169 keyValue.index = 0;
170 }
171 //source
172 if (split.length > current){
173 //refType
174 if (isRefType(split[current])){
175 String refTypeStr = split[current++];
176 keyValue.refType = SourceType.byKeyName(refTypeStr);
177 if (keyValue.refType == null){
178 String message = "Unmatched source key: " + refTypeStr;
179 fireWarningEvent(message, state, 10);
180 logger.warn(message);
181 }
182 }else {
183 String message = "RefType expected at %d position of key. But %s is no valid reftype";
184 message = String.format(message, current, split[current]);
185 fireWarningEvent(message, state, 10);
186 logger.warn(message);
187 keyValue.hasError = true;
188 }
189 //ref index
190 if (split.length > current){
191 if (isInteger(split[current])){
192 keyValue.refIndex = Integer.valueOf(split[current++]);
193 }else{
194 String message = "Ref index expected at position %d of key. But %s is no valid reftype";
195 message = String.format(message, current, split[current]);
196 fireWarningEvent(message, state, 10);
197 logger.warn(message);
198 keyValue.hasError = true;
199 }
200 }else {
201 keyValue.refIndex = 0;
202 }
203
204 }
205 if (split.length > current){
206 String message = "Key has unexpected part at position %d of key. %s (and following parts) can not be handled";
207 message = String.format(message, current, split[current]);
208 fireWarningEvent(message, state, 10);
209 logger.warn(message);
210 keyValue.hasError = true;
211 }
212
213 //TODO shouldn't we use originalKey here??
214 String value = (String) record.get(indexedKey);
215 if (! StringUtils.isBlank(value)) {
216 if (logger.isDebugEnabled()) { logger.debug(keyValue.key + ": " + value); }
217 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
218 keyValue.value = value;
219 }else{
220 keyValue.value = null;
221 }
222 return keyValue;
223 }
224
225
226 private boolean isRefType(String string) {
227 return SourceType.isKeyName(string);
228 }
229
230
231 private boolean handleBaseColumn(KeyValue keyValue, ExcelRowBase row) {
232 String key = keyValue.key;
233 String value = keyValue.value;
234 if (key.matches(CDM_UUID_COLUMN)) {
235 row.setCdmUuid(UUID.fromString(value)); //VALIDATE UUID
236 }
237 return true;
238 }
239
240 private boolean isBaseColumn(KeyValue keyValue) {
241 String key = keyValue.key;
242 if (key.matches(CDM_UUID_COLUMN)){
243 return true;
244 }
245 return false;
246 }
247
248 protected boolean isInteger(String value){
249 try {
250 Integer.valueOf(value);
251 return true;
252 } catch (NumberFormatException e) {
253 return false;
254 }
255 }
256
257
258 protected boolean analyzeFeatures(STATE state, KeyValue keyValue) {
259 String key = keyValue.key;
260 Pager<DefinedTermBase> features = getTermService().findByTitle(Feature.class, key, null, null, null, null, null, null);
261 if (features.getCount() > 1){
262 String message = "More than one feature found matching key " + key;
263 fireWarningEvent(message, state, 4);
264 return false;
265 }else if (features.getCount() == 0){
266 return false;
267 }else{
268 Feature feature = CdmBase.deproxy(features.getRecords().get(0), Feature.class);
269 ROW row = state.getCurrentRow();
270 if ( keyValue.isKeyData()){
271 row.putFeature(feature.getUuid(), keyValue.index, keyValue.value);
272 }else if (keyValue.isLanguage()){
273 row.putFeatureLanguage(feature.getUuid(), keyValue.index, keyValue.value);
274 }else{
275 row.putFeatureSource(feature.getUuid(), keyValue.index, keyValue.refType, keyValue.value, keyValue.refIndex);
276 }
277 return true;
278 }
279 }
280
281
282 protected void handleExtensions(IdentifiableEntity<?> identifiable, SpecimenRow row, SpecimenCdmExcelImportState state) {
283 List<PostfixTerm> extensions = row.getExtensions();
284
285 for (PostfixTerm exType : extensions){
286 ExtensionType extensionType = state.getPostfixExtensionType(exType.postfix);
287
288 Extension extension = Extension.NewInstance();
289 extension.setType(extensionType);
290 extension.setValue(exType.term);
291 identifiable.addExtension(extension);
292 }
293
294 }
295
296
297 protected void fireWarningEvent(String message, STATE state, int severity) {
298 fireWarningEvent(message, "Record" + state.getCurrentLine(), severity, 1);
299 }
300 }