2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.excel
.common
;
12 import java
.util
.HashMap
;
13 import java
.util
.List
;
15 import java
.util
.UUID
;
17 import org
.apache
.commons
.lang
.StringUtils
;
18 import org
.apache
.log4j
.Logger
;
20 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
21 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
22 import eu
.etaxonomy
.cdm
.io
.excel
.common
.ExcelRowBase
.PostfixTerm
;
23 import eu
.etaxonomy
.cdm
.io
.specimen
.excel
.in
.SpecimenCdmExcelImportState
;
24 import eu
.etaxonomy
.cdm
.io
.specimen
.excel
.in
.SpecimenRow
;
25 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
26 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
27 import eu
.etaxonomy
.cdm
.model
.common
.Extension
;
28 import eu
.etaxonomy
.cdm
.model
.common
.ExtensionType
;
29 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableEntity
;
30 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
36 public abstract class ExcelTaxonOrSpecimenImportBase
<STATE
extends ExcelImportState
<?
extends ExcelImportConfiguratorBase
, ROW
>, ROW
extends ExcelRowBase
> extends ExcelImporterBase
<STATE
> {
37 private static final Logger logger
= Logger
.getLogger(ExcelTaxonOrSpecimenImportBase
.class);
40 protected static final String CDM_UUID_COLUMN
= "(?i)(CdmUuid)";
41 protected static final String IGNORE_COLUMN
= "(?i)(Ignore|Not)";
44 protected static final String RANK_COLUMN
= "(?i)(Rank)";
45 protected static final String FULL_NAME_COLUMN
= "(?i)(FullName)";
46 protected static final String TAXON_UUID_COLUMN
= "(?i)(taxonUuid)";
47 protected static final String FAMILY_COLUMN
= "(?i)(Family)";
48 protected static final String GENUS_COLUMN
= "(?i)(Genus)";
49 protected static final String SPECIFIC_EPITHET_COLUMN
= "(?i)(SpecificEpi(thet)?)";
50 protected static final String INFRASPECIFIC_EPITHET_COLUMN
= "(?i)(InfraSpecificEpi(thet)?)";
52 protected static final String LANGUAGE
= "(?i)(Language)";
55 protected void analyzeRecord(HashMap
<String
, String
> record
, STATE state
) {
56 Set
<String
> keys
= record
.keySet();
58 ROW row
= createDataHolderRow();
59 state
.setCurrentRow(row
);
61 for (String originalKey
: keys
) {
62 KeyValue keyValue
= makeKeyValue(record
, originalKey
, state
);
63 if (StringUtils
.isBlank(keyValue
.value
)){
66 if (isBaseColumn(keyValue
)){
67 handleBaseColumn(keyValue
, row
);
69 analyzeSingleValue(keyValue
, state
);
75 protected abstract ROW
createDataHolderRow();
78 * Analyzes a single record value and fills the row instance accordingly.
83 protected abstract void analyzeSingleValue(KeyValue keyValue
, STATE state
);
86 * DataHolder class for all key and value information for a cell.
87 * Value is the value of the cell (as String). Key is the main attribute, further defined by postfix,
88 * and in case of multiple values indexed.
91 protected class KeyValue
{
95 public String originalKey
;
100 public String postfix
;
101 public int index
= 0;
102 public SourceType refType
;
103 public int refIndex
= 0;
104 public boolean hasError
= false;
105 public boolean isKeyData() {
106 return (refType
== null);
108 public boolean isLanguage(){
109 return (refType
.isLanguage());
113 public enum SourceType
{
117 RefExtension("RefExt(ension)?"),
118 Language("Lang") //strictly not a reference, so some refactoring/renaming is needed
121 String keyMatch
= null;
122 private SourceType(String keyName
){
123 this.keyMatch
= keyName
;
127 boolean isLanguage(){
128 return (this.equals(Language
));
131 static SourceType
byKeyName(String str
){
132 if (StringUtils
.isBlank(str
)){
135 for (SourceType type
: SourceType
.values()){
136 if (str
.matches("(?i)(" + type
.keyMatch
+ ")")){
143 static boolean isKeyName(String str
){
144 return (byKeyName(str
) != null);
157 protected KeyValue
makeKeyValue(HashMap
<String
, String
> record
, String originalKey
, STATE state
) {
158 KeyValue keyValue
= new KeyValue();
159 keyValue
.originalKey
= originalKey
;
160 String indexedKey
= CdmUtils
.removeDuplicateWhitespace(originalKey
.trim()).toString();
161 String
[] split
= indexedKey
.split("_");
164 keyValue
.key
= split
[current
++];
166 if (split
.length
> current
&& ! isRefType(split
[current
]) && ! isInteger(split
[current
]) ){
167 keyValue
.postfix
= split
[current
++];
170 if (split
.length
> current
&& isInteger(split
[current
]) ){
171 keyValue
.index
= Integer
.valueOf(split
[current
++]);
176 if (split
.length
> current
&& ! isIgnore(keyValue
.key
)){
178 if (isRefType(split
[current
])){
179 String refTypeStr
= split
[current
++];
180 keyValue
.refType
= SourceType
.byKeyName(refTypeStr
);
181 if (keyValue
.refType
== null){
182 String message
= "Unmatched source key: " + refTypeStr
;
183 fireWarningEvent(message
, state
, 10);
184 logger
.warn(message
);
187 String message
= "RefType expected at %d position of key. But %s is no valid reftype";
188 message
= String
.format(message
, current
, split
[current
]);
189 fireWarningEvent(message
, state
, 10);
190 logger
.warn(message
);
191 keyValue
.hasError
= true;
194 if (split
.length
> current
){
195 if (isInteger(split
[current
])){
196 keyValue
.refIndex
= Integer
.valueOf(split
[current
++]);
198 String message
= "Ref index expected at position %d of key. But %s is no valid reftype";
199 message
= String
.format(message
, current
, split
[current
]);
200 fireWarningEvent(message
, state
, 10);
201 logger
.warn(message
);
202 keyValue
.hasError
= true;
205 keyValue
.refIndex
= 0;
209 if (split
.length
> current
&& ! isIgnore(keyValue
.key
)){
210 String message
= "Key has unexpected part at position %d of key. %s (and following parts) can not be handled";
211 message
= String
.format(message
, current
, split
[current
]);
212 fireWarningEvent(message
, state
, 10);
213 logger
.warn(message
);
214 keyValue
.hasError
= true;
217 //TODO shouldn't we use originalKey here??
218 String value
= (String
) record
.get(indexedKey
);
219 if (! StringUtils
.isBlank(value
)) {
220 if (logger
.isDebugEnabled()) { logger
.debug(keyValue
.key
+ ": " + value
); }
221 value
= CdmUtils
.removeDuplicateWhitespace(value
.trim()).toString();
222 keyValue
.value
= value
;
224 keyValue
.value
= null;
230 private boolean isIgnore(String key
) {
231 return key
.matches(IGNORE_COLUMN
);
234 private boolean isRefType(String string
) {
235 return SourceType
.isKeyName(string
);
239 private boolean handleBaseColumn(KeyValue keyValue
, ExcelRowBase row
) {
240 String key
= keyValue
.key
;
241 String value
= keyValue
.value
;
242 if (key
.matches(CDM_UUID_COLUMN
)) {
243 row
.setCdmUuid(UUID
.fromString(value
)); //VALIDATE UUID
248 private boolean isBaseColumn(KeyValue keyValue
) {
249 String key
= keyValue
.key
;
250 if (key
.matches(CDM_UUID_COLUMN
)){
252 } else if(isIgnore(keyValue
.key
)) {
253 logger
.debug("Ignored column" + keyValue
.originalKey
);
259 protected boolean isInteger(String value
){
261 Integer
.valueOf(value
);
263 } catch (NumberFormatException e
) {
269 protected boolean analyzeFeatures(STATE state
, KeyValue keyValue
) {
270 String key
= keyValue
.key
;
271 Pager
<DefinedTermBase
> features
= getTermService().findByTitle(Feature
.class, key
, null, null, null, null, null, null);
273 if (features
.getCount() > 1){
274 String message
= "More than one feature found matching key " + key
;
275 fireWarningEvent(message
, state
, 4);
277 }else if (features
.getCount() == 0){
280 Feature feature
= CdmBase
.deproxy(features
.getRecords().get(0), Feature
.class);
281 ROW row
= state
.getCurrentRow();
282 if ( keyValue
.isKeyData()){
283 row
.putFeature(feature
.getUuid(), keyValue
.index
, keyValue
.value
);
284 }else if (keyValue
.isLanguage()){
285 row
.putFeatureLanguage(feature
.getUuid(), keyValue
.index
, keyValue
.value
);
287 row
.putFeatureSource(feature
.getUuid(), keyValue
.index
, keyValue
.refType
, keyValue
.value
, keyValue
.refIndex
);
294 protected void handleExtensions(IdentifiableEntity
<?
> identifiable
, SpecimenRow row
, SpecimenCdmExcelImportState state
) {
295 List
<PostfixTerm
> extensions
= row
.getExtensions();
297 for (PostfixTerm exType
: extensions
){
298 ExtensionType extensionType
= state
.getPostfixExtensionType(exType
.postfix
);
300 Extension extension
= Extension
.NewInstance();
301 extension
.setType(extensionType
);
302 extension
.setValue(exType
.term
);
303 identifiable
.addExtension(extension
);
309 protected void fireWarningEvent(String message
, STATE state
, int severity
) {
310 fireWarningEvent(message
, "Record" + state
.getCurrentLine(), severity
, 1);