2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.excel
.common
;
12 import java
.util
.HashMap
;
13 import java
.util
.List
;
15 import java
.util
.UUID
;
17 import org
.apache
.commons
.lang
.StringUtils
;
18 import org
.apache
.log4j
.Logger
;
20 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
21 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
22 import eu
.etaxonomy
.cdm
.io
.excel
.common
.ExcelRowBase
.PostfixTerm
;
23 import eu
.etaxonomy
.cdm
.io
.specimen
.excel
.in
.SpecimenCdmExcelImportState
;
24 import eu
.etaxonomy
.cdm
.io
.specimen
.excel
.in
.SpecimenRow
;
25 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
26 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
27 import eu
.etaxonomy
.cdm
.model
.common
.Extension
;
28 import eu
.etaxonomy
.cdm
.model
.common
.ExtensionType
;
29 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableEntity
;
30 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
36 public abstract class ExcelTaxonOrSpecimenImportBase
<STATE
extends ExcelImportState
<?
extends ExcelImportConfiguratorBase
, ROW
>, ROW
extends ExcelRowBase
> extends ExcelImporterBase
<STATE
> {
37 private static final Logger logger
= Logger
.getLogger(ExcelTaxonOrSpecimenImportBase
.class);
40 protected static final String CDM_UUID_COLUMN
= "(?i)(CdmUuid)";
41 protected static final String IGNORE_COLUMN
= "(?i)(Ignore|Not)";
44 protected static final String RANK_COLUMN
= "(?i)(Rank)";
45 protected static final String FULL_NAME_COLUMN
= "(?i)(FullName)";
46 protected static final String TAXON_UUID_COLUMN
= "(?i)(taxonUuid)";
47 protected static final String FAMILY_COLUMN
= "(?i)(Family)";
48 protected static final String GENUS_COLUMN
= "(?i)(Genus)";
49 protected static final String SPECIFIC_EPITHET_COLUMN
= "(?i)(SpecificEpi(thet)?)";
50 protected static final String INFRASPECIFIC_EPITHET_COLUMN
= "(?i)(InfraSpecificEpi(thet)?)";
53 protected void analyzeRecord(HashMap
<String
, String
> record
, STATE state
) {
54 Set
<String
> keys
= record
.keySet();
56 ROW row
= createDataHolderRow();
57 state
.setCurrentRow(row
);
59 for (String originalKey
: keys
) {
60 KeyValue keyValue
= makeKeyValue(record
, originalKey
, state
);
61 if (StringUtils
.isBlank(keyValue
.value
)){
64 if (isBaseColumn(keyValue
)){
65 handleBaseColumn(keyValue
, row
);
67 analyzeSingleValue(keyValue
, state
);
73 protected abstract ROW
createDataHolderRow();
76 * Analyzes a single record value and fills the row instance accordingly.
81 protected abstract void analyzeSingleValue(KeyValue keyValue
, STATE state
);
84 * DataHolder class for all key and value information for a cell.
85 * Value is the value of the cell (as String). Key is the main attribute, further defined by postfix,
86 * and in case of multiple values indexed.
89 protected class KeyValue
{
93 public String originalKey
;
98 public String postfix
;
100 public SourceType refType
;
101 public int refIndex
= 0;
102 public boolean hasError
= false;
103 public boolean isKeyData() {
104 return (refType
== null);
106 public boolean isLanguage(){
107 return (refType
.isLanguage());
111 public enum SourceType
{
115 RefExtension("RefExt(ension)?"),
116 Language("Lang") //strictly not a reference, so some refactoring/renaming is needed
119 String keyMatch
= null;
120 private SourceType(String keyName
){
121 this.keyMatch
= keyName
;
125 boolean isLanguage(){
126 return (this.equals(Language
));
129 static SourceType
byKeyName(String str
){
130 if (StringUtils
.isBlank(str
)){
133 for (SourceType type
: SourceType
.values()){
134 if (str
.matches("(?i)(" + type
.keyMatch
+ ")")){
141 static boolean isKeyName(String str
){
142 return (byKeyName(str
) != null);
155 protected KeyValue
makeKeyValue(HashMap
<String
, String
> record
, String originalKey
, STATE state
) {
156 KeyValue keyValue
= new KeyValue();
157 keyValue
.originalKey
= originalKey
;
158 String indexedKey
= CdmUtils
.removeDuplicateWhitespace(originalKey
.trim()).toString();
159 String
[] split
= indexedKey
.split("_");
162 keyValue
.key
= split
[current
++];
164 if (split
.length
> current
&& ! isRefType(split
[current
]) && ! isInteger(split
[current
]) ){
165 keyValue
.postfix
= split
[current
++];
168 if (split
.length
> current
&& isInteger(split
[current
]) ){
169 keyValue
.index
= Integer
.valueOf(split
[current
++]);
174 if (split
.length
> current
){
176 if (isRefType(split
[current
])){
177 String refTypeStr
= split
[current
++];
178 keyValue
.refType
= SourceType
.byKeyName(refTypeStr
);
179 if (keyValue
.refType
== null){
180 String message
= "Unmatched source key: " + refTypeStr
;
181 fireWarningEvent(message
, state
, 10);
182 logger
.warn(message
);
185 String message
= "RefType expected at %d position of key. But %s is no valid reftype";
186 message
= String
.format(message
, current
, split
[current
]);
187 fireWarningEvent(message
, state
, 10);
188 logger
.warn(message
);
189 keyValue
.hasError
= true;
192 if (split
.length
> current
){
193 if (isInteger(split
[current
])){
194 keyValue
.refIndex
= Integer
.valueOf(split
[current
++]);
196 String message
= "Ref index expected at position %d of key. But %s is no valid reftype";
197 message
= String
.format(message
, current
, split
[current
]);
198 fireWarningEvent(message
, state
, 10);
199 logger
.warn(message
);
200 keyValue
.hasError
= true;
203 keyValue
.refIndex
= 0;
207 if (split
.length
> current
){
208 String message
= "Key has unexpected part at position %d of key. %s (and following parts) can not be handled";
209 message
= String
.format(message
, current
, split
[current
]);
210 fireWarningEvent(message
, state
, 10);
211 logger
.warn(message
);
212 keyValue
.hasError
= true;
215 //TODO shouldn't we use originalKey here??
216 String value
= (String
) record
.get(indexedKey
);
217 if (! StringUtils
.isBlank(value
)) {
218 if (logger
.isDebugEnabled()) { logger
.debug(keyValue
.key
+ ": " + value
); }
219 value
= CdmUtils
.removeDuplicateWhitespace(value
.trim()).toString();
220 keyValue
.value
= value
;
222 keyValue
.value
= null;
228 private boolean isRefType(String string
) {
229 return SourceType
.isKeyName(string
);
233 private boolean handleBaseColumn(KeyValue keyValue
, ExcelRowBase row
) {
234 String key
= keyValue
.key
;
235 String value
= keyValue
.value
;
236 if (key
.matches(CDM_UUID_COLUMN
)) {
237 row
.setCdmUuid(UUID
.fromString(value
)); //VALIDATE UUID
242 private boolean isBaseColumn(KeyValue keyValue
) {
243 String key
= keyValue
.key
;
244 if (key
.matches(CDM_UUID_COLUMN
)){
246 } else if(keyValue
.key
.matches(IGNORE_COLUMN
)) {
247 logger
.debug("Ignored column" + keyValue
.originalKey
);
253 protected boolean isInteger(String value
){
255 Integer
.valueOf(value
);
257 } catch (NumberFormatException e
) {
263 protected boolean analyzeFeatures(STATE state
, KeyValue keyValue
) {
264 String key
= keyValue
.key
;
265 Pager
<DefinedTermBase
> features
= getTermService().findByTitle(Feature
.class, key
, null, null, null, null, null, null);
266 if (features
.getCount() > 1){
267 String message
= "More than one feature found matching key " + key
;
268 fireWarningEvent(message
, state
, 4);
270 }else if (features
.getCount() == 0){
273 Feature feature
= CdmBase
.deproxy(features
.getRecords().get(0), Feature
.class);
274 ROW row
= state
.getCurrentRow();
275 if ( keyValue
.isKeyData()){
276 row
.putFeature(feature
.getUuid(), keyValue
.index
, keyValue
.value
);
277 }else if (keyValue
.isLanguage()){
278 row
.putFeatureLanguage(feature
.getUuid(), keyValue
.index
, keyValue
.value
);
280 row
.putFeatureSource(feature
.getUuid(), keyValue
.index
, keyValue
.refType
, keyValue
.value
, keyValue
.refIndex
);
287 protected void handleExtensions(IdentifiableEntity
<?
> identifiable
, SpecimenRow row
, SpecimenCdmExcelImportState state
) {
288 List
<PostfixTerm
> extensions
= row
.getExtensions();
290 for (PostfixTerm exType
: extensions
){
291 ExtensionType extensionType
= state
.getPostfixExtensionType(exType
.postfix
);
293 Extension extension
= Extension
.NewInstance();
294 extension
.setType(extensionType
);
295 extension
.setValue(exType
.term
);
296 identifiable
.addExtension(extension
);
302 protected void fireWarningEvent(String message
, STATE state
, int severity
) {
303 fireWarningEvent(message
, "Record" + state
.getCurrentLine(), severity
, 1);