cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/excel/common/ExcelTaxonOrSpecimenImportBase.java

   1 /**
   2  * Copyright (C) 2007 EDIT
   3  * European Distributed Institute of Taxonomy
   4  * http://www.e-taxonomy.eu
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version 1.1
   7  * See LICENSE.TXT at the top of this package for the full license terms.
   8  */
   9
  10 package eu.etaxonomy.cdm.io.excel.common;
  11
  12 import java.util.HashMap;
  13 import java.util.List;
  14 import java.util.Set;
  15 import java.util.UUID;
  16
  17 import org.apache.commons.lang.StringUtils;
  18 import org.apache.log4j.Logger;
  19
  20 import eu.etaxonomy.cdm.api.service.pager.Pager;
  21 import eu.etaxonomy.cdm.common.CdmUtils;
  22 import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase.PostfixTerm;
  23 import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenCdmExcelImportState;
  24 import eu.etaxonomy.cdm.io.specimen.excel.in.SpecimenRow;
  25 import eu.etaxonomy.cdm.model.common.CdmBase;
  26 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
  27 import eu.etaxonomy.cdm.model.common.Extension;
  28 import eu.etaxonomy.cdm.model.common.ExtensionType;
  29 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
  30 import eu.etaxonomy.cdm.model.description.Feature;
  31
  32 /**
  33  * @author a.mueller
  34  * @date 12.07.2011
  35  */
  36 public abstract class ExcelTaxonOrSpecimenImportBase<STATE extends ExcelImportState<? extends ExcelImportConfiguratorBase, ROW>, ROW extends ExcelRowBase> extends ExcelImporterBase<STATE> {
  37         private static final Logger logger = Logger.getLogger(ExcelTaxonOrSpecimenImportBase.class);
  38
  39
  40         protected static final String CDM_UUID_COLUMN = "(?i)(CdmUuid)";
  41         protected static final String IGNORE_COLUMN = "(?i)(Ignore|Not)";
  42
  43
  44         protected static final String RANK_COLUMN = "(?i)(Rank)";
  45         protected static final String FULL_NAME_COLUMN = "(?i)(FullName)";
  46         protected static final String TAXON_UUID_COLUMN = "(?i)(taxonUuid)";
  47         protected static final String FAMILY_COLUMN = "(?i)(Family)";
  48         protected static final String GENUS_COLUMN = "(?i)(Genus)";
  49         protected static final String SPECIFIC_EPITHET_COLUMN = "(?i)(SpecificEpi(thet)?)";
  50         protected static final String INFRASPECIFIC_EPITHET_COLUMN = "(?i)(InfraSpecificEpi(thet)?)";
  51
  52         protected static final String LANGUAGE = "(?i)(Language)";
  53
  54         @Override
  55         protected void analyzeRecord(HashMap<String, String> record, STATE state) {
  56                 Set<String> keys = record.keySet();
  57
  58         ROW row = createDataHolderRow();
  59         state.setCurrentRow(row);
  60
  61         for (String originalKey: keys) {
  62                 KeyValue keyValue = makeKeyValue(record, originalKey, state);
  63                 if (StringUtils.isBlank(keyValue.value)){
  64                         continue;
  65                 }
  66                 if (isBaseColumn(keyValue)){
  67                         handleBaseColumn(keyValue, row);
  68                 }else{
  69                         analyzeSingleValue(keyValue, state);
  70                 }
  71         }
  72         return;
  73         }
  74
  75         protected abstract ROW createDataHolderRow();
  76
  77         /**
  78          * Analyzes a single record value and fills the row instance accordingly.
  79          * @param keyValue
  80          * @param state
  81          * @return
  82          */
  83         protected abstract void analyzeSingleValue(KeyValue keyValue, STATE state);
  84
  85         /**
  86          *      DataHolder class for all key and value information for a cell.
  87          * Value is the value of the cell (as String). Key is the main attribute, further defined by postfix,
  88          * and in case of multiple values indexed.
  89          * TODO doc for refXXX
  90          */
  91         protected class KeyValue{
  92                 public KeyValue() {}
  93
  94                 //original Key
  95                 public String originalKey;
  96                 //value
  97                 public String value;
  98                 //atomized key
  99                 public String key;
 100                 public String postfix;
 101                 public int index = 0;
 102                 public SourceType refType;
 103                 public int refIndex = 0;
 104                 public boolean hasError = false;
 105                 public boolean isKeyData() {
 106                         return (refType == null);
 107                 }
 108                 public boolean isLanguage(){
 109                         return (refType.isLanguage());
 110                 }
 111         }
 112
 113         public enum SourceType{
 114                 Author("RefAuthor"),
 115                 Title("RefTitle"),
 116                 Year("RefYear"),
 117                 RefExtension("RefExt(ension)?"),
 118                 Language("Lang") //strictly not a reference, so some refactoring/renaming is needed
 119                 ;
 120
 121                 String keyMatch = null;
 122                 private SourceType(String keyName){
 123                         this.keyMatch = keyName;
 124                 }
 125
 126
 127                 boolean isLanguage(){
 128                         return (this.equals(Language));
 129                 }
 130
 131                 static SourceType byKeyName(String str){
 132                         if (StringUtils.isBlank(str)){
 133                                 return null;
 134                         }
 135                         for (SourceType type : SourceType.values()){
 136                                 if (str.matches("(?i)(" + type.keyMatch + ")")){
 137                                         return type;
 138                                 }
 139                         }
 140                         return null;
 141                 }
 142
 143                 static boolean isKeyName(String str){
 144                         return (byKeyName(str) != null);
 145                 }
 146
 147         }
 148
 149
 150         /**
 151          * @param record
 152          * @param originalKey
 153          * @param state
 154          * @param keyValue
 155          * @return
 156          */
 157         protected KeyValue makeKeyValue(HashMap<String, String> record, String originalKey, STATE state) {
 158                 KeyValue keyValue = new KeyValue();
 159                 keyValue.originalKey = originalKey;
 160                 String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
 161                 String[] split = indexedKey.split("_");
 162                 int current = 0;
 163                 //key
 164                 keyValue.key = split[current++];
 165                 //postfix
 166                 if (split.length > current && ! isRefType(split[current]) && ! isInteger(split[current]) ){
 167                         keyValue.postfix = split[current++];
 168                 }
 169                 //index
 170                 if (split.length > current && isInteger(split[current]) ){
 171                         keyValue.index = Integer.valueOf(split[current++]);
 172                 }else{
 173                         keyValue.index = 0;
 174                 }
 175                 //source
 176                 if (split.length > current && ! isIgnore(keyValue.key)){
 177                         //refType
 178                         if (isRefType(split[current])){
 179                                 String refTypeStr = split[current++];
 180                                 keyValue.refType = SourceType.byKeyName(refTypeStr);
 181                                 if (keyValue.refType == null){
 182                                         String message = "Unmatched source key: " + refTypeStr;
 183                                         fireWarningEvent(message, state, 10);
 184                                         logger.warn(message);
 185                                 }
 186                         }else {
 187                                 String message = "RefType expected at %d position of key. But %s is no valid reftype";
 188                                 message = String.format(message, current, split[current]);
 189                                 fireWarningEvent(message, state, 10);
 190                                 logger.warn(message);
 191                                 keyValue.hasError  = true;
 192                         }
 193                         //ref index
 194                         if (split.length > current){
 195                                  if (isInteger(split[current])){
 196                                          keyValue.refIndex = Integer.valueOf(split[current++]);
 197                                  }else{
 198                                         String message = "Ref index expected at position %d of key. But %s is no valid reftype";
 199                                         message = String.format(message, current, split[current]);
 200                                         fireWarningEvent(message, state, 10);
 201                                         logger.warn(message);
 202                                         keyValue.hasError = true;
 203                                  }
 204                         }else {
 205                                 keyValue.refIndex = 0;
 206                         }
 207
 208                 }
 209                 if (split.length > current  && ! isIgnore(keyValue.key)){
 210                         String message = "Key has unexpected part at position %d of key. %s (and following parts) can not be handled";
 211                         message = String.format(message, current, split[current]);
 212                         fireWarningEvent(message, state, 10);
 213                         logger.warn(message);
 214                         keyValue.hasError = true;
 215                 }
 216
 217                 //TODO shouldn't we use originalKey here??
 218                 String value = (String) record.get(indexedKey);
 219                 if (! StringUtils.isBlank(value)) {
 220                         if (logger.isDebugEnabled()) { logger.debug(keyValue.key + ": " + value); }
 221                         value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
 222                         keyValue.value = value;
 223                 }else{
 224                         keyValue.value = null;
 225                 }
 226                 return keyValue;
 227         }
 228
 229
 230         private boolean isIgnore(String key) {
 231                 return key.matches(IGNORE_COLUMN);
 232         }
 233
 234         private boolean isRefType(String string) {
 235                 return SourceType.isKeyName(string);
 236         }
 237
 238
 239         private boolean handleBaseColumn(KeyValue keyValue, ExcelRowBase row) {
 240                 String key = keyValue.key;
 241                 String value = keyValue.value;
 242                 if (key.matches(CDM_UUID_COLUMN)) {
 243                         row.setCdmUuid(UUID.fromString(value)); //VALIDATE UUID
 244                 }
 245                 return true;
 246         }
 247
 248         private boolean isBaseColumn(KeyValue keyValue) {
 249                 String key = keyValue.key;
 250                 if (key.matches(CDM_UUID_COLUMN)){
 251                         return true;
 252                 } else if(isIgnore(keyValue.key)) {
 253                         logger.debug("Ignored column" + keyValue.originalKey);
 254                         return true;
 255                 }
 256                 return false;
 257         }
 258
 259         protected boolean isInteger(String value){
 260                 try {
 261                         Integer.valueOf(value);
 262                         return true;
 263                 } catch (NumberFormatException e) {
 264                         return false;
 265                 }
 266         }
 267
 268
 269         protected boolean analyzeFeatures(STATE state, KeyValue keyValue) {
 270                 String key = keyValue.key;
 271                 Pager<DefinedTermBase> features = getTermService().findByTitle(Feature.class, key, null, null, null, null, null, null);
 272
 273                 if (features.getCount() > 1){
 274                         String message = "More than one feature found matching key " + key;
 275                         fireWarningEvent(message, state, 4);
 276                         return false;
 277                 }else if (features.getCount() == 0){
 278                         return false;
 279                 }else{
 280                         Feature feature = CdmBase.deproxy(features.getRecords().get(0), Feature.class);
 281                         ROW row = state.getCurrentRow();
 282                         if ( keyValue.isKeyData()){
 283                                 row.putFeature(feature.getUuid(), keyValue.index, keyValue.value);
 284                         }else if (keyValue.isLanguage()){
 285                                 row.putFeatureLanguage(feature.getUuid(), keyValue.index, keyValue.value);
 286                         }else{
 287                                 row.putFeatureSource(feature.getUuid(), keyValue.index, keyValue.refType, keyValue.value, keyValue.refIndex);
 288                         }
 289                         return true;
 290                 }
 291         }
 292
 293
 294         protected void handleExtensions(IdentifiableEntity<?> identifiable, SpecimenRow row, SpecimenCdmExcelImportState state) {
 295                 List<PostfixTerm> extensions = row.getExtensions();
 296
 297                 for (PostfixTerm exType : extensions){
 298                         ExtensionType extensionType = state.getPostfixExtensionType(exType.postfix);
 299
 300                         Extension extension = Extension.NewInstance();
 301                         extension.setType(extensionType);
 302                         extension.setValue(exType.term);
 303                         identifiable.addExtension(extension);
 304                 }
 305
 306         }
 307
 308
 309         protected void fireWarningEvent(String message, STATE state, int severity) {
 310                 fireWarningEvent(message, "Record" + state.getCurrentLine(), severity, 1);
 311         }
 312 }