cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/DwcTaxonCsv2CdmTaxonConverter.java

   1 // $Id$
   2 /**
   3 * Copyright (C) 2009 EDIT
   4 * European Distributed Institute of Taxonomy
   5 * http://www.e-taxonomy.eu
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version 1.1
   8 * See LICENSE.TXT at the top of this package for the full license terms.
   9 */
  10 package eu.etaxonomy.cdm.io.dwca.in;
  11
  12 import java.util.ArrayList;
  13 import java.util.HashSet;
  14 import java.util.List;
  15 import java.util.Map;
  16 import java.util.Set;
  17
  18 import org.apache.commons.lang.StringUtils;
  19 import org.apache.log4j.Logger;
  20
  21 import com.ibm.lsid.MalformedLSIDException;
  22
  23 import eu.etaxonomy.cdm.common.CdmUtils;
  24 import eu.etaxonomy.cdm.io.dwca.TermUri;
  25 import eu.etaxonomy.cdm.model.common.CdmBase;
  26 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
  27 import eu.etaxonomy.cdm.model.common.LSID;
  28 import eu.etaxonomy.cdm.model.name.BotanicalName;
  29 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
  30 import eu.etaxonomy.cdm.model.name.NonViralName;
  31 import eu.etaxonomy.cdm.model.name.Rank;
  32 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
  33 import eu.etaxonomy.cdm.model.name.ZoologicalName;
  34 import eu.etaxonomy.cdm.model.reference.Reference;
  35 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
  36 import eu.etaxonomy.cdm.model.taxon.Classification;
  37 import eu.etaxonomy.cdm.model.taxon.Synonym;
  38 import eu.etaxonomy.cdm.model.taxon.Taxon;
  39 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
  40 import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
  41 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
  42 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
  43
  44 /**
  45  * @author a.mueller
  46  * @date 22.11.2011
  47  *
  48  */
  49 public class DwcTaxonCsv2CdmTaxonConverter extends PartitionableConverterBase<DwcaImportState> implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String>{
  50         @SuppressWarnings("unused")
  51         private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonConverter.class);
  52
  53         private static final String ID = "id";
  54         // temporary key for the case that no dataset information is supplied, TODO use something better
  55         public static final String NO_DATASET = "no_dataset_jli773oebhjklw";
  56
  57         private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
  58
  59         /**
  60          * @param state
  61          */
  62         public DwcTaxonCsv2CdmTaxonConverter(DwcaImportState state) {
  63                 super(state);
  64         }
  65
  66
  67         public IReader<MappedCdmBase> map(CsvStreamItem csvTaxonRecord){
  68                 List<MappedCdmBase> resultList = new ArrayList<MappedCdmBase>();
  69
  70                 //TODO what if not transactional?
  71                 Reference<?> sourceReference = state.getTransactionalSourceReference();
  72                 String sourceReferenceDetail = null;
  73
  74                 //taxon
  75                 TaxonBase<?> taxonBase = getTaxonBase(csvTaxonRecord);
  76                 MappedCdmBase  mcb = new MappedCdmBase(csvTaxonRecord.term, csvTaxonRecord.get(ID), taxonBase);
  77                 resultList.add(mcb);
  78
  79                 //original source
  80                 String id = csvTaxonRecord.get(ID);
  81                 IdentifiableSource source = taxonBase.addSource(id, "Taxon", sourceReference, sourceReferenceDetail);
  82                 MappedCdmBase mappedSource = new MappedCdmBase(csvTaxonRecord.get(ID), source);
  83                 resultList.add(mappedSource);
  84                 csvTaxonRecord.remove(ID);
  85
  86                 //rank
  87                 NomenclaturalCode nomCode = getNomCode(csvTaxonRecord);
  88                 Rank rank = getRank(csvTaxonRecord, nomCode);
  89
  90                 //name && name published in
  91                 TaxonNameBase<?,?> name = getScientificName(csvTaxonRecord, nomCode, rank, resultList, sourceReference);
  92                 taxonBase.setName(name);
  93
  94                 //nameAccordingTo
  95                 MappedCdmBase<Reference> sec = getNameAccordingTo(csvTaxonRecord, resultList);
  96                 if (sec == null && state.getConfig().isUseSourceReferenceAsSec()){
  97                         sec = new MappedCdmBase<Reference>(state.getTransactionalSourceReference());
  98                 }
  99                 if (sec != null){
 100                         taxonBase.setSec(sec.getCdmBase());
 101                 }
 102
 103                 //classification
 104                 handleDataset(csvTaxonRecord, taxonBase, resultList, sourceReference, sourceReferenceDetail);
 105
 106                 //NON core
 107             //term="http://purl.org/dc/terms/identifier"
 108                 //currently only LSIDs
 109                 handleIdentifier(csvTaxonRecord, taxonBase);
 110
 111
 112
 113                 //                  <!-- Top level group; listed as kingdom but may be interpreted as domain or superkingdom
 114 //                       The following eight groups are recognized: Animalia, Archaea, Bacteria, Chromista,
 115 //                       Fungi, Plantae, Protozoa, Viruses -->
 116 //                  <field index='10' term='http://rs.tdwg.org/dwc/terms/kingdom'/>
 117
 118 //                  <!-- Phylum in which the taxon has been classified -->
 119 //                  <field index='11' term='http://rs.tdwg.org/dwc/terms/phylum'/>
 120
 121                 //                  <!-- Class in which the taxon has been classified -->
 122 //                  <field index='12' term='http://rs.tdwg.org/dwc/terms/class'/>
 123
 124                 //                  <!-- Order in which the taxon has been classified -->
 125 //                  <field index='13' term='http://rs.tdwg.org/dwc/terms/order'/>
 126
 127                 //                  <!-- Family in which the taxon has been classified -->
 128 //                  <field index='14' term='http://rs.tdwg.org/dwc/terms/family'/>
 129
 130                 //                  <!-- Genus in which the taxon has been classified -->
 131 //                  <field index='15' term='http://rs.tdwg.org/dwc/terms/genus'/>
 132
 133                 //                  <!-- Subgenus in which the taxon has been classified -->
 134 //                  <field index='16' term='http://rs.tdwg.org/dwc/terms/subgenus'/>
 135 //                  <!-- Specific epithet; for hybrids, the multiplication symbol is included in the epithet -->
 136
 137 //                  <field index='17' term='http://rs.tdwg.org/dwc/terms/specificEpithet'/>
 138 //                  <!-- Infraspecific epithet -->
 139
 140 //                  <field index='18' term='http://rs.tdwg.org/dwc/terms/infraspecificEpithet'/>
 141 //                  <!-- Authorship -->
 142
 143 //                  <field index='19' term='http://rs.tdwg.org/dwc/terms/scientificNameAuthorship'/>
 144 //              ==> see scientific name
 145 //
 146 //              <!-- Acceptance status published in -->
 147 //                  <field index='20' term='http://purl.org/dc/terms/source'/>
 148 //                  <!-- Reference in which the scientific name was first published -->
 149 //                  <field index='21' term='http://rs.tdwg.org/dwc/terms/namePublishedIn'/>
 150 //                  <!-- Taxon scrutinized by -->
 151 //                  <field index='22' term='http://rs.tdwg.org/dwc/terms/nameAccordingTo'/>
 152 //                  <!-- Scrutiny date -->
 153 //                  <field index='23' term='http://purl.org/dc/terms/modified'/>
 154 //                  <!-- Additional data for the taxon -->
 155 //                  <field index='24' term='http://purl.org/dc/terms/description'/>
 156 //                  </core>
 157
 158                 return new ListReader<MappedCdmBase>(resultList);
 159         }
 160
 161
 162
 163         //TODO handle non LSIDs
 164         //TODO handle LSIDs for names
 165         private void handleIdentifier(CsvStreamItem csvTaxonRecord, TaxonBase<?> taxonBase) {
 166                 String identifier = csvTaxonRecord.get(TermUri.DC_IDENTIFIER);
 167                 if (StringUtils.isNotBlank(identifier)){
 168                         if (identifier.trim().startsWith("urn:lsid")){
 169                                 try {
 170                                         LSID lsid = new LSID(identifier);
 171                                         taxonBase.setLsid(lsid);
 172                                 } catch (MalformedLSIDException e) {
 173                                         String message = "LSID is malformed and can't be handled as LSID: %s";
 174                                         message = String.format(message, identifier);
 175                                         fireWarningEvent(message, csvTaxonRecord, 4);
 176                                 }
 177                         }else{
 178                                 String message = "Identifier type not supported: %s";
 179                                 message = String.format(message, identifier);
 180                                 fireWarningEvent(message, csvTaxonRecord, 4);
 181                         }
 182                 }
 183
 184         }
 185
 186
 187         private void handleDataset(CsvStreamItem item, TaxonBase<?> taxonBase, List<MappedCdmBase> resultList, Reference<?> sourceReference, String sourceReferecenDetail) {
 188                 TermUri idTerm = TermUri.DWC_DATASET_ID;
 189                 TermUri strTerm = TermUri.DWC_DATASET_NAME;
 190
 191                 if (config.isDatasetsAsClassifications()){
 192                         String datasetId = CdmUtils.Nz(item.get(idTerm)).trim();
 193                         String datasetName = CdmUtils.Nz(item.get(strTerm)).trim();
 194                                 if (CdmUtils.areBlank(datasetId, datasetName) ){
 195                                 datasetId = NO_DATASET;
 196                         }
 197
 198                         //check id
 199                         boolean classificationExists = state.exists(idTerm.toString() , datasetId, Classification.class);
 200
 201                         //check name
 202                         if (!classificationExists){
 203                                 classificationExists = state.exists(strTerm.toString() , datasetName, Classification.class);
 204                         }
 205
 206                         //if not exists, create new
 207                         if (! classificationExists){
 208                                 String classificationName = StringUtils.isBlank(datasetName)? datasetId : datasetName;
 209                                 if (classificationName.equals(NO_DATASET)){
 210                                         classificationName = "Classification (no name)";  //TODO define by config or zipfile or metadata
 211                                 }
 212
 213                                 String classificationId = StringUtils.isBlank(datasetId)? datasetName : datasetId;
 214                                 Classification classification = Classification.NewInstance(classificationName);
 215                                 //source
 216                                 IdentifiableSource source = classification.addSource(classificationId, "Dataset", sourceReference, sourceReferecenDetail);
 217                                 //add to result
 218                                 resultList.add(new MappedCdmBase(idTerm, datasetId, classification));
 219                                 resultList.add(new MappedCdmBase(strTerm, datasetName, classification));
 220                                 resultList.add(new MappedCdmBase(source));
 221                                 //TODO this is not so nice but currently necessary as classifications are requested in the same partition
 222                                 state.putMapping(idTerm.toString(), classificationId, classification);
 223                                 state.putMapping(strTerm.toString(), classificationName, classification);
 224                         }
 225                 }else if (config.isDatasetsAsSecundumReference() || config.isDatasetsAsOriginalSource()){
 226                         MappedCdmBase<Reference> mappedCitation = getReference(item, resultList, idTerm, strTerm, true);
 227                         if (mappedCitation != null){
 228                                 Reference<?> ref = mappedCitation.getCdmBase();
 229                                 if (config.isDatasetsAsSecundumReference()){
 230                                         //dataset as secundum reference
 231                                         taxonBase.setSec(ref);
 232                                 }else{
 233                                         //dataset as original source
 234                                         taxonBase.addSource(null, null, ref, null);
 235                                 }
 236                         }
 237                 }else{
 238                         String message = "DatasetUse type not yet implemented. Can't import dataset information.";
 239                         fireWarningEvent(message, item, 4);
 240                 }
 241
 242                 //remove to later check if all attributes were used
 243                 item.remove(idTerm);
 244                 item.remove(strTerm);
 245
 246         }
 247
 248
 249         @Override
 250         public String getSourceId(CsvStreamItem item) {
 251                 String id = item.get(ID);
 252                 return id;
 253         }
 254
 255         private MappedCdmBase<Reference> getNameAccordingTo(CsvStreamItem item, List<MappedCdmBase> resultList) {
 256                 if (config.isDatasetsAsSecundumReference()){
 257                         //TODO store nameAccordingTo info some where else or let the user define where to store it.
 258                         return null;
 259                 }else{
 260                         TermUri idTerm = TermUri.DWC_NAME_ACCORDING_TO_ID;
 261                         TermUri strTerm = TermUri.DWC_NAME_ACCORDING_TO;
 262                         MappedCdmBase<Reference> secRef = getReference(item, resultList, idTerm, strTerm, false);
 263                         return secRef;
 264                 }
 265         }
 266
 267         private NomenclaturalCode getNomCode(CsvStreamItem item) {
 268                 String strNomCode = getValue(item, TermUri.DWC_NOMENCLATURAL_CODE);
 269                 NomenclaturalCode nomCode = null;
 270                 // by Nomcenclatural Code
 271                 if (strNomCode != null){
 272                         nomCode = NomenclaturalCode.fromString(strNomCode);
 273                         if (nomCode == null){
 274                                 String message = "NomCode '%s' not recognized";
 275                                 message = String.format(message, strNomCode);
 276                                 fireWarningEvent(message, item, 4);
 277                         }else{
 278                                 return nomCode;
 279                         }
 280                 }
 281                 // by Kingdom
 282                 String strKingdom = getValue(item, TermUri.DWC_KINGDOM);
 283                 if (strKingdom != null){
 284                         if (strKingdom.equalsIgnoreCase("Plantae")){
 285                                 nomCode = NomenclaturalCode.ICNAFP;
 286                         }else if (strKingdom.equalsIgnoreCase("Fungi")){
 287                                 nomCode = NomenclaturalCode.ICNAFP;
 288                         }else if (strKingdom.equalsIgnoreCase("Animalia")){
 289                                 nomCode = NomenclaturalCode.ICZN;
 290                         }else if (strKingdom.equalsIgnoreCase("Protozoa")){
 291                                 nomCode = NomenclaturalCode.ICZN;
 292                         }
 293                 }
 294
 295                 //TODO further kingdoms
 296                 if (nomCode == null){
 297                         //TODO warning
 298                         if (config.getNomenclaturalCode() != null){
 299                                 nomCode = config.getNomenclaturalCode();
 300                         }
 301                 }
 302                 return nomCode;
 303         }
 304
 305
 306         private TaxonNameBase<?,?> getScientificName(CsvStreamItem item, NomenclaturalCode nomCode, Rank rank, List<MappedCdmBase> resultList, Reference sourceReference) {
 307                 TaxonNameBase<?,?> name = null;
 308                 String strScientificName = getValue(item, TermUri.DWC_SCIENTIFIC_NAME);
 309                 //Name
 310                 if (strScientificName != null){
 311                         name = parser.parseFullName(strScientificName, nomCode, rank);
 312                         if ( rank != null && name != null && name.getRank() != null &&  ! rank.equals(name.getRank())){
 313                                 if (config.isValidateRankConsistency()){
 314                                         String message = "Parsed rank %s (%s) differs from rank %s given by fields 'taxonRank' or 'verbatimTaxonRank'";
 315                                         message = String.format(message, name.getRank().getTitleCache(), strScientificName, rank.getTitleCache());
 316                                         fireWarningEvent(message, item, 4);
 317                                 }
 318                         }
 319                         checkAuthorship(name, item);
 320                         resultList.add(new MappedCdmBase(TermUri.DWC_SCIENTIFIC_NAME, strScientificName, name));
 321                 }
 322                 //By ID
 323                 String strScientificNameId = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_ID);
 324                 if (strScientificNameId != null){
 325                         if (config.isScientificNameIdAsOriginalSourceId()){
 326                                 if (name != null){
 327                                         IdentifiableSource source = IdentifiableSource.NewInstance(strScientificNameId, TermUri.DWC_SCIENTIFIC_NAME_ID.toString(), sourceReference, null);
 328                                         name.addSource(source);
 329                                 }
 330                         }else{
 331                                 String message = "ScientificNameId not yet implemented: '%s'";
 332                                 message = String.format(message, strScientificNameId);
 333                                 fireWarningEvent(message, item, 4);
 334                         }
 335                 }
 336
 337                 //namePublishedIn
 338                 TermUri idTerm = TermUri.DWC_NAME_PUBLISHED_IN_ID;
 339                 TermUri strTerm = TermUri.DWC_NAME_PUBLISHED_IN;
 340                 MappedCdmBase<Reference> nomRef = getReference(item, resultList, idTerm, strTerm, false);
 341
 342                 if (name != null){
 343                         if (nomRef != null){
 344                                 name.setNomenclaturalReference(nomRef.getCdmBase());  //check if name already has a nomRef, shouldn't be the case usually
 345                         }
 346                 }else{
 347                         if (nomRef != null){
 348                                 String message = "NamePublishedIn information available but no name exists";
 349                                 fireWarningEvent(message, item, 4);
 350                         }
 351                 }
 352                 return name;
 353         }
 354
 355
 356         /**
 357          * General method to handle references used for multiple attributes.
 358          * @param item
 359          * @param resultList
 360          * @param idTerm
 361          * @param strTerm
 362          * @param idIsInternal
 363          * @return
 364          */
 365         private MappedCdmBase<Reference> getReference(CsvStreamItem item, List<MappedCdmBase> resultList, TermUri idTerm, TermUri strTerm, boolean idIsInternal) {
 366                 Reference<?> newRef = null;
 367                 Reference<?> sourceCitation = null;
 368
 369                 MappedCdmBase<Reference> result = null;
 370                 if (exists(idTerm, item) || exists(strTerm, item)){
 371                         String refId = CdmUtils.Nz(item.get(idTerm)).trim();
 372                         String refStr = CdmUtils.Nz(item.get(strTerm)).trim();
 373                         if (StringUtils.isNotBlank(refId)){
 374                                 List<Reference> references = state.get(idTerm.toString(), refId, Reference.class);
 375                                 if (references.size() == 0){
 376                                         if (! idIsInternal){
 377                                                 //references should already exist in store if not linking to external links like URLs
 378                                                 String message = "External namePublishedInIDs are not yet supported";
 379                                                 fireWarningEvent(message, item, 4);
 380                                         }else{
 381                                                 newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
 382                                                 newRef.addSource(refId, idTerm.toString(), sourceCitation, null);
 383                                                 MappedCdmBase<Reference> idResult = new MappedCdmBase<Reference>(idTerm, refId, newRef);
 384                                                 resultList.add(idResult);
 385                                         }
 386                                 }else{
 387                                         //TODO handle list.size > 1 , do we need a list here ?
 388                                         result = new MappedCdmBase<Reference>(idTerm, refId , references.get(0));
 389                                 }
 390                         }
 391                         if (result == null){
 392                                 List<Reference> nomRefs = state.get(strTerm.toString(), refStr, Reference.class);
 393                                 if (nomRefs.size() > 0){
 394                                         //TODO handle list.size > 1 , do we need a list here ?
 395                                         result = new MappedCdmBase<Reference>(strTerm, refStr , nomRefs.get(0));
 396                                 }else{
 397                                         // new Reference
 398                                         if (newRef == null){
 399                                                 newRef = ReferenceFactory.newGeneric();  //TODO handle other types if possible
 400                                         }
 401                                         newRef.setTitleCache(refStr, true);
 402                                         //TODO distinguish available year, authorship, etc. if
 403                                         result = new MappedCdmBase<Reference>(strTerm, refStr, newRef);
 404                                         resultList.add(result);
 405                                 }
 406                         }
 407                 }
 408                 return result;
 409         }
 410
 411
 412         //TODO we may configure in configuration that scientific name never includes Authorship
 413         private void checkAuthorship(TaxonNameBase nameBase, CsvStreamItem item) {
 414                 if (!nameBase.isInstanceOf(NonViralName.class)){
 415                         return;
 416                 }
 417                 NonViralName<?> nvName = CdmBase.deproxy(nameBase, NonViralName.class);
 418                 String strAuthors = getValue(item, TermUri.DWC_SCIENTIFIC_NAME_AUTHORS);
 419
 420                 if (! nvName.isProtectedTitleCache()){
 421                         if (StringUtils.isBlank(nvName.getAuthorshipCache())){
 422                                 if (nvName.isInstanceOf(BotanicalName.class) || nvName.isInstanceOf(ZoologicalName.class)){
 423                                         //TODO can't we also parse NonViralNames correctly ?
 424                                         try {
 425                                                 parser.parseAuthors(nvName, strAuthors);
 426                                         } catch (StringNotParsableException e) {
 427                                                 nvName.setAuthorshipCache(strAuthors);
 428                                         }
 429                                 }else{
 430                                         nvName.setAuthorshipCache(strAuthors);
 431                                 }
 432                                 //TODO throw warning (scientific name should always include authorship) by DwC definition
 433                         }
 434                 }
 435
 436         }
 437
 438
 439         private Rank getRank(CsvStreamItem csvTaxonRecord, NomenclaturalCode nomCode) {
 440                 boolean USE_UNKNOWN = true;
 441                 Rank rank = null;
 442                 String strRank = getValue(csvTaxonRecord,TermUri.DWC_TAXON_RANK);
 443                 String strVerbatimRank = getValue(csvTaxonRecord,TermUri.DWC_VERBATIM_TAXON_RANK);
 444                 if (strRank != null){
 445                         try {
 446                                 rank = Rank.getRankByEnglishName(strRank, nomCode, USE_UNKNOWN);
 447                                 if (rank.equals(Rank.UNKNOWN_RANK())){
 448                                         rank = Rank.getRankByNameOrAbbreviation(strRank, USE_UNKNOWN);
 449                                         if (rank.equals(Rank.UNKNOWN_RANK())){
 450                                                 String message = "Rank can not be defined for '%s'";
 451                                                 message = String.format(message, strRank);
 452                                                 fireWarningEvent(message, csvTaxonRecord, 4);
 453                                         }
 454                                 }
 455                         } catch (UnknownCdmTypeException e) {
 456                                 //should not happen as USE_UNKNOWN is used
 457                                 rank = Rank.UNKNOWN_RANK();
 458                         }
 459                 }
 460                 if ( (rank == null || rank.equals(Rank.UNKNOWN_RANK())) && strVerbatimRank != null){
 461                         try {
 462                                 rank = Rank.getRankByNameOrAbbreviation(strVerbatimRank, USE_UNKNOWN);
 463                                 if (rank.equals(Rank.UNKNOWN_RANK())){
 464                                         String message = "Rank can not be defined for '%s'";
 465                                         message = String.format(message, strVerbatimRank);
 466                                         fireWarningEvent(message, csvTaxonRecord, 4);
 467                                 }
 468                         } catch (UnknownCdmTypeException e) {
 469                                 //should not happen as USE_UNKNOWN is used
 470                                 rank = Rank.UNKNOWN_RANK();
 471                         }
 472                 }
 473                 return rank;
 474         }
 475
 476
 477         /**
 478          * Creates an empty taxon object with a given status.
 479          * @param item
 480          * @return
 481          */
 482         private TaxonBase<?> getTaxonBase(CsvStreamItem item) {
 483                 TaxonNameBase<?,?> name = null;
 484                 Reference<?> sec = null;
 485                 TaxonBase<?> result;
 486                 String taxStatus = item.get(TermUri.DWC_TAXONOMIC_STATUS);
 487                 String status = "";
 488
 489                 if (taxStatus != null){
 490                         if (taxStatus.matches("accepted.*|valid")){
 491                                 status += "A";
 492                         } else if (taxStatus.matches(".*synonym|invalid|not accepted")){   //not accepted comes from scratchpads
 493                                 status += "S";
 494                         } else if (taxStatus.matches("misapplied.*")){
 495                                 status += "M";
 496                         } else{
 497                                 status += "?";
 498                         }
 499                         item.remove(TermUri.DWC_TAXONOMIC_STATUS);
 500                 }
 501                 if (! CdmUtils.isBlank(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
 502                         // acceptedNameUsageId = id
 503                         if (getSourceId(item).equals(item.get(TermUri.DWC_ACCEPTED_NAME_USAGE_ID))){
 504                                 status += "A";
 505                         }else{
 506                                 status += "S";
 507                         }
 508                 }
 509                 if (status.contains("A") || status.contains("M")){
 510                         result = Taxon.NewInstance(name, sec);
 511                         if (status.contains("S") && ! status.contains("M") ){
 512                                 String message = "Ambigous taxon status (%s)";
 513                                 message = String.format(message, status);
 514                                 fireWarningEvent(message, item, 6);
 515                         }
 516                 } else if (status.contains("S")){
 517                         result = Synonym.NewInstance(name, sec);
 518                 } else{
 519                         result = Taxon.NewUnknownStatusInstance(name, sec);
 520                 }
 521
 522                 return result;
 523
 524         }
 525
 526 // ********************** PARTITIONABLE ****************************************/
 527
 528
 529         @Override
 530         protected void makeForeignKeysForItem(CsvStreamItem item, Map<String, Set<String>> fkMap) {
 531                 String value;
 532                 String key;
 533
 534                 //namePublishedIn
 535                 if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN_ID.toString()))){
 536                         Set<String> keySet = getKeySet(key, fkMap);
 537                         keySet.add(value);
 538                 }
 539                 if (config.isDeduplicateNamePublishedIn()){
 540                         if ( hasValue(value = item.get(key = TermUri.DWC_NAME_PUBLISHED_IN.toString()))){
 541                                 Set<String> keySet = getKeySet(key, fkMap);
 542                                 keySet.add(value);
 543                         }
 544                 }
 545
 546                 //nameAccordingTo
 547                 if (! config.isDatasetsAsSecundumReference()){
 548                         if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO_ID.toString()))){
 549                                 Set<String> keySet = getKeySet(key, fkMap);
 550                                 keySet.add(value);
 551                         }
 552                         if ( hasValue(value = item.get(key = TermUri.DWC_NAME_ACCORDING_TO.toString()))){
 553                                 Set<String> keySet = getKeySet(key, fkMap);
 554                                 keySet.add(value);
 555                         }
 556                 }
 557
 558                 //dataset
 559                 if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_ID.toString()))){
 560                         Set<String> keySet = getKeySet(key, fkMap);
 561                         keySet.add(value);
 562                 }
 563                 if ( hasValue(value = item.get(key = TermUri.DWC_DATASET_NAME.toString()))){
 564                         Set<String> keySet = getKeySet(key, fkMap);
 565                         keySet.add(value);
 566                 }
 567
 568         }
 569
 570
 571         @Override
 572         public Set<String> requiredSourceNamespaces() {
 573                 Set<String> result = new HashSet<String>();
 574                 result.add(TermUri.DWC_NAME_PUBLISHED_IN_ID.toString());
 575                 result.add(TermUri.DWC_NAME_PUBLISHED_IN.toString());
 576                 if (!config.isDatasetsAsSecundumReference()){
 577                         result.add(TermUri.DWC_NAME_ACCORDING_TO_ID.toString());
 578                         result.add(TermUri.DWC_NAME_ACCORDING_TO.toString());
 579                 }
 580                 result.add(TermUri.DWC_DATASET_ID.toString());
 581                 result.add(TermUri.DWC_DATASET_NAME.toString());
 582                 return result;
 583         }
 584
 585 //** ***************************** TO STRING *********************************************/
 586
 587         @Override
 588         public String toString(){
 589                 return this.getClass().getName();
 590         }
 591
 592
 593
 594 }