2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.excel
.taxa
;
12 import java
.net
.MalformedURLException
;
14 import java
.net
.URISyntaxException
;
15 import java
.util
.Arrays
;
16 import java
.util
.HashSet
;
19 import org
.apache
.log4j
.Logger
;
20 import org
.springframework
.stereotype
.Component
;
22 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
23 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
24 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
25 import eu
.etaxonomy
.cdm
.model
.description
.CommonTaxonName
;
26 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
27 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
28 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
29 import eu
.etaxonomy
.cdm
.model
.description
.PresenceTerm
;
30 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
31 import eu
.etaxonomy
.cdm
.model
.description
.TaxonNameDescription
;
32 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
33 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
34 import eu
.etaxonomy
.cdm
.model
.location
.TdwgArea
;
35 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
36 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
37 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
38 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
39 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
40 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
41 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
42 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
43 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
44 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
45 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
46 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.StringNotParsableException
;
47 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
48 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
51 * @author a.babadshanjan
57 public class NormalExplicitImport
extends TaxonExcelImporterBase
{
58 private static final Logger logger
= Logger
.getLogger(NormalExplicitImport
.class);
60 public static Set
<String
> validMarkers
= new HashSet
<String
>(Arrays
.asList(new String
[]{"", "valid", "accepted", "a", "v", "t"}));
61 public static Set
<String
> synonymMarkers
= new HashSet
<String
>(Arrays
.asList(new String
[]{"", "invalid", "synonym", "s", "i"}));
65 * @see eu.etaxonomy.cdm.io.excel.common.ExcelTaxonOrSpecimenImportBase#analyzeSingleValue(eu.etaxonomy.cdm.io.excel.common.ExcelTaxonOrSpecimenImportBase.KeyValue, eu.etaxonomy.cdm.io.excel.common.ExcelImportState)
68 protected boolean analyzeSingleValue(KeyValue keyValue
, TaxonExcelImportState state
) {
69 boolean success
= true;
71 NormalExplicitRow normalExplicitRow
= state
.getCurrentRow();
72 String key
= keyValue
.key
;
73 String value
= keyValue
.value
;
74 Integer index
= keyValue
.index
;
75 if (key
.equalsIgnoreCase(ID_COLUMN
)) {
76 int ivalue
= floatString2IntValue(value
);
77 normalExplicitRow
.setId(ivalue
);
79 } else if(key
.equalsIgnoreCase(PARENT_ID_COLUMN
)) {
80 int ivalue
= floatString2IntValue(value
);
81 normalExplicitRow
.setParentId(ivalue
);
83 } else if(key
.equalsIgnoreCase(RANK_COLUMN
)) {
84 normalExplicitRow
.setRank(value
);
86 } else if(key
.equalsIgnoreCase(SCIENTIFIC_NAME_COLUMN
)) {
87 normalExplicitRow
.setScientificName(value
);
89 } else if(key
.equalsIgnoreCase(AUTHOR_COLUMN
)) {
90 normalExplicitRow
.setAuthor(value
);
92 } else if(key
.equalsIgnoreCase(NAME_STATUS_COLUMN
)) {
93 normalExplicitRow
.setNameStatus(value
);
95 } else if(key
.equalsIgnoreCase(VERNACULAR_NAME_COLUMN
)) {
96 normalExplicitRow
.setCommonName(value
);
98 } else if(key
.equalsIgnoreCase(LANGUAGE_COLUMN
)) {
99 normalExplicitRow
.setLanguage(value
);
101 } else if(key
.equalsIgnoreCase(TDWG_COLUMN
)) {
102 value
= value
.replace(".0", "");
103 normalExplicitRow
.putDistribution(index
, value
);
105 } else if(key
.equalsIgnoreCase(PROTOLOGUE_COLUMN
)) {
106 normalExplicitRow
.putProtologue(index
, value
);
108 } else if(key
.equalsIgnoreCase(IMAGE_COLUMN
)) {
109 normalExplicitRow
.putImage(index
, value
);
113 logger
.error("Unexpected column header " + key
);
119 * @see eu.etaxonomy.cdm.io.excel.common.ExcelTaxonOrSpecimenImportBase#createDataHolderRow()
122 protected NormalExplicitRow
createDataHolderRow() {
123 return new NormalExplicitRow();
128 * Stores taxa records in DB
131 protected boolean firstPass(TaxonExcelImportState state
) {
132 boolean success
= true;
134 NormalExplicitRow taxonDataHolder
= state
.getCurrentRow();
136 String rankStr
= taxonDataHolder
.getRank();
137 String taxonNameStr
= taxonDataHolder
.getScientificName();
138 String authorStr
= taxonDataHolder
.getAuthor();
139 String nameStatus
= taxonDataHolder
.getNameStatus();
140 Integer id
= taxonDataHolder
.getId();
142 if (CdmUtils
.isNotEmpty(taxonNameStr
)) {
144 TaxonBase taxonBase
= null;
145 if (taxonDataHolder
.getCdmUuid() != null){
146 taxonBase
= getTaxonService().find(taxonDataHolder
.getCdmUuid());
151 rank
= Rank
.getRankByNameOrAbbreviation(rankStr
);
152 } catch (UnknownCdmTypeException ex
) {
154 rank
= Rank
.getRankByEnglishName(rankStr
, state
.getConfig().getNomenclaturalCode(), false);
155 } catch (UnknownCdmTypeException e
) {
157 logger
.error(rankStr
+ " is not a valid rank.");
162 taxonBase
= createTaxon(state
, rank
, taxonNameStr
, authorStr
, nameStatus
);
164 if (taxonBase
== null){
165 String message
= "Taxon could not be created. Record will not be handled";
166 fireWarningEvent(message
, "Record: " + state
.getCurrentLine(), 6);
171 for (String protologue
: taxonDataHolder
.getProtologues()){
172 TextData textData
= TextData
.NewInstance(Feature
.PROTOLOGUE());
173 this.getNameDescription(taxonBase
.getName()).addElement(textData
);
176 uri
= new URI(protologue
);
177 textData
.addMedia(Media
.NewInstance(uri
, null, null, null));
178 } catch (URISyntaxException e
) {
179 String warning
= "URISyntaxException when trying to convert to URI: " + protologue
;
180 logger
.error(warning
);
185 for (String imageUrl
: taxonDataHolder
.getImages()){
187 Taxon taxon
= CdmBase
.deproxy(taxonBase
, Taxon
.class);
188 TaxonDescription td
= taxon
.getImageGallery(true);
189 DescriptionElementBase mediaHolder
;
190 if (td
.getElements().size() != 0){
191 mediaHolder
= td
.getElements().iterator().next();
193 mediaHolder
= TextData
.NewInstance(Feature
.IMAGE());
194 td
.addElement(mediaHolder
);
197 Media media
= getImageMedia(imageUrl
, true);
198 mediaHolder
.addMedia(media
);
199 } catch (MalformedURLException e
) {
200 logger
.warn("Can't add media: " + e
.getMessage());
205 for (String tdwg
: taxonDataHolder
.getDistributions()){
207 Taxon taxon
= CdmBase
.deproxy(taxonBase
, Taxon
.class);
208 TaxonDescription td
= this.getTaxonDescription(taxon
, false, true);
209 NamedArea area
= TdwgArea
.getAreaByTdwgAbbreviation(tdwg
);
211 area
= TdwgArea
.getAreaByTdwgLabel(tdwg
);
214 Distribution distribution
= Distribution
.NewInstance(area
, PresenceTerm
.PRESENT());
215 td
.addElement(distribution
);
217 String message
= "TDWG area could not be recognized: " + tdwg
;
218 logger
.warn(message
);
224 state
.putTaxon(id
, taxonBase
);
225 getTaxonService().save(taxonBase
);
233 * @param taxonNameStr
238 private TaxonBase
createTaxon(TaxonExcelImportState state
, Rank rank
,
239 String taxonNameStr
, String authorStr
, String nameStatus
) {
240 // Create the taxon name object depending on the setting of the nomenclatural code
241 // in the configurator (botanical code, zoological code, etc.)
242 NomenclaturalCode nc
= getConfigurator().getNomenclaturalCode();
244 TaxonBase taxonBase
= null;
246 String titleCache
= CdmUtils
.concat(" ", taxonNameStr
, authorStr
);
247 if (! synonymMarkers
.contains(nameStatus
) && state
.getConfig().isDoMatchTaxa()){
248 titleCache
= CdmUtils
.concat(" ", taxonNameStr
, authorStr
);
249 taxonBase
= getTaxonService().findBestMatchingTaxon(titleCache
);
251 taxonBase
= getTaxonService().findBestMatchingSynonym(titleCache
);
252 if (taxonBase
!= null){
253 logger
.info("Matching taxon/synonym found for " + titleCache
);
256 if (taxonBase
!= null){
257 logger
.info("Matching taxon/synonym found for " + titleCache
);
259 taxonBase
= createTaxon(state
, rank
, taxonNameStr
, authorStr
, nameStatus
, nc
);
268 * Stores parent-child, synonym and common name relationships
271 protected boolean secondPass(TaxonExcelImportState state
) {
272 boolean success
= true;
274 String taxonNameStr
= state
.getCurrentRow().getScientificName();
275 String nameStatus
= state
.getCurrentRow().getNameStatus();
276 String commonNameStr
= state
.getCurrentRow().getCommonName();
277 Integer parentId
= state
.getCurrentRow().getParentId();
278 Integer childId
= state
.getCurrentRow().getId();
280 Taxon parentTaxon
= (Taxon
)state
.getTaxonBase(parentId
);
281 if (CdmUtils
.isNotEmpty(taxonNameStr
)) {
282 nameStatus
= CdmUtils
.Nz(nameStatus
).trim().toLowerCase();
283 if (validMarkers
.contains(nameStatus
)){
284 Taxon taxon
= (Taxon
)state
.getTaxonBase(childId
);
285 // Add the parent relationship
286 if (state
.getCurrentRow().getParentId() != 0) {
287 if (parentTaxon
!= null) {
288 //Taxon taxon = (Taxon)state.getTaxonBase(childId);
290 Reference citation
= state
.getConfig().getSourceReference();
291 String microCitation
= null;
292 Taxon childTaxon
= taxon
;
293 success
&= makeParent(state
, parentTaxon
, childTaxon
, citation
, microCitation
);
294 getTaxonService().saveOrUpdate(parentTaxon
);
296 logger
.warn("Taxonomic parent not found for " + taxonNameStr
);
300 //do nothing (parent == 0) no parent exists
302 }else if (synonymMarkers
.contains(nameStatus
)){
303 //add synonym relationship
305 TaxonBase taxonBase
= state
.getTaxonBase(childId
);
306 Synonym synonym
= CdmBase
.deproxy(taxonBase
,Synonym
.class);
307 parentTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF());
308 getTaxonService().saveOrUpdate(parentTaxon
);
309 } catch (Exception e
) {
310 logger
.warn("Child id = " + childId
);
315 if (CdmUtils
.isNotEmpty(commonNameStr
)){ // add common name to taxon
316 handleCommonName(state
, taxonNameStr
, commonNameStr
, parentId
);
318 } catch (Exception e
) {
327 * @param taxonNameStr
328 * @param commonNameStr
331 private void handleCommonName(TaxonExcelImportState state
,
332 String taxonNameStr
, String commonNameStr
, Integer parentId
) {
333 Language language
= getTermService().getLanguageByIso(state
.getCurrentRow().getLanguage());
334 if (language
== null && CdmUtils
.isNotEmpty(state
.getCurrentRow().getLanguage()) ){
335 String error
="Language is null but shouldn't";
337 throw new IllegalArgumentException(error
);
339 CommonTaxonName commonTaxonName
= CommonTaxonName
.NewInstance(commonNameStr
, language
);
341 Taxon taxon
= (Taxon
)state
.getTaxonBase(parentId
);
342 TaxonDescription taxonDescription
= getTaxonDescription(taxon
, false, true);
343 taxonDescription
.addElement(commonTaxonName
);
344 logger
.info("Common name " + commonNameStr
+ " added to " + taxon
.getTitleCache());
345 } catch (ClassCastException ex
) {
346 logger
.error(taxonNameStr
+ " is not a taxon instance.");
354 * @param taxonNameStr
360 private TaxonBase
createTaxon(TaxonExcelImportState state
, Rank rank
, String taxonNameStr
,
361 String authorStr
, String nameStatus
, NomenclaturalCode nc
) {
363 NonViralName taxonNameBase
= null;
364 if (nc
== NomenclaturalCode
.ICVCN
){
365 logger
.warn("ICVCN not yet supported");
368 taxonNameBase
=(NonViralName
) nc
.getNewTaxonNameInstance(rank
);
369 //NonViralName nonViralName = (NonViralName)taxonNameBase;
370 NonViralNameParserImpl parser
= NonViralNameParserImpl
.NewInstance();
371 taxonNameBase
= parser
.parseFullName(taxonNameStr
, nc
, rank
);
373 taxonNameBase
.setNameCache(taxonNameStr
);
376 if (CdmUtils
.isNotEmpty(authorStr
)) {
378 parser
.parseAuthors(taxonNameBase
, authorStr
);
379 } catch (StringNotParsableException e
) {
380 taxonNameBase
.setAuthorshipCache(authorStr
);
386 Reference sec
= state
.getConfig().getSourceReference();
388 nameStatus
= CdmUtils
.Nz(nameStatus
).trim().toLowerCase();
389 if (validMarkers
.contains(nameStatus
)){
390 taxonBase
= Taxon
.NewInstance(taxonNameBase
, sec
);
391 }else if (synonymMarkers
.contains(nameStatus
)){
392 taxonBase
= Synonym
.NewInstance(taxonNameBase
, sec
);
394 Taxon taxon
= Taxon
.NewInstance(taxonNameBase
, sec
);
395 taxon
.setTaxonStatusUnknown(true);
405 //TODO implementation must be improved when matching of taxon names with existing names is implemented
406 //=> the assumption that the only description is the description added by this import
408 private TaxonNameDescription
getNameDescription(TaxonNameBase name
) {
409 Set
<TaxonNameDescription
> descriptions
= name
.getDescriptions();
410 if (descriptions
.size()>1){
411 throw new IllegalStateException("Implementation does not yet support names with multiple descriptions");
412 }else if (descriptions
.size()==1){
413 return descriptions
.iterator().next();
415 return TaxonNameDescription
.NewInstance(name
);
419 private boolean makeParent(TaxonExcelImportState state
, Taxon parentTaxon
, Taxon childTaxon
, Reference citation
, String microCitation
){
420 boolean success
= true;
421 Reference sec
= state
.getConfig().getSourceReference();
423 // Reference sec = parentTaxon.getSec();
424 Classification tree
= state
.getTree(sec
);
426 tree
= makeTree(state
, sec
);
428 if (sec
.equals(childTaxon
.getSec())){
429 success
&= (null != tree
.addParentChild(parentTaxon
, childTaxon
, citation
, microCitation
));
431 logger
.warn("No relationship added for child " + childTaxon
.getTitleCache());
438 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
441 protected boolean doCheck(TaxonExcelImportState state
) {
442 logger
.warn("DoCheck not yet implemented for NormalExplicitImport");
447 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IoStateBase)
450 protected boolean isIgnore(TaxonExcelImportState state
) {