2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.excel
.taxa
;
12 import java
.net
.MalformedURLException
;
13 import java
.util
.Arrays
;
14 import java
.util
.HashMap
;
15 import java
.util
.HashSet
;
18 import org
.apache
.commons
.lang
.StringUtils
;
19 import org
.apache
.log4j
.Logger
;
20 import org
.springframework
.stereotype
.Component
;
22 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
23 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
24 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
25 import eu
.etaxonomy
.cdm
.model
.description
.CommonTaxonName
;
26 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
27 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
28 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
29 import eu
.etaxonomy
.cdm
.model
.description
.PresenceTerm
;
30 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
31 import eu
.etaxonomy
.cdm
.model
.description
.TaxonNameDescription
;
32 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
33 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
34 import eu
.etaxonomy
.cdm
.model
.location
.TdwgArea
;
35 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
36 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
37 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
38 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
39 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
40 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceBase
;
41 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
42 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
43 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
44 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
45 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonomicTree
;
46 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.StringNotParsableException
;
47 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
48 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
51 * @author a.babadshanjan
57 public class NormalExplicitImport
extends TaxonExcelImporterBase
{
58 private static final Logger logger
= Logger
.getLogger(NormalExplicitImport
.class);
60 public static Set
<String
> validMarkers
= new HashSet
<String
>(Arrays
.asList(new String
[]{"", "valid", "accepted", "a", "v", "t"}));
61 public static Set
<String
> synonymMarkers
= new HashSet
<String
>(Arrays
.asList(new String
[]{"", "invalid", "synonym", "s", "i"}));
65 protected boolean isIgnore(TaxonExcelImportState state
) {
71 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
74 protected boolean doCheck(TaxonExcelImportState state
) {
75 logger
.warn("DoCheck not yet implemented for NormalExplicitImport");
80 protected boolean analyzeRecord(HashMap
<String
, String
> record
, TaxonExcelImportState state
) {
82 boolean success
= true;
83 Set
<String
> keys
= record
.keySet();
85 NormalExplicitRow normalExplicitRow
= new NormalExplicitRow();
86 state
.setTaxonLight(normalExplicitRow
);
88 for (String originalKey
: keys
) {
90 String indexedKey
= CdmUtils
.removeDuplicateWhitespace(originalKey
.trim()).toString();
91 String
[] split
= indexedKey
.split("_");
92 String key
= split
[0];
93 if (split
.length
> 1){
94 String indexString
= split
[1];
96 index
= Integer
.valueOf(indexString
);
97 } catch (NumberFormatException e
) {
98 String message
= "Index must be integer";
99 logger
.error(message
);
104 String value
= (String
) record
.get(indexedKey
);
105 if (! StringUtils
.isBlank(value
)) {
106 if (logger
.isDebugEnabled()) { logger
.debug(key
+ ": " + value
); }
107 value
= CdmUtils
.removeDuplicateWhitespace(value
.trim()).toString();
113 if (key
.equalsIgnoreCase(ID_COLUMN
)) {
114 int ivalue
= floatString2IntValue(value
);
115 normalExplicitRow
.setId(ivalue
);
117 } else if(key
.equalsIgnoreCase(PARENT_ID_COLUMN
)) {
118 int ivalue
= floatString2IntValue(value
);
119 normalExplicitRow
.setParentId(ivalue
);
121 } else if(key
.equalsIgnoreCase(RANK_COLUMN
)) {
122 normalExplicitRow
.setRank(value
);
124 } else if(key
.equalsIgnoreCase(SCIENTIFIC_NAME_COLUMN
)) {
125 normalExplicitRow
.setScientificName(value
);
127 } else if(key
.equalsIgnoreCase(AUTHOR_COLUMN
)) {
128 normalExplicitRow
.setAuthor(value
);
130 } else if(key
.equalsIgnoreCase(NAME_STATUS_COLUMN
)) {
131 normalExplicitRow
.setNameStatus(value
);
133 } else if(key
.equalsIgnoreCase(VERNACULAR_NAME_COLUMN
)) {
134 normalExplicitRow
.setCommonName(value
);
136 } else if(key
.equalsIgnoreCase(LANGUAGE_COLUMN
)) {
137 normalExplicitRow
.setLanguage(value
);
139 } else if(key
.equalsIgnoreCase(TDWG_COLUMN
)) {
140 value
= value
.replace(".0", "");
141 normalExplicitRow
.putDistribution(index
, value
);
143 } else if(key
.equalsIgnoreCase(PROTOLOGUE_COLUMN
)) {
144 normalExplicitRow
.putProtologue(index
, value
);
146 } else if(key
.equalsIgnoreCase(IMAGE_COLUMN
)) {
147 normalExplicitRow
.putImage(index
, value
);
151 logger
.error("Unexpected column header " + key
);
159 * Stores taxa records in DB
162 protected boolean firstPass(TaxonExcelImportState state
) {
163 boolean success
= true;
165 NormalExplicitRow taxonLight
= state
.getTaxonLight();
167 String rankStr
= taxonLight
.getRank();
168 String taxonNameStr
= taxonLight
.getScientificName();
169 String authorStr
= taxonLight
.getAuthor();
170 String nameStatus
= taxonLight
.getNameStatus();
171 Integer id
= taxonLight
.getId();
173 if (CdmUtils
.isNotEmpty(taxonNameStr
)) {
175 // Determine the rank
177 rank
= Rank
.getRankByNameOrAbbreviation(rankStr
);
178 } catch (UnknownCdmTypeException ex
) {
180 rank
= Rank
.getRankByEnglishName(rankStr
, state
.getConfig().getNomenclaturalCode(), false);
181 } catch (UnknownCdmTypeException e
) {
183 logger
.error(rankStr
+ " is not a valid rank.");
187 // Create the taxon name object depending on the setting of the nomenclatural code
188 // in the configurator (botanical code, zoological code, etc.)
189 NomenclaturalCode nc
= getConfigurator().getNomenclaturalCode();
191 TaxonBase taxonBase
= null;
193 if (! synonymMarkers
.contains(nameStatus
) && state
.getConfig().isDoMatchTaxa()){
194 String titleCache
= CdmUtils
.concat(" ", taxonNameStr
, authorStr
);
195 taxonBase
= getTaxonService().findBestMatchingTaxon(titleCache
);
196 if (taxonBase
!= null){
197 logger
.warn("Matching taxon found for " + titleCache
);
200 if (taxonBase
== null){
201 taxonBase
= createTaxon(state
, rank
, taxonNameStr
, authorStr
, nameStatus
, nc
);
203 if (taxonBase
== null){
209 for (String protologue
: taxonLight
.getProtologues()){
210 TextData textData
= TextData
.NewInstance(Feature
.PROTOLOGUE());
211 this.getNameDescription(taxonBase
.getName()).addElement(textData
);
212 textData
.addMedia(Media
.NewInstance(protologue
, null, null, null));
216 for (String imageUrl
: taxonLight
.getImages()){
218 Taxon taxon
= CdmBase
.deproxy(taxonBase
, Taxon
.class);
219 TaxonDescription td
= taxon
.getImageGallery(true);
220 DescriptionElementBase mediaHolder
;
221 if (td
.getElements().size() != 0){
222 mediaHolder
= td
.getElements().iterator().next();
224 mediaHolder
= TextData
.NewInstance(Feature
.IMAGE());
225 td
.addElement(mediaHolder
);
228 Media media
= getImageMedia(imageUrl
, true);
229 mediaHolder
.addMedia(media
);
230 } catch (MalformedURLException e
) {
231 logger
.warn("Can't add media: " + e
.getMessage());
236 for (String tdwg
: taxonLight
.getDistributions()){
238 Taxon taxon
= CdmBase
.deproxy(taxonBase
, Taxon
.class);
239 TaxonDescription td
= this.getTaxonDescription(taxon
, false, true);
240 NamedArea area
= TdwgArea
.getAreaByTdwgAbbreviation(tdwg
);
242 area
= TdwgArea
.getAreaByTdwgLabel(tdwg
);
245 Distribution distribution
= Distribution
.NewInstance(area
, PresenceTerm
.PRESENT());
246 td
.addElement(distribution
);
248 String message
= "TDWG area could not be recognized: " + tdwg
;
249 logger
.warn(message
);
255 state
.putTaxon(id
, taxonBase
);
256 getTaxonService().save(taxonBase
);
265 * Stores parent-child, synonym and common name relationships
268 protected boolean secondPass(TaxonExcelImportState state
) {
269 boolean success
= true;
271 String taxonNameStr
= state
.getTaxonLight().getScientificName();
272 String nameStatus
= state
.getTaxonLight().getNameStatus();
273 String commonNameStr
= state
.getTaxonLight().getCommonName();
274 Integer parentId
= state
.getTaxonLight().getParentId();
275 Integer childId
= state
.getTaxonLight().getId();
277 Taxon parentTaxon
= (Taxon
)state
.getTaxonBase(parentId
);
278 if (CdmUtils
.isNotEmpty(taxonNameStr
)) {
279 nameStatus
= CdmUtils
.Nz(nameStatus
).trim().toLowerCase();
280 if (validMarkers
.contains(nameStatus
)){
281 Taxon taxon
= (Taxon
)state
.getTaxonBase(childId
);
282 // Add the parent relationship
283 if (state
.getTaxonLight().getParentId() != 0) {
284 if (parentTaxon
!= null) {
285 //Taxon taxon = (Taxon)state.getTaxonBase(childId);
287 ReferenceBase citation
= state
.getConfig().getSourceReference();
288 String microCitation
= null;
289 Taxon childTaxon
= taxon
;
290 success
&= makeParent(state
, parentTaxon
, childTaxon
, citation
, microCitation
);
291 getTaxonService().saveOrUpdate(parentTaxon
);
293 logger
.warn("Taxonomic parent not found for " + taxonNameStr
);
297 //do nothing (parent == 0) no parent exists
299 }else if (synonymMarkers
.contains(nameStatus
)){
300 //add synonym relationship
302 TaxonBase taxonBase
= state
.getTaxonBase(childId
);
303 Synonym synonym
= CdmBase
.deproxy(taxonBase
,Synonym
.class);
304 parentTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF());
305 getTaxonService().saveOrUpdate(parentTaxon
);
306 } catch (Exception e
) {
307 logger
.warn("Child id = " + childId
);
312 if (CdmUtils
.isNotEmpty(commonNameStr
)){ // add common name to taxon
313 handleCommonName(state
, taxonNameStr
, commonNameStr
, parentId
);
315 } catch (Exception e
) {
324 * @param taxonNameStr
325 * @param commonNameStr
328 private void handleCommonName(TaxonExcelImportState state
,
329 String taxonNameStr
, String commonNameStr
, Integer parentId
) {
330 Language language
= getTermService().getLanguageByIso(state
.getTaxonLight().getLanguage());
331 if (language
== null && CdmUtils
.isNotEmpty(state
.getTaxonLight().getLanguage()) ){
332 String error
="Language is null but shouldn't";
334 throw new IllegalArgumentException(error
);
336 CommonTaxonName commonTaxonName
= CommonTaxonName
.NewInstance(commonNameStr
, language
);
338 Taxon taxon
= (Taxon
)state
.getTaxonBase(parentId
);
339 TaxonDescription taxonDescription
= getTaxonDescription(taxon
, false, true);
340 taxonDescription
.addElement(commonTaxonName
);
341 logger
.info("Common name " + commonNameStr
+ " added to " + taxon
.getTitleCache());
342 } catch (ClassCastException ex
) {
343 logger
.error(taxonNameStr
+ " is not a taxon instance.");
351 * @param taxonNameStr
357 private TaxonBase
createTaxon(TaxonExcelImportState state
, Rank rank
, String taxonNameStr
,
358 String authorStr
, String nameStatus
, NomenclaturalCode nc
) {
360 NonViralName taxonNameBase
= null;
361 if (nc
== NomenclaturalCode
.ICVCN
){
362 logger
.warn("ICVCN not yet supported");
365 taxonNameBase
=(NonViralName
) nc
.getNewTaxonNameInstance(rank
);
366 //NonViralName nonViralName = (NonViralName)taxonNameBase;
367 NonViralNameParserImpl parser
= NonViralNameParserImpl
.NewInstance();
368 taxonNameBase
= parser
.parseFullName(taxonNameStr
, nc
, rank
);
370 taxonNameBase
.setNameCache(taxonNameStr
);
373 if (CdmUtils
.isNotEmpty(authorStr
)) {
375 parser
.parseAuthors(taxonNameBase
, authorStr
);
376 } catch (StringNotParsableException e
) {
377 taxonNameBase
.setAuthorshipCache(authorStr
);
383 ReferenceBase sec
= state
.getConfig().getSourceReference();
385 nameStatus
= CdmUtils
.Nz(nameStatus
).trim().toLowerCase();
386 if (validMarkers
.contains(nameStatus
)){
387 taxonBase
= Taxon
.NewInstance(taxonNameBase
, sec
);
388 }else if (synonymMarkers
.contains(nameStatus
)){
389 taxonBase
= Synonym
.NewInstance(taxonNameBase
, sec
);
391 Taxon taxon
= Taxon
.NewInstance(taxonNameBase
, sec
);
392 taxon
.setTaxonStatusUnknown(true);
402 //TODO implementation must be improved when matching of taxon names with existing names is implemented
403 //=> the assumption that the only description is the description added by this import
405 private TaxonNameDescription
getNameDescription(TaxonNameBase name
) {
406 Set
<TaxonNameDescription
> descriptions
= name
.getDescriptions();
407 if (descriptions
.size()>1){
408 throw new IllegalStateException("Implementation does not yet support names with multiple descriptions");
409 }else if (descriptions
.size()==1){
410 return descriptions
.iterator().next();
412 return TaxonNameDescription
.NewInstance(name
);
416 private boolean makeParent(TaxonExcelImportState state
, Taxon parentTaxon
, Taxon childTaxon
, ReferenceBase citation
, String microCitation
){
417 boolean success
= true;
418 ReferenceBase sec
= state
.getConfig().getSourceReference();
420 // ReferenceBase sec = parentTaxon.getSec();
421 TaxonomicTree tree
= state
.getTree(sec
);
423 tree
= makeTree(state
, sec
);
425 if (sec
.equals(childTaxon
.getSec())){
426 success
&= (null != tree
.addParentChild(parentTaxon
, childTaxon
, citation
, microCitation
));
428 logger
.warn("No relationship added for child " + childTaxon
.getTitleCache());