2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.markup
;
12 import java
.util
.ArrayList
;
13 import java
.util
.List
;
17 import javax
.xml
.stream
.Location
;
18 import javax
.xml
.stream
.XMLEventReader
;
19 import javax
.xml
.stream
.XMLStreamException
;
20 import javax
.xml
.stream
.events
.Attribute
;
21 import javax
.xml
.stream
.events
.XMLEvent
;
23 import org
.apache
.log4j
.Logger
;
25 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
26 import eu
.etaxonomy
.cdm
.io
.markup
.UnmatchedLeads
.UnmatchedLeadsKey
;
27 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
28 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
29 import eu
.etaxonomy
.cdm
.model
.description
.KeyStatement
;
30 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
31 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKeyNode
;
32 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
33 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
34 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
35 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
41 public class MarkupKeyImport
extends MarkupImportBase
{
42 @SuppressWarnings("unused")
43 private static final Logger logger
= Logger
.getLogger(MarkupKeyImport
.class);
46 public MarkupKeyImport(MarkupDocumentImport docImport
) {
50 public void handleKey(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
52 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
53 String isSpotcharacters
= getAndRemoveAttributeValue(attributes
, IS_SPOTCHARACTERS
);
54 if (isNotBlank(isSpotcharacters
) ) {
55 //TODO isSpotcharacters
56 String message
= "Attribute isSpotcharacters not yet implemented for <key>";
57 fireWarningEvent(message
, parentEvent
, 4);
59 boolean onlyNumberedTaxaExist
= checkAndRemoveAttributeValue(attributes
, ONLY_NUMBERED_TAXA_EXIST
, "true");
60 state
.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist
);
62 PolytomousKey key
= PolytomousKey
.NewInstance();
63 key
.addTaxonomicScope(state
.getCurrentTaxon());
64 state
.setCurrentKey(key
);
66 boolean isFirstCouplet
= true;
67 while (reader
.hasNext()) {
68 XMLEvent next
= readNoWhitespace(reader
);
69 if (isMyEndingElement(next
, parentEvent
)) {
72 state
.setCurrentKey(null);
73 state
.setOnlyNumberedTaxaExist(false);
75 } else if (isEndingElement(next
, KEYNOTES
)){
76 popUnimplemented(next
.asEndElement());
77 } else if (isStartingElement(next
, KEY_TITLE
)) {
78 handleKeyTitle(state
, reader
, next
);
79 } else if (isStartingElement(next
, KEYNOTES
)) {
81 handleNotYetImplementedElement(next
);
82 } else if (isStartingElement(next
, COUPLET
)) {
83 PolytomousKeyNode node
= null;
86 isFirstCouplet
= false;
88 handleCouplet(state
, reader
, next
, node
);
90 handleUnexpectedElement(next
);
93 throw new IllegalStateException("<key> has no closing tag");
102 * @throws XMLStreamException
104 private void handleKeyTitle(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
105 PolytomousKey key
= state
.getCurrentKey();
106 String keyTitle
= getCData(state
, reader
, parentEvent
);
107 String standardTitlesEngl
= "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
108 String standardTitlesFrench
= "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";
109 String standardTitles
= standardTitlesEngl
;
110 if (state
.getDefaultLanguage() != null && state
.getDefaultLanguage().equals(Language
.FRENCH())){
111 standardTitles
= standardTitlesFrench
;
114 if (isNotBlank(keyTitle
) ){
115 if (!state
.getConfig().isReplaceStandardKeyTitles() || ! keyTitle
.matches(standardTitles
)){
116 key
.setTitleCache(keyTitle
, true);
122 private void handleCouplet(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, PolytomousKeyNode parentNode
) throws XMLStreamException
{
123 String num
= getOnlyAttribute(parentEvent
, NUM
, true);
124 List
<PolytomousKeyNode
> childList
= new ArrayList
<PolytomousKeyNode
>();
126 while (reader
.hasNext()) {
127 XMLEvent next
= readNoWhitespace(reader
);
128 if (isMyEndingElement(next
, parentEvent
)) {
129 completeCouplet(state
, parentEvent
, parentNode
, num
, childList
);
131 } else if (next
.isCharacters()){
132 handleNotYetImplementedCharacters(next
);
133 //work in progress from pesiimport2, not sure if this works
134 // String mainQuestion = next.asCharacters().getData();
135 // mainQuestion = mainQuestion.replaceAll("\\s+", " ").trim();
136 // KeyStatement question = KeyStatement.NewInstance(mainQuestion);
137 // if (parentNode != null){ parentNode.setStatement(question);} //work in progress
138 } else if (isStartingElement(next
, QUESTION
)) {
139 handleQuestion(state
, reader
, next
, childList
);
140 } else if (isStartingElement(next
, KEYNOTES
)) {
142 handleNotYetImplementedElement(next
);
143 } else if (isEndingElement(next
, KEYNOTES
)) {
145 popUnimplemented(next
.asEndElement());
147 handleUnexpectedElement(next
);
150 throw new IllegalStateException("<couplet> has no closing tag");
161 private void completeCouplet(MarkupImportState state
, XMLEvent parentEvent
,
162 PolytomousKeyNode parentNode
, String num
, List
<PolytomousKeyNode
> childList
) {
163 if (parentNode
!= null){
164 for (PolytomousKeyNode childNode
: childList
){
165 parentNode
.addChild(childNode
);
166 //just to be on the save side
167 parentNode
.refreshNodeNumbering();
169 }else if (isNotBlank(num
)){
170 UnmatchedLeadsKey unmatchedKey
= UnmatchedLeadsKey
.NewInstance(state
.getCurrentKey(), num
);
171 Set
<PolytomousKeyNode
> nodes
= state
.getUnmatchedLeads().getNodes(unmatchedKey
);
172 for(PolytomousKeyNode nodeToMatch
: nodes
){
173 for (PolytomousKeyNode childNode
: childList
){
175 nodeToMatch
.addChild(childNode
);
176 //just to be on the save side
177 nodeToMatch
.refreshNodeNumbering();
178 } catch (Exception e
) {
179 String message
= "An exception occurred when trying to add a key node child or to referesh the node numbering: " + e
.getMessage();
180 fireWarningEvent(message
, parentEvent
, 6);
183 state
.getUnmatchedLeads().removeNode(unmatchedKey
, nodeToMatch
);
186 String message
= "Parent num could not be matched. Please check if num (%s) is correct";
187 message
= String
.format(message
, num
);
188 fireWarningEvent(message
, parentEvent
, 6);
192 private void handleQuestion(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, List
<PolytomousKeyNode
> nodesList
) throws XMLStreamException
{
194 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
195 //TODO needed only for data lineage
196 String questionNum
= getAndRemoveRequiredAttributeValue(parentEvent
, attributes
, NUM
);
198 PolytomousKeyNode myNode
= PolytomousKeyNode
.NewInstance();
199 myNode
.setKey(state
.getCurrentKey()); //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
200 nodesList
.add(myNode
);
202 while (reader
.hasNext()) {
203 XMLEvent next
= readNoWhitespace(reader
);
204 if (isMyEndingElement(next
, parentEvent
)) {
206 } else if (isStartingElement(next
, TEXT
)) {
207 String text
= getCData(state
, reader
, next
);
208 KeyStatement statement
= KeyStatement
.NewInstance(getDefaultLanguage(state
), text
);
209 myNode
.setStatement(statement
);
210 } else if (isStartingElement(next
, COUPLET
)) {
212 handleCouplet(state
, reader
, next
, myNode
);
213 } else if (isStartingElement(next
, TO_COUPLET
)) {
214 handleToCouplet(state
, reader
, next
, myNode
);
215 } else if (isStartingElement(next
, TO_TAXON
)) {
216 handleToTaxon(state
, reader
, next
, myNode
);
217 } else if (isStartingElement(next
, TO_KEY
)) {
219 handleNotYetImplementedElement(next
);
220 } else if (isEndingElement(next
, TO_KEY
)){
222 popUnimplemented(next
.asEndElement());
223 } else if (isStartingElement(next
, KEYNOTES
)) {
225 handleNotYetImplementedElement(next
);
226 } else if (isEndingElement(next
, KEYNOTES
)){
228 popUnimplemented(next
.asEndElement());
230 handleUnexpectedElement(next
);
233 throw new IllegalStateException("<question> has no closing tag");
236 private void handleToCouplet(MarkupImportState state
, XMLEventReader reader
, XMLEvent next
, PolytomousKeyNode node
) throws XMLStreamException
{
237 String num
= getOnlyAttribute(next
, NUM
, true);
238 String cData
= getCData(state
, reader
, next
, false);
239 if (isNotBlank(cData
) && ! cData
.equals(num
)){
240 String message
= "CData ('%s') not handled in <toCouplet>";
241 message
= String
.format(message
, cData
);
242 fireWarningEvent(message
, next
, 4);
244 UnmatchedLeadsKey unmatched
= UnmatchedLeadsKey
.NewInstance(state
.getCurrentKey(), num
);
245 state
.getUnmatchedLeads().addKey(unmatched
, node
);
248 private void handleToTaxon(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, PolytomousKeyNode node
) throws XMLStreamException
{
249 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
250 String num
= getAndRemoveAttributeValue(attributes
, NUM
);
251 boolean taxonNotExists
= checkAndRemoveAttributeValue(attributes
, EXISTS
, "false");
253 String taxonCData
= handleInnerToTaxon(state
, reader
, parentEvent
, node
).trim();
255 String taxonKeyStr
= makeTaxonKey(taxonCData
, state
.getCurrentTaxon(), parentEvent
.getLocation());
256 taxonNotExists
= taxonNotExists
|| (isBlank(num
) && state
.isOnlyNumberedTaxaExist());
258 NonViralName
<?
> name
= createNameByCode(state
, Rank
.UNKNOWN_RANK());
259 Taxon taxon
= Taxon
.NewInstance(name
, null);
260 taxon
.getName().setTitleCache(taxonKeyStr
, true);
261 node
.setTaxon(taxon
);
263 UnmatchedLeadsKey unmatched
= UnmatchedLeadsKey
.NewInstance(num
, taxonKeyStr
);
264 state
.getUnmatchedLeads().addKey(unmatched
, node
);
265 // String message = "The following key leads are unmatched: %s";
266 // message = String.format(message, state.getUnmatchedLeads().toString());
267 // fireWarningEvent(message, parentEvent, 6);
273 * Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.
274 * Footnote refs are not yet handled.
280 * @throws XMLStreamException
282 private String
handleInnerToTaxon(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, PolytomousKeyNode node
) throws XMLStreamException
{
283 String taxonText
= "";
284 String modifyingText
= null;
285 while (reader
.hasNext()) {
286 XMLEvent next
= readNoWhitespace(reader
);
287 if (isMyEndingElement(next
, parentEvent
)) {
288 if (isNotBlank(modifyingText
)){
289 node
.putModifyingText(getDefaultLanguage(state
), modifyingText
);
292 } else if (next
.isCharacters()) {
293 taxonText
+= next
.asCharacters().getData();
294 } else if (isStartingElement(next
, ANNOTATION
)) {
295 String annotation
= handleSimpleAnnotation(state
, reader
, next
);
296 modifyingText
= CdmUtils
.concat("; ", modifyingText
, annotation
);
297 } else if (isStartingElement(next
, FOOTNOTE_REF
)) {
298 handleNotYetImplementedElement(next
);
300 handleUnexpectedElement(next
);
303 throw new IllegalStateException("Event has no closing tag");
308 * Creates a string that represents the given taxon. The string will try to replace e.g.
309 * abbreviated genus epithets by its full name etc.
315 private String
makeTaxonKey(String strGoto
, Taxon taxon
, Location location
) {
317 if (strGoto
== null){
321 NonViralName
<?
> name
= CdmBase
.deproxy(taxon
.getName(), NonViralName
.class);
322 String strGenusName
= name
.getGenusOrUninomial();
324 final String bracketPattern
= "\\([^\\(\\)]*\\)";
325 final String bracketPatternSomewhere
= String
.format(".*%s.*", bracketPattern
);
326 if (strGoto
.matches(bracketPatternSomewhere
)){
327 fireWarningEvent("toTaxon has bracket: " + strGoto
, makeLocationStr(location
), 4);
328 strGoto
= strGoto
.replaceAll(bracketPattern
, ""); //replace all brackets
330 strGoto
= strGoto
.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
332 strGoto
= strGoto
.trim();
333 strGoto
= strGoto
.replaceAll("\\s+\\.", "\\."); // " ." may be created by bracket replacement
334 strGoto
= strGoto
.replaceAll("\\.\\.", "\\."); //replace
336 String
[] split
= strGoto
.split("\\s");
337 //handle single epithets and markers
338 for (int i
= 0; i
<split
.length
; i
++){
339 String single
= split
[i
];
340 if (isGenusAbbrev(single
, strGenusName
)){
341 split
[i
] = strGenusName
;
343 if (isInfraSpecificMarker(single
)){
344 String strSpeciesEpi
= name
.getSpecificEpithet();
345 if (isBlank(result
) && isNotBlank(strSpeciesEpi
)){
346 result
+= strGenusName
+ " " + strSpeciesEpi
;
349 result
= (result
+ " " + split
[i
]).trim();
351 //remove trailing "." except for "sp."
352 while (result
.matches(".*(?<!sp)\\.$")){
353 result
= result
.substring(0, result
.length()-1).trim();
359 private boolean isInfraSpecificMarker(String single
) {
361 if (Rank
.getRankByIdInVoc(single
).isInfraSpecific()){
366 } catch (UnknownCdmTypeException e
) {
371 //******************************** recognize nodes ***********/
373 public void makeKeyNodes(MarkupImportState state
, XMLEvent event
, String taxonTitle
) {
374 Taxon taxon
= state
.getCurrentTaxon();
375 String num
= state
.getCurrentTaxonNum();
377 String nameString
= CdmBase
.deproxy(taxon
.getName(), NonViralName
.class).getNameCache();
378 // String nameString = taxonTitle;
380 //try to find matching lead nodes
381 UnmatchedLeadsKey leadsKey
= UnmatchedLeadsKey
.NewInstance(num
, nameString
);
382 Set
<PolytomousKeyNode
> matchingNodes
= handleMatchingNodes(state
, event
, taxon
, leadsKey
);
384 if (num
!= null){//same without using the num
385 UnmatchedLeadsKey noNumLeadsKey
= UnmatchedLeadsKey
.NewInstance("", nameString
);
386 Set
<PolytomousKeyNode
> noNumMatchingNodes
= handleMatchingNodes(state
, event
, taxon
, noNumLeadsKey
);
387 if(noNumMatchingNodes
.size() > 0){
388 String message
="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
389 fireWarningEvent(message
, event
, 1);
392 //report missing match, if num exists
393 if (matchingNodes
.isEmpty() /* TODO redo comment && num != null */){
394 String message
= "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
395 message
= String
.format(message
, num
, leadsKey
.toString());
396 fireWarningEvent(message
, event
, 1);
401 private Set
<PolytomousKeyNode
> handleMatchingNodes(MarkupImportState state
, XMLEvent event
, Taxon taxon
, UnmatchedLeadsKey leadsKey
) {
402 Set
<PolytomousKeyNode
> matchingNodes
= state
.getUnmatchedLeads().getNodes(leadsKey
);
403 for (PolytomousKeyNode matchingNode
: matchingNodes
){
404 state
.getUnmatchedLeads().removeNode(leadsKey
, matchingNode
);
405 matchingNode
.setTaxon(taxon
);
406 //just to be on the save side
408 matchingNode
.refreshNodeNumbering();
409 } catch (Exception e
) {
410 String message
= "An exception occurred when trying to referesh the node numbering: " + e
.getMessage();
411 fireWarningEvent(message
, event
, 6);
413 state
.getPolytomousKeyNodesToSave().add(matchingNode
);
415 return matchingNodes
;