2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.markup
;
12 import java
.util
.ArrayList
;
13 import java
.util
.List
;
17 import javax
.xml
.stream
.Location
;
18 import javax
.xml
.stream
.XMLEventReader
;
19 import javax
.xml
.stream
.XMLStreamException
;
20 import javax
.xml
.stream
.events
.Attribute
;
21 import javax
.xml
.stream
.events
.XMLEvent
;
23 import org
.apache
.log4j
.Logger
;
25 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
26 import eu
.etaxonomy
.cdm
.io
.markup
.UnmatchedLeads
.UnmatchedLeadsKey
;
27 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
28 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
29 import eu
.etaxonomy
.cdm
.model
.description
.KeyStatement
;
30 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
31 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKeyNode
;
32 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
33 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
34 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
35 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
41 public class MarkupKeyImport
extends MarkupImportBase
{
42 @SuppressWarnings("unused")
43 private static final Logger logger
= Logger
.getLogger(MarkupKeyImport
.class);
46 public MarkupKeyImport(MarkupDocumentImport docImport
) {
50 public void handleKey(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
52 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
53 String isSpotcharacters
= getAndRemoveAttributeValue(attributes
, IS_SPOTCHARACTERS
);
54 if (isNotBlank(isSpotcharacters
) ) {
55 //TODO isSpotcharacters
56 String message
= "Attribute isSpotcharacters not yet implemented for <key>";
57 fireWarningEvent(message
, parentEvent
, 4);
59 boolean onlyNumberedTaxaExist
= checkAndRemoveAttributeValue(attributes
, ONLY_NUMBERED_TAXA_EXIST
, "true");
60 state
.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist
);
62 PolytomousKey key
= PolytomousKey
.NewInstance();
63 key
.addTaxonomicScope(state
.getCurrentTaxon());
64 state
.setCurrentKey(key
);
66 boolean isFirstCouplet
= true;
67 while (reader
.hasNext()) {
68 XMLEvent next
= readNoWhitespace(reader
);
69 if (isMyEndingElement(next
, parentEvent
)) {
72 state
.setCurrentKey(null);
73 state
.setOnlyNumberedTaxaExist(false);
75 } else if (isEndingElement(next
, KEYNOTES
)){
76 popUnimplemented(next
.asEndElement());
77 } else if (isStartingElement(next
, KEY_TITLE
)) {
78 handleKeyTitle(state
, reader
, next
);
79 } else if (isStartingElement(next
, KEYNOTES
)) {
81 handleNotYetImplementedElement(next
);
82 } else if (isStartingElement(next
, COUPLET
)) {
83 PolytomousKeyNode node
= null;
86 isFirstCouplet
= false;
88 handleCouplet(state
, reader
, next
, node
);
90 handleUnexpectedElement(next
);
93 throw new IllegalStateException("<key> has no closing tag");
102 * @throws XMLStreamException
104 private void handleKeyTitle(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
105 PolytomousKey key
= state
.getCurrentKey();
106 String keyTitle
= getCData(state
, reader
, parentEvent
);
107 String standardTitlesEngl
= "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
108 String standardTitlesFrench
= "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";
109 String standardTitles
= standardTitlesEngl
;
110 if (state
.getDefaultLanguage() != null && state
.getDefaultLanguage().equals(Language
.FRENCH())){
111 standardTitles
= standardTitlesFrench
;
114 if (isNotBlank(keyTitle
) ){
115 if (!state
.getConfig().isReplaceStandardKeyTitles() || ! keyTitle
.matches(standardTitles
)){
116 key
.setTitleCache(keyTitle
, true);
122 private void handleCouplet(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, PolytomousKeyNode parentNode
) throws XMLStreamException
{
123 String num
= getOnlyAttribute(parentEvent
, NUM
, true);
124 List
<PolytomousKeyNode
> childList
= new ArrayList
<PolytomousKeyNode
>();
126 while (reader
.hasNext()) {
127 XMLEvent next
= readNoWhitespace(reader
);
128 if (isMyEndingElement(next
, parentEvent
)) {
129 completeCouplet(state
, parentEvent
, parentNode
, num
, childList
);
131 } else if (next
.isCharacters()){
132 handleNotYetImplementedCharacters(next
);
133 //work in progress from pesiimport2, not sure if this works
134 // String mainQuestion = next.asCharacters().getData();
135 // mainQuestion = mainQuestion.replaceAll("\\s+", " ").trim();
136 // KeyStatement question = KeyStatement.NewInstance(mainQuestion);
137 // if (parentNode != null){ parentNode.setStatement(question);} //work in progress
138 } else if (isStartingElement(next
, QUESTION
)) {
139 handleQuestion(state
, reader
, next
, childList
);
140 } else if (isStartingElement(next
, KEYNOTES
)) {
142 handleNotYetImplementedElement(next
);
143 } else if (isEndingElement(next
, KEYNOTES
)) {
145 popUnimplemented(next
.asEndElement());
147 handleUnexpectedElement(next
);
150 throw new IllegalStateException("<couplet> has no closing tag");
161 private void completeCouplet(MarkupImportState state
, XMLEvent parentEvent
,
162 PolytomousKeyNode parentNode
, String num
, List
<PolytomousKeyNode
> childList
) {
163 if (parentNode
!= null){
164 for (PolytomousKeyNode childNode
: childList
){
165 parentNode
.addChild(childNode
);
166 //just to be on the save side
167 parentNode
.refreshNodeNumbering();
169 }else if (isNotBlank(num
)){
170 UnmatchedLeadsKey unmatchedKey
= UnmatchedLeadsKey
.NewInstance(state
.getCurrentKey(), num
);
171 Set
<PolytomousKeyNode
> nodes
= state
.getUnmatchedLeads().getNodes(unmatchedKey
);
172 for(PolytomousKeyNode nodeToMatch
: nodes
){
173 for (PolytomousKeyNode childNode
: childList
){
174 nodeToMatch
.addChild(childNode
);
175 //just to be on the save side
176 nodeToMatch
.refreshNodeNumbering();
178 state
.getUnmatchedLeads().removeNode(unmatchedKey
, nodeToMatch
);
181 String message
= "Parent num could not be matched. Please check if num (%s) is correct";
182 message
= String
.format(message
, num
);
183 fireWarningEvent(message
, parentEvent
, 6);
187 private void handleQuestion(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, List
<PolytomousKeyNode
> nodesList
) throws XMLStreamException
{
189 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
190 //TODO needed only for data lineage
191 String questionNum
= getAndRemoveRequiredAttributeValue(parentEvent
, attributes
, NUM
);
193 PolytomousKeyNode myNode
= PolytomousKeyNode
.NewInstance();
194 myNode
.setKey(state
.getCurrentKey()); //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
195 nodesList
.add(myNode
);
197 while (reader
.hasNext()) {
198 XMLEvent next
= readNoWhitespace(reader
);
199 if (isMyEndingElement(next
, parentEvent
)) {
201 } else if (isStartingElement(next
, TEXT
)) {
202 String text
= getCData(state
, reader
, next
);
203 KeyStatement statement
= KeyStatement
.NewInstance(getDefaultLanguage(state
), text
);
204 myNode
.setStatement(statement
);
205 } else if (isStartingElement(next
, COUPLET
)) {
207 handleCouplet(state
, reader
, next
, myNode
);
208 } else if (isStartingElement(next
, TO_COUPLET
)) {
209 handleToCouplet(state
, reader
, next
, myNode
);
210 } else if (isStartingElement(next
, TO_TAXON
)) {
211 handleToTaxon(state
, reader
, next
, myNode
);
212 } else if (isStartingElement(next
, TO_KEY
)) {
214 handleNotYetImplementedElement(next
);
215 } else if (isEndingElement(next
, TO_KEY
)){
217 popUnimplemented(next
.asEndElement());
218 } else if (isStartingElement(next
, KEYNOTES
)) {
220 handleNotYetImplementedElement(next
);
221 } else if (isEndingElement(next
, KEYNOTES
)){
223 popUnimplemented(next
.asEndElement());
225 handleUnexpectedElement(next
);
228 throw new IllegalStateException("<question> has no closing tag");
231 private void handleToCouplet(MarkupImportState state
, XMLEventReader reader
, XMLEvent next
, PolytomousKeyNode node
) throws XMLStreamException
{
232 String num
= getOnlyAttribute(next
, NUM
, true);
233 String cData
= getCData(state
, reader
, next
, false);
234 if (isNotBlank(cData
) && ! cData
.equals(num
)){
235 String message
= "CData ('%s') not handled in <toCouplet>";
236 message
= String
.format(message
, cData
);
237 fireWarningEvent(message
, next
, 4);
239 UnmatchedLeadsKey unmatched
= UnmatchedLeadsKey
.NewInstance(state
.getCurrentKey(), num
);
240 state
.getUnmatchedLeads().addKey(unmatched
, node
);
243 private void handleToTaxon(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, PolytomousKeyNode node
) throws XMLStreamException
{
244 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
245 String num
= getAndRemoveAttributeValue(attributes
, NUM
);
246 boolean taxonNotExists
= checkAndRemoveAttributeValue(attributes
, EXISTS
, "false");
248 String taxonCData
= handleInnerToTaxon(state
, reader
, parentEvent
, node
).trim();
250 String taxonKeyStr
= makeTaxonKey(taxonCData
, state
.getCurrentTaxon(), parentEvent
.getLocation());
251 taxonNotExists
= taxonNotExists
|| (isBlank(num
) && state
.isOnlyNumberedTaxaExist());
253 NonViralName
<?
> name
= createNameByCode(state
, Rank
.UNKNOWN_RANK());
254 Taxon taxon
= Taxon
.NewInstance(name
, null);
255 taxon
.getName().setTitleCache(taxonKeyStr
, true);
256 node
.setTaxon(taxon
);
258 UnmatchedLeadsKey unmatched
= UnmatchedLeadsKey
.NewInstance(num
, taxonKeyStr
);
259 state
.getUnmatchedLeads().addKey(unmatched
, node
);
260 // String message = "The following key leads are unmatched: %s";
261 // message = String.format(message, state.getUnmatchedLeads().toString());
262 // fireWarningEvent(message, parentEvent, 6);
268 * Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.
269 * Footnote refs are not yet handled.
275 * @throws XMLStreamException
277 private String
handleInnerToTaxon(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, PolytomousKeyNode node
) throws XMLStreamException
{
278 String taxonText
= "";
279 String modifyingText
= null;
280 while (reader
.hasNext()) {
281 XMLEvent next
= readNoWhitespace(reader
);
282 if (isMyEndingElement(next
, parentEvent
)) {
283 if (isNotBlank(modifyingText
)){
284 node
.putModifyingText(getDefaultLanguage(state
), modifyingText
);
287 } else if (next
.isCharacters()) {
288 taxonText
+= next
.asCharacters().getData();
289 } else if (isStartingElement(next
, ANNOTATION
)) {
290 String annotation
= handleSimpleAnnotation(state
, reader
, next
);
291 modifyingText
= CdmUtils
.concat("; ", modifyingText
, annotation
);
292 } else if (isStartingElement(next
, FOOTNOTE_REF
)) {
293 handleNotYetImplementedElement(next
);
295 handleUnexpectedElement(next
);
298 throw new IllegalStateException("Event has no closing tag");
303 * Creates a string that represents the given taxon. The string will try to replace e.g.
304 * abbreviated genus epithets by its full name etc.
310 private String
makeTaxonKey(String strGoto
, Taxon taxon
, Location location
) {
312 if (strGoto
== null){
316 NonViralName
<?
> name
= CdmBase
.deproxy(taxon
.getName(), NonViralName
.class);
317 String strGenusName
= name
.getGenusOrUninomial();
319 final String bracketPattern
= "\\([^\\(\\)]*\\)";
320 final String bracketPatternSomewhere
= String
.format(".*%s.*", bracketPattern
);
321 if (strGoto
.matches(bracketPatternSomewhere
)){
322 fireWarningEvent("toTaxon has bracket: " + strGoto
, makeLocationStr(location
), 4);
323 strGoto
= strGoto
.replaceAll(bracketPattern
, ""); //replace all brackets
325 strGoto
= strGoto
.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
327 strGoto
= strGoto
.trim();
328 strGoto
= strGoto
.replaceAll("\\s+\\.", "\\."); // " ." may be created by bracket replacement
329 strGoto
= strGoto
.replaceAll("\\.\\.", "\\."); //replace
331 String
[] split
= strGoto
.split("\\s");
332 //handle single epithets and markers
333 for (int i
= 0; i
<split
.length
; i
++){
334 String single
= split
[i
];
335 if (isGenusAbbrev(single
, strGenusName
)){
336 split
[i
] = strGenusName
;
338 if (isInfraSpecificMarker(single
)){
339 String strSpeciesEpi
= name
.getSpecificEpithet();
340 if (isBlank(result
) && isNotBlank(strSpeciesEpi
)){
341 result
+= strGenusName
+ " " + strSpeciesEpi
;
344 result
= (result
+ " " + split
[i
]).trim();
346 //remove trailing "." except for "sp."
347 while (result
.matches(".*(?<!sp)\\.$")){
348 result
= result
.substring(0, result
.length()-1).trim();
354 private boolean isInfraSpecificMarker(String single
) {
356 if (Rank
.getRankByIdInVoc(single
).isInfraSpecific()){
361 } catch (UnknownCdmTypeException e
) {
366 //******************************** recognize nodes ***********/
368 public void makeKeyNodes(MarkupImportState state
, XMLEvent event
, String taxonTitle
) {
369 Taxon taxon
= state
.getCurrentTaxon();
370 String num
= state
.getCurrentTaxonNum();
372 String nameString
= CdmBase
.deproxy(taxon
.getName(), NonViralName
.class).getNameCache();
373 // String nameString = taxonTitle;
375 //try to find matching lead nodes
376 UnmatchedLeadsKey leadsKey
= UnmatchedLeadsKey
.NewInstance(num
, nameString
);
377 Set
<PolytomousKeyNode
> matchingNodes
= handleMatchingNodes(state
, taxon
, leadsKey
);
379 if (num
!= null){//same without using the num
380 UnmatchedLeadsKey noNumLeadsKey
= UnmatchedLeadsKey
.NewInstance("", nameString
);
381 Set
<PolytomousKeyNode
> noNumMatchingNodes
= handleMatchingNodes(state
, taxon
, noNumLeadsKey
);
382 if(noNumMatchingNodes
.size() > 0){
383 String message
="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
384 fireWarningEvent(message
, event
, 1);
387 //report missing match, if num exists
388 if (matchingNodes
.isEmpty() /* TODO redo comment && num != null */){
389 String message
= "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
390 message
= String
.format(message
, num
, leadsKey
.toString());
391 fireWarningEvent(message
, event
, 1);
396 private Set
<PolytomousKeyNode
> handleMatchingNodes(MarkupImportState state
, Taxon taxon
, UnmatchedLeadsKey leadsKey
) {
397 Set
<PolytomousKeyNode
> matchingNodes
= state
.getUnmatchedLeads().getNodes(leadsKey
);
398 for (PolytomousKeyNode matchingNode
: matchingNodes
){
399 state
.getUnmatchedLeads().removeNode(leadsKey
, matchingNode
);
400 matchingNode
.setTaxon(taxon
);
401 //just to be on the save side
402 matchingNode
.refreshNodeNumbering();
403 state
.getPolytomousKeyNodesToSave().add(matchingNode
);
405 return matchingNodes
;