2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.markup
;
12 import java
.util
.ArrayList
;
13 import java
.util
.List
;
16 import java
.util
.UUID
;
18 import javax
.xml
.stream
.Location
;
19 import javax
.xml
.stream
.XMLEventReader
;
20 import javax
.xml
.stream
.XMLStreamException
;
21 import javax
.xml
.stream
.events
.Attribute
;
22 import javax
.xml
.stream
.events
.XMLEvent
;
24 import org
.apache
.log4j
.Logger
;
26 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
27 import eu
.etaxonomy
.cdm
.common
.UTF8
;
28 import eu
.etaxonomy
.cdm
.io
.markup
.UnmatchedLeads
.UnmatchedLeadsKey
;
29 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
30 import eu
.etaxonomy
.cdm
.model
.description
.KeyStatement
;
31 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
32 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKeyNode
;
33 import eu
.etaxonomy
.cdm
.model
.name
.INonViralName
;
34 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
35 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
36 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
37 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
43 public class MarkupKeyImport
extends MarkupImportBase
{
44 @SuppressWarnings("unused")
45 private static final Logger logger
= Logger
.getLogger(MarkupKeyImport
.class);
48 public MarkupKeyImport(MarkupDocumentImport docImport
) {
52 public void handleKey(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
54 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
55 String isSpotcharacters
= getAndRemoveAttributeValue(attributes
, IS_SPOTCHARACTERS
);
56 if (isNotBlank(isSpotcharacters
) ) {
57 //TODO isSpotcharacters
58 String message
= "Attribute isSpotcharacters not yet implemented for <key>";
59 fireWarningEvent(message
, parentEvent
, 4);
61 boolean onlyNumberedTaxaExist
= checkAndRemoveAttributeValue(attributes
, ONLY_NUMBERED_TAXA_EXIST
, "true");
62 state
.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist
);
64 PolytomousKey key
= PolytomousKey
.NewInstance();
65 key
.addTaxonomicScope(state
.getCurrentTaxon());
66 state
.setCurrentKey(key
);
68 boolean isFirstCouplet
= true;
69 while (reader
.hasNext()) {
70 XMLEvent next
= readNoWhitespace(reader
);
71 if (isMyEndingElement(next
, parentEvent
)) {
74 state
.setCurrentKey(null);
75 state
.setOnlyNumberedTaxaExist(false);
77 } else if (isEndingElement(next
, KEYNOTES
)){
78 popUnimplemented(next
.asEndElement());
79 } else if (isStartingElement(next
, KEY_TITLE
)) {
80 handleKeyTitle(state
, reader
, next
);
81 } else if (isStartingElement(next
, KEYNOTES
)) {
83 handleNotYetImplementedElement(next
);
84 } else if (isStartingElement(next
, COUPLET
)) {
85 PolytomousKeyNode node
= null;
88 isFirstCouplet
= false;
90 handleCouplet(state
, reader
, next
, node
);
92 handleUnexpectedElement(next
);
95 throw new IllegalStateException("<key> has no closing tag");
104 * @throws XMLStreamException
106 private void handleKeyTitle(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
107 PolytomousKey key
= state
.getCurrentKey();
108 String keyTitle
= getCData(state
, reader
, parentEvent
);
109 String standardTitlesEngl
= "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
110 String standardTitlesFrench
= "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";
111 String standardTitles
= standardTitlesEngl
;
112 if (state
.getDefaultLanguage() != null && state
.getDefaultLanguage().equals(Language
.FRENCH())){
113 standardTitles
= standardTitlesFrench
;
116 if (isNotBlank(keyTitle
) ){
117 if (!state
.getConfig().isReplaceStandardKeyTitles() || ! keyTitle
.matches(standardTitles
)){
118 key
.setTitleCache(keyTitle
, true);
124 private void handleCouplet(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, PolytomousKeyNode parentNode
) throws XMLStreamException
{
125 String num
= getOnlyAttribute(parentEvent
, NUM
, true);
126 List
<PolytomousKeyNode
> childList
= new ArrayList
<PolytomousKeyNode
>();
128 while (reader
.hasNext()) {
129 XMLEvent next
= readNoWhitespace(reader
);
130 if (isMyEndingElement(next
, parentEvent
)) {
131 completeCouplet(state
, parentEvent
, parentNode
, num
, childList
);
133 } else if (next
.isCharacters()){
134 handleNotYetImplementedCharacters(next
);
135 //work in progress from pesiimport2, not sure if this works
136 // String mainQuestion = next.asCharacters().getData();
137 // mainQuestion = mainQuestion.replaceAll("\\s+", " ").trim();
138 // KeyStatement question = KeyStatement.NewInstance(mainQuestion);
139 // if (parentNode != null){ parentNode.setStatement(question);} //work in progress
140 } else if (isStartingElement(next
, QUESTION
)) {
141 handleQuestion(state
, reader
, next
, childList
);
142 } else if (isStartingElement(next
, KEYNOTES
)) {
144 handleNotYetImplementedElement(next
);
145 } else if (isEndingElement(next
, KEYNOTES
)) {
147 popUnimplemented(next
.asEndElement());
149 handleUnexpectedElement(next
);
152 throw new IllegalStateException("<couplet> has no closing tag");
163 private void completeCouplet(MarkupImportState state
, XMLEvent parentEvent
,
164 PolytomousKeyNode parentNode
, String num
, List
<PolytomousKeyNode
> childList
) {
165 if (parentNode
!= null){
166 for (PolytomousKeyNode childNode
: childList
){
167 parentNode
.addChild(childNode
);
168 //just to be on the save side
169 parentNode
.refreshNodeNumbering();
171 }else if (isNotBlank(num
)){
172 UnmatchedLeadsKey unmatchedKey
= UnmatchedLeadsKey
.NewInstance(state
.getCurrentKey(), num
);
173 Set
<PolytomousKeyNode
> nodes
= state
.getUnmatchedLeads().getNodes(unmatchedKey
);
174 for(PolytomousKeyNode nodeToMatch
: nodes
){
175 for (PolytomousKeyNode childNode
: childList
){
177 nodeToMatch
.addChild(childNode
);
178 //just to be on the save side
179 nodeToMatch
.refreshNodeNumbering();
180 } catch (Exception e
) {
181 String message
= "An exception occurred when trying to add a key node child or to referesh the node numbering: " + e
.getMessage();
182 fireWarningEvent(message
, parentEvent
, 6);
185 state
.getUnmatchedLeads().removeNode(unmatchedKey
, nodeToMatch
);
188 String message
= "Parent num could not be matched. Please check if num (%s) is correct";
189 message
= String
.format(message
, num
);
190 fireWarningEvent(message
, parentEvent
, 6);
194 private void handleQuestion(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, List
<PolytomousKeyNode
> nodesList
) throws XMLStreamException
{
196 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
197 //TODO needed only for data lineage
198 String questionNum
= getAndRemoveRequiredAttributeValue(parentEvent
, attributes
, NUM
);
200 PolytomousKeyNode myNode
= PolytomousKeyNode
.NewInstance();
201 myNode
.setKey(state
.getCurrentKey()); //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
202 nodesList
.add(myNode
);
204 while (reader
.hasNext()) {
205 XMLEvent next
= readNoWhitespace(reader
);
206 if (isMyEndingElement(next
, parentEvent
)) {
208 } else if (isStartingElement(next
, TEXT
)) {
209 String text
= getCData(state
, reader
, next
);
210 KeyStatement statement
= KeyStatement
.NewInstance(getDefaultLanguage(state
), text
);
211 myNode
.setStatement(statement
);
212 } else if (isStartingElement(next
, COUPLET
)) {
214 handleCouplet(state
, reader
, next
, myNode
);
215 } else if (isStartingElement(next
, TO_COUPLET
)) {
216 handleToCouplet(state
, reader
, next
, myNode
);
217 } else if (isStartingElement(next
, TO_TAXON
)) {
218 handleToTaxon(state
, reader
, next
, myNode
);
219 } else if (isStartingElement(next
, TO_KEY
)) {
221 handleNotYetImplementedElement(next
);
222 } else if (isStartingElement(next
, KEYNOTES
)) {
223 handleAmbigousManually(state
, reader
, next
.asStartElement());
225 handleUnexpectedElement(next
);
228 throw new IllegalStateException("<question> has no closing tag");
231 private void handleToCouplet(MarkupImportState state
, XMLEventReader reader
, XMLEvent next
, PolytomousKeyNode node
) throws XMLStreamException
{
232 String num
= getOnlyAttribute(next
, NUM
, true);
233 String cData
= getCData(state
, reader
, next
, false);
234 if (isNotBlank(cData
) && ! cData
.equals(num
)){
235 String message
= "CData ('%s') not handled in <toCouplet>";
236 message
= String
.format(message
, cData
);
237 fireWarningEvent(message
, next
, 4);
239 UnmatchedLeadsKey unmatched
= UnmatchedLeadsKey
.NewInstance(state
.getCurrentKey(), num
);
240 state
.getUnmatchedLeads().addKey(unmatched
, node
);
243 private void handleToTaxon(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, PolytomousKeyNode node
) throws XMLStreamException
{
244 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
245 String num
= getAndRemoveAttributeValue(attributes
, NUM
);
246 boolean taxonNotExists
= checkAndRemoveAttributeValue(attributes
, EXISTS
, "false");
248 String taxonCData
= handleInnerToTaxon(state
, reader
, parentEvent
, node
).trim();
250 String taxonKeyStr
= makeTaxonKey(taxonCData
, state
.getCurrentTaxon(), parentEvent
.getLocation());
252 if (taxonKeyStr
.contains(":")){
253 System
.out
.println(":");
254 UUID
.fromString(taxonKeyStr
);
255 System
.out
.println("Here we have a uuid: " + taxonKeyStr
);
260 taxonNotExists
= taxonNotExists
|| (isBlank(num
) && state
.isOnlyNumberedTaxaExist());
262 NonViralName
<?
> name
= createNameByCode(state
, Rank
.UNKNOWN_RANK());
263 Taxon taxon
= Taxon
.NewInstance(name
, null);
264 taxon
.getName().setTitleCache(taxonKeyStr
, true);
265 node
.setTaxon(taxon
);
267 UnmatchedLeadsKey unmatched
= UnmatchedLeadsKey
.NewInstance(num
, taxonKeyStr
);
268 state
.getUnmatchedLeads().addKey(unmatched
, node
);
269 // String message = "The following key leads are unmatched: %s";
270 // message = String.format(message, state.getUnmatchedLeads().toString());
271 // fireWarningEvent(message, parentEvent, 6);
277 * Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.
278 * Footnote refs are not yet handled.
284 * @throws XMLStreamException
286 private String
handleInnerToTaxon(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, PolytomousKeyNode node
) throws XMLStreamException
{
287 String taxonText
= "";
288 String modifyingText
= null;
289 while (reader
.hasNext()) {
290 XMLEvent next
= readNoWhitespace(reader
);
291 if (isMyEndingElement(next
, parentEvent
)) {
292 if (isNotBlank(modifyingText
)){
293 node
.putModifyingText(getDefaultLanguage(state
), modifyingText
);
296 } else if (next
.isCharacters()) {
297 taxonText
+= next
.asCharacters().getData();
298 } else if (isStartingElement(next
, ANNOTATION
)) {
299 String annotation
= handleSimpleAnnotation(state
, reader
, next
);
300 modifyingText
= CdmUtils
.concat("; ", modifyingText
, annotation
);
301 } else if (isStartingElement(next
, FOOTNOTE_REF
)) {
302 handleNotYetImplementedElement(next
);
304 handleUnexpectedElement(next
);
307 throw new IllegalStateException("Event has no closing tag");
312 * Creates a string that represents the given taxon. The string will try to replace e.g.
313 * abbreviated genus epithets by its full name etc.
319 private String
makeTaxonKey(String strGoto
, Taxon taxon
, Location location
) {
321 if (strGoto
== null){
325 INonViralName name
= taxon
.getName();
326 String strGenusName
= name
.getGenusOrUninomial();
328 String normalized
= normalizeKeyString(strGoto
, location
);
330 String
[] split
= normalized
.split("\\s");
331 //handle single epithets and markers
332 for (int i
= 0; i
<split
.length
; i
++){
333 String single
= split
[i
];
334 if (isGenusAbbrev(single
, strGenusName
)){
335 split
[i
] = strGenusName
;
337 if (isInfraSpecificMarker(single
)){
338 String strSpeciesEpi
= name
.getSpecificEpithet();
339 if (isBlank(result
) && isNotBlank(strSpeciesEpi
)){
340 result
+= strGenusName
+ " " + strSpeciesEpi
;
343 result
= (result
+ " " + split
[i
]).trim();
345 result
= removeTrailingDot(result
);
350 final static String bracketPattern
= "\\([^\\(\\)]*\\)";
351 final static String bracketPatternSomewhere
= String
.format(".*%s.*", bracketPattern
);
358 private String
normalizeKeyString(String strGoto
, Location location
) {
359 String result
= strGoto
;
360 if (result
.matches(bracketPatternSomewhere
)){
361 fireWarningEvent("keyString has bracket (uncritical for fullname matching): " + result
, makeLocationStr(location
), 4);
362 result
= result
.replaceAll(bracketPattern
, ""); //replace all brackets
364 result
= result
.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
366 result
= result
.trim();
367 result
= result
.replaceAll("\\s+\\.", "\\."); // " ." may be created by bracket replacement
368 result
= result
.replaceAll("\\.\\.", "\\."); //replace
369 result
= result
.replace(UTF8
.HYBRID
.toString(), "x ");
374 private boolean isInfraSpecificMarker(String single
) {
376 if (Rank
.getRankByIdInVoc(single
).isInfraSpecific()){
381 } catch (UnknownCdmTypeException e
) {
386 //******************************** recognize nodes ***********/
388 public void makeKeyNodes(MarkupImportState state
, XMLEvent event
, String taxonTitle
) {
389 Taxon taxon
= state
.getCurrentTaxon();
390 String num
= state
.getCurrentTaxonNum();
392 INonViralName nvn
= taxon
.getName();
393 String nameString
= nvn
.getNameCache();
394 nameString
= normalizeKeyString(nameString
, event
.getLocation());
395 nameString
= removeTrailingDot(nameString
);
397 if (nameString
.contains(":")){
398 System
.out
.println(":");
399 UUID
.fromString(nameString
);
400 System
.out
.println("Here we have a uuid: " + nameString
+ "for" + nvn
.getTitleCache());
405 //try to find matching lead nodes
406 UnmatchedLeadsKey leadsKey
= UnmatchedLeadsKey
.NewInstance(num
, nameString
);
407 Set
<PolytomousKeyNode
> matchingNodes
= handleMatchingNodes(state
, event
, taxon
, leadsKey
);
409 if (num
!= null){//same without using the num
410 UnmatchedLeadsKey noNumLeadsKey
= UnmatchedLeadsKey
.NewInstance("", nameString
);
411 Set
<PolytomousKeyNode
> noNumMatchingNodes
= handleMatchingNodes(state
, event
, taxon
, noNumLeadsKey
);
412 if(noNumMatchingNodes
.size() > 0){
413 String message
="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
414 fireWarningEvent(message
, event
, 1);
417 //report missing match, if num exists
418 if (num
!= null && matchingNodes
.isEmpty() /* TODO redo comment && num != null (later DONE) */){
419 String message
= "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
420 message
= String
.format(message
, num
, leadsKey
.toString());
421 fireWarningEvent(message
, event
, 1);
426 * remove trailing "." except for "sp."
430 private String
removeTrailingDot(String str
) {
431 while (str
.matches(".*(?<!sp)\\.$")){
432 str
= str
.substring(0, str
.length()-1).trim();
437 private Set
<PolytomousKeyNode
> handleMatchingNodes(MarkupImportState state
, XMLEvent event
, Taxon taxon
, UnmatchedLeadsKey leadsKey
) {
438 Set
<PolytomousKeyNode
> matchingNodes
= state
.getUnmatchedLeads().getNodes(leadsKey
);
439 for (PolytomousKeyNode matchingNode
: matchingNodes
){
440 state
.getUnmatchedLeads().removeNode(leadsKey
, matchingNode
);
441 matchingNode
.setTaxon(taxon
);
442 //just to be on the save side
444 matchingNode
.refreshNodeNumbering();
445 } catch (Exception e
) {
446 String message
= "An exception occurred when trying to referesh the node numbering: " + e
.getMessage();
447 fireWarningEvent(message
, event
, 6);
449 state
.getPolytomousKeyNodesToSave().add(matchingNode
);
451 return matchingNodes
;