1
|
/**
|
2
|
* Copyright (C) 2009 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.markup;
|
11
|
|
12
|
import java.util.ArrayList;
|
13
|
import java.util.List;
|
14
|
import java.util.Map;
|
15
|
import java.util.Set;
|
16
|
|
17
|
import javax.xml.stream.Location;
|
18
|
import javax.xml.stream.XMLEventReader;
|
19
|
import javax.xml.stream.XMLStreamException;
|
20
|
import javax.xml.stream.events.Attribute;
|
21
|
import javax.xml.stream.events.XMLEvent;
|
22
|
|
23
|
import org.apache.log4j.Logger;
|
24
|
|
25
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
26
|
import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;
|
27
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
28
|
import eu.etaxonomy.cdm.model.common.Language;
|
29
|
import eu.etaxonomy.cdm.model.description.KeyStatement;
|
30
|
import eu.etaxonomy.cdm.model.description.PolytomousKey;
|
31
|
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
|
32
|
import eu.etaxonomy.cdm.model.name.NonViralName;
|
33
|
import eu.etaxonomy.cdm.model.name.Rank;
|
34
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
35
|
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
|
36
|
|
37
|
/**
|
38
|
* @author a.mueller
|
39
|
* @created 26.04.2013
|
40
|
*/
|
41
|
public class MarkupKeyImport extends MarkupImportBase {
|
42
|
@SuppressWarnings("unused")
|
43
|
private static final Logger logger = Logger.getLogger(MarkupKeyImport.class);
|
44
|
|
45
|
|
46
|
public MarkupKeyImport(MarkupDocumentImport docImport) {
|
47
|
super(docImport);
|
48
|
}
|
49
|
|
50
|
public void handleKey(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
|
51
|
// attributes
|
52
|
Map<String, Attribute> attributes = getAttributes(parentEvent);
|
53
|
String isSpotcharacters = getAndRemoveAttributeValue(attributes, IS_SPOTCHARACTERS);
|
54
|
if (isNotBlank(isSpotcharacters) ) {
|
55
|
//TODO isSpotcharacters
|
56
|
String message = "Attribute isSpotcharacters not yet implemented for <key>";
|
57
|
fireWarningEvent(message, parentEvent, 4);
|
58
|
}
|
59
|
boolean onlyNumberedTaxaExist = checkAndRemoveAttributeValue(attributes, ONLY_NUMBERED_TAXA_EXIST, "true");
|
60
|
state.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist);
|
61
|
|
62
|
PolytomousKey key = PolytomousKey.NewInstance();
|
63
|
key.addTaxonomicScope(state.getCurrentTaxon());
|
64
|
state.setCurrentKey(key);
|
65
|
|
66
|
boolean isFirstCouplet = true;
|
67
|
while (reader.hasNext()) {
|
68
|
XMLEvent next = readNoWhitespace(reader);
|
69
|
if (isMyEndingElement(next, parentEvent)) {
|
70
|
save(key, state);
|
71
|
//reset state
|
72
|
state.setCurrentKey(null);
|
73
|
state.setOnlyNumberedTaxaExist(false);
|
74
|
return;
|
75
|
} else if (isEndingElement(next, KEYNOTES)){
|
76
|
popUnimplemented(next.asEndElement());
|
77
|
} else if (isStartingElement(next, KEY_TITLE)) {
|
78
|
handleKeyTitle(state, reader, next);
|
79
|
} else if (isStartingElement(next, KEYNOTES)) {
|
80
|
//TODO
|
81
|
handleNotYetImplementedElement(next);
|
82
|
} else if (isStartingElement(next, COUPLET)) {
|
83
|
PolytomousKeyNode node = null;
|
84
|
if (isFirstCouplet){
|
85
|
node = key.getRoot();
|
86
|
isFirstCouplet = false;
|
87
|
}
|
88
|
handleCouplet(state, reader, next, node);
|
89
|
} else {
|
90
|
handleUnexpectedElement(next);
|
91
|
}
|
92
|
}
|
93
|
throw new IllegalStateException("<key> has no closing tag");
|
94
|
}
|
95
|
|
96
|
|
97
|
/**
|
98
|
* @param state
|
99
|
* @param reader
|
100
|
* @param key
|
101
|
* @param next
|
102
|
* @throws XMLStreamException
|
103
|
*/
|
104
|
private void handleKeyTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
|
105
|
PolytomousKey key = state.getCurrentKey();
|
106
|
String keyTitle = getCData(state, reader, parentEvent);
|
107
|
String standardTitlesEngl = "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
|
108
|
String standardTitlesFrench = "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";
|
109
|
String standardTitles = standardTitlesEngl;
|
110
|
if (state.getDefaultLanguage() != null && state.getDefaultLanguage().equals(Language.FRENCH())){
|
111
|
standardTitles = standardTitlesFrench;
|
112
|
}
|
113
|
|
114
|
if (isNotBlank(keyTitle) ){
|
115
|
if (!state.getConfig().isReplaceStandardKeyTitles() || ! keyTitle.matches(standardTitles)){
|
116
|
key.setTitleCache(keyTitle, true);
|
117
|
}
|
118
|
}
|
119
|
}
|
120
|
|
121
|
|
122
|
private void handleCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode parentNode) throws XMLStreamException {
|
123
|
String num = getOnlyAttribute(parentEvent, NUM, true);
|
124
|
List<PolytomousKeyNode> childList = new ArrayList<PolytomousKeyNode>();
|
125
|
|
126
|
while (reader.hasNext()) {
|
127
|
XMLEvent next = readNoWhitespace(reader);
|
128
|
if (isMyEndingElement(next, parentEvent)) {
|
129
|
completeCouplet(state, parentEvent, parentNode, num, childList);
|
130
|
return;
|
131
|
} else if (isStartingElement(next, QUESTION)) {
|
132
|
handleQuestion(state, reader, next, childList);
|
133
|
} else if (isStartingElement(next, KEYNOTES)) {
|
134
|
//TODO
|
135
|
handleNotYetImplementedElement(next);
|
136
|
} else if (isEndingElement(next, KEYNOTES)) {
|
137
|
//TODO
|
138
|
popUnimplemented(next.asEndElement());
|
139
|
} else {
|
140
|
handleUnexpectedElement(next);
|
141
|
}
|
142
|
}
|
143
|
throw new IllegalStateException("<couplet> has no closing tag");
|
144
|
}
|
145
|
|
146
|
|
147
|
/**
|
148
|
* @param state
|
149
|
* @param parentEvent
|
150
|
* @param parentNode
|
151
|
* @param num
|
152
|
* @param childList
|
153
|
*/
|
154
|
private void completeCouplet(MarkupImportState state, XMLEvent parentEvent,
|
155
|
PolytomousKeyNode parentNode, String num, List<PolytomousKeyNode> childList) {
|
156
|
if (parentNode != null){
|
157
|
for (PolytomousKeyNode childNode : childList){
|
158
|
parentNode.addChild(childNode);
|
159
|
//just to be on the save side
|
160
|
parentNode.refreshNodeNumbering();
|
161
|
}
|
162
|
}else if (isNotBlank(num)){
|
163
|
UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
|
164
|
Set<PolytomousKeyNode> nodes = state.getUnmatchedLeads().getNodes(unmatchedKey);
|
165
|
for(PolytomousKeyNode nodeToMatch: nodes){
|
166
|
for (PolytomousKeyNode childNode : childList){
|
167
|
nodeToMatch.addChild(childNode);
|
168
|
//just to be on the save side
|
169
|
nodeToMatch.refreshNodeNumbering();
|
170
|
}
|
171
|
state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);
|
172
|
}
|
173
|
}else{
|
174
|
String message = "Parent num could not be matched. Please check if num (%s) is correct";
|
175
|
message = String.format(message, num);
|
176
|
fireWarningEvent(message, parentEvent, 6);
|
177
|
}
|
178
|
}
|
179
|
|
180
|
private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {
|
181
|
// attributes
|
182
|
Map<String, Attribute> attributes = getAttributes(parentEvent);
|
183
|
//TODO needed only for data lineage
|
184
|
String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);
|
185
|
|
186
|
PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();
|
187
|
myNode.setKey(state.getCurrentKey()); //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
|
188
|
nodesList.add(myNode);
|
189
|
|
190
|
while (reader.hasNext()) {
|
191
|
XMLEvent next = readNoWhitespace(reader);
|
192
|
if (isMyEndingElement(next, parentEvent)) {
|
193
|
return;
|
194
|
} else if (isStartingElement(next, TEXT)) {
|
195
|
String text = getCData(state, reader, next);
|
196
|
KeyStatement statement = KeyStatement.NewInstance(getDefaultLanguage(state), text);
|
197
|
myNode.setStatement(statement);
|
198
|
} else if (isStartingElement(next, COUPLET)) {
|
199
|
//TODO test
|
200
|
handleCouplet(state, reader, next, myNode);
|
201
|
} else if (isStartingElement(next, TO_COUPLET)) {
|
202
|
handleToCouplet(state, reader, next, myNode);
|
203
|
} else if (isStartingElement(next, TO_TAXON)) {
|
204
|
handleToTaxon(state, reader, next, myNode);
|
205
|
} else if (isStartingElement(next, TO_KEY)) {
|
206
|
//TODO
|
207
|
handleNotYetImplementedElement(next);
|
208
|
} else if (isEndingElement(next, TO_KEY)){
|
209
|
//TODO
|
210
|
popUnimplemented(next.asEndElement());
|
211
|
} else if (isStartingElement(next, KEYNOTES)) {
|
212
|
//TODO
|
213
|
handleNotYetImplementedElement(next);
|
214
|
} else if (isEndingElement(next, KEYNOTES)){
|
215
|
//TODO
|
216
|
popUnimplemented(next.asEndElement());
|
217
|
} else {
|
218
|
handleUnexpectedElement(next);
|
219
|
}
|
220
|
}
|
221
|
throw new IllegalStateException("<question> has no closing tag");
|
222
|
}
|
223
|
|
224
|
private void handleToCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent next, PolytomousKeyNode node) throws XMLStreamException {
|
225
|
String num = getOnlyAttribute(next, NUM, true);
|
226
|
String cData = getCData(state, reader, next, false);
|
227
|
if (isNotBlank(cData) && ! cData.equals(num)){
|
228
|
String message = "CData ('%s') not be handled in <toCouplet>";
|
229
|
message = String.format(message, cData);
|
230
|
fireWarningEvent(message, next, 4);
|
231
|
}
|
232
|
UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
|
233
|
state.getUnmatchedLeads().addKey(unmatched, node);
|
234
|
}
|
235
|
|
236
|
private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
|
237
|
Map<String, Attribute> attributes = getAttributes(parentEvent);
|
238
|
String num = getAndRemoveAttributeValue(attributes, NUM);
|
239
|
boolean taxonNotExists = checkAndRemoveAttributeValue(attributes, EXISTS, "false");
|
240
|
|
241
|
String taxonCData = handleInnerToTaxon(state, reader, parentEvent, node).trim();
|
242
|
|
243
|
String taxonKeyStr = makeTaxonKey(taxonCData, state.getCurrentTaxon(), parentEvent.getLocation());
|
244
|
taxonNotExists = taxonNotExists || (isBlank(num) && state.isOnlyNumberedTaxaExist());
|
245
|
if (taxonNotExists){
|
246
|
NonViralName<?> name = createNameByCode(state, Rank.UNKNOWN_RANK());
|
247
|
Taxon taxon = Taxon.NewInstance(name, null);
|
248
|
taxon.getName().setTitleCache(taxonKeyStr, true);
|
249
|
node.setTaxon(taxon);
|
250
|
}else{
|
251
|
UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonKeyStr);
|
252
|
state.getUnmatchedLeads().addKey(unmatched, node);
|
253
|
// String message = "The following key leads are unmatched: %s";
|
254
|
// message = String.format(message, state.getUnmatchedLeads().toString());
|
255
|
// fireWarningEvent(message, parentEvent, 6);
|
256
|
}
|
257
|
return;
|
258
|
}
|
259
|
|
260
|
|
261
|
/**
|
262
|
* Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.
|
263
|
* Footnote refs are not yet handled.
|
264
|
* @param state
|
265
|
* @param reader
|
266
|
* @param parentEvent
|
267
|
* @param node
|
268
|
* @return
|
269
|
* @throws XMLStreamException
|
270
|
*/
|
271
|
private String handleInnerToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
|
272
|
String taxonText = "";
|
273
|
String modifyingText = null;
|
274
|
while (reader.hasNext()) {
|
275
|
XMLEvent next = readNoWhitespace(reader);
|
276
|
if (isMyEndingElement(next, parentEvent)) {
|
277
|
if (isNotBlank(modifyingText)){
|
278
|
node.putModifyingText(getDefaultLanguage(state), modifyingText);
|
279
|
}
|
280
|
return taxonText;
|
281
|
} else if (next.isCharacters()) {
|
282
|
taxonText += next.asCharacters().getData();
|
283
|
} else if (isStartingElement(next, ANNOTATION)) {
|
284
|
String annotation = handleSimpleAnnotation(state, reader, next);
|
285
|
modifyingText = CdmUtils.concat("; ", modifyingText, annotation);
|
286
|
} else if (isStartingElement(next, FOOTNOTE_REF)) {
|
287
|
handleNotYetImplementedElement(next);
|
288
|
} else {
|
289
|
handleUnexpectedElement(next);
|
290
|
}
|
291
|
}
|
292
|
throw new IllegalStateException("Event has no closing tag");
|
293
|
|
294
|
}
|
295
|
|
296
|
/**
|
297
|
* Creates a string that represents the given taxon. The string will try to replace e.g.
|
298
|
* abbreviated genus epithets by its full name etc.
|
299
|
* @param strGoto
|
300
|
* @param taxon
|
301
|
* @param location
|
302
|
* @return
|
303
|
*/
|
304
|
private String makeTaxonKey(String strGoto, Taxon taxon, Location location) {
|
305
|
String result = "";
|
306
|
if (strGoto == null){
|
307
|
return "";
|
308
|
}
|
309
|
|
310
|
NonViralName<?> name = CdmBase.deproxy(taxon.getName(), NonViralName.class);
|
311
|
String strGenusName = name.getGenusOrUninomial();
|
312
|
|
313
|
final String bracketPattern = "\\([^\\(\\)]*\\)";
|
314
|
final String bracketPatternSomewhere = String.format(".*%s.*", bracketPattern);
|
315
|
if (strGoto.matches(bracketPatternSomewhere)){
|
316
|
fireWarningEvent("toTaxon has bracket: " + strGoto, makeLocationStr(location), 4);
|
317
|
strGoto = strGoto.replaceAll(bracketPattern, ""); //replace all brackets
|
318
|
}
|
319
|
strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
|
320
|
|
321
|
strGoto = strGoto.trim();
|
322
|
strGoto = strGoto.replaceAll("\\s+\\.", "\\."); // " ." may be created by bracket replacement
|
323
|
strGoto = strGoto.replaceAll("\\.\\.", "\\."); //replace
|
324
|
|
325
|
String[] split = strGoto.split("\\s");
|
326
|
//handle single epithets and markers
|
327
|
for (int i = 0; i<split.length; i++){
|
328
|
String single = split[i];
|
329
|
if (isGenusAbbrev(single, strGenusName)){
|
330
|
split[i] = strGenusName;
|
331
|
}
|
332
|
if (isInfraSpecificMarker(single)){
|
333
|
String strSpeciesEpi = name.getSpecificEpithet();
|
334
|
if (isBlank(result) && isNotBlank(strSpeciesEpi)){
|
335
|
result += strGenusName + " " + strSpeciesEpi;
|
336
|
}
|
337
|
}
|
338
|
result = (result + " " + split[i]).trim();
|
339
|
}
|
340
|
//remove trailing "." except for "sp."
|
341
|
while (result.matches(".*(?<!sp)\\.$")){
|
342
|
result = result.substring(0, result.length()-1).trim();
|
343
|
}
|
344
|
return result;
|
345
|
}
|
346
|
|
347
|
|
348
|
private boolean isInfraSpecificMarker(String single) {
|
349
|
try {
|
350
|
if (Rank.getRankByAbbreviation(single).isInfraSpecific()){
|
351
|
return true;
|
352
|
}else{
|
353
|
return false;
|
354
|
}
|
355
|
} catch (UnknownCdmTypeException e) {
|
356
|
return false;
|
357
|
}
|
358
|
}
|
359
|
|
360
|
|
361
|
|
362
|
|
363
|
//******************************** recognize nodes ***********/
|
364
|
|
365
|
public void makeKeyNodes(MarkupImportState state, XMLEvent event, String taxonTitle) {
|
366
|
Taxon taxon = state.getCurrentTaxon();
|
367
|
String num = state.getCurrentTaxonNum();
|
368
|
|
369
|
String nameString = CdmBase.deproxy(taxon.getName(), NonViralName.class).getNameCache();
|
370
|
// String nameString = taxonTitle;
|
371
|
|
372
|
//try to find matching lead nodes
|
373
|
UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, nameString);
|
374
|
Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
|
375
|
|
376
|
if (num != null){//same without using the num
|
377
|
UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", nameString);
|
378
|
Set<PolytomousKeyNode> noNumMatchingNodes = handleMatchingNodes(state, taxon, noNumLeadsKey);
|
379
|
if(noNumMatchingNodes.size() > 0){
|
380
|
String message ="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
|
381
|
fireWarningEvent(message, event, 1);
|
382
|
}
|
383
|
}
|
384
|
//report missing match, if num exists
|
385
|
if (matchingNodes.isEmpty() /* TODO redo comment && num != null */){
|
386
|
String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
|
387
|
message = String.format(message, num, leadsKey.toString());
|
388
|
fireWarningEvent(message, event, 1);
|
389
|
}
|
390
|
|
391
|
}
|
392
|
|
393
|
private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
|
394
|
Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
|
395
|
for (PolytomousKeyNode matchingNode : matchingNodes){
|
396
|
state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
|
397
|
matchingNode.setTaxon(taxon);
|
398
|
//just to be on the save side
|
399
|
matchingNode.refreshNodeNumbering();
|
400
|
state.getPolytomousKeyNodesToSave().add(matchingNode);
|
401
|
}
|
402
|
return matchingNodes;
|
403
|
}
|
404
|
|
405
|
}
|