1
|
/**
|
2
|
* Copyright (C) 2009 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.markup;
|
11
|
|
12
|
import java.util.ArrayList;
|
13
|
import java.util.List;
|
14
|
import java.util.Map;
|
15
|
import java.util.Set;
|
16
|
|
17
|
import javax.xml.stream.Location;
|
18
|
import javax.xml.stream.XMLEventReader;
|
19
|
import javax.xml.stream.XMLStreamException;
|
20
|
import javax.xml.stream.events.Attribute;
|
21
|
import javax.xml.stream.events.XMLEvent;
|
22
|
|
23
|
import org.apache.log4j.Logger;
|
24
|
|
25
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
26
|
import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;
|
27
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
28
|
import eu.etaxonomy.cdm.model.common.Language;
|
29
|
import eu.etaxonomy.cdm.model.description.KeyStatement;
|
30
|
import eu.etaxonomy.cdm.model.description.PolytomousKey;
|
31
|
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
|
32
|
import eu.etaxonomy.cdm.model.name.NonViralName;
|
33
|
import eu.etaxonomy.cdm.model.name.Rank;
|
34
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
35
|
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
|
36
|
|
37
|
/**
|
38
|
* @author a.mueller
|
39
|
* @created 26.04.2013
|
40
|
*/
|
41
|
public class MarkupKeyImport extends MarkupImportBase {
|
42
|
@SuppressWarnings("unused")
|
43
|
private static final Logger logger = Logger.getLogger(MarkupKeyImport.class);
|
44
|
|
45
|
|
46
|
public MarkupKeyImport(MarkupDocumentImport docImport) {
|
47
|
super(docImport);
|
48
|
}
|
49
|
|
50
|
public void handleKey(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
|
51
|
// attributes
|
52
|
Map<String, Attribute> attributes = getAttributes(parentEvent);
|
53
|
String isSpotcharacters = getAndRemoveAttributeValue(attributes, IS_SPOTCHARACTERS);
|
54
|
if (isNotBlank(isSpotcharacters) ) {
|
55
|
//TODO isSpotcharacters
|
56
|
String message = "Attribute isSpotcharacters not yet implemented for <key>";
|
57
|
fireWarningEvent(message, parentEvent, 4);
|
58
|
}
|
59
|
boolean onlyNumberedTaxaExist = checkAndRemoveAttributeValue(attributes, ONLY_NUMBERED_TAXA_EXIST, "true");
|
60
|
state.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist);
|
61
|
|
62
|
PolytomousKey key = PolytomousKey.NewInstance();
|
63
|
key.addTaxonomicScope(state.getCurrentTaxon());
|
64
|
state.setCurrentKey(key);
|
65
|
|
66
|
boolean isFirstCouplet = true;
|
67
|
while (reader.hasNext()) {
|
68
|
XMLEvent next = readNoWhitespace(reader);
|
69
|
if (isMyEndingElement(next, parentEvent)) {
|
70
|
save(key, state);
|
71
|
//reset state
|
72
|
state.setCurrentKey(null);
|
73
|
state.setOnlyNumberedTaxaExist(false);
|
74
|
return;
|
75
|
} else if (isEndingElement(next, KEYNOTES)){
|
76
|
popUnimplemented(next.asEndElement());
|
77
|
} else if (isStartingElement(next, KEY_TITLE)) {
|
78
|
handleKeyTitle(state, reader, next);
|
79
|
} else if (isStartingElement(next, KEYNOTES)) {
|
80
|
//TODO
|
81
|
handleNotYetImplementedElement(next);
|
82
|
} else if (isStartingElement(next, COUPLET)) {
|
83
|
PolytomousKeyNode node = null;
|
84
|
if (isFirstCouplet){
|
85
|
node = key.getRoot();
|
86
|
isFirstCouplet = false;
|
87
|
}
|
88
|
handleCouplet(state, reader, next, node);
|
89
|
} else {
|
90
|
handleUnexpectedElement(next);
|
91
|
}
|
92
|
}
|
93
|
throw new IllegalStateException("<key> has no closing tag");
|
94
|
}
|
95
|
|
96
|
|
97
|
/**
|
98
|
* @param state
|
99
|
* @param reader
|
100
|
* @param key
|
101
|
* @param next
|
102
|
* @throws XMLStreamException
|
103
|
*/
|
104
|
private void handleKeyTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
|
105
|
PolytomousKey key = state.getCurrentKey();
|
106
|
String keyTitle = getCData(state, reader, parentEvent);
|
107
|
String standardTitlesEngl = "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
|
108
|
String standardTitlesFrench = "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";
|
109
|
String standardTitles = standardTitlesEngl;
|
110
|
if (state.getDefaultLanguage() != null && state.getDefaultLanguage().equals(Language.FRENCH())){
|
111
|
standardTitles = standardTitlesFrench;
|
112
|
}
|
113
|
|
114
|
if (isNotBlank(keyTitle) ){
|
115
|
if (!state.getConfig().isReplaceStandardKeyTitles() || ! keyTitle.matches(standardTitles)){
|
116
|
key.setTitleCache(keyTitle, true);
|
117
|
}
|
118
|
}
|
119
|
}
|
120
|
|
121
|
|
122
|
private void handleCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode parentNode) throws XMLStreamException {
|
123
|
String num = getOnlyAttribute(parentEvent, NUM, true);
|
124
|
List<PolytomousKeyNode> childList = new ArrayList<PolytomousKeyNode>();
|
125
|
|
126
|
while (reader.hasNext()) {
|
127
|
XMLEvent next = readNoWhitespace(reader);
|
128
|
if (isMyEndingElement(next, parentEvent)) {
|
129
|
completeCouplet(state, parentEvent, parentNode, num, childList);
|
130
|
return;
|
131
|
} else if (next.isCharacters()){
|
132
|
handleNotYetImplementedCharacters(next);
|
133
|
//work in progress from pesiimport2, not sure if this works
|
134
|
// String mainQuestion = next.asCharacters().getData();
|
135
|
// mainQuestion = mainQuestion.replaceAll("\\s+", " ").trim();
|
136
|
// KeyStatement question = KeyStatement.NewInstance(mainQuestion);
|
137
|
// if (parentNode != null){ parentNode.setStatement(question);} //work in progress
|
138
|
} else if (isStartingElement(next, QUESTION)) {
|
139
|
handleQuestion(state, reader, next, childList);
|
140
|
} else if (isStartingElement(next, KEYNOTES)) {
|
141
|
//TODO
|
142
|
handleNotYetImplementedElement(next);
|
143
|
} else if (isEndingElement(next, KEYNOTES)) {
|
144
|
//TODO
|
145
|
popUnimplemented(next.asEndElement());
|
146
|
} else {
|
147
|
handleUnexpectedElement(next);
|
148
|
}
|
149
|
}
|
150
|
throw new IllegalStateException("<couplet> has no closing tag");
|
151
|
}
|
152
|
|
153
|
|
154
|
/**
|
155
|
* @param state
|
156
|
* @param parentEvent
|
157
|
* @param parentNode
|
158
|
* @param num
|
159
|
* @param childList
|
160
|
*/
|
161
|
private void completeCouplet(MarkupImportState state, XMLEvent parentEvent,
|
162
|
PolytomousKeyNode parentNode, String num, List<PolytomousKeyNode> childList) {
|
163
|
if (parentNode != null){
|
164
|
for (PolytomousKeyNode childNode : childList){
|
165
|
parentNode.addChild(childNode);
|
166
|
//just to be on the save side
|
167
|
parentNode.refreshNodeNumbering();
|
168
|
}
|
169
|
}else if (isNotBlank(num)){
|
170
|
UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
|
171
|
Set<PolytomousKeyNode> nodes = state.getUnmatchedLeads().getNodes(unmatchedKey);
|
172
|
for(PolytomousKeyNode nodeToMatch: nodes){
|
173
|
for (PolytomousKeyNode childNode : childList){
|
174
|
try {
|
175
|
nodeToMatch.addChild(childNode);
|
176
|
//just to be on the save side
|
177
|
nodeToMatch.refreshNodeNumbering();
|
178
|
} catch (Exception e) {
|
179
|
String message = "An exception occurred when trying to add a key node child or to referesh the node numbering: " + e.getMessage();
|
180
|
fireWarningEvent(message, parentEvent, 6);
|
181
|
}
|
182
|
}
|
183
|
state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);
|
184
|
}
|
185
|
}else{
|
186
|
String message = "Parent num could not be matched. Please check if num (%s) is correct";
|
187
|
message = String.format(message, num);
|
188
|
fireWarningEvent(message, parentEvent, 6);
|
189
|
}
|
190
|
}
|
191
|
|
192
|
private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {
|
193
|
// attributes
|
194
|
Map<String, Attribute> attributes = getAttributes(parentEvent);
|
195
|
//TODO needed only for data lineage
|
196
|
String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);
|
197
|
|
198
|
PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();
|
199
|
myNode.setKey(state.getCurrentKey()); //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
|
200
|
nodesList.add(myNode);
|
201
|
|
202
|
while (reader.hasNext()) {
|
203
|
XMLEvent next = readNoWhitespace(reader);
|
204
|
if (isMyEndingElement(next, parentEvent)) {
|
205
|
return;
|
206
|
} else if (isStartingElement(next, TEXT)) {
|
207
|
String text = getCData(state, reader, next);
|
208
|
KeyStatement statement = KeyStatement.NewInstance(getDefaultLanguage(state), text);
|
209
|
myNode.setStatement(statement);
|
210
|
} else if (isStartingElement(next, COUPLET)) {
|
211
|
//TODO test
|
212
|
handleCouplet(state, reader, next, myNode);
|
213
|
} else if (isStartingElement(next, TO_COUPLET)) {
|
214
|
handleToCouplet(state, reader, next, myNode);
|
215
|
} else if (isStartingElement(next, TO_TAXON)) {
|
216
|
handleToTaxon(state, reader, next, myNode);
|
217
|
} else if (isStartingElement(next, TO_KEY)) {
|
218
|
//TODO
|
219
|
handleNotYetImplementedElement(next);
|
220
|
} else if (isStartingElement(next, KEYNOTES)) {
|
221
|
handleAmbigousManually(state, reader, next.asStartElement());
|
222
|
} else {
|
223
|
handleUnexpectedElement(next);
|
224
|
}
|
225
|
}
|
226
|
throw new IllegalStateException("<question> has no closing tag");
|
227
|
}
|
228
|
|
229
|
private void handleToCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent next, PolytomousKeyNode node) throws XMLStreamException {
|
230
|
String num = getOnlyAttribute(next, NUM, true);
|
231
|
String cData = getCData(state, reader, next, false);
|
232
|
if (isNotBlank(cData) && ! cData.equals(num)){
|
233
|
String message = "CData ('%s') not handled in <toCouplet>";
|
234
|
message = String.format(message, cData);
|
235
|
fireWarningEvent(message, next, 4);
|
236
|
}
|
237
|
UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
|
238
|
state.getUnmatchedLeads().addKey(unmatched, node);
|
239
|
}
|
240
|
|
241
|
private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
|
242
|
Map<String, Attribute> attributes = getAttributes(parentEvent);
|
243
|
String num = getAndRemoveAttributeValue(attributes, NUM);
|
244
|
boolean taxonNotExists = checkAndRemoveAttributeValue(attributes, EXISTS, "false");
|
245
|
|
246
|
String taxonCData = handleInnerToTaxon(state, reader, parentEvent, node).trim();
|
247
|
|
248
|
String taxonKeyStr = makeTaxonKey(taxonCData, state.getCurrentTaxon(), parentEvent.getLocation());
|
249
|
taxonNotExists = taxonNotExists || (isBlank(num) && state.isOnlyNumberedTaxaExist());
|
250
|
if (taxonNotExists){
|
251
|
NonViralName<?> name = createNameByCode(state, Rank.UNKNOWN_RANK());
|
252
|
Taxon taxon = Taxon.NewInstance(name, null);
|
253
|
taxon.getName().setTitleCache(taxonKeyStr, true);
|
254
|
node.setTaxon(taxon);
|
255
|
}else{
|
256
|
UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonKeyStr);
|
257
|
state.getUnmatchedLeads().addKey(unmatched, node);
|
258
|
// String message = "The following key leads are unmatched: %s";
|
259
|
// message = String.format(message, state.getUnmatchedLeads().toString());
|
260
|
// fireWarningEvent(message, parentEvent, 6);
|
261
|
}
|
262
|
return;
|
263
|
}
|
264
|
|
265
|
/**
|
266
|
* Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.
|
267
|
* Footnote refs are not yet handled.
|
268
|
* @param state
|
269
|
* @param reader
|
270
|
* @param parentEvent
|
271
|
* @param node
|
272
|
* @return
|
273
|
* @throws XMLStreamException
|
274
|
*/
|
275
|
private String handleInnerToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
|
276
|
String taxonText = "";
|
277
|
String modifyingText = null;
|
278
|
while (reader.hasNext()) {
|
279
|
XMLEvent next = readNoWhitespace(reader);
|
280
|
if (isMyEndingElement(next, parentEvent)) {
|
281
|
if (isNotBlank(modifyingText)){
|
282
|
node.putModifyingText(getDefaultLanguage(state), modifyingText);
|
283
|
}
|
284
|
return taxonText;
|
285
|
} else if (next.isCharacters()) {
|
286
|
taxonText += next.asCharacters().getData();
|
287
|
} else if (isStartingElement(next, ANNOTATION)) {
|
288
|
String annotation = handleSimpleAnnotation(state, reader, next);
|
289
|
modifyingText = CdmUtils.concat("; ", modifyingText, annotation);
|
290
|
} else if (isStartingElement(next, FOOTNOTE_REF)) {
|
291
|
handleNotYetImplementedElement(next);
|
292
|
} else {
|
293
|
handleUnexpectedElement(next);
|
294
|
}
|
295
|
}
|
296
|
throw new IllegalStateException("Event has no closing tag");
|
297
|
|
298
|
}
|
299
|
|
300
|
/**
|
301
|
* Creates a string that represents the given taxon. The string will try to replace e.g.
|
302
|
* abbreviated genus epithets by its full name etc.
|
303
|
* @param strGoto
|
304
|
* @param taxon
|
305
|
* @param location
|
306
|
* @return
|
307
|
*/
|
308
|
private String makeTaxonKey(String strGoto, Taxon taxon, Location location) {
|
309
|
String result = "";
|
310
|
if (strGoto == null){
|
311
|
return "";
|
312
|
}
|
313
|
|
314
|
NonViralName<?> name = CdmBase.deproxy(taxon.getName(), NonViralName.class);
|
315
|
String strGenusName = name.getGenusOrUninomial();
|
316
|
|
317
|
final String bracketPattern = "\\([^\\(\\)]*\\)";
|
318
|
final String bracketPatternSomewhere = String.format(".*%s.*", bracketPattern);
|
319
|
if (strGoto.matches(bracketPatternSomewhere)){
|
320
|
fireWarningEvent("toTaxon has bracket: " + strGoto, makeLocationStr(location), 4);
|
321
|
strGoto = strGoto.replaceAll(bracketPattern, ""); //replace all brackets
|
322
|
}
|
323
|
strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
|
324
|
|
325
|
strGoto = strGoto.trim();
|
326
|
strGoto = strGoto.replaceAll("\\s+\\.", "\\."); // " ." may be created by bracket replacement
|
327
|
strGoto = strGoto.replaceAll("\\.\\.", "\\."); //replace
|
328
|
|
329
|
String[] split = strGoto.split("\\s");
|
330
|
//handle single epithets and markers
|
331
|
for (int i = 0; i<split.length; i++){
|
332
|
String single = split[i];
|
333
|
if (isGenusAbbrev(single, strGenusName)){
|
334
|
split[i] = strGenusName;
|
335
|
}
|
336
|
if (isInfraSpecificMarker(single)){
|
337
|
String strSpeciesEpi = name.getSpecificEpithet();
|
338
|
if (isBlank(result) && isNotBlank(strSpeciesEpi)){
|
339
|
result += strGenusName + " " + strSpeciesEpi;
|
340
|
}
|
341
|
}
|
342
|
result = (result + " " + split[i]).trim();
|
343
|
}
|
344
|
//remove trailing "." except for "sp."
|
345
|
while (result.matches(".*(?<!sp)\\.$")){
|
346
|
result = result.substring(0, result.length()-1).trim();
|
347
|
}
|
348
|
return result;
|
349
|
}
|
350
|
|
351
|
|
352
|
private boolean isInfraSpecificMarker(String single) {
|
353
|
try {
|
354
|
if (Rank.getRankByIdInVoc(single).isInfraSpecific()){
|
355
|
return true;
|
356
|
}else{
|
357
|
return false;
|
358
|
}
|
359
|
} catch (UnknownCdmTypeException e) {
|
360
|
return false;
|
361
|
}
|
362
|
}
|
363
|
|
364
|
//******************************** recognize nodes ***********/
|
365
|
|
366
|
public void makeKeyNodes(MarkupImportState state, XMLEvent event, String taxonTitle) {
|
367
|
Taxon taxon = state.getCurrentTaxon();
|
368
|
String num = state.getCurrentTaxonNum();
|
369
|
|
370
|
String nameString = CdmBase.deproxy(taxon.getName(), NonViralName.class).getNameCache();
|
371
|
// String nameString = taxonTitle;
|
372
|
|
373
|
//try to find matching lead nodes
|
374
|
UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, nameString);
|
375
|
Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, event, taxon, leadsKey);
|
376
|
|
377
|
if (num != null){//same without using the num
|
378
|
UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", nameString);
|
379
|
Set<PolytomousKeyNode> noNumMatchingNodes = handleMatchingNodes(state, event, taxon, noNumLeadsKey);
|
380
|
if(noNumMatchingNodes.size() > 0){
|
381
|
String message ="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
|
382
|
fireWarningEvent(message, event, 1);
|
383
|
}
|
384
|
}
|
385
|
//report missing match, if num exists
|
386
|
if (num != null && matchingNodes.isEmpty() /* TODO redo comment && num != null (later DONE) */){
|
387
|
String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
|
388
|
message = String.format(message, num, leadsKey.toString());
|
389
|
fireWarningEvent(message, event, 1);
|
390
|
}
|
391
|
}
|
392
|
|
393
|
private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, XMLEvent event, Taxon taxon, UnmatchedLeadsKey leadsKey) {
|
394
|
Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
|
395
|
for (PolytomousKeyNode matchingNode : matchingNodes){
|
396
|
state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
|
397
|
matchingNode.setTaxon(taxon);
|
398
|
//just to be on the save side
|
399
|
try{
|
400
|
matchingNode.refreshNodeNumbering();
|
401
|
} catch (Exception e) {
|
402
|
String message = "An exception occurred when trying to referesh the node numbering: " + e.getMessage();
|
403
|
fireWarningEvent(message, event, 6);
|
404
|
}
|
405
|
state.getPolytomousKeyNodesToSave().add(matchingNode);
|
406
|
}
|
407
|
return matchingNodes;
|
408
|
}
|
409
|
}
|