ref #6369 adapt existing occurrences of interface to removed generics
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupKeyImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16 import java.util.UUID;
17
18 import javax.xml.stream.Location;
19 import javax.xml.stream.XMLEventReader;
20 import javax.xml.stream.XMLStreamException;
21 import javax.xml.stream.events.Attribute;
22 import javax.xml.stream.events.XMLEvent;
23
24 import org.apache.log4j.Logger;
25
26 import eu.etaxonomy.cdm.common.CdmUtils;
27 import eu.etaxonomy.cdm.common.UTF8;
28 import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;
29 import eu.etaxonomy.cdm.model.common.Language;
30 import eu.etaxonomy.cdm.model.description.KeyStatement;
31 import eu.etaxonomy.cdm.model.description.PolytomousKey;
32 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
33 import eu.etaxonomy.cdm.model.name.INonViralName;
34 import eu.etaxonomy.cdm.model.name.NonViralName;
35 import eu.etaxonomy.cdm.model.name.Rank;
36 import eu.etaxonomy.cdm.model.taxon.Taxon;
37 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
38
39 /**
40 * @author a.mueller
41 * @created 26.04.2013
42 */
43 public class MarkupKeyImport extends MarkupImportBase {
44 @SuppressWarnings("unused")
45 private static final Logger logger = Logger.getLogger(MarkupKeyImport.class);
46
47
48 public MarkupKeyImport(MarkupDocumentImport docImport) {
49 super(docImport);
50 }
51
52 public void handleKey(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
53 // attributes
54 Map<String, Attribute> attributes = getAttributes(parentEvent);
55 String isSpotcharacters = getAndRemoveAttributeValue(attributes, IS_SPOTCHARACTERS);
56 if (isNotBlank(isSpotcharacters) ) {
57 //TODO isSpotcharacters
58 String message = "Attribute isSpotcharacters not yet implemented for <key>";
59 fireWarningEvent(message, parentEvent, 4);
60 }
61 boolean onlyNumberedTaxaExist = checkAndRemoveAttributeValue(attributes, ONLY_NUMBERED_TAXA_EXIST, "true");
62 state.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist);
63
64 PolytomousKey key = PolytomousKey.NewInstance();
65 key.addTaxonomicScope(state.getCurrentTaxon());
66 state.setCurrentKey(key);
67
68 boolean isFirstCouplet = true;
69 while (reader.hasNext()) {
70 XMLEvent next = readNoWhitespace(reader);
71 if (isMyEndingElement(next, parentEvent)) {
72 save(key, state);
73 //reset state
74 state.setCurrentKey(null);
75 state.setOnlyNumberedTaxaExist(false);
76 return;
77 } else if (isEndingElement(next, KEYNOTES)){
78 popUnimplemented(next.asEndElement());
79 } else if (isStartingElement(next, KEY_TITLE)) {
80 handleKeyTitle(state, reader, next);
81 } else if (isStartingElement(next, KEYNOTES)) {
82 //TODO
83 handleNotYetImplementedElement(next);
84 } else if (isStartingElement(next, COUPLET)) {
85 PolytomousKeyNode node = null;
86 if (isFirstCouplet){
87 node = key.getRoot();
88 isFirstCouplet = false;
89 }
90 handleCouplet(state, reader, next, node);
91 } else {
92 handleUnexpectedElement(next);
93 }
94 }
95 throw new IllegalStateException("<key> has no closing tag");
96 }
97
98
99 /**
100 * @param state
101 * @param reader
102 * @param key
103 * @param next
104 * @throws XMLStreamException
105 */
106 private void handleKeyTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
107 PolytomousKey key = state.getCurrentKey();
108 String keyTitle = getCData(state, reader, parentEvent);
109 String standardTitlesEngl = "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
110 String standardTitlesFrench = "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";
111 String standardTitles = standardTitlesEngl;
112 if (state.getDefaultLanguage() != null && state.getDefaultLanguage().equals(Language.FRENCH())){
113 standardTitles = standardTitlesFrench;
114 }
115
116 if (isNotBlank(keyTitle) ){
117 if (!state.getConfig().isReplaceStandardKeyTitles() || ! keyTitle.matches(standardTitles)){
118 key.setTitleCache(keyTitle, true);
119 }
120 }
121 }
122
123
124 private void handleCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode parentNode) throws XMLStreamException {
125 String num = getOnlyAttribute(parentEvent, NUM, true);
126 List<PolytomousKeyNode> childList = new ArrayList<PolytomousKeyNode>();
127
128 while (reader.hasNext()) {
129 XMLEvent next = readNoWhitespace(reader);
130 if (isMyEndingElement(next, parentEvent)) {
131 completeCouplet(state, parentEvent, parentNode, num, childList);
132 return;
133 } else if (next.isCharacters()){
134 handleNotYetImplementedCharacters(next);
135 //work in progress from pesiimport2, not sure if this works
136 // String mainQuestion = next.asCharacters().getData();
137 // mainQuestion = mainQuestion.replaceAll("\\s+", " ").trim();
138 // KeyStatement question = KeyStatement.NewInstance(mainQuestion);
139 // if (parentNode != null){ parentNode.setStatement(question);} //work in progress
140 } else if (isStartingElement(next, QUESTION)) {
141 handleQuestion(state, reader, next, childList);
142 } else if (isStartingElement(next, KEYNOTES)) {
143 //TODO
144 handleNotYetImplementedElement(next);
145 } else if (isEndingElement(next, KEYNOTES)) {
146 //TODO
147 popUnimplemented(next.asEndElement());
148 } else {
149 handleUnexpectedElement(next);
150 }
151 }
152 throw new IllegalStateException("<couplet> has no closing tag");
153 }
154
155
156 /**
157 * @param state
158 * @param parentEvent
159 * @param parentNode
160 * @param num
161 * @param childList
162 */
163 private void completeCouplet(MarkupImportState state, XMLEvent parentEvent,
164 PolytomousKeyNode parentNode, String num, List<PolytomousKeyNode> childList) {
165 if (parentNode != null){
166 for (PolytomousKeyNode childNode : childList){
167 parentNode.addChild(childNode);
168 //just to be on the save side
169 parentNode.refreshNodeNumbering();
170 }
171 }else if (isNotBlank(num)){
172 UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
173 Set<PolytomousKeyNode> nodes = state.getUnmatchedLeads().getNodes(unmatchedKey);
174 for(PolytomousKeyNode nodeToMatch: nodes){
175 for (PolytomousKeyNode childNode : childList){
176 try {
177 nodeToMatch.addChild(childNode);
178 //just to be on the save side
179 nodeToMatch.refreshNodeNumbering();
180 } catch (Exception e) {
181 String message = "An exception occurred when trying to add a key node child or to referesh the node numbering: " + e.getMessage();
182 fireWarningEvent(message, parentEvent, 6);
183 }
184 }
185 state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);
186 }
187 }else{
188 String message = "Parent num could not be matched. Please check if num (%s) is correct";
189 message = String.format(message, num);
190 fireWarningEvent(message, parentEvent, 6);
191 }
192 }
193
194 private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {
195 // attributes
196 Map<String, Attribute> attributes = getAttributes(parentEvent);
197 //TODO needed only for data lineage
198 String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);
199
200 PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();
201 myNode.setKey(state.getCurrentKey()); //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
202 nodesList.add(myNode);
203
204 while (reader.hasNext()) {
205 XMLEvent next = readNoWhitespace(reader);
206 if (isMyEndingElement(next, parentEvent)) {
207 return;
208 } else if (isStartingElement(next, TEXT)) {
209 String text = getCData(state, reader, next);
210 KeyStatement statement = KeyStatement.NewInstance(getDefaultLanguage(state), text);
211 myNode.setStatement(statement);
212 } else if (isStartingElement(next, COUPLET)) {
213 //TODO test
214 handleCouplet(state, reader, next, myNode);
215 } else if (isStartingElement(next, TO_COUPLET)) {
216 handleToCouplet(state, reader, next, myNode);
217 } else if (isStartingElement(next, TO_TAXON)) {
218 handleToTaxon(state, reader, next, myNode);
219 } else if (isStartingElement(next, TO_KEY)) {
220 //TODO
221 handleNotYetImplementedElement(next);
222 } else if (isStartingElement(next, KEYNOTES)) {
223 handleAmbigousManually(state, reader, next.asStartElement());
224 } else {
225 handleUnexpectedElement(next);
226 }
227 }
228 throw new IllegalStateException("<question> has no closing tag");
229 }
230
231 private void handleToCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent next, PolytomousKeyNode node) throws XMLStreamException {
232 String num = getOnlyAttribute(next, NUM, true);
233 String cData = getCData(state, reader, next, false);
234 if (isNotBlank(cData) && ! cData.equals(num)){
235 String message = "CData ('%s') not handled in <toCouplet>";
236 message = String.format(message, cData);
237 fireWarningEvent(message, next, 4);
238 }
239 UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
240 state.getUnmatchedLeads().addKey(unmatched, node);
241 }
242
243 private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
244 Map<String, Attribute> attributes = getAttributes(parentEvent);
245 String num = getAndRemoveAttributeValue(attributes, NUM);
246 boolean taxonNotExists = checkAndRemoveAttributeValue(attributes, EXISTS, "false");
247
248 String taxonCData = handleInnerToTaxon(state, reader, parentEvent, node).trim();
249
250 String taxonKeyStr = makeTaxonKey(taxonCData, state.getCurrentTaxon(), parentEvent.getLocation());
251 try{
252 if (taxonKeyStr.contains(":")){
253 System.out.println(":");
254 UUID.fromString(taxonKeyStr);
255 System.out.println("Here we have a uuid: " + taxonKeyStr );
256 }
257 }catch(Exception e){
258 }
259
260 taxonNotExists = taxonNotExists || (isBlank(num) && state.isOnlyNumberedTaxaExist());
261 if (taxonNotExists){
262 NonViralName<?> name = createNameByCode(state, Rank.UNKNOWN_RANK());
263 Taxon taxon = Taxon.NewInstance(name, null);
264 taxon.getName().setTitleCache(taxonKeyStr, true);
265 node.setTaxon(taxon);
266 }else{
267 UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonKeyStr);
268 state.getUnmatchedLeads().addKey(unmatched, node);
269 // String message = "The following key leads are unmatched: %s";
270 // message = String.format(message, state.getUnmatchedLeads().toString());
271 // fireWarningEvent(message, parentEvent, 6);
272 }
273 return;
274 }
275
276 /**
277 * Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.
278 * Footnote refs are not yet handled.
279 * @param state
280 * @param reader
281 * @param parentEvent
282 * @param node
283 * @return
284 * @throws XMLStreamException
285 */
286 private String handleInnerToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
287 String taxonText = "";
288 String modifyingText = null;
289 while (reader.hasNext()) {
290 XMLEvent next = readNoWhitespace(reader);
291 if (isMyEndingElement(next, parentEvent)) {
292 if (isNotBlank(modifyingText)){
293 node.putModifyingText(getDefaultLanguage(state), modifyingText);
294 }
295 return taxonText;
296 } else if (next.isCharacters()) {
297 taxonText += next.asCharacters().getData();
298 } else if (isStartingElement(next, ANNOTATION)) {
299 String annotation = handleSimpleAnnotation(state, reader, next);
300 modifyingText = CdmUtils.concat("; ", modifyingText, annotation);
301 } else if (isStartingElement(next, FOOTNOTE_REF)) {
302 handleNotYetImplementedElement(next);
303 } else {
304 handleUnexpectedElement(next);
305 }
306 }
307 throw new IllegalStateException("Event has no closing tag");
308
309 }
310
311 /**
312 * Creates a string that represents the given taxon. The string will try to replace e.g.
313 * abbreviated genus epithets by its full name etc.
314 * @param strGoto
315 * @param taxon
316 * @param location
317 * @return
318 */
319 private String makeTaxonKey(String strGoto, Taxon taxon, Location location) {
320 String result = "";
321 if (strGoto == null){
322 return "";
323 }
324
325 INonViralName name = taxon.getName();
326 String strGenusName = name.getGenusOrUninomial();
327
328 String normalized = normalizeKeyString(strGoto, location);
329
330 String[] split = normalized.split("\\s");
331 //handle single epithets and markers
332 for (int i = 0; i<split.length; i++){
333 String single = split[i];
334 if (isGenusAbbrev(single, strGenusName)){
335 split[i] = strGenusName;
336 }
337 if (isInfraSpecificMarker(single)){
338 String strSpeciesEpi = name.getSpecificEpithet();
339 if (isBlank(result) && isNotBlank(strSpeciesEpi)){
340 result += strGenusName + " " + strSpeciesEpi;
341 }
342 }
343 result = (result + " " + split[i]).trim();
344 }
345 result = removeTrailingDot(result);
346 return result;
347 }
348
349
350 final static String bracketPattern = "\\([^\\(\\)]*\\)";
351 final static String bracketPatternSomewhere = String.format(".*%s.*", bracketPattern);
352
353 /**
354 * @param strGoto
355 * @param location
356 * @return
357 */
358 private String normalizeKeyString(String strGoto, Location location) {
359 String result = strGoto;
360 if (result.matches(bracketPatternSomewhere)){
361 fireWarningEvent("keyString has bracket (uncritical for fullname matching): " + result, makeLocationStr(location), 4);
362 result = result.replaceAll(bracketPattern, ""); //replace all brackets
363 }
364 result = result.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
365
366 result = result.trim();
367 result = result.replaceAll("\\s+\\.", "\\."); // " ." may be created by bracket replacement
368 result = result.replaceAll("\\.\\.", "\\."); //replace
369 result = result.replace(UTF8.HYBRID.toString(), "x ");
370 return result;
371 }
372
373
374 private boolean isInfraSpecificMarker(String single) {
375 try {
376 if (Rank.getRankByIdInVoc(single).isInfraSpecific()){
377 return true;
378 }else{
379 return false;
380 }
381 } catch (UnknownCdmTypeException e) {
382 return false;
383 }
384 }
385
386 //******************************** recognize nodes ***********/
387
388 public void makeKeyNodes(MarkupImportState state, XMLEvent event, String taxonTitle) {
389 Taxon taxon = state.getCurrentTaxon();
390 String num = state.getCurrentTaxonNum();
391
392 INonViralName nvn = taxon.getName();
393 String nameString = nvn.getNameCache();
394 nameString = normalizeKeyString(nameString, event.getLocation());
395 nameString = removeTrailingDot(nameString);
396 try{
397 if (nameString.contains(":")){
398 System.out.println(":");
399 UUID.fromString(nameString);
400 System.out.println("Here we have a uuid: " + nameString + "for" + nvn.getTitleCache());
401 }
402 }catch(Exception e){
403 }
404
405 //try to find matching lead nodes
406 UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, nameString);
407 Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, event, taxon, leadsKey);
408
409 if (num != null){//same without using the num
410 UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", nameString);
411 Set<PolytomousKeyNode> noNumMatchingNodes = handleMatchingNodes(state, event, taxon, noNumLeadsKey);
412 if(noNumMatchingNodes.size() > 0){
413 String message ="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
414 fireWarningEvent(message, event, 1);
415 }
416 }
417 //report missing match, if num exists
418 if (num != null && matchingNodes.isEmpty() /* TODO redo comment && num != null (later DONE) */){
419 String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
420 message = String.format(message, num, leadsKey.toString());
421 fireWarningEvent(message, event, 1);
422 }
423 }
424
425 /**
426 * remove trailing "." except for "sp."
427 * @param str
428 * @return
429 */
430 private String removeTrailingDot(String str) {
431 while (str.matches(".*(?<!sp)\\.$")){
432 str = str.substring(0, str.length()-1).trim();
433 }
434 return str;
435 }
436
437 private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, XMLEvent event, Taxon taxon, UnmatchedLeadsKey leadsKey) {
438 Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
439 for (PolytomousKeyNode matchingNode : matchingNodes){
440 state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
441 matchingNode.setTaxon(taxon);
442 //just to be on the save side
443 try{
444 matchingNode.refreshNodeNumbering();
445 } catch (Exception e) {
446 String message = "An exception occurred when trying to referesh the node numbering: " + e.getMessage();
447 fireWarningEvent(message, event, 6);
448 }
449 state.getPolytomousKeyNodesToSave().add(matchingNode);
450 }
451 return matchingNodes;
452 }
453 }