updated to trunk
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupKeyImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16
17 import javax.xml.stream.Location;
18 import javax.xml.stream.XMLEventReader;
19 import javax.xml.stream.XMLStreamException;
20 import javax.xml.stream.events.Attribute;
21 import javax.xml.stream.events.XMLEvent;
22
23 import org.apache.log4j.Logger;
24
25 import eu.etaxonomy.cdm.common.CdmUtils;
26 import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;
27 import eu.etaxonomy.cdm.model.common.CdmBase;
28 import eu.etaxonomy.cdm.model.common.Language;
29 import eu.etaxonomy.cdm.model.description.KeyStatement;
30 import eu.etaxonomy.cdm.model.description.PolytomousKey;
31 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
32 import eu.etaxonomy.cdm.model.name.NonViralName;
33 import eu.etaxonomy.cdm.model.name.Rank;
34 import eu.etaxonomy.cdm.model.taxon.Taxon;
35 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
36
37 /**
38 * @author a.mueller
39 * @created 26.04.2013
40 */
41 public class MarkupKeyImport extends MarkupImportBase {
42 @SuppressWarnings("unused")
43 private static final Logger logger = Logger.getLogger(MarkupKeyImport.class);
44
45
46 public MarkupKeyImport(MarkupDocumentImport docImport) {
47 super(docImport);
48 }
49
50 public void handleKey(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
51 // attributes
52 Map<String, Attribute> attributes = getAttributes(parentEvent);
53 String isSpotcharacters = getAndRemoveAttributeValue(attributes, IS_SPOTCHARACTERS);
54 if (isNotBlank(isSpotcharacters) ) {
55 //TODO isSpotcharacters
56 String message = "Attribute isSpotcharacters not yet implemented for <key>";
57 fireWarningEvent(message, parentEvent, 4);
58 }
59 boolean onlyNumberedTaxaExist = checkAndRemoveAttributeValue(attributes, ONLY_NUMBERED_TAXA_EXIST, "true");
60 state.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist);
61
62 PolytomousKey key = PolytomousKey.NewInstance();
63 key.addTaxonomicScope(state.getCurrentTaxon());
64 state.setCurrentKey(key);
65
66 boolean isFirstCouplet = true;
67 while (reader.hasNext()) {
68 XMLEvent next = readNoWhitespace(reader);
69 if (isMyEndingElement(next, parentEvent)) {
70 save(key, state);
71 //reset state
72 state.setCurrentKey(null);
73 state.setOnlyNumberedTaxaExist(false);
74 return;
75 } else if (isEndingElement(next, KEYNOTES)){
76 popUnimplemented(next.asEndElement());
77 } else if (isStartingElement(next, KEY_TITLE)) {
78 handleKeyTitle(state, reader, next);
79 } else if (isStartingElement(next, KEYNOTES)) {
80 //TODO
81 handleNotYetImplementedElement(next);
82 } else if (isStartingElement(next, COUPLET)) {
83 PolytomousKeyNode node = null;
84 if (isFirstCouplet){
85 node = key.getRoot();
86 isFirstCouplet = false;
87 }
88 handleCouplet(state, reader, next, node);
89 } else {
90 handleUnexpectedElement(next);
91 }
92 }
93 throw new IllegalStateException("<key> has no closing tag");
94 }
95
96
97 /**
98 * @param state
99 * @param reader
100 * @param key
101 * @param next
102 * @throws XMLStreamException
103 */
104 private void handleKeyTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
105 PolytomousKey key = state.getCurrentKey();
106 String keyTitle = getCData(state, reader, parentEvent);
107 String standardTitlesEngl = "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
108 String standardTitlesFrench = "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";
109 String standardTitles = standardTitlesEngl;
110 if (state.getDefaultLanguage() != null && state.getDefaultLanguage().equals(Language.FRENCH())){
111 standardTitles = standardTitlesFrench;
112 }
113
114 if (isNotBlank(keyTitle) ){
115 if (!state.getConfig().isReplaceStandardKeyTitles() || ! keyTitle.matches(standardTitles)){
116 key.setTitleCache(keyTitle, true);
117 }
118 }
119 }
120
121
122 private void handleCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode parentNode) throws XMLStreamException {
123 String num = getOnlyAttribute(parentEvent, NUM, true);
124 List<PolytomousKeyNode> childList = new ArrayList<PolytomousKeyNode>();
125
126 while (reader.hasNext()) {
127 XMLEvent next = readNoWhitespace(reader);
128 if (isMyEndingElement(next, parentEvent)) {
129 completeCouplet(state, parentEvent, parentNode, num, childList);
130 return;
131 } else if (next.isCharacters()){
132 handleNotYetImplementedCharacters(next);
133 //work in progress from pesiimport2, not sure if this works
134 // String mainQuestion = next.asCharacters().getData();
135 // mainQuestion = mainQuestion.replaceAll("\\s+", " ").trim();
136 // KeyStatement question = KeyStatement.NewInstance(mainQuestion);
137 // if (parentNode != null){ parentNode.setStatement(question);} //work in progress
138 } else if (isStartingElement(next, QUESTION)) {
139 handleQuestion(state, reader, next, childList);
140 } else if (isStartingElement(next, KEYNOTES)) {
141 //TODO
142 handleNotYetImplementedElement(next);
143 } else if (isEndingElement(next, KEYNOTES)) {
144 //TODO
145 popUnimplemented(next.asEndElement());
146 } else {
147 handleUnexpectedElement(next);
148 }
149 }
150 throw new IllegalStateException("<couplet> has no closing tag");
151 }
152
153
154 /**
155 * @param state
156 * @param parentEvent
157 * @param parentNode
158 * @param num
159 * @param childList
160 */
161 private void completeCouplet(MarkupImportState state, XMLEvent parentEvent,
162 PolytomousKeyNode parentNode, String num, List<PolytomousKeyNode> childList) {
163 if (parentNode != null){
164 for (PolytomousKeyNode childNode : childList){
165 parentNode.addChild(childNode);
166 //just to be on the save side
167 parentNode.refreshNodeNumbering();
168 }
169 }else if (isNotBlank(num)){
170 UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
171 Set<PolytomousKeyNode> nodes = state.getUnmatchedLeads().getNodes(unmatchedKey);
172 for(PolytomousKeyNode nodeToMatch: nodes){
173 for (PolytomousKeyNode childNode : childList){
174 try {
175 nodeToMatch.addChild(childNode);
176 //just to be on the save side
177 nodeToMatch.refreshNodeNumbering();
178 } catch (Exception e) {
179 String message = "An exception occurred when trying to add a key node child or to referesh the node numbering: " + e.getMessage();
180 fireWarningEvent(message, parentEvent, 6);
181 }
182 }
183 state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);
184 }
185 }else{
186 String message = "Parent num could not be matched. Please check if num (%s) is correct";
187 message = String.format(message, num);
188 fireWarningEvent(message, parentEvent, 6);
189 }
190 }
191
192 private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {
193 // attributes
194 Map<String, Attribute> attributes = getAttributes(parentEvent);
195 //TODO needed only for data lineage
196 String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);
197
198 PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();
199 myNode.setKey(state.getCurrentKey()); //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
200 nodesList.add(myNode);
201
202 while (reader.hasNext()) {
203 XMLEvent next = readNoWhitespace(reader);
204 if (isMyEndingElement(next, parentEvent)) {
205 return;
206 } else if (isStartingElement(next, TEXT)) {
207 String text = getCData(state, reader, next);
208 KeyStatement statement = KeyStatement.NewInstance(getDefaultLanguage(state), text);
209 myNode.setStatement(statement);
210 } else if (isStartingElement(next, COUPLET)) {
211 //TODO test
212 handleCouplet(state, reader, next, myNode);
213 } else if (isStartingElement(next, TO_COUPLET)) {
214 handleToCouplet(state, reader, next, myNode);
215 } else if (isStartingElement(next, TO_TAXON)) {
216 handleToTaxon(state, reader, next, myNode);
217 } else if (isStartingElement(next, TO_KEY)) {
218 //TODO
219 handleNotYetImplementedElement(next);
220 } else if (isEndingElement(next, TO_KEY)){
221 //TODO
222 popUnimplemented(next.asEndElement());
223 } else if (isStartingElement(next, KEYNOTES)) {
224 //TODO
225 handleNotYetImplementedElement(next);
226 } else if (isEndingElement(next, KEYNOTES)){
227 //TODO
228 popUnimplemented(next.asEndElement());
229 } else {
230 handleUnexpectedElement(next);
231 }
232 }
233 throw new IllegalStateException("<question> has no closing tag");
234 }
235
236 private void handleToCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent next, PolytomousKeyNode node) throws XMLStreamException {
237 String num = getOnlyAttribute(next, NUM, true);
238 String cData = getCData(state, reader, next, false);
239 if (isNotBlank(cData) && ! cData.equals(num)){
240 String message = "CData ('%s') not handled in <toCouplet>";
241 message = String.format(message, cData);
242 fireWarningEvent(message, next, 4);
243 }
244 UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
245 state.getUnmatchedLeads().addKey(unmatched, node);
246 }
247
248 private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
249 Map<String, Attribute> attributes = getAttributes(parentEvent);
250 String num = getAndRemoveAttributeValue(attributes, NUM);
251 boolean taxonNotExists = checkAndRemoveAttributeValue(attributes, EXISTS, "false");
252
253 String taxonCData = handleInnerToTaxon(state, reader, parentEvent, node).trim();
254
255 String taxonKeyStr = makeTaxonKey(taxonCData, state.getCurrentTaxon(), parentEvent.getLocation());
256 taxonNotExists = taxonNotExists || (isBlank(num) && state.isOnlyNumberedTaxaExist());
257 if (taxonNotExists){
258 NonViralName<?> name = createNameByCode(state, Rank.UNKNOWN_RANK());
259 Taxon taxon = Taxon.NewInstance(name, null);
260 taxon.getName().setTitleCache(taxonKeyStr, true);
261 node.setTaxon(taxon);
262 }else{
263 UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonKeyStr);
264 state.getUnmatchedLeads().addKey(unmatched, node);
265 // String message = "The following key leads are unmatched: %s";
266 // message = String.format(message, state.getUnmatchedLeads().toString());
267 // fireWarningEvent(message, parentEvent, 6);
268 }
269 return;
270 }
271
272 /**
273 * Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.
274 * Footnote refs are not yet handled.
275 * @param state
276 * @param reader
277 * @param parentEvent
278 * @param node
279 * @return
280 * @throws XMLStreamException
281 */
282 private String handleInnerToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
283 String taxonText = "";
284 String modifyingText = null;
285 while (reader.hasNext()) {
286 XMLEvent next = readNoWhitespace(reader);
287 if (isMyEndingElement(next, parentEvent)) {
288 if (isNotBlank(modifyingText)){
289 node.putModifyingText(getDefaultLanguage(state), modifyingText);
290 }
291 return taxonText;
292 } else if (next.isCharacters()) {
293 taxonText += next.asCharacters().getData();
294 } else if (isStartingElement(next, ANNOTATION)) {
295 String annotation = handleSimpleAnnotation(state, reader, next);
296 modifyingText = CdmUtils.concat("; ", modifyingText, annotation);
297 } else if (isStartingElement(next, FOOTNOTE_REF)) {
298 handleNotYetImplementedElement(next);
299 } else {
300 handleUnexpectedElement(next);
301 }
302 }
303 throw new IllegalStateException("Event has no closing tag");
304
305 }
306
307 /**
308 * Creates a string that represents the given taxon. The string will try to replace e.g.
309 * abbreviated genus epithets by its full name etc.
310 * @param strGoto
311 * @param taxon
312 * @param location
313 * @return
314 */
315 private String makeTaxonKey(String strGoto, Taxon taxon, Location location) {
316 String result = "";
317 if (strGoto == null){
318 return "";
319 }
320
321 NonViralName<?> name = CdmBase.deproxy(taxon.getName(), NonViralName.class);
322 String strGenusName = name.getGenusOrUninomial();
323
324 final String bracketPattern = "\\([^\\(\\)]*\\)";
325 final String bracketPatternSomewhere = String.format(".*%s.*", bracketPattern);
326 if (strGoto.matches(bracketPatternSomewhere)){
327 fireWarningEvent("toTaxon has bracket: " + strGoto, makeLocationStr(location), 4);
328 strGoto = strGoto.replaceAll(bracketPattern, ""); //replace all brackets
329 }
330 strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
331
332 strGoto = strGoto.trim();
333 strGoto = strGoto.replaceAll("\\s+\\.", "\\."); // " ." may be created by bracket replacement
334 strGoto = strGoto.replaceAll("\\.\\.", "\\."); //replace
335
336 String[] split = strGoto.split("\\s");
337 //handle single epithets and markers
338 for (int i = 0; i<split.length; i++){
339 String single = split[i];
340 if (isGenusAbbrev(single, strGenusName)){
341 split[i] = strGenusName;
342 }
343 if (isInfraSpecificMarker(single)){
344 String strSpeciesEpi = name.getSpecificEpithet();
345 if (isBlank(result) && isNotBlank(strSpeciesEpi)){
346 result += strGenusName + " " + strSpeciesEpi;
347 }
348 }
349 result = (result + " " + split[i]).trim();
350 }
351 //remove trailing "." except for "sp."
352 while (result.matches(".*(?<!sp)\\.$")){
353 result = result.substring(0, result.length()-1).trim();
354 }
355 return result;
356 }
357
358
359 private boolean isInfraSpecificMarker(String single) {
360 try {
361 if (Rank.getRankByIdInVoc(single).isInfraSpecific()){
362 return true;
363 }else{
364 return false;
365 }
366 } catch (UnknownCdmTypeException e) {
367 return false;
368 }
369 }
370
371 //******************************** recognize nodes ***********/
372
373 public void makeKeyNodes(MarkupImportState state, XMLEvent event, String taxonTitle) {
374 Taxon taxon = state.getCurrentTaxon();
375 String num = state.getCurrentTaxonNum();
376
377 String nameString = CdmBase.deproxy(taxon.getName(), NonViralName.class).getNameCache();
378 // String nameString = taxonTitle;
379
380 //try to find matching lead nodes
381 UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, nameString);
382 Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, event, taxon, leadsKey);
383
384 if (num != null){//same without using the num
385 UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", nameString);
386 Set<PolytomousKeyNode> noNumMatchingNodes = handleMatchingNodes(state, event, taxon, noNumLeadsKey);
387 if(noNumMatchingNodes.size() > 0){
388 String message ="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
389 fireWarningEvent(message, event, 1);
390 }
391 }
392 //report missing match, if num exists
393 if (matchingNodes.isEmpty() /* TODO redo comment && num != null */){
394 String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
395 message = String.format(message, num, leadsKey.toString());
396 fireWarningEvent(message, event, 1);
397 }
398
399 }
400
401 private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, XMLEvent event, Taxon taxon, UnmatchedLeadsKey leadsKey) {
402 Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
403 for (PolytomousKeyNode matchingNode : matchingNodes){
404 state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
405 matchingNode.setTaxon(taxon);
406 //just to be on the save side
407 try{
408 matchingNode.refreshNodeNumbering();
409 } catch (Exception e) {
410 String message = "An exception occurred when trying to referesh the node numbering: " + e.getMessage();
411 fireWarningEvent(message, event, 6);
412 }
413 state.getPolytomousKeyNodesToSave().add(matchingNode);
414 }
415 return matchingNodes;
416 }
417 }