Project

General

Profile

Download (15.8 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2009 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.io.markup;
11

    
12
import java.util.ArrayList;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16

    
17
import javax.xml.stream.Location;
18
import javax.xml.stream.XMLEventReader;
19
import javax.xml.stream.XMLStreamException;
20
import javax.xml.stream.events.Attribute;
21
import javax.xml.stream.events.XMLEvent;
22

    
23
import org.apache.log4j.Logger;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.Language;
29
import eu.etaxonomy.cdm.model.description.KeyStatement;
30
import eu.etaxonomy.cdm.model.description.PolytomousKey;
31
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
32
import eu.etaxonomy.cdm.model.name.NonViralName;
33
import eu.etaxonomy.cdm.model.name.Rank;
34
import eu.etaxonomy.cdm.model.taxon.Taxon;
35
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
36

    
37
/**
38
 * @author a.mueller
39
 * @created 26.04.2013
40
 */
41
public class MarkupKeyImport  extends MarkupImportBase  {
42
	@SuppressWarnings("unused")
43
	private static final Logger logger = Logger.getLogger(MarkupKeyImport.class);
44
	
45
	
46
	public MarkupKeyImport(MarkupDocumentImport docImport) {
47
		super(docImport);
48
	}
49
	
50
	public void handleKey(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
51
		// attributes
52
		Map<String, Attribute> attributes = getAttributes(parentEvent);
53
		String isSpotcharacters = getAndRemoveAttributeValue(attributes, IS_SPOTCHARACTERS);
54
		if (isNotBlank(isSpotcharacters) ) {
55
			//TODO isSpotcharacters
56
			String message = "Attribute isSpotcharacters not yet implemented for <key>";
57
			fireWarningEvent(message, parentEvent, 4);
58
		}
59
		boolean onlyNumberedTaxaExist = checkAndRemoveAttributeValue(attributes, ONLY_NUMBERED_TAXA_EXIST, "true");
60
		state.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist);
61
		
62
		PolytomousKey key = PolytomousKey.NewInstance();
63
		key.addTaxonomicScope(state.getCurrentTaxon());
64
		state.setCurrentKey(key);
65
		
66
		boolean isFirstCouplet = true;
67
		while (reader.hasNext()) {
68
			XMLEvent next = readNoWhitespace(reader);
69
			if (isMyEndingElement(next, parentEvent)) {
70
				save(key, state);
71
				//reset state
72
				state.setCurrentKey(null);
73
				state.setOnlyNumberedTaxaExist(false);
74
				return;
75
			} else if (isEndingElement(next, KEYNOTES)){
76
				popUnimplemented(next.asEndElement());
77
			} else if (isStartingElement(next, KEY_TITLE)) {
78
				handleKeyTitle(state, reader, next);
79
			} else if (isStartingElement(next, KEYNOTES)) {
80
				//TODO
81
				handleNotYetImplementedElement(next);
82
			} else if (isStartingElement(next, COUPLET)) {
83
				PolytomousKeyNode node = null;
84
				if (isFirstCouplet){
85
					node = key.getRoot();
86
					isFirstCouplet = false;
87
				}
88
				handleCouplet(state, reader, next, node);
89
			} else {
90
				handleUnexpectedElement(next);
91
			}
92
		}
93
		throw new IllegalStateException("<key> has no closing tag");
94
	}
95

    
96

    
97
	/**
98
	 * @param state
99
	 * @param reader
100
	 * @param key
101
	 * @param next
102
	 * @throws XMLStreamException
103
	 */
104
	private void handleKeyTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
105
		PolytomousKey key = state.getCurrentKey();
106
		String keyTitle = getCData(state, reader, parentEvent);
107
		String standardTitlesEngl = "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
108
		String standardTitlesFrench = "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";
109
		String standardTitles = standardTitlesEngl;
110
		if (state.getDefaultLanguage() != null && state.getDefaultLanguage().equals(Language.FRENCH())){
111
			standardTitles = standardTitlesFrench;
112
		}
113
		
114
		if (isNotBlank(keyTitle) ){
115
			if (!state.getConfig().isReplaceStandardKeyTitles() || ! keyTitle.matches(standardTitles)){
116
				key.setTitleCache(keyTitle, true);
117
			}
118
		}
119
	}
120
	
121

    
122
	private void handleCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode parentNode) throws XMLStreamException {
123
		String num = getOnlyAttribute(parentEvent, NUM, true);
124
		List<PolytomousKeyNode> childList = new ArrayList<PolytomousKeyNode>(); 
125
		
126
		while (reader.hasNext()) {
127
			XMLEvent next = readNoWhitespace(reader);
128
			if (isMyEndingElement(next, parentEvent)) {
129
				completeCouplet(state, parentEvent, parentNode, num, childList);
130
				return;
131
			} else if (next.isCharacters()){
132
				handleNotYetImplementedCharacters(next);
133
				//work in progress from pesiimport2, not sure if this works
134
//				String mainQuestion = next.asCharacters().getData();
135
//				mainQuestion = mainQuestion.replaceAll("\\s+", " ").trim();
136
//				KeyStatement question = KeyStatement.NewInstance(mainQuestion);
137
//				if (parentNode != null){ parentNode.setStatement(question);}  //work in progress
138
			} else if (isStartingElement(next, QUESTION)) {
139
				handleQuestion(state, reader, next, childList);
140
			} else if (isStartingElement(next, KEYNOTES)) {
141
				//TODO
142
				handleNotYetImplementedElement(next);
143
			} else if (isEndingElement(next, KEYNOTES)) {
144
				//TODO
145
				popUnimplemented(next.asEndElement());
146
			} else {
147
				handleUnexpectedElement(next);
148
			}
149
		}
150
		throw new IllegalStateException("<couplet> has no closing tag");
151
	}
152
	
153

    
154
	/**
155
	 * @param state
156
	 * @param parentEvent
157
	 * @param parentNode
158
	 * @param num
159
	 * @param childList
160
	 */
161
	private void completeCouplet(MarkupImportState state, XMLEvent parentEvent,
162
			PolytomousKeyNode parentNode, String num, List<PolytomousKeyNode> childList) {
163
		if (parentNode != null){
164
			for (PolytomousKeyNode childNode : childList){
165
				parentNode.addChild(childNode);
166
				//just to be on the save side
167
				parentNode.refreshNodeNumbering();
168
			}
169
		}else if (isNotBlank(num)){
170
			UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
171
			Set<PolytomousKeyNode> nodes = state.getUnmatchedLeads().getNodes(unmatchedKey);
172
			for(PolytomousKeyNode nodeToMatch: nodes){
173
				for (PolytomousKeyNode childNode : childList){
174
					try {
175
						nodeToMatch.addChild(childNode);
176
						//just to be on the save side
177
						nodeToMatch.refreshNodeNumbering();
178
					} catch (Exception e) {
179
						String message = "An exception occurred when trying to add a key node child or to referesh the node numbering: " + e.getMessage();
180
						fireWarningEvent(message, parentEvent, 6);
181
					}
182
				}
183
				state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);
184
			}
185
		}else{
186
			String message = "Parent num could not be matched. Please check if num (%s) is correct";
187
			message = String.format(message, num);
188
			fireWarningEvent(message, parentEvent, 6);
189
		}
190
	}
191

    
192
	private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {
193
		// attributes
194
		Map<String, Attribute> attributes = getAttributes(parentEvent);
195
		//TODO needed only for data lineage
196
		String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);
197
		
198
		PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();
199
		myNode.setKey(state.getCurrentKey());  //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
200
		nodesList.add(myNode);
201
		
202
		while (reader.hasNext()) {
203
			XMLEvent next = readNoWhitespace(reader);
204
			if (isMyEndingElement(next, parentEvent)) {
205
				return;
206
			} else if (isStartingElement(next, TEXT)) {
207
				String text = getCData(state, reader, next);
208
				KeyStatement statement = KeyStatement.NewInstance(getDefaultLanguage(state), text);
209
				myNode.setStatement(statement);
210
			} else if (isStartingElement(next, COUPLET)) {
211
				//TODO test
212
				handleCouplet(state, reader, next, myNode);
213
			} else if (isStartingElement(next, TO_COUPLET)) {
214
				handleToCouplet(state, reader, next, myNode);
215
			} else if (isStartingElement(next, TO_TAXON)) {
216
				handleToTaxon(state, reader, next, myNode);
217
			} else if (isStartingElement(next, TO_KEY)) {
218
				//TODO
219
				handleNotYetImplementedElement(next);
220
			} else if (isStartingElement(next, KEYNOTES)) {
221
				handleAmbigousManually(state, reader, next.asStartElement());
222
			} else {
223
				handleUnexpectedElement(next);
224
			}
225
		}
226
		throw new IllegalStateException("<question> has no closing tag");
227
	}
228

    
229
	private void handleToCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent next, PolytomousKeyNode node) throws XMLStreamException {
230
		String num = getOnlyAttribute(next, NUM, true);
231
		String cData = getCData(state, reader, next, false);
232
		if (isNotBlank(cData) && ! cData.equals(num)){
233
			String message = "CData ('%s') not handled in <toCouplet>";
234
			message = String.format(message, cData);
235
			fireWarningEvent(message, next, 4);
236
		}
237
		UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
238
		state.getUnmatchedLeads().addKey(unmatched, node);
239
	}
240

    
241
	private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
242
		Map<String, Attribute> attributes = getAttributes(parentEvent);
243
		String num = getAndRemoveAttributeValue(attributes, NUM);
244
		boolean taxonNotExists = checkAndRemoveAttributeValue(attributes, EXISTS, "false");
245
		
246
		String taxonCData = handleInnerToTaxon(state, reader, parentEvent, node).trim();
247
		
248
		String taxonKeyStr = makeTaxonKey(taxonCData, state.getCurrentTaxon(), parentEvent.getLocation());
249
		taxonNotExists = taxonNotExists || (isBlank(num) && state.isOnlyNumberedTaxaExist());
250
		if (taxonNotExists){
251
			NonViralName<?> name = createNameByCode(state, Rank.UNKNOWN_RANK());
252
			Taxon taxon = Taxon.NewInstance(name, null);
253
			taxon.getName().setTitleCache(taxonKeyStr, true);
254
			node.setTaxon(taxon);
255
		}else{
256
			UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonKeyStr);
257
			state.getUnmatchedLeads().addKey(unmatched, node);
258
//			String message = "The following key leads are unmatched: %s";
259
//			message = String.format(message, state.getUnmatchedLeads().toString());
260
//			fireWarningEvent(message, parentEvent, 6);
261
		}
262
		return;
263
	}
264

    
265
		/**
266
	 * Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.
267
	 * Footnote refs are not yet handled.
268
	 * @param state
269
	 * @param reader
270
	 * @param parentEvent
271
	 * @param node
272
	 * @return
273
	 * @throws XMLStreamException
274
	 */
275
	private String handleInnerToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
276
		String taxonText = "";
277
		String modifyingText = null;
278
		while (reader.hasNext()) {
279
			XMLEvent next = readNoWhitespace(reader);
280
			if (isMyEndingElement(next, parentEvent)) {
281
				if (isNotBlank(modifyingText)){
282
					node.putModifyingText(getDefaultLanguage(state), modifyingText);
283
				}
284
				return taxonText;
285
			} else if (next.isCharacters()) {
286
				taxonText += next.asCharacters().getData();
287
			} else if (isStartingElement(next, ANNOTATION)) {
288
				String annotation = handleSimpleAnnotation(state, reader, next);
289
				modifyingText = CdmUtils.concat("; ", modifyingText, annotation);
290
			} else if (isStartingElement(next, FOOTNOTE_REF)) {
291
				handleNotYetImplementedElement(next);
292
			} else {
293
				handleUnexpectedElement(next);
294
			}
295
		}
296
		throw new IllegalStateException("Event has no closing tag");
297

    
298
	}
299

    
300
	/**
301
	 * Creates a string that represents the given taxon. The string will try to replace e.g.
302
	 * abbreviated genus epithets by its full name etc.
303
	 * @param strGoto
304
	 * @param taxon
305
	 * @param location 
306
	 * @return
307
	 */
308
	private String makeTaxonKey(String strGoto, Taxon taxon, Location location) {
309
		String result = "";
310
		if (strGoto == null){
311
			return "";
312
		}
313
		
314
		NonViralName<?> name = CdmBase.deproxy(taxon.getName(), NonViralName.class);
315
		String strGenusName = name.getGenusOrUninomial();
316
		
317
		final String bracketPattern = "\\([^\\(\\)]*\\)";
318
		final String bracketPatternSomewhere = String.format(".*%s.*", bracketPattern);
319
		if (strGoto.matches(bracketPatternSomewhere)){
320
			fireWarningEvent("toTaxon has bracket: " + strGoto, makeLocationStr(location), 4);
321
			strGoto = strGoto.replaceAll(bracketPattern, "");  //replace all brackets
322
		}
323
		strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
324
		
325
		strGoto = strGoto.trim();
326
		strGoto = strGoto.replaceAll("\\s+\\.", "\\.");   // " ." may be created by bracket replacement
327
		strGoto = strGoto.replaceAll("\\.\\.", "\\.");   //replace
328
		
329
		String[] split = strGoto.split("\\s");
330
		//handle single epithets and markers
331
		for (int i = 0; i<split.length; i++){
332
			String single = split[i];
333
			if (isGenusAbbrev(single, strGenusName)){
334
				split[i] = strGenusName;
335
			}
336
			if (isInfraSpecificMarker(single)){
337
				String strSpeciesEpi = name.getSpecificEpithet();
338
				if (isBlank(result) && isNotBlank(strSpeciesEpi)){
339
					result += strGenusName + " " + strSpeciesEpi;
340
				}
341
			}
342
			result = (result + " " + split[i]).trim();
343
		}
344
		//remove trailing "." except for "sp."
345
		while (result.matches(".*(?<!sp)\\.$")){
346
			result = result.substring(0, result.length()-1).trim();
347
		}
348
		return result;
349
	}
350
	
351

    
352
	private boolean isInfraSpecificMarker(String single) {
353
		try {
354
			if (Rank.getRankByIdInVoc(single).isInfraSpecific()){
355
				return true;
356
			}else{
357
				return false;
358
			}
359
		} catch (UnknownCdmTypeException e) {
360
			return false;
361
		}
362
	}
363

    
364
//******************************** recognize nodes ***********/
365

    
366
	public void makeKeyNodes(MarkupImportState state, XMLEvent event, String taxonTitle) {
367
		Taxon taxon = state.getCurrentTaxon();
368
		String num = state.getCurrentTaxonNum();
369
		
370
		String nameString = CdmBase.deproxy(taxon.getName(), NonViralName.class).getNameCache();
371
//		String nameString = taxonTitle;
372
		
373
		//try to find matching lead nodes 
374
		UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, nameString);
375
		Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, event, taxon, leadsKey);
376
		
377
		if (num != null){//same without using the num
378
			UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", nameString);
379
			Set<PolytomousKeyNode> noNumMatchingNodes = handleMatchingNodes(state, event, taxon, noNumLeadsKey);
380
			if(noNumMatchingNodes.size() > 0){
381
				String message ="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
382
				fireWarningEvent(message, event, 1);
383
			}
384
		}
385
		//report missing match, if num exists
386
		if (num != null && matchingNodes.isEmpty() /* TODO redo comment && num != null (later DONE) */){
387
			String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
388
			message = String.format(message, num, leadsKey.toString());
389
			fireWarningEvent(message, event, 1);
390
		}	
391
	}
392
	
393
	private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, XMLEvent event, Taxon taxon, UnmatchedLeadsKey leadsKey) {
394
		Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
395
		for (PolytomousKeyNode matchingNode : matchingNodes){
396
			state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
397
			matchingNode.setTaxon(taxon);
398
			//just to be on the save side
399
			try{	
400
				matchingNode.refreshNodeNumbering();
401
			} catch (Exception e) {
402
				String message = "An exception occurred when trying to referesh the node numbering: " + e.getMessage();
403
				fireWarningEvent(message, event, 6);
404
			}
405
			state.getPolytomousKeyNodesToSave().add(matchingNode);
406
		}
407
		return matchingNodes;
408
	}
409
}
(13-13/19)