Project

General

Profile

Download (15.1 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2009 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.io.markup;
11

    
12
import java.util.ArrayList;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16

    
17
import javax.xml.stream.Location;
18
import javax.xml.stream.XMLEventReader;
19
import javax.xml.stream.XMLStreamException;
20
import javax.xml.stream.events.Attribute;
21
import javax.xml.stream.events.XMLEvent;
22

    
23
import org.apache.log4j.Logger;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.Language;
29
import eu.etaxonomy.cdm.model.description.KeyStatement;
30
import eu.etaxonomy.cdm.model.description.PolytomousKey;
31
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
32
import eu.etaxonomy.cdm.model.name.NonViralName;
33
import eu.etaxonomy.cdm.model.name.Rank;
34
import eu.etaxonomy.cdm.model.taxon.Taxon;
35
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
36

    
37
/**
38
 * @author a.mueller
39
 * @created 26.04.2013
40
 */
41
public class MarkupKeyImport  extends MarkupImportBase  {
42
	@SuppressWarnings("unused")
43
	private static final Logger logger = Logger.getLogger(MarkupKeyImport.class);
44
	
45
	
46
	public MarkupKeyImport(MarkupDocumentImport docImport) {
47
		super(docImport);
48
	}
49
	
50
	public void handleKey(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
51
		// attributes
52
		Map<String, Attribute> attributes = getAttributes(parentEvent);
53
		String isSpotcharacters = getAndRemoveAttributeValue(attributes, IS_SPOTCHARACTERS);
54
		if (isNotBlank(isSpotcharacters) ) {
55
			//TODO isSpotcharacters
56
			String message = "Attribute isSpotcharacters not yet implemented for <key>";
57
			fireWarningEvent(message, parentEvent, 4);
58
		}
59
		boolean onlyNumberedTaxaExist = checkAndRemoveAttributeValue(attributes, ONLY_NUMBERED_TAXA_EXIST, "true");
60
		state.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist);
61
		
62
		PolytomousKey key = PolytomousKey.NewInstance();
63
		key.addTaxonomicScope(state.getCurrentTaxon());
64
		state.setCurrentKey(key);
65
		
66
		boolean isFirstCouplet = true;
67
		while (reader.hasNext()) {
68
			XMLEvent next = readNoWhitespace(reader);
69
			if (isMyEndingElement(next, parentEvent)) {
70
				save(key, state);
71
				//reset state
72
				state.setCurrentKey(null);
73
				state.setOnlyNumberedTaxaExist(false);
74
				return;
75
			} else if (isEndingElement(next, KEYNOTES)){
76
				popUnimplemented(next.asEndElement());
77
			} else if (isStartingElement(next, KEY_TITLE)) {
78
				handleKeyTitle(state, reader, next);
79
			} else if (isStartingElement(next, KEYNOTES)) {
80
				//TODO
81
				handleNotYetImplementedElement(next);
82
			} else if (isStartingElement(next, COUPLET)) {
83
				PolytomousKeyNode node = null;
84
				if (isFirstCouplet){
85
					node = key.getRoot();
86
					isFirstCouplet = false;
87
				}
88
				handleCouplet(state, reader, next, node);
89
			} else {
90
				handleUnexpectedElement(next);
91
			}
92
		}
93
		throw new IllegalStateException("<key> has no closing tag");
94
	}
95

    
96

    
97
	/**
98
	 * @param state
99
	 * @param reader
100
	 * @param key
101
	 * @param next
102
	 * @throws XMLStreamException
103
	 */
104
	private void handleKeyTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
105
		PolytomousKey key = state.getCurrentKey();
106
		String keyTitle = getCData(state, reader, parentEvent);
107
		String standardTitlesEngl = "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
108
		String standardTitlesFrench = "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";
109
		String standardTitles = standardTitlesEngl;
110
		if (state.getDefaultLanguage() != null && state.getDefaultLanguage().equals(Language.FRENCH())){
111
			standardTitles = standardTitlesFrench;
112
		}
113
		
114
		if (isNotBlank(keyTitle) ){
115
			if (!state.getConfig().isReplaceStandardKeyTitles() || ! keyTitle.matches(standardTitles)){
116
				key.setTitleCache(keyTitle, true);
117
			}
118
		}
119
	}
120
	
121

    
122
	private void handleCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode parentNode) throws XMLStreamException {
123
		String num = getOnlyAttribute(parentEvent, NUM, true);
124
		List<PolytomousKeyNode> childList = new ArrayList<PolytomousKeyNode>(); 
125
		
126
		while (reader.hasNext()) {
127
			XMLEvent next = readNoWhitespace(reader);
128
			if (isMyEndingElement(next, parentEvent)) {
129
				completeCouplet(state, parentEvent, parentNode, num, childList);
130
				return;
131
			} else if (isStartingElement(next, QUESTION)) {
132
				handleQuestion(state, reader, next, childList);
133
			} else if (isStartingElement(next, KEYNOTES)) {
134
				//TODO
135
				handleNotYetImplementedElement(next);
136
			} else if (isEndingElement(next, KEYNOTES)) {
137
				//TODO
138
				popUnimplemented(next.asEndElement());
139
			} else {
140
				handleUnexpectedElement(next);
141
			}
142
		}
143
		throw new IllegalStateException("<couplet> has no closing tag");
144
	}
145
	
146

    
147
	/**
148
	 * @param state
149
	 * @param parentEvent
150
	 * @param parentNode
151
	 * @param num
152
	 * @param childList
153
	 */
154
	private void completeCouplet(MarkupImportState state, XMLEvent parentEvent,
155
			PolytomousKeyNode parentNode, String num, List<PolytomousKeyNode> childList) {
156
		if (parentNode != null){
157
			for (PolytomousKeyNode childNode : childList){
158
				parentNode.addChild(childNode);
159
				//just to be on the save side
160
				parentNode.refreshNodeNumbering();
161
			}
162
		}else if (isNotBlank(num)){
163
			UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
164
			Set<PolytomousKeyNode> nodes = state.getUnmatchedLeads().getNodes(unmatchedKey);
165
			for(PolytomousKeyNode nodeToMatch: nodes){
166
				for (PolytomousKeyNode childNode : childList){
167
					nodeToMatch.addChild(childNode);
168
					//just to be on the save side
169
					nodeToMatch.refreshNodeNumbering();
170
				}
171
				state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);
172
			}
173
		}else{
174
			String message = "Parent num could not be matched. Please check if num (%s) is correct";
175
			message = String.format(message, num);
176
			fireWarningEvent(message, parentEvent, 6);
177
		}
178
	}
179

    
180
	private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {
181
		// attributes
182
		Map<String, Attribute> attributes = getAttributes(parentEvent);
183
		//TODO needed only for data lineage
184
		String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);
185
		
186
		PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();
187
		myNode.setKey(state.getCurrentKey());  //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
188
		nodesList.add(myNode);
189
		
190
		while (reader.hasNext()) {
191
			XMLEvent next = readNoWhitespace(reader);
192
			if (isMyEndingElement(next, parentEvent)) {
193
				return;
194
			} else if (isStartingElement(next, TEXT)) {
195
				String text = getCData(state, reader, next);
196
				KeyStatement statement = KeyStatement.NewInstance(getDefaultLanguage(state), text);
197
				myNode.setStatement(statement);
198
			} else if (isStartingElement(next, COUPLET)) {
199
				//TODO test
200
				handleCouplet(state, reader, next, myNode);
201
			} else if (isStartingElement(next, TO_COUPLET)) {
202
				handleToCouplet(state, reader, next, myNode);
203
			} else if (isStartingElement(next, TO_TAXON)) {
204
				handleToTaxon(state, reader, next, myNode);
205
			} else if (isStartingElement(next, TO_KEY)) {
206
				//TODO
207
				handleNotYetImplementedElement(next);
208
			} else if (isEndingElement(next, TO_KEY)){
209
				//TODO
210
				popUnimplemented(next.asEndElement());
211
			} else if (isStartingElement(next, KEYNOTES)) {
212
				//TODO
213
				handleNotYetImplementedElement(next);
214
			} else if (isEndingElement(next, KEYNOTES)){
215
				//TODO
216
				popUnimplemented(next.asEndElement());
217
			} else {
218
				handleUnexpectedElement(next);
219
			}
220
		}
221
		throw new IllegalStateException("<question> has no closing tag");
222
	}
223

    
224
	private void handleToCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent next, PolytomousKeyNode node) throws XMLStreamException {
225
		String num = getOnlyAttribute(next, NUM, true);
226
		String cData = getCData(state, reader, next, false);
227
		if (isNotBlank(cData) && ! cData.equals(num)){
228
			String message = "CData ('%s') not be handled in <toCouplet>";
229
			message = String.format(message, cData);
230
			fireWarningEvent(message, next, 4);
231
		}
232
		UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
233
		state.getUnmatchedLeads().addKey(unmatched, node);
234
	}
235

    
236
	private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
237
		Map<String, Attribute> attributes = getAttributes(parentEvent);
238
		String num = getAndRemoveAttributeValue(attributes, NUM);
239
		boolean taxonNotExists = checkAndRemoveAttributeValue(attributes, EXISTS, "false");
240
		
241
		String taxonCData = handleInnerToTaxon(state, reader, parentEvent, node).trim();
242
		
243
		String taxonKeyStr = makeTaxonKey(taxonCData, state.getCurrentTaxon(), parentEvent.getLocation());
244
		taxonNotExists = taxonNotExists || (isBlank(num) && state.isOnlyNumberedTaxaExist());
245
		if (taxonNotExists){
246
			NonViralName<?> name = createNameByCode(state, Rank.UNKNOWN_RANK());
247
			Taxon taxon = Taxon.NewInstance(name, null);
248
			taxon.getName().setTitleCache(taxonKeyStr, true);
249
			node.setTaxon(taxon);
250
		}else{
251
			UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonKeyStr);
252
			state.getUnmatchedLeads().addKey(unmatched, node);
253
//			String message = "The following key leads are unmatched: %s";
254
//			message = String.format(message, state.getUnmatchedLeads().toString());
255
//			fireWarningEvent(message, parentEvent, 6);
256
		}
257
		return;
258
	}
259
	
260
	
261
	/**
262
	 * Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.
263
	 * Footnote refs are not yet handled.
264
	 * @param state
265
	 * @param reader
266
	 * @param parentEvent
267
	 * @param node
268
	 * @return
269
	 * @throws XMLStreamException
270
	 */
271
	private String handleInnerToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
272
		String taxonText = "";
273
		String modifyingText = null;
274
		while (reader.hasNext()) {
275
			XMLEvent next = readNoWhitespace(reader);
276
			if (isMyEndingElement(next, parentEvent)) {
277
				if (isNotBlank(modifyingText)){
278
					node.putModifyingText(getDefaultLanguage(state), modifyingText);
279
				}
280
				return taxonText;
281
			} else if (next.isCharacters()) {
282
				taxonText += next.asCharacters().getData();
283
			} else if (isStartingElement(next, ANNOTATION)) {
284
				String annotation = handleSimpleAnnotation(state, reader, next);
285
				modifyingText = CdmUtils.concat("; ", modifyingText, annotation);
286
			} else if (isStartingElement(next, FOOTNOTE_REF)) {
287
				handleNotYetImplementedElement(next);
288
			} else {
289
				handleUnexpectedElement(next);
290
			}
291
		}
292
		throw new IllegalStateException("Event has no closing tag");
293

    
294
	}
295

    
296
	/**
297
	 * Creates a string that represents the given taxon. The string will try to replace e.g.
298
	 * abbreviated genus epithets by its full name etc.
299
	 * @param strGoto
300
	 * @param taxon
301
	 * @param location 
302
	 * @return
303
	 */
304
	private String makeTaxonKey(String strGoto, Taxon taxon, Location location) {
305
		String result = "";
306
		if (strGoto == null){
307
			return "";
308
		}
309
		
310
		NonViralName<?> name = CdmBase.deproxy(taxon.getName(), NonViralName.class);
311
		String strGenusName = name.getGenusOrUninomial();
312
		
313
		final String bracketPattern = "\\([^\\(\\)]*\\)";
314
		final String bracketPatternSomewhere = String.format(".*%s.*", bracketPattern);
315
		if (strGoto.matches(bracketPatternSomewhere)){
316
			fireWarningEvent("toTaxon has bracket: " + strGoto, makeLocationStr(location), 4);
317
			strGoto = strGoto.replaceAll(bracketPattern, "");  //replace all brackets
318
		}
319
		strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
320
		
321
		strGoto = strGoto.trim();
322
		strGoto = strGoto.replaceAll("\\s+\\.", "\\.");   // " ." may be created by bracket replacement
323
		strGoto = strGoto.replaceAll("\\.\\.", "\\.");   //replace
324
		
325
		String[] split = strGoto.split("\\s");
326
		//handle single epithets and markers
327
		for (int i = 0; i<split.length; i++){
328
			String single = split[i];
329
			if (isGenusAbbrev(single, strGenusName)){
330
				split[i] = strGenusName;
331
			}
332
			if (isInfraSpecificMarker(single)){
333
				String strSpeciesEpi = name.getSpecificEpithet();
334
				if (isBlank(result) && isNotBlank(strSpeciesEpi)){
335
					result += strGenusName + " " + strSpeciesEpi;
336
				}
337
			}
338
			result = (result + " " + split[i]).trim();
339
		}
340
		//remove trailing "." except for "sp."
341
		while (result.matches(".*(?<!sp)\\.$")){
342
			result = result.substring(0, result.length()-1).trim();
343
		}
344
		return result;
345
	}
346
	
347

    
348
	private boolean isInfraSpecificMarker(String single) {
349
		try {
350
			if (Rank.getRankByAbbreviation(single).isInfraSpecific()){
351
				return true;
352
			}else{
353
				return false;
354
			}
355
		} catch (UnknownCdmTypeException e) {
356
			return false;
357
		}
358
	}
359
	
360

    
361
	
362
	
363
//******************************** recognize nodes ***********/
364

    
365
	public void makeKeyNodes(MarkupImportState state, XMLEvent event, String taxonTitle) {
366
		Taxon taxon = state.getCurrentTaxon();
367
		String num = state.getCurrentTaxonNum();
368
		
369
		String nameString = CdmBase.deproxy(taxon.getName(), NonViralName.class).getNameCache();
370
//		String nameString = taxonTitle;
371
		
372
		//try to find matching lead nodes 
373
		UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, nameString);
374
		Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
375
		
376
		if (num != null){//same without using the num
377
			UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", nameString);
378
			Set<PolytomousKeyNode> noNumMatchingNodes = handleMatchingNodes(state, taxon, noNumLeadsKey);
379
			if(noNumMatchingNodes.size() > 0){
380
				String message ="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
381
				fireWarningEvent(message, event, 1);
382
			}
383
		}
384
		//report missing match, if num exists
385
		if (matchingNodes.isEmpty() /* TODO redo comment && num != null  */){
386
			String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
387
			message = String.format(message, num, leadsKey.toString());
388
			fireWarningEvent(message, event, 1);
389
		}
390
		
391
	}
392
	
393
	private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
394
		Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
395
		for (PolytomousKeyNode matchingNode : matchingNodes){
396
			state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
397
			matchingNode.setTaxon(taxon);
398
			//just to be on the save side
399
			matchingNode.refreshNodeNumbering();
400
			state.getPolytomousKeyNodesToSave().add(matchingNode);
401
		}
402
		return matchingNodes;
403
	}
404

    
405
}
(12-12/17)