Project

General

Profile

Download (73.5 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/ 
9

    
10
package eu.etaxonomy.cdm.io.eflora;
11

    
12
import java.util.ArrayList;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19
import java.util.regex.Matcher;
20
import java.util.regex.Pattern;
21

    
22
import org.apache.commons.lang.CharUtils;
23
import org.apache.commons.lang.StringUtils;
24
import org.apache.log4j.Logger;
25
import org.jdom.Attribute;
26
import org.jdom.Element;
27
import org.springframework.stereotype.Component;
28
import org.springframework.transaction.TransactionStatus;
29

    
30
import eu.etaxonomy.cdm.common.CdmUtils;
31
import eu.etaxonomy.cdm.common.ResultWrapper;
32
import eu.etaxonomy.cdm.common.XmlHelp;
33
import eu.etaxonomy.cdm.io.common.ICdmIO;
34
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35
import eu.etaxonomy.cdm.io.eflora.UnmatchedLeads.UnmatchedLeadsKey;
36
import eu.etaxonomy.cdm.model.agent.Person;
37
import eu.etaxonomy.cdm.model.agent.Team;
38
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
39
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
40
import eu.etaxonomy.cdm.model.common.Annotation;
41
import eu.etaxonomy.cdm.model.common.AnnotationType;
42
import eu.etaxonomy.cdm.model.common.CdmBase;
43
import eu.etaxonomy.cdm.model.common.Credit;
44
import eu.etaxonomy.cdm.model.common.ExtensionType;
45
import eu.etaxonomy.cdm.model.common.ISourceable;
46
import eu.etaxonomy.cdm.model.common.Language;
47
import eu.etaxonomy.cdm.model.common.Marker;
48
import eu.etaxonomy.cdm.model.common.MarkerType;
49
import eu.etaxonomy.cdm.model.common.TimePeriod;
50
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
51
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
52
import eu.etaxonomy.cdm.model.description.Feature;
53
import eu.etaxonomy.cdm.model.description.KeyStatement;
54
import eu.etaxonomy.cdm.model.description.PolytomousKey;
55
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
56
import eu.etaxonomy.cdm.model.description.TaxonDescription;
57
import eu.etaxonomy.cdm.model.description.TextData;
58
import eu.etaxonomy.cdm.model.name.BotanicalName;
59
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
60
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
61
import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
62
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
63
import eu.etaxonomy.cdm.model.name.NonViralName;
64
import eu.etaxonomy.cdm.model.name.Rank;
65
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
66
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
67
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
68
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
69
import eu.etaxonomy.cdm.model.occurrence.Specimen;
70
import eu.etaxonomy.cdm.model.reference.IBook;
71
import eu.etaxonomy.cdm.model.reference.IJournal;
72
import eu.etaxonomy.cdm.model.reference.Reference;
73
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
74
import eu.etaxonomy.cdm.model.reference.ReferenceType;
75
import eu.etaxonomy.cdm.model.taxon.Classification;
76
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
77
import eu.etaxonomy.cdm.model.taxon.Taxon;
78
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
79
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
80
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
81
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
82

    
83

    
84
/**
85
 * @author a.mueller
86
 *
87
 */
88
@Component
89
public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<EfloraImportState> {
90
	private static final Logger logger = Logger.getLogger(EfloraTaxonImport.class);
91

    
92
	private static int modCount = 30000;
93
	private NonViralNameParserImpl parser = new NonViralNameParserImpl();
94

    
95
	public EfloraTaxonImport(){
96
		super();
97
	}
98
	
99
	
100
	@Override
101
	public boolean doCheck(EfloraImportState state){
102
		boolean result = true;
103
		return result;
104
	}
105
	
106
	//TODO make part of state, but state is renewed when invoking the import a second time 
107
	private UnmatchedLeads unmatchedLeads;
108
	
109
	@Override
110
	public void doInvoke(EfloraImportState state){
111
		logger.info("start make Taxa ...");
112
		
113
		//FIXME reset state
114
		state.putTree(null, null);
115
//		UnmatchedLeads unmatchedLeads = state.getOpenKeys();
116
		if (unmatchedLeads == null){
117
			unmatchedLeads = UnmatchedLeads.NewInstance();
118
		}
119
		state.setUnmatchedLeads(unmatchedLeads);
120
		
121
		TransactionStatus tx = startTransaction();
122
		unmatchedLeads.saveToSession(getPolytomousKeyNodeService());
123
		
124
		
125
		//TODO generally do not store the reference object in the config
126
		Reference sourceReference = state.getConfig().getSourceReference();
127
		getReferenceService().saveOrUpdate(sourceReference);
128
		
129
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
130
		ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
131

    
132
		Element elbody= getBodyElement(state.getConfig());
133
		List<Element> elTaxonList = elbody.getChildren();
134
		
135
		int i = 0;
136
		
137
		Set<String> unhandledTitleClassess = new HashSet<String>();
138
		Set<String> unhandledNomeclatureChildren = new HashSet<String>();
139
		Set<String> unhandledDescriptionChildren = new HashSet<String>();
140
		
141
		Taxon lastTaxon = getLastTaxon(state);
142
		
143
		//for each taxon
144
		for (Element elTaxon : elTaxonList){
145
			try {
146
				if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
147
				if (! elTaxon.getName().equalsIgnoreCase("taxon")){
148
					logger.warn("body has element other than 'taxon'");
149
				}
150
				
151
				BotanicalName botanicalName = BotanicalName.NewInstance(Rank.SPECIES());
152
				Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
153
				
154
				handleTaxonAttributes(elTaxon, taxon, state);
155

    
156
				
157
				List<Element> children = elTaxon.getChildren();
158
				handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren,	unhandledDescriptionChildren, taxon, children);
159
				handleTaxonRelation(state, taxon, lastTaxon);
160
				lastTaxon = taxon;
161
				taxaToSave.add(taxon);
162
				state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
163
				
164
			} catch (Exception e) {
165
				logger.warn("Exception occurred in Sapindacea taxon import: " + e);
166
				e.printStackTrace();
167
			}
168
			
169
		}
170
		
171
		System.out.println(state.getUnmatchedLeads().toString());
172
		logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
173
		
174
		logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
175
		logger.info("Children for description are: " + unhandledDescriptionChildren);
176
		logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
177
		logger.info("Children for nom are: " + unhandledNomChildren);
178
		
179
		
180
		//invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
181
		logger.info(i + " taxa handled. Saving ...");
182
		getTaxonService().saveOrUpdate(taxaToSave);
183
		getFeatureTreeService().saveOrUpdateFeatureNodesAll(state.getFeatureNodesToSave());
184
		state.getFeatureNodesToSave().clear();
185
		commitTransaction(tx);
186
		
187
		logger.info("end makeTaxa ...");
188
		logger.info("start makeKey ...");
189
//		invokeDoKey(state);
190
		logger.info("end makeKey ...");
191
		
192
		if (! success.getValue()){
193
			state.setUnsuccessfull();
194
		}
195
		return ;
196
	}
197

    
198

    
199
	private void handleTaxonAttributes(Element elTaxon, Taxon taxon, EfloraImportState state) {
200
		List<Attribute> attrList = elTaxon.getAttributes();
201
		for (Attribute attr : attrList){
202
			String attrName = attr.getName();
203
			String attrValue = attr.getValue();
204
			if ("class".equals(attrName)){
205
				if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES")  ){
206
					taxon.setDoubtful(true);
207
				}else{
208
					MarkerType markerType = getMarkerType(state, attrValue);
209
					if (markerType == null){
210
						logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
211
					}else{
212
						taxon.addMarker(Marker.NewInstance(markerType, true));
213
					}
214
				}
215
			}else if ("num".equals(attrName)){
216
				logger.warn("num not yet supported");
217
			}else{
218
				logger.warn("Attribute " + attrName + " not yet supported for element taxon");
219
			}
220
		}
221

    
222
	}
223

    
224

    
225
	private Taxon getLastTaxon(EfloraImportState state) {
226
		if (state.getConfig().getLastTaxonUuid() == null){
227
			return null;
228
		}else{
229
			return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
230
		}
231
	}
232

    
233

    
234
//	private void invokeDoKey(SapindaceaeImportState state) {
235
//		TransactionStatus tx = startTransaction();
236
//		
237
//		Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
238
//		ITaxonService taxonService = getTaxonService();
239
//		ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
240
//
241
//		Element elbody= getBodyElement(state.getConfig());
242
//		List<Element> elTaxonList = elbody.getChildren();
243
//		
244
//		int i = 0;
245
//		
246
//		//for each taxon
247
//		for (Element elTaxon : elTaxonList){
248
//			if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
249
//			if (! elTaxon.getName().equalsIgnoreCase("taxon")){
250
//				continue;
251
//			}
252
//			
253
//			List<Element> children = elTaxon.getChildren("key");
254
//			for (Element element : children){
255
//				handleKeys(state, element, null);
256
//			}
257
//			nodesToSave.add(taxon);
258
//
259
//		}
260
//		
261
//	}
262

    
263

    
264
	// body/taxon/*
265
	private void handleTaxonElement(EfloraImportState state, Set<String> unhandledTitleClassess, Set<String> unhandledNomeclatureChildren, Set<String> unhandledDescriptionChildren, Taxon taxon, List<Element> children) {
266
		AnnotatableEntity lastEntity = null;
267
		for (Element element : children){
268
			String elName = element.getName();
269
			
270
			if (elName.equalsIgnoreCase("title")){
271
				handleTitle(state, element, taxon, unhandledTitleClassess);
272
				lastEntity = null;
273
			}else if(elName.equalsIgnoreCase("nomenclature")){
274
				handleNomenclature(state, element, taxon, unhandledNomeclatureChildren);
275
				lastEntity = null;
276
			}else if(elName.equalsIgnoreCase("description")){
277
				handleDescription(state, element, taxon, unhandledDescriptionChildren);
278
				lastEntity = null;
279
			}else if(elName.equalsIgnoreCase("habitatecology")){
280
				lastEntity = handleEcology(state, element, taxon);
281
			}else if(elName.equalsIgnoreCase("distribution")){
282
				lastEntity = handleDistribution(state, element, taxon);
283
			}else if(elName.equalsIgnoreCase("uses")){
284
				lastEntity = handleUses(state, element, taxon);
285
			}else if(elName.equalsIgnoreCase("notes")){
286
				lastEntity = handleTaxonNotes(state, element, taxon);
287
			}else if(elName.equalsIgnoreCase("chromosomes")){
288
				lastEntity = handleChromosomes(state, element, taxon);
289
			}else if(elName.equalsIgnoreCase("vernacularnames")){
290
				handleVernaculars(state, element, taxon);
291
			}else if(elName.equalsIgnoreCase("key")){
292
				lastEntity = handleKeys(state, element, taxon);
293
			}else if(elName.equalsIgnoreCase("references")){
294
				handleReferences(state, element, taxon, lastEntity);
295
				lastEntity = null;
296
			}else if(elName.equalsIgnoreCase("taxon")){
297
				logger.warn("A taxon should not be part of a taxon");
298
			}else if(elName.equalsIgnoreCase("homotypes")){
299
				logger.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
300
			}else{
301
				logger.warn("Unexpected child for taxon: " + elName);
302
			}
303
		}
304
	}
305
	
306
	
307
	private void handleVernaculars(EfloraImportState state, Element elVernacular, Taxon taxon) {
308
		verifyNoAttribute(elVernacular);
309
		verifyNoChildren(elVernacular, false);
310
		String value = elVernacular.getTextNormalize();
311
		Feature feature = Feature.COMMON_NAME();
312
		value = replaceStart(value, "Noms vernaculaires");
313
		String[] dialects = value.split(";");
314
		for (String singleDialect : dialects){
315
			handleSingleDialect(taxon, singleDialect, feature, state);
316
		}
317
		return;
318
	}
319

    
320

    
321
	private void handleSingleDialect(Taxon taxon, String singleDialect, Feature feature, EfloraImportState state) {
322
		singleDialect = singleDialect.trim();
323
		TaxonDescription description = getDescription(taxon);
324
		String reDialect = "\\(dial\\.\\s.*\\)";
325
//		String reDialect = "\\(.*\\)";
326
		Pattern patDialect = Pattern.compile(reDialect);
327
		Matcher matcher = patDialect.matcher(singleDialect);
328
		if (matcher.find()){
329
			String dialect = singleDialect.substring(matcher.start(), matcher.end());
330
			dialect = dialect.replace("(dial. ", "").replace(")", "");
331
			
332
			Language language = null;
333
			try {
334
				language = this.getLanguage(state, state.getTransformer().getLanguageUuid(dialect), dialect, dialect, dialect);
335
			} catch (UndefinedTransformerMethodException e) {
336
				logger.error(e.getMessage());
337
			}
338
			
339
			String commonNames = singleDialect.substring(0, matcher.start());
340
			String[] splitNames = commonNames.split(",");
341
			for (String commonNameString : splitNames){
342
				commonNameString = commonNameString.trim();
343
				CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameString, language);
344
				description.addElement(commonName);
345
			}
346
		}else{
347
			logger.warn("No dialect match: " +  singleDialect);
348
		}
349
	}
350

    
351

    
352
	private void handleReferences(EfloraImportState state, Element elReferences, Taxon taxon, AnnotatableEntity lastEntity) {
353
		verifyNoAttribute(elReferences);
354
		verifyNoChildren(elReferences, true);
355
		String refString = elReferences.getTextNormalize(); 
356
		if (lastEntity == null){
357
			logger.warn("No last entity defined: " + refString);
358
			return;
359
		}
360
		
361
		Annotation annotation = Annotation.NewInstance(refString, AnnotationType.EDITORIAL(), Language.DEFAULT());
362
		lastEntity.addAnnotation(annotation);
363
	}
364

    
365

    
366
	private PolytomousKey handleKeys(EfloraImportState state, Element elKey, Taxon taxon) {
367
		UnmatchedLeads openKeys = state.getUnmatchedLeads();
368
		
369
		//title
370
		String title = makeKeyTitle(elKey);
371
		
372
		//key
373
		PolytomousKey key = PolytomousKey.NewTitledInstance(title);
374
		
375
		//TODO add covered taxa etc.
376
		verifyNoAttribute(elKey);
377
		
378
		//notes
379
		makeKeyNotes(elKey, key);
380
		
381
		//keycouplets
382
		List<Element> keychoices = new ArrayList<Element>();
383
		keychoices.addAll(elKey.getChildren("keycouplet"));
384
		keychoices.addAll(elKey.getChildren("keychoice"));
385
		
386
		
387
		for (Element elKeychoice : keychoices){
388
			handleKeyChoices(state, openKeys, key, elKeychoice, taxon);
389
			elKey.removeContent(elKeychoice);
390
		}
391
		
392
		//
393
		verifyNoChildren(elKey);
394
		logger.info("Unmatched leads after key handling:" + openKeys.toString());
395
		
396

    
397
		if (state.getConfig().isDoPrintKeys()){
398
			key.print(System.err);
399
		}
400
		getPolytomousKeyService().save(key);
401
		return key;
402
	}
403

    
404

    
405
	/**
406
	 * @param state
407
	 * @param elKey
408
	 * @param openKeys
409
	 * @param key
410
	 * @param elKeychoice
411
	 * @param taxon 
412
	 */
413
	private void handleKeyChoices(EfloraImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
414
		
415
		//char Attribute
416
		//TODO it's still unclear if char is a feature and needs to be a new attribute 
417
		//or if it is handled as question. Therefore both cases are handled but feature
418
		//is finally not yet set
419
		KeyStatement question = handleKeychoiceChar(state, elKeychoice);
420
		Feature feature = handleKeychoiceCharAsFeature(state, elKeychoice);
421
		
422
		//lead
423
		List<PolytomousKeyNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, question, feature);
424
		
425
		//num -> match with unmatched leads
426
		handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
427

    
428
		//others
429
		verifyNoAttribute(elKeychoice);
430
	}
431

    
432

    
433
	/**
434
	 * @param openKeys
435
	 * @param key
436
	 * @param elKeychoice
437
	 * @param childNodes
438
	 */
439
	private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<PolytomousKeyNode> childNodes) {
440
		Attribute numAttr = elKeychoice.getAttribute("num");
441
		String num = CdmUtils.removeTrailingDot(numAttr == null? "":numAttr.getValue());
442
		UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
443
		Set<PolytomousKeyNode> matchingNodes = openKeys.getNodes(okk);
444
		for (PolytomousKeyNode matchingNode : matchingNodes){
445
			for (PolytomousKeyNode childNode : childNodes){
446
				matchingNode.addChild(childNode);
447
			}
448
			openKeys.removeNode(okk, matchingNode);
449
		}
450
		if (matchingNodes.isEmpty()){
451
			for (PolytomousKeyNode childNode : childNodes){
452
				key.getRoot().addChild(childNode);
453
			}
454
		}
455
		
456
		elKeychoice.removeAttribute("num");
457
	}
458

    
459

    
460
	/**
461
	 * @param state
462
	 * @param key
463
	 * @param elKeychoice
464
	 * @param taxon
465
	 * @param feature
466
	 * @return
467
	 */
468
	private List<PolytomousKeyNode> handleKeychoiceLeads(	EfloraImportState state, PolytomousKey key,	Element elKeychoice, Taxon taxon, KeyStatement question, Feature feature) {
469
		List<PolytomousKeyNode> childNodes = new ArrayList<PolytomousKeyNode>();
470
		List<Element> leads = elKeychoice.getChildren("lead");
471
		for(Element elLead : leads){
472
			PolytomousKeyNode childNode = handleLead(state, key, elLead, taxon, question, feature);
473
			childNodes.add(childNode);
474
		}
475
		return childNodes;
476
	}
477

    
478

    
479
	/**
480
	 * @param state
481
	 * @param elKeychoice
482
	 * @return
483
	 */
484
	private KeyStatement handleKeychoiceChar(EfloraImportState state, Element elKeychoice) {
485
		KeyStatement statement = null;
486
		Attribute charAttr = elKeychoice.getAttribute("char");
487
		if (charAttr != null){
488
			String charStr = charAttr.getValue();
489
			if (StringUtils.isNotBlank(charStr)){
490
				statement = KeyStatement.NewInstance(charStr);
491
			}
492
			elKeychoice.removeAttribute("char");
493
		}
494
		return statement;
495
	}
496
	
497
	/**
498
	 * @param state
499
	 * @param elKeychoice
500
	 * @return
501
	 */
502
	private Feature handleKeychoiceCharAsFeature(EfloraImportState state, Element elKeychoice) {
503
		Feature feature = null;
504
		Attribute charAttr = elKeychoice.getAttribute("char");
505
		if (charAttr != null){
506
			String charStr = charAttr.getValue();
507
			feature = getFeature(charStr, state);
508
			elKeychoice.removeAttribute("char");
509
		}
510
		return feature;
511
	}
512

    
513

    
514
	private PolytomousKeyNode handleLead(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, KeyStatement question, Feature feature) {
515
		PolytomousKeyNode node = PolytomousKeyNode.NewInstance();
516
		//TODO the char attribute in the keychoice is more a feature than a question
517
		//needs to be discussed on model side
518
		node.setQuestion(question);
519
//		node.setFeature(feature);
520
		
521
		//text
522
		String text = handleLeadText(elLead, node);
523
		
524
		//num
525
		handleLeadNum(elLead, text);
526
		
527
		//goto
528
		handleLeadGoto(state, key, elLead, taxon, node);
529
		
530
		//others
531
		verifyNoAttribute(elLead);
532
		
533
		return node;
534
	}
535

    
536

    
537
	/**
538
	 * @param elLead
539
	 * @param node
540
	 * @return
541
	 */
542
	private String handleLeadText(Element elLead, PolytomousKeyNode node) {
543
		String text = elLead.getAttributeValue("text").trim();
544
		if (StringUtils.isBlank(text)){
545
			logger.warn("Empty text in lead");
546
		}
547
		elLead.removeAttribute("text");
548
		KeyStatement statement = KeyStatement.NewInstance(text);
549
		node.setStatement(statement);
550
		return text;
551
	}
552

    
553

    
554
	/**
555
	 * @param state
556
	 * @param key
557
	 * @param elLead
558
	 * @param taxon
559
	 * @param node
560
	 */
561
	private void handleLeadGoto(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, PolytomousKeyNode node) {
562
		Attribute gotoAttr = elLead.getAttribute("goto");
563
		if (gotoAttr != null){
564
			String strGoto = gotoAttr.getValue().trim();
565
			//create key
566
			UnmatchedLeadsKey gotoKey = null;
567
			if (isInternalNode(strGoto)){
568
				gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
569
			}else{
570
				String taxonKey = makeTaxonKey(strGoto, taxon);
571
				gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
572
			}
573
			//
574
			UnmatchedLeads openKeys = state.getUnmatchedLeads();
575
			if (gotoKey.isInnerLead()){
576
				Set<PolytomousKeyNode> existingNodes = openKeys.getNodes(gotoKey);
577
				for (PolytomousKeyNode existingNode : existingNodes){
578
					node.addChild(existingNode);
579
				}
580
			}
581
			openKeys.addKey(gotoKey, node);
582
			//remove attribute (need for consistency check)
583
			elLead.removeAttribute("goto");
584
		}else{
585
			logger.warn("lead has no goto attribute");
586
		}
587
	}
588

    
589

    
590
	/**
591
	 * @param elLead
592
	 * @param text
593
	 */
594
	private void handleLeadNum(Element elLead, String text) {
595
		Attribute numAttr = elLead.getAttribute("num");
596
		if (numAttr != null){
597
			//TODO num
598
			String num = numAttr.getValue();
599
			elLead.removeAttribute("num");
600
		}else{
601
			logger.info("Keychoice has no num attribute: " + text);
602
		}
603
	}
604

    
605

    
606
	private String makeTaxonKey(String strGoto, Taxon taxon) {
607
		String result = "";
608
		if (strGoto == null){
609
			return "";
610
		}
611
		String strGenusName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getGenusOrUninomial();
612
		strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", "");  //replace all brackets
613
		strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
614
		
615
		strGoto = strGoto.trim();  
616
		String[] split = strGoto.split("\\s");
617
		for (int i = 0; i<split.length; i++){
618
			String single = split[i];
619
			if (isGenusAbbrev(single, strGenusName)){
620
				split[i] = strGenusName;
621
			}
622
//			if (isInfraSpecificMarker(single)){
623
//				String strSpeciesName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getSpecificEpithet();
624
//				split[i] = strGenusName + " " + strSpeciesName + " ";
625
//			}
626
			result = (result + " " + split[i]).trim();
627
		}
628
		return result;
629
	}
630

    
631

    
632
	private boolean isInfraSpecificMarker(String single) {
633
		try {
634
			if (Rank.getRankByAbbreviation(single).isInfraSpecific()){
635
				return true;
636
			}
637
		} catch (UnknownCdmTypeException e) {
638
			return false;
639
		}
640
		return false;
641
	}
642

    
643

    
644
	private boolean isGenusAbbrev(String single, String strGenusName) {
645
		if (! single.matches("[A-Z]\\.?")) {
646
			return false;
647
		}else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
648
			return false; 
649
		}else{
650
			return single.charAt(0) == strGenusName.charAt(0);
651
		}
652
	}
653

    
654

    
655
	private boolean isInternalNode(String strGoto) {
656
		return CdmUtils.isNumeric(strGoto);
657
	}
658

    
659

    
660
	private void makeKeyNotes(Element keyElement, PolytomousKey key) {
661
		Element elNotes = keyElement.getChild("notes");
662
		if (elNotes != null){
663
			keyElement.removeContent(elNotes);
664
			String notes = elNotes.getTextNormalize();
665
			if (StringUtils.isNotBlank(notes)){
666
				key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
667
			}
668
		}
669
	}
670

    
671

    
672
	private String makeKeyTitle(Element keyElement) {
673
		String title = "- no title - ";
674
		Attribute titleAttr = keyElement.getAttribute("title");
675
		keyElement.removeAttribute(titleAttr);
676
		if (titleAttr == null){
677
			Element elTitle = keyElement.getChild("keytitle");
678
			keyElement.removeContent(elTitle);
679
			if (elTitle != null){
680
				title = elTitle.getTextNormalize();
681
			}
682
		}else{
683
			title = titleAttr.getValue();
684
		}
685
		return title;
686
	}
687

    
688

    
689
	/**
690
	 * @param state
691
	 * @param element
692
	 * @param taxon
693
	 */
694
	private TextData handleChromosomes(EfloraImportState state, Element element, Taxon taxon) {
695
		Feature chromosomeFeature = getFeature("chromosomes", state);
696
		verifyNoAttribute(element);
697
		verifyNoChildren(element);
698
		String value = element.getTextNormalize();
699
		value = replaceStart(value, "Chromosomes");
700
		String chromosomesPart = getChromosomesPart(value);
701
		String references = value.replace(chromosomesPart, "").trim();
702
		chromosomesPart = chromosomesPart.replace(":", "").trim();
703
		return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);	
704
	}
705

    
706

    
707
	/**
708
	 * @param ref 
709
	 * @param string 
710
	 * @return
711
	 */
712
	private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
713
		String[] splits = refAll.split(splitter);
714
		for (String strRef: splits){
715
			Reference ref = ReferenceFactory.newGeneric();
716
			ref.setTitleCache(strRef, true);
717
			String refDetail = parseReferenceYearAndDetail(ref);
718
			sourcable.addSource(null, null, ref, refDetail);
719
		}
720
		
721
		
722
//TODO use regex instead
723
/*		String detailResult = null;
724
		String titleToParse = ref.getTitleCache();
725
		String reReference = "^\\.{1,}";
726
//		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
727
		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
728
		String reYearPeriod = reYear + "(-" + reYear + ")+";
729
		String reDetail = "\\.{1,10}$";
730
*/		
731
	}
732

    
733

    
734
	/**
735
	 * @param value
736
	 * @return
737
	 */
738
	private String getChromosomesPart(String str) {
739
		Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
740
		Matcher matcher = pattern.matcher(str);
741
		if (matcher.find()){
742
			return matcher.group(0);
743
		}else{
744
			logger.warn("Chromosomes could not be parsed: " + str);
745
		}
746
		return str;
747
	}
748

    
749

    
750
	/**
751
	 * @param state
752
	 * @param element
753
	 * @param taxon
754
	 */
755
	private TextData handleTaxonNotes(EfloraImportState state, Element element, Taxon taxon) {
756
		TextData result = null;
757
		verifyNoChildren(element, true);
758
		//verifyNoAttribute(element);
759
		List<Attribute> attributes = element.getAttributes();
760
		for (Attribute attribute : attributes){
761
			if (! attribute.getName().equalsIgnoreCase("class")){
762
				logger.warn("Char has unhandled attribute " +  attribute.getName());
763
			}else{
764
				String classValue = attribute.getValue();
765
				result = handleDescriptiveElement(state, element, taxon, classValue);
766
			}
767
		}
768
		//if no class attribute exists, handle as note
769
		if (attributes.isEmpty()){
770
			result = handleDescriptiveElement(state, element, taxon, "Note");
771
		}
772

    
773
		//Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
774
		//taxon.addAnnotation(annotation);
775
		return result; //annotation;
776
	}
777

    
778

    
779
	/**
780
	 * @param state
781
	 * @param element
782
	 * @param taxon
783
	 * @param result
784
	 * @param attribute
785
	 * @return
786
	 */
787
	private TextData handleDescriptiveElement(EfloraImportState state, Element element, Taxon taxon, String classValue) {
788
		TextData result = null;
789
		Feature feature = getFeature(classValue, state);
790
		if (feature == null){
791
			logger.warn("Unhandled feature: " + classValue);
792
		}else{
793
			String value = element.getValue();
794
			value = replaceStart(value, "Notes");
795
			value = replaceStart(value, "Note");
796
			result = addDescriptionElement(state, taxon, value, feature, null);
797
		}
798
		return result;
799
	}
800

    
801

    
802
	private void removeBr(Element element) {
803
		element.removeChildren("Br");
804
		element.removeChildren("br");
805
		element.removeChildren("BR");
806
	}
807

    
808

    
809
	/**
810
	 * @param state
811
	 * @param element
812
	 * @param taxon
813
	 */
814
	private TextData handleUses(EfloraImportState state, Element element, Taxon taxon) {
815
		verifyNoAttribute(element);
816
		verifyNoChildren(element, true);
817
		String value = element.getTextNormalize();
818
		value = replaceStart(value, "Uses");
819
		Feature feature = Feature.USES();
820
		return addDescriptionElement(state, taxon, value, feature, null);
821
		
822
	}
823

    
824

    
825
	/**
826
	 * @param state
827
	 * @param element
828
	 * @param taxon
829
	 * @param unhandledDescriptionChildren
830
	 */
831
	private DescriptionElementBase handleDistribution(EfloraImportState state, Element element, Taxon taxon) {
832
		verifyNoAttribute(element);
833
		verifyNoChildren(element, true);
834
		String value = element.getTextNormalize();
835
		value = replaceStart(value, "Distribution");
836
		Feature feature = Feature.DISTRIBUTION();
837
		//distribution parsing almost impossible as there is lots of freetext in the distribution tag
838
		return addDescriptionElement(state, taxon, value, feature, null);
839
	}
840

    
841

    
842
	/**
843
	 * @param state
844
	 * @param element
845
	 * @param taxon
846
	 * @param unhandledDescriptionChildren
847
	 */
848
	private TextData handleEcology(EfloraImportState state, Element elEcology, Taxon taxon) {
849
		verifyNoAttribute(elEcology);
850
		verifyNoChildren(elEcology, true);
851
		String value = elEcology.getTextNormalize();
852
		Feature feature = Feature.ECOLOGY();
853
		if (value.startsWith("Habitat & Ecology")){
854
			feature = getFeature("Habitat & Ecology", state);
855
			value = replaceStart(value, "Habitat & Ecology");
856
		}else if (value.startsWith("Habitat")){
857
			value = replaceStart(value, "Habitat");
858
			feature = getFeature("Habitat", state);
859
		}
860
		return addDescriptionElement(state, taxon, value, feature, null);
861
	}
862

    
863

    
864

    
865
	/**
866
	 * @param value
867
	 * @param replacementString
868
	 */
869
	private String replaceStart(String value, String replacementString) {
870
		if (value.startsWith(replacementString) ){
871
			value = value.substring(replacementString.length()).trim();
872
		}
873
		while (value.startsWith("-") || value.startsWith("–") ){
874
			value = value.substring("-".length()).trim();
875
		}
876
		return value;
877
	}
878

    
879

    
880
	/**
881
	 * @param value
882
	 * @param replacementString
883
	 */
884
	protected String removeTrailing(String value, String replacementString) {
885
		if (value == null){
886
			return null;
887
		}
888
		if (value.endsWith(replacementString) ){
889
			value = value.substring(0, value.length() - replacementString.length()).trim();
890
		}
891
		return value;
892
	}
893

    
894
	/**
895
	 * @param state
896
	 * @param element
897
	 * @param taxon
898
	 * @param unhandledNomeclatureChildren 
899
	 */
900
	private void handleNomenclature(EfloraImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
901
		verifyNoAttribute(elNomenclature);
902
		
903
		List<Element> elements = elNomenclature.getChildren();
904
		for (Element element : elements){
905
			if (element.getName().equals("homotypes")){
906
				handleHomotypes(state, element, taxon);
907
			}else if (element.getName().equals("notes")){
908
				handleNomenclatureNotes(state, element, taxon);
909
			}else{
910
				unhandledChildren.add(element.getName());
911
			}
912
		}
913
		
914
	}
915

    
916

    
917

    
918
	private void handleNomenclatureNotes(EfloraImportState state, Element elNotes, Taxon taxon) {
919
		verifyNoAttribute(elNotes);
920
		verifyNoChildren(elNotes);
921
		String notesText = elNotes.getTextNormalize();
922
		Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
923
		taxon.addAnnotation(annotation);
924
	}
925

    
926

    
927

    
928
	private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
929
	/**
930
	 * @param state
931
	 * @param element
932
	 * @param taxon
933
	 */
934
	private void handleHomotypes(EfloraImportState state, Element elHomotypes, Taxon taxon) {
935
		verifyNoAttribute(elHomotypes);
936
		
937
		List<Element> elements = elHomotypes.getChildren();
938
		HomotypicalGroup homotypicalGroup = null;
939
		for (Element element : elements){
940
			if (element.getName().equals("nom")){
941
				homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
942
			}else{
943
				unhandledHomotypeChildren.add(element.getName());
944
			}
945
		}
946
		
947
	}
948

    
949
	private static Set<String> unhandledNomChildren = new HashSet<String>();
950

    
951
	/**
952
	 * @param state
953
	 * @param element
954
	 * @param taxon
955
	 */
956
	private HomotypicalGroup handleNom(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
957
		List<Attribute> attributes = elNom.getAttributes();
958
		
959
		boolean taxonBaseClassType = false;
960
		for (Attribute attribute : attributes){
961
			if (! attribute.getName().equalsIgnoreCase("class")){
962
				logger.warn("Nom has unhandled attribute " +  attribute.getName());
963
			}else{
964
				String classValue = attribute.getValue();
965
				if (classValue.equalsIgnoreCase("acceptedname")){
966
					homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
967
					taxonBaseClassType = true;
968
				}else if (classValue.equalsIgnoreCase("synonym")){
969
					homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
970
					taxonBaseClassType = true;
971
				}else if (classValue.equalsIgnoreCase("typeref")){
972
					handleTypeRef(state, elNom, taxon, homotypicalGroup);
973
				}else{
974
					logger.warn("Unhandled class value for nom: " + classValue);
975
				}
976
				
977
			}
978
		}
979
		
980
		List<Element> elements = elNom.getChildren();
981
		for (Element element : elements){
982
			if (element.getName().equals("name") || element.getName().equals("homonym") ){
983
				if (taxonBaseClassType == false){
984
					logger.warn("Name or homonym tag not allowed in non taxon nom tag");
985
				}
986
			}else{
987
				unhandledNomChildren.add(element.getName());
988
			}
989
		}
990
		
991
		return homotypicalGroup;
992
		
993
	}
994

    
995
	/**
996
	 * @param state
997
	 * @param elNom
998
	 * @param taxon
999
	 * @param homotypicalGroup 
1000
	 */
1001
	protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
1002
		verifyNoChildren(elNom);
1003
		String typeRef = elNom.getTextNormalize();
1004
		typeRef = removeStartingTypeRefMinus(typeRef);
1005
		
1006
		String[] split = typeRef.split(":");
1007
		if (split.length < 2){
1008
			logger.warn("typeRef has no ':' : " + typeRef);
1009
		}else if (split.length > 2){
1010
			logger.warn("typeRef has more than 1 ':' : " + typeRef);
1011
		}else{
1012
			StringBuffer typeType = new StringBuffer(split[0]);
1013
			String typeText = split[1].trim();
1014
			TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
1015
			
1016
			//Name Type Desitnations
1017
			if (typeDesignation instanceof NameTypeDesignation){
1018
				makeNameTypeDesignations(typeType, typeText, typeDesignation);
1019
			}
1020
			//SpecimenTypeDesignations
1021
			else if (typeDesignation instanceof SpecimenTypeDesignation){
1022
				makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
1023
			}else{
1024
				logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
1025
			}
1026
			for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
1027
				name.addTypeDesignation(typeDesignation, true);
1028
			}
1029
		}
1030
	}
1031

    
1032

    
1033
	/**
1034
	 * @param typeRef
1035
	 * @return
1036
	 */
1037
	protected String removeStartingTypeRefMinus(String typeRef) {
1038
		typeRef = replaceStart(typeRef, "-");
1039
		typeRef = replaceStart(typeRef, "—");
1040
		typeRef = replaceStart(typeRef, "\u002d");
1041
		typeRef = replaceStart(typeRef, "\u2013");
1042
		typeRef = replaceStart(typeRef, "--");
1043
		return typeRef;
1044
	}
1045

    
1046
	/**
1047
	 * @param typeType
1048
	 * @param typeText
1049
	 * @param typeDesignation
1050
	 */
1051
	private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1052
		if (typeType.toString().trim().equalsIgnoreCase("Type")){
1053
			//do nothing
1054
		}else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1055
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1056
		}else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
1057
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1058
		}else{
1059
			logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
1060
		}
1061
		//clean
1062
		typeText = cleanNameType(typeText);
1063
		//create name
1064
		BotanicalName nameType = (BotanicalName)parser.parseFullName(typeText, NomenclaturalCode.ICBN, Rank.SPECIES());
1065
		((NameTypeDesignation) typeDesignation).setTypeName(nameType);
1066
		//TODO wie können NameTypes den Namen zugeordnet werden? -  wird aber vom Portal via NameCache matching gemacht
1067
	}
1068

    
1069

    
1070
	private String cleanNameType(String typeText) {
1071
		String result;
1072
		String[] split = typeText.split("\\[.*\\].?");
1073
		result = split[0];
1074
		return result;
1075
	}
1076

    
1077

    
1078
	/**
1079
	 * @param typeType
1080
	 * @param typeText
1081
	 * @param typeDesignation
1082
	 */
1083
	protected void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1084
		if (typeType.toString().trim().equalsIgnoreCase("Type")){
1085
			//do nothing
1086
		}else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1087
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1088
		}else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1089
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1090
		}else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1091
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1092
		}else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1093
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1094
		}else{
1095
			logger.warn("Unhandled type string: " + typeType);
1096
		}
1097
		Specimen specimen = Specimen.NewInstance();
1098
		if (typeText.length() > 255){
1099
			specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1100
		}else{
1101
			specimen.setTitleCache(typeText, true);
1102
		}
1103
		specimen.addDefinition(typeText, Language.ENGLISH());
1104
		((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1105
	}
1106

    
1107
	private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1108
		TypeDesignationBase result;
1109
		Reference ref = parseTypeDesignationReference(typeType);
1110
		if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1111
			if (typeType.indexOf(" species")>-1 ){
1112
				result = NameTypeDesignation.NewInstance();
1113
				int start = typeType.indexOf(" species");
1114
				typeType.replace(start, start + " species".length(), "");
1115
			}else {
1116
				result = NameTypeDesignation.NewInstance();
1117
				int start = typeType.indexOf(" genus");
1118
				typeType.replace(start, start + " genus".length(), "");
1119
			}
1120
		}else{
1121
			result = SpecimenTypeDesignation.NewInstance();
1122
		}
1123
		result.setCitation(ref);
1124
		return result;
1125
	}
1126

    
1127

    
1128
	private Reference parseTypeDesignationReference(StringBuffer typeType) {
1129
		Reference result = null;
1130
		String reBracketReference = "\\(.*\\)";
1131
		Pattern patBracketReference = Pattern.compile(reBracketReference);
1132
		Matcher matcher = patBracketReference.matcher(typeType);
1133
		if (matcher.find()){
1134
			String refString = matcher.group();
1135
			int start = typeType.indexOf(refString);
1136
			typeType.replace(start, start + refString.length(), "");
1137
			refString = refString.replace("(", "").replace(")", "").trim();
1138
			Reference ref = ReferenceFactory.newGeneric();
1139
			ref.setTitleCache(refString, true);
1140
			result = ref;
1141
		}
1142
		return result;
1143
	}
1144

    
1145

    
1146
	/**
1147
	 * @param state
1148
	 * @param elNom
1149
	 * @param taxon
1150
	 */
1151
	//body/taxon/
1152
	private HomotypicalGroup handleNomTaxon(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1153
		NonViralName name = makeName(taxon, homotypicalGroup, isSynonym);
1154
		String num = null;
1155
		
1156
		boolean hasGenusInfo = false;
1157
		TeamOrPersonBase lastTeam = null;
1158
		
1159
		//genus
1160
		List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1161
		if (elGenus.size() > 0){
1162
			hasGenusInfo = true;
1163
		}else{
1164
			logger.debug ("No Synonym Genus");
1165
		}
1166
		//infra rank -> needed to handle authors correctly
1167
		List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1168
		Rank infraRank = null;
1169
		infraRank = handleInfRank(name, elInfraRank, infraRank);
1170
		
1171
		//get left over elements
1172
		List<Element> elements = elNom.getChildren();
1173
		elements.removeAll(elInfraRank);
1174
		
1175
		for (Element element : elements){
1176
			if (element.getName().equals("name")){
1177
				String classValue = element.getAttributeValue("class");
1178
				String value = element.getValue().trim();
1179
				if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1180
					name.setGenusOrUninomial(value);
1181
				}else if (classValue.equalsIgnoreCase("family") ){
1182
					name.setGenusOrUninomial(value);
1183
					name.setRank(Rank.FAMILY());
1184
				}else if (classValue.equalsIgnoreCase("subgenus")){
1185
					//name.setInfraGenericEpithet(value);
1186
					name.setNameCache(value.replace(":", "").trim());
1187
					name.setRank(Rank.SUBGENUS());
1188
				}else if (classValue.equalsIgnoreCase("epithet") ){
1189
					if (hasGenusInfo == true){
1190
						name.setSpecificEpithet(value);
1191
					}else{
1192
						handleInfraspecificEpithet(element, classValue, name);
1193
					}
1194
				}else if (classValue.equalsIgnoreCase("author")){
1195
					handleNameAuthors(element, name);
1196
				}else if (classValue.equalsIgnoreCase("paraut")){
1197
					handleBasionymAuthor(state, element, name, false);
1198
				}else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1199
					handleInfrAuthor(state, element, name, true);
1200
				}else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1201
					handleBasionymAuthor(state, element, name, true);
1202
				}else if (classValue.equalsIgnoreCase("infrepi")){
1203
					handleInfrEpi(name, infraRank, value);
1204
				}else if (classValue.equalsIgnoreCase("pub")){
1205
					lastTeam = handleNomenclaturalReference(name, value);
1206
				}else if (classValue.equalsIgnoreCase("usage")){
1207
					lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1208
				}else if (classValue.equalsIgnoreCase("note")){
1209
					handleNameNote(name, value);
1210
				}else if (classValue.equalsIgnoreCase("num")){
1211
					if (num != null){
1212
						logger.warn("Duplicate num: " + value);
1213
					}else{
1214
						num = value;
1215
					}
1216
					if (isSynonym == true){
1217
						logger.warn("Synonym should not have a num");
1218
					}
1219
				}else if (classValue.equalsIgnoreCase("typification")){
1220
					logger.warn("Typification should not be a nom class");
1221
				}else{
1222
					logger.warn("Unhandled name class: " +  classValue);
1223
				}
1224
			}else if(element.getName().equals("homonym")){
1225
				handleHomonym(state, element, name);
1226
			}else{
1227
				// child element is not "name"
1228
				unhandledNomChildren.add(element.getName());
1229
			}
1230
		}
1231
		
1232
		//handle key
1233
		if (! isSynonym){
1234
			String taxonString = name.getNameCache();
1235
			//try to find matching lead nodes 
1236
			UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1237
			Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1238
			//same without using the num
1239
			if (num != null){
1240
				UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1241
				handleMatchingNodes(state, taxon, noNumLeadsKey);
1242
			}
1243
			if (matchingNodes.isEmpty() && num != null){
1244
				logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1245
			}
1246
		}
1247
		
1248
		//test nom element has no text
1249
		if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("—", "").replace("\u002d","").replace("\u2013", ""))){
1250
			String strElNom = elNom.getTextNormalize();
1251
			if ("?".equals(strElNom)){
1252
				handleQuestionMark(name, taxon);
1253
			}
1254
//			Character c = strElNom.charAt(0);
1255
			//System.out.println(CharUtils.unicodeEscaped(c));
1256
			logger.warn("Nom tag has text: " + strElNom);
1257
		}
1258
		
1259
		return name.getHomotypicalGroup();
1260
	}
1261

    
1262

    
1263
	private void handleQuestionMark(NonViralName name, Taxon taxon) {
1264
		int count = name.getTaxonBases().size();
1265
		if (count != 1){
1266
			logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1267
		}else{
1268
			TaxonBase taxonBase = (TaxonBase)name.getTaxonBases().iterator().next();
1269
			taxonBase.setDoubtful(true);
1270
		}
1271
	}
1272

    
1273

    
1274
	//merge with handleNomTaxon	
1275
	private void handleHomonym(EfloraImportState state, Element elHomonym, NonViralName upperName) {
1276
		verifyNoAttribute(elHomonym);
1277
		
1278
		//hommonym name
1279
		BotanicalName homonymName = BotanicalName.NewInstance(upperName.getRank());
1280
		homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1281
		homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1282
		homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1283
		homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1284

    
1285
		for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1286
			String classValue = elName.getAttributeValue("class");
1287
			String value = elName.getValue().trim();
1288
			if (classValue.equalsIgnoreCase("genus") ){
1289
				homonymName.setGenusOrUninomial(value);
1290
			}else if (classValue.equalsIgnoreCase("epithet") ){
1291
				homonymName.setSpecificEpithet(value);
1292
			}else if (classValue.equalsIgnoreCase("author")){
1293
				handleNameAuthors(elName, homonymName);
1294
			}else if (classValue.equalsIgnoreCase("paraut")){
1295
				handleBasionymAuthor(state, elName, homonymName, true);
1296
			}else if (classValue.equalsIgnoreCase("pub")){
1297
				handleNomenclaturalReference(homonymName, value);
1298
			}else if (classValue.equalsIgnoreCase("note")){
1299
				handleNameNote(homonymName, value);
1300
			}else{
1301
				logger.warn("Unhandled class value: " + classValue);
1302
			}
1303
		}
1304
		//TODO verify other information
1305
		
1306

    
1307
		//rel
1308
		boolean homonymIsLater = false;
1309
		NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1310
		if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1311
			TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1312
			TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1313
			homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart())  > 0;
1314
		}else{
1315
			if (upperName.getNomenclaturalReference() == null){
1316
				logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());
1317
			}
1318
			if (homonymName.getNomenclaturalReference() == null){
1319
				logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());
1320
			}
1321
		}
1322
		if (homonymIsLater){
1323
			homonymName.addRelationshipToName(upperName, relType, null);
1324
		}else{
1325
			upperName.addRelationshipToName(homonymName, relType, null);
1326
		}
1327
		
1328
	}
1329

    
1330

    
1331
	/**
1332
	 * @param state
1333
	 * @param taxon
1334
	 * @param leadsKey
1335
	 * @return
1336
	 */
1337
	private Set<PolytomousKeyNode> handleMatchingNodes(EfloraImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1338
		Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1339
		for (PolytomousKeyNode matchingNode : matchingNodes){
1340
			state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1341
			matchingNode.setTaxon(taxon);
1342
			state.getPolytomousKeyNodesToSave().add(matchingNode);
1343
		}
1344
		return matchingNodes;
1345
	}
1346

    
1347

    
1348
	private void handleNameNote(NonViralName name, String value) {
1349
		logger.warn("Name note: " + value + ". Available in portal?");
1350
		Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1351
		name.addAnnotation(annotation);
1352
	}
1353

    
1354

    
1355
	/**
1356
	 * @param taxon
1357
	 * @param name
1358
	 * @param value
1359
	 */
1360
	protected TeamOrPersonBase handleNameUsage(Taxon taxon, NonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
1361
		Reference ref = ReferenceFactory.newGeneric();
1362
		referenceTitle = removeStartingSymbols(referenceTitle, ref);
1363
		
1364
		ref.setTitleCache(referenceTitle, true);
1365
		String microReference = parseReferenceYearAndDetail(ref);
1366
		TeamOrPersonBase team = getReferenceAuthor(ref);
1367
		parseReferenceType(ref);
1368
		if (team == null){
1369
			team = lastTeam;
1370
		}
1371
		ref.setAuthorTeam(team);
1372
		
1373
		TaxonDescription description = getDescription(taxon);
1374
		TextData textData = TextData.NewInstance(Feature.CITATION());
1375
		textData.addSource(null, null, ref, microReference, name, null);
1376
		description.addElement(textData);
1377
		return team;
1378
	}
1379

    
1380

    
1381
	/**
1382
	 * @param referenceTitle
1383
	 * @param ref
1384
	 * @return
1385
	 */
1386
	private String removeStartingSymbols(String referenceTitle,	Reference ref) {
1387
		if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1388
			referenceTitle = referenceTitle.substring(1).trim();
1389
			ref.setTitleCache(referenceTitle);
1390
		}
1391
		return referenceTitle;
1392
	}
1393

    
1394

    
1395
	private void parseReferenceType(Reference ref) {
1396
		String title = ref.getTitle();
1397
		if (title == null){
1398
			return;
1399
		}
1400
		title = title.trim();
1401
		//no in reference
1402
		if (! title.startsWith("in ")){
1403
			ref.setType(ReferenceType.Book);
1404
			return;
1405
		}
1406
		
1407
		title = title.substring(3);
1408
		//in reference
1409
		//no ,
1410
		if (title.indexOf(",") == -1){
1411
			ref.setType(ReferenceType.Article);
1412
			IJournal journal = ReferenceFactory.newJournal();
1413
			journal.setTitle(title);
1414
			ref.setTitle(null);
1415
			ref.setInJournal(journal);
1416
			//return;
1417
		}else{
1418
			//,-references
1419
			ref.setType(ReferenceType.BookSection);
1420
			String[] split = (title).split(",\\s*[A-Z]");
1421
			if (split.length <= 1){
1422
				logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1423
			}
1424
			IBook book = ReferenceFactory.newBook();
1425
			Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1426
			try {
1427
				title = title.substring(split[0].length() + 1).trim();
1428
			} catch (Exception e) {
1429
				logger.error("ERROR occurred when trying to split title: " +  title + "; split[0]: + " + split[0]);
1430
			}
1431
			book.setTitle(title);
1432
			book.setAuthorTeam(bookTeam);
1433
			book.setDatePublished(ref.getDatePublished());
1434
			ref.setTitle(null);
1435
			ref.setInBook(book);
1436
		}		
1437
	}
1438

    
1439

    
1440
	protected Team getReferenceAuthor (Reference ref) {
1441
		boolean isCache = false;
1442
		String referenceTitle = ref.getTitle();
1443
		if (referenceTitle == null){
1444
			isCache = true;
1445
			referenceTitle = ref.getTitleCache();
1446
		}
1447
		//in references
1448
		String[] split = (" " + referenceTitle).split(" in ");
1449
		if (split.length > 1){
1450
			if (StringUtils.isNotBlank(split[0])){
1451
				//' in ' is within the reference string, take the preceding string as the team
1452
				Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1453
				if (! isCache){
1454
					ref.setTitle("in " + split[1]);
1455
				}
1456
				return team;
1457
			}else{
1458
				//string starts with in therefore no author is given
1459
				return null;
1460
			}
1461
		}
1462
		//no ,-reference
1463
		split = referenceTitle.split(",");
1464
		if (split.length < 2){
1465
			//no author is given
1466
			return null;
1467
		}
1468
		
1469
		//,-references
1470
		split = (referenceTitle).split(",\\s*[A-Z]");
1471
		if (split.length > 1){
1472
			Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1473
			if (! isCache){
1474
				ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1475
			}
1476
			return team;
1477
		}else{
1478
			logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1479
			return null;
1480
		}
1481
	}
1482

    
1483

    
1484
	/**
1485
	 * Replaced by <homonym> tag but still in use for exceptions
1486
	 * @param detail
1487
	 * @param name
1488
	 * @return
1489
	 */
1490
	protected String parseHomonym(String detail, NonViralName name) {
1491
		String result;
1492
		if (detail == null){
1493
			return detail;
1494
		}
1495

    
1496
		
1497
		//non RE
1498
		String reNon = "(\\s|,)non\\s";
1499
		Pattern patReference = Pattern.compile(reNon);
1500
		Matcher matcher = patReference.matcher(detail);
1501
		if (matcher.find()){
1502
			int start = matcher.start();
1503
			int end = matcher.end();
1504
			
1505
			if (detail != null){
1506
				logger.warn("Unhandled non part: " + detail.substring(start));
1507
				return detail;
1508
			}
1509
			
1510
			result = detail.substring(0, start);
1511

    
1512
			//homonym string
1513
			String homonymString = detail.substring(end);
1514
			
1515
			//hommonym name
1516
			BotanicalName homonymName = BotanicalName.NewInstance(name.getRank());
1517
			homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1518
			homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1519
			homonymName.setSpecificEpithet(name.getSpecificEpithet());
1520
			homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1521
			Reference homonymNomRef = ReferenceFactory.newGeneric();
1522
			homonymNomRef.setTitleCache(homonymString);
1523
			String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1524
			homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1525
			String authorTitle = homonymNomRef.getTitleCache();
1526
			Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1527
			homonymNomRef.setAuthorTeam(team);
1528
			homonymNomRef.setTitle("");
1529
			homonymNomRef.setProtectedTitleCache(false);
1530
			
1531
			//rel
1532
			boolean homonymIsLater = false;
1533
			NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1534
			TimePeriod homonymYear = homonymNomRef.getDatePublished();
1535
			if (name.getNomenclaturalReference() != null){
1536
				TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1537
				homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart())  > 0;
1538
			}else{
1539
				logger.warn("Classification name has no nomenclatural reference");
1540
			}
1541
			if (homonymIsLater){
1542
				homonymName.addRelationshipToName(name, relType, null);
1543
			}else{
1544
				name.addRelationshipToName(homonymName, relType, null);
1545
			}
1546
			
1547
		}else{
1548
			return detail;
1549
		}
1550
		return result;
1551
	}
1552

    
1553

    
1554
	/**
1555
	 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1556
	 * @param name
1557
	 * @param value
1558
	 */
1559
	protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
1560
		Reference nomRef = ReferenceFactory.newGeneric();
1561
		nomRef.setTitleCache(value, true);
1562
		parseNomStatus(nomRef, name);
1563
		String microReference = parseReferenceYearAndDetail(nomRef);
1564
		name.setNomenclaturalReference(nomRef);
1565
		microReference = parseHomonym(microReference, name);
1566
		name.setNomenclaturalMicroReference(microReference);
1567
		TeamOrPersonBase  team = (TeamOrPersonBase)name.getCombinationAuthorTeam();
1568
		if (team == null){
1569
			logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1570
		}else{
1571
			nomRef.setAuthorTeam(team);
1572
		}
1573
		return team;
1574
	}
1575

    
1576
	private void handleInfrAuthor(EfloraImportState state, Element elAuthor, NonViralName name, boolean overwrite) {
1577
		String strAuthor = elAuthor.getValue().trim();
1578
		if (strAuthor.endsWith(",")){
1579
			strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1580
		}
1581
		TeamOrPersonBase[] team = getTeam(strAuthor);
1582
		if (name.getCombinationAuthorTeam() != null && overwrite == false){
1583
			logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1584
		}else{
1585
			name.setCombinationAuthorTeam(team[0]);
1586
			name.setExCombinationAuthorTeam(team[1]);
1587
		}
1588
		
1589
		
1590
	}
1591

    
1592

    
1593
	/**
1594
	 * Sets the names rank according to the infrank value
1595
	 * @param name
1596
	 * @param elements
1597
	 * @param elInfraRank
1598
	 * @param infraRank
1599
	 * @return
1600
	 */
1601
	private Rank handleInfRank(NonViralName name, List<Element> elInfraRank, Rank infraRank) {
1602
		if (elInfraRank.size() == 1){
1603
			String strRank = elInfraRank.get(0).getTextNormalize();
1604
			try {
1605
				infraRank = Rank.getRankByNameOrAbbreviation(strRank);
1606
			} catch (UnknownCdmTypeException e) {
1607
				try{
1608
					infraRank = Rank.getRankByNameOrAbbreviation(strRank + ".");
1609
				} catch (UnknownCdmTypeException e2) {
1610
					logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1611
				}
1612
			}
1613
		}else if (elInfraRank.size() > 1){
1614
			logger.warn ("There is more than 1 infrank");
1615
		}
1616
		if (infraRank != null){
1617
			name.setRank(infraRank);
1618
		}
1619
		return infraRank;
1620
	}
1621

    
1622

    
1623
	private void handleInfrEpi(NonViralName name, Rank infraRank, String value) {
1624
		if (infraRank != null && infraRank.isInfraSpecific()){
1625
			name.setInfraSpecificEpithet(value);
1626
			if (CdmUtils.isCapital(value)){
1627
				logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1628
			}
1629
		}else if (infraRank != null && infraRank.isInfraGeneric()){
1630
			name.setInfraGenericEpithet(value);
1631
			if (! CdmUtils.isCapital(value)){
1632
				logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1633
			}
1634
		}else{
1635
			logger.warn("Infrepi could not be handled: " + value);
1636
		}
1637
	}
1638

    
1639

    
1640

    
1641
	/**
1642
	 * Returns the (empty) with the correct homotypical group depending on the taxon status
1643
	 * @param taxon
1644
	 * @param homotypicalGroup
1645
	 * @param isSynonym
1646
	 * @return
1647
	 */
1648
	private NonViralName makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1649
		NonViralName name;
1650
		if (isSynonym){
1651
			name = BotanicalName.NewInstance(Rank.SPECIES(), homotypicalGroup);
1652
			SynonymRelationshipType synonymType = SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF();
1653
			if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1654
				synonymType = SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF();
1655
			}
1656
			taxon.addSynonymName(name, synonymType);
1657
		}else{
1658
			name = (NonViralName)taxon.getName();
1659
		}
1660
		return name;
1661
	}
1662

    
1663

    
1664
	/**
1665
	 * @param element
1666
	 * @param taxon
1667
	 */
1668
	private void handleInfraspecificEpithet(Element element, String attrValue, NonViralName name) {
1669
		String value = element.getTextNormalize();
1670
		if (value.indexOf("subsp.") != -1){
1671
			//TODO genus and species epi
1672
			String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1673
			name.setInfraSpecificEpithet(infrEpi);
1674
			name.setRank(Rank.SUBSPECIES());
1675
		}else if (value.indexOf("var.") != -1){
1676
			//TODO genus and species epi
1677
			String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1678
			name.setInfraSpecificEpithet(infrEpi);
1679
			name.setRank(Rank.VARIETY());
1680
		}else{
1681
			logger.warn("Unhandled infraspecific type: " + value);
1682
		}
1683
	}
1684

    
1685

    
1686
	/**
1687
	 * @param state
1688
	 * @param element
1689
	 * @param name
1690
	 */
1691
	private void handleBasionymAuthor(EfloraImportState state, Element elBasionymAuthor, NonViralName name, boolean overwrite) {
1692
		String strAuthor = elBasionymAuthor.getValue().trim();
1693
		Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1694
		if (reBasionymAuthor.matcher(strAuthor).matches()){
1695
			strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1696
		}else{
1697
			logger.warn("Brackets are missing for original combination author " + strAuthor);
1698
		}
1699
		TeamOrPersonBase[] basionymTeam = getTeam(strAuthor);
1700
		if (name.getBasionymAuthorTeam() != null && overwrite == false){
1701
			logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1702
		}else{
1703
			name.setBasionymAuthorTeam(basionymTeam[0]);
1704
			name.setExBasionymAuthorTeam(basionymTeam[1]);
1705

    
1706
		}
1707
	}
1708

    
1709
	private Map<String, UUID> teamMap = new HashMap<String, UUID>();
1710
	/**
1711
	 * @param elAuthors
1712
	 * @param name
1713
	 * @param elNom 
1714
	 */
1715
	private void handleNameAuthors(Element elAuthor, NonViralName name) {
1716
		if (name.getCombinationAuthorTeam() != null){
1717
			logger.warn("Name already has a combination author. Name: " +  name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1718
		}
1719
		String strAuthor = elAuthor.getValue().trim();
1720
		if (strAuthor.endsWith(",")){
1721
			strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1722
		}
1723
		if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){
1724
			logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);
1725
		}
1726
		TeamOrPersonBase[] team = getTeam(strAuthor);
1727
		name.setCombinationAuthorTeam(team[0]);
1728
		name.setExCombinationAuthorTeam(team[1]);
1729
	}
1730

    
1731

    
1732
	/**
1733
	 * @param strAuthor
1734
	 * @return
1735
	 */
1736
	private TeamOrPersonBase[] getTeam(String strAuthor) {
1737
		TeamOrPersonBase[] result = new TeamOrPersonBase[2];
1738
		String[] split = strAuthor.split(" ex ");
1739
		String strBaseAuthor = null;
1740
		String strExAuthor = null;
1741
		
1742
		if (split.length == 2){
1743
			strBaseAuthor = split[1]; 
1744
			strExAuthor = split[0];	
1745
		}else if (split.length == 1){
1746
			strBaseAuthor = split[0];
1747
		}else{
1748
			logger.warn("Could not parse (ex) author: " + strAuthor);
1749
		}
1750
		result[0] = getUuidTeam(strBaseAuthor);
1751
		if (result[0] == null){
1752
			result[0] = parseSingleTeam(strBaseAuthor);
1753
			teamMap.put(strBaseAuthor, result[0].getUuid());
1754
		}
1755
		if (strExAuthor != null){
1756
			result[1] = getUuidTeam(strExAuthor);
1757
			if (result[1] == null){
1758
				result[1] = Team.NewInstance();
1759
				result[1].setTitleCache(strExAuthor, true);
1760
				teamMap.put(strExAuthor, result[1].getUuid());
1761
			}
1762
		
1763
		}	
1764
		return result;
1765
	}
1766

    
1767

    
1768
	protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {
1769
		TeamOrPersonBase result;
1770
		String[] split = strBaseAuthor.split("&");
1771
		if (split.length > 1){
1772
			result = Team.NewInstance();
1773
			for (String personString : split){
1774
				Person person = makePerson(personString);
1775
				((Team)result).addTeamMember(person);
1776
			}
1777
		}else{
1778
			result = makePerson(strBaseAuthor.trim());
1779
		}
1780
		return result;
1781
	}
1782

    
1783

    
1784
	/**
1785
	 * @param personString
1786
	 * @return
1787
	 */
1788
	private Person makePerson(String personString) {
1789
		personString = personString.trim();
1790
		Person person = Person.NewTitledInstance(personString);
1791
		person.setNomenclaturalTitle(personString);
1792
		return person;
1793
	}
1794

    
1795

    
1796
	/**
1797
	 * @param result
1798
	 * @param strBaseAuthor
1799
	 */
1800
	private TeamOrPersonBase getUuidTeam(String strBaseAuthor) {
1801
		UUID uuidTeam = teamMap.get(strBaseAuthor);
1802
		return CdmBase.deproxy(getAgentService().find(uuidTeam), TeamOrPersonBase.class);
1803
	}
1804

    
1805

    
1806
	private void handleDescription(EfloraImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
1807
		verifyNoAttribute(elDescription);
1808
		
1809
		List<Element> elements = elDescription.getChildren();
1810
		for (Element element : elements){
1811
			if (element.getName().equalsIgnoreCase("char")){
1812
				handleChar(state, element, taxon);
1813
			}else{
1814
				logger.warn("Unhandled description child: " + element.getName());
1815
			}
1816
		}
1817
		
1818
	}
1819
	
1820
	
1821
	/**
1822
	 * @param state
1823
	 * @param element
1824
	 * @param taxon
1825
	 */
1826
	private void handleChar(EfloraImportState state, Element element, Taxon taxon) {
1827
		List<Attribute> attributes = element.getAttributes();
1828
		for (Attribute attribute : attributes){
1829
			if (! attribute.getName().equalsIgnoreCase("class")){
1830
				logger.warn("Char has unhandled attribute " +  attribute.getName());
1831
			}else{
1832
				String classValue = attribute.getValue();
1833
				Feature feature = getFeature(classValue, state);
1834
				if (feature == null){
1835
					logger.warn("Unhandled feature: " + classValue);
1836
				}else{
1837
					String value = element.getValue();
1838
					addDescriptionElement(state, taxon, value, feature, null);
1839
				}
1840
				
1841
			}
1842
		}
1843
		
1844
		List<Element> elements = element.getChildren();
1845
		if (! elements.isEmpty()){
1846
			logger.warn("Char has unhandled children");
1847
		}
1848
	}
1849

    
1850

    
1851
	/**
1852
	 * @param taxon
1853
	 * @return
1854
	 */
1855
	protected TaxonDescription getDescription(Taxon taxon) {
1856
		for (TaxonDescription description : taxon.getDescriptions()){
1857
			if (! description.isImageGallery()){
1858
				return description;
1859
			}
1860
		}
1861
		TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
1862
		return newDescription;
1863
	}
1864

    
1865

    
1866
	/**
1867
	 * @param classValue
1868
	 * @param state 
1869
	 * @return
1870
	 * @throws UndefinedTransformerMethodException 
1871
	 */
1872
	private Feature getFeature(String classValue, EfloraImportState state) {
1873
		UUID uuid;
1874
		try {
1875
			uuid = state.getTransformer().getFeatureUuid(classValue);
1876
			if (uuid == null){
1877
				logger.info("Uuid is null for " + classValue);
1878
			}
1879
			String featureText = StringUtils.capitalize(classValue);
1880
			Feature feature = getFeature(state, uuid, featureText, featureText, classValue);
1881
			if (feature == null){
1882
				throw new NullPointerException(classValue + " not recognized as a feature");
1883
			}
1884
			return feature;
1885
		} catch (Exception e) {
1886
			logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
1887
			return Feature.UNKNOWN();
1888
		}
1889
	}
1890

    
1891

    
1892
	/**
1893
	 * @param state
1894
	 * @param element
1895
	 * @param taxon
1896
	 * @param unhandledTitleClassess 
1897
	 */
1898
	private void handleTitle(EfloraImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
1899
		// attributes
1900
		List<Attribute> attributes = element.getAttributes();
1901
		for (Attribute attribute : attributes){
1902
			if (! attribute.getName().equalsIgnoreCase("class") ){
1903
				if (! attribute.getName().equalsIgnoreCase("num")){
1904
					logger.warn("Title has unhandled attribute " +  attribute.getName());
1905
				}else{
1906
					//TODO num attribute in taxon
1907
				}
1908
			}else{
1909
				String classValue = attribute.getValue();
1910
				try {
1911
					Rank rank;
1912
					try {
1913
						rank = Rank.getRankByNameOrAbbreviation(classValue);
1914
					} catch (Exception e) {
1915
						//TODO nc
1916
						rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICBN, false);
1917
					}
1918
					taxon.getName().setRank(rank);
1919
					if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
1920
						handleGenus(element.getValue(), taxon.getName());
1921
					}else if (rank.equals(Rank.SUBGENUS())){
1922
						handleSubGenus(element.getValue(), taxon.getName());
1923
					}else if (rank.equals(Rank.SECTION_BOTANY())){
1924
						handleSection(element.getValue(), taxon.getName());
1925
					}else if (rank.equals(Rank.SPECIES())){
1926
						handleSpecies(element.getValue(), taxon.getName());
1927
					}else if (rank.equals(Rank.SUBSPECIES())){
1928
						handleSubSpecies(element.getValue(), taxon.getName());
1929
					}else if (rank.equals(Rank.VARIETY())){
1930
						handleVariety(element.getValue(), taxon.getName());
1931
					}else{
1932
						logger.warn("Unhandled rank: " + rank.getLabel());
1933
					}
1934
				} catch (UnknownCdmTypeException e) {
1935
					logger.warn("Unknown rank " + classValue);
1936
					unhandledTitleClassess.add(classValue);
1937
				}
1938
			}
1939
		}
1940
		List<Element> elements = element.getChildren();
1941
		if (! elements.isEmpty()){
1942
			logger.warn("Title has unexpected children");
1943
		}
1944
		UUID uuidTitle = EfloraTransformer.uuidTitle;
1945
		ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
1946
		taxon.addExtension(element.getTextNormalize(), titleExtension);
1947
		
1948
	}
1949

    
1950

    
1951
	/**
1952
	 * @param value
1953
	 * @param taxonNameBase 
1954
	 */
1955
	private void handleSubGenus(String value, TaxonNameBase taxonNameBase) {
1956
		String name = value.replace("Subgenus", "").trim();
1957
		((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1958
	}
1959
	
1960
	/**
1961
	 * @param value
1962
	 * @param taxonNameBase 
1963
	 */
1964
	private void handleSection(String value, TaxonNameBase taxonNameBase) {
1965
		String name = value.replace("Section", "").trim();
1966
		((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1967
	}
1968
	
1969
	/**
1970
	 * @param value
1971
	 * @param taxonNameBase 
1972
	 */
1973
	private void handleSpecies(String value, TaxonNameBase taxonNameBase) {
1974
		//do nothing
1975
	}
1976
	
1977
	/**
1978
	 * @param value
1979
	 * @param taxonNameBase 
1980
	 */
1981
	private void handleVariety(String value, TaxonNameBase taxonNameBase) {
1982
		//do nothing
1983
	}
1984
	
1985
	/**
1986
	 * @param value
1987
	 * @param taxonNameBase 
1988
	 */
1989
	private void handleSubSpecies(String value, TaxonNameBase taxonNameBase) {
1990
		//do nothing
1991
	}
1992

    
1993
	
1994
	private Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
1995
	
1996
	/**
1997
	 * @param value
1998
	 * @param taxonNameBase 
1999
	 */
2000
	protected void handleGenus(String value, TaxonNameBase taxonName) {
2001
		Matcher matcher = rexGenusAuthor.matcher(value);
2002
		if (matcher.find()){
2003
			String author = matcher.group();
2004
//			String genus = value.replace(author, "");
2005
			author = author.substring(1, author.length() - 1);
2006
			Team team = Team.NewInstance();
2007
			team.setTitleCache(author, true);
2008
			Credit credit = Credit.NewInstance(team, null);
2009
			taxonName.addCredit(credit);
2010
//			NonViralName nvn = (NonViralName)taxonName;
2011
//			nvn.setCombinationAuthorTeam(team);
2012
//			nvn.setGenusOrUninomial(genus);
2013
		}else{
2014
			logger.info("No Author match for " + value);
2015
		}
2016
	}
2017
	
2018

    
2019
	/**
2020
	 * @param taxon
2021
	 * @param lastTaxon
2022
	 */
2023
	private void handleTaxonRelation(EfloraImportState state, Taxon taxon, Taxon lastTaxon) {
2024
		
2025
		Classification tree = getTree(state);
2026
		if (lastTaxon == null){
2027
			tree.addChildTaxon(taxon, null, null, null);
2028
			return;
2029
		}
2030
		Rank thisRank = taxon.getName().getRank();
2031
		Rank lastRank = lastTaxon.getName().getRank();
2032
		if (lastTaxon.getTaxonNodes().size() > 0){
2033
			TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
2034
			if (thisRank.isLower(lastRank )  ){
2035
				lastNode.addChildTaxon(taxon, null, null, null);
2036
				fillMissingEpithetsForTaxa(lastTaxon, taxon);
2037
			}else if (thisRank.equals(lastRank)){
2038
				TaxonNode parent = lastNode.getParent();
2039
				if (parent != null){
2040
					parent.addChildTaxon(taxon, null, null, null);
2041
					fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
2042
				}else{
2043
					tree.addChildTaxon(taxon, null, null, null);
2044
				}
2045
			}else if (thisRank.isHigher(lastRank)){
2046
				handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2047
//				TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2048
//				parentNode.addChildTaxon(taxon, null, null, null);
2049
			}
2050
		}else{
2051
			logger.warn("Last taxon has no node");
2052
		}
2053
	}
2054

    
2055

    
2056

    
2057
	/**
2058
	 * @param state
2059
	 * @return 
2060
	 */
2061
	private Classification getTree(EfloraImportState state) {
2062
		Classification result = state.getTree(null);
2063
		if (result == null){
2064
			UUID uuid = state.getConfig().getClassificationUuid();
2065
			if (uuid == null){
2066
				logger.warn("No classification uuid is defined");
2067
				result = getNewClassification(state);
2068
			}else{
2069
				result = getClassificationService().find(uuid);
2070
				if (result == null){
2071
					result = getNewClassification(state);
2072
					result.setUuid(uuid);
2073
				}
2074
			}
2075
			state.putTree(null, result);
2076
		}
2077
		return result;
2078
	}
2079

    
2080

    
2081
	private Classification getNewClassification(EfloraImportState state) {
2082
		Classification result;
2083
		result = Classification.NewInstance(state.getConfig().getClassificationTitle());
2084
		state.putTree(null, result);
2085
		return result;
2086
	}
2087

    
2088

    
2089
	/**
2090
	 * @param state 
2091
	 * @param taxon
2092
	 * @param value
2093
	 * @param feature
2094
	 * @return 
2095
	 */
2096
	private TextData addDescriptionElement(EfloraImportState state, Taxon taxon, String value, Feature feature, String references) {
2097
		TextData textData = TextData.NewInstance(feature);
2098
		Language textLanguage = getDefaultLanguage(state);
2099
		textData.putText(textLanguage, value);
2100
		TaxonDescription description = getDescription(taxon);
2101
		description.addElement(textData);
2102
		if (references != null){
2103
			makeOriginalSourceReferences(textData, ";", references);
2104
		}
2105
		return textData;
2106
	}
2107

    
2108
	private Language getDefaultLanguage(EfloraImportState state) {
2109
		UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();
2110
		if (defaultLanguageUuid != null){
2111
			Language result = state.getDefaultLanguage();
2112
			if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){
2113
				result = (Language)getTermService().find(defaultLanguageUuid);
2114
				state.setDefaultLanguage(result);
2115
				if (result == null){
2116
					logger.warn("Default language for " + defaultLanguageUuid +  " does not exist.");
2117
				}
2118
			}
2119
			return result;
2120
		}else{
2121
			return Language.DEFAULT();
2122
		}
2123
	}
2124

    
2125

    
2126
	/**
2127
	 * @param elNomenclature
2128
	 */
2129
	private void verifyNoAttribute(Element element) {
2130
		List<Attribute> attributes = element.getAttributes();
2131
		if (! attributes.isEmpty()){
2132
			logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
2133
		}
2134
	}
2135
	
2136
	/**
2137
	 * @param elNomenclature
2138
	 */
2139
	protected void verifyNoChildren(Element element) {
2140
		verifyNoChildren(element, false);
2141
	}
2142
	
2143
	/**
2144
	 * @param elNomenclature
2145
	 */
2146
	private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2147
		List<Element> children = element.getChildren();
2148
		if (! children.isEmpty()){
2149
			if (ignoreLineBreak == true){
2150
				for (Element child : children){
2151
					if (! child.getName().equalsIgnoreCase("BR")){
2152
						logger.warn(element.getName() + " has unhandled child: " + child.getName());
2153
					}
2154
				}
2155
			}else{
2156
				logger.warn(element.getName() + " has unhandled children");
2157
			}
2158
		}
2159
	}
2160
	
2161
	
2162

    
2163
	/**
2164
	 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2165
	 * exists it is added to the name and the nom. status part of the references title cache is 
2166
	 * removed. Requires protected title cache.
2167
	 * @param ref
2168
	 * @param nonViralName
2169
	 */
2170
	protected void parseNomStatus(Reference ref, NonViralName nonViralName) {
2171
		String titleToParse = ref.getTitleCache();
2172
		
2173
		String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName);
2174
		if (! noStatusTitle.equals(titleToParse)){
2175
			ref.setTitleCache(noStatusTitle, true);
2176
		}
2177
	}
2178

    
2179
	
2180
	/**
2181
	 * Extracts the date published part and returns micro reference
2182
	 * @param ref
2183
	 * @return
2184
	 */
2185
	private String parseReferenceYearAndDetail(Reference ref){
2186
		String detailResult = null;
2187
		String titleToParse = ref.getTitleCache();
2188
		titleToParse = removeStartingSymbols(titleToParse, ref);
2189
		String reReference = "^\\.{1,}";
2190
//		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2191
		String oneMonth = "(Feb.|Dec.|March|June|July)";
2192
		String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2193
		String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2194
		
2195
		String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2196
		String reDetail = "\\.{1,10}$";
2197
		
2198
		//pattern for the whole string
2199
		Pattern patReference = Pattern.compile(/*reReference +*/ reYearPeriod /*+ reDetail */);
2200
		Matcher matcher = patReference.matcher(titleToParse);
2201
		if (matcher.find()){
2202
			int start = matcher.start();
2203
			int end = matcher.end();
2204
			
2205
			//title and other information precedes the year part
2206
			String title = titleToParse.substring(0, start).trim();
2207
			//detail follows the year part
2208
			String detail = titleToParse.substring(end).trim();
2209
			
2210
			//time period
2211
			String strPeriod = matcher.group().trim();
2212
			strPeriod = strPeriod.substring(1, strPeriod.length()-1);   //remove brackets
2213
			Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2214
			matcher = patStartMonth.matcher(strPeriod);
2215
			strPeriod = strPeriod.replace(" ", "");
2216
			Integer startMonth = null;
2217
			if (matcher.find()){
2218
				end = matcher.end();
2219
				strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2220
				startMonth = getMonth(strPeriod.substring(0, end));
2221
			}
2222
			
2223
			TimePeriod datePublished = TimePeriod.parseString(strPeriod);
2224
			if (startMonth != null){
2225
				datePublished.setStartMonth(startMonth);
2226
			}
2227
			ref.setDatePublished(datePublished);
2228
			ref.setTitle(title);
2229
			detailResult = CdmUtils.removeTrailingDot(detail);
2230
			if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",")  ){
2231
				detailResult = detailResult.substring(0, detailResult.length() -1);
2232
			}
2233
			ref.setProtectedTitleCache(false);
2234
		}else{
2235
			logger.warn("Could not parse reference: " +  titleToParse);
2236
		}
2237
		return detailResult;
2238
		
2239
	}
2240

    
2241
	
2242
	
2243
	private Integer getMonth(String month) {
2244
		if (month.startsWith("Jan")){
2245
			return 1;
2246
		}else if (month.startsWith("Feb")){
2247
			return 2;
2248
		}else if (month.startsWith("Mar")){
2249
			return 3;
2250
		}else if (month.startsWith("Apr")){
2251
			return 4;
2252
		}else if (month.startsWith("May")){
2253
			return 5;
2254
		}else if (month.startsWith("Jun")){
2255
			return 6;
2256
		}else if (month.startsWith("Jul")){
2257
			return 7;
2258
		}else if (month.startsWith("Aug")){
2259
			return 8;
2260
		}else if (month.startsWith("Sep")){
2261
			return 9;
2262
		}else if (month.startsWith("Oct")){
2263
			return 10;
2264
		}else if (month.startsWith("Nov")){
2265
			return 11;
2266
		}else if (month.startsWith("Dec")){
2267
			return 12;
2268
		}else{
2269
			logger.warn("Month not yet supported: " + month);
2270
			return null;
2271
		}
2272
	}
2273

    
2274

    
2275
	/* (non-Javadoc)
2276
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2277
	 */
2278
	protected boolean isIgnore(EfloraImportState state){
2279
		return ! state.getConfig().isDoTaxa();
2280
	}
2281

    
2282
}
(4-4/6)