Project

General

Profile

Download (73.8 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/ 
9

    
10
package eu.etaxonomy.cdm.io.eflora;
11

    
12
import java.util.ArrayList;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19
import java.util.regex.Matcher;
20
import java.util.regex.Pattern;
21

    
22
import org.apache.commons.lang.CharUtils;
23
import org.apache.commons.lang.StringUtils;
24
import org.apache.log4j.Logger;
25
import org.jdom.Attribute;
26
import org.jdom.Element;
27
import org.springframework.stereotype.Component;
28
import org.springframework.transaction.TransactionStatus;
29

    
30
import eu.etaxonomy.cdm.common.CdmUtils;
31
import eu.etaxonomy.cdm.common.ResultWrapper;
32
import eu.etaxonomy.cdm.common.XmlHelp;
33
import eu.etaxonomy.cdm.io.common.ICdmIO;
34
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35
import eu.etaxonomy.cdm.io.eflora.UnmatchedLeads.UnmatchedLeadsKey;
36
import eu.etaxonomy.cdm.model.agent.Person;
37
import eu.etaxonomy.cdm.model.agent.Team;
38
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
39
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
40
import eu.etaxonomy.cdm.model.common.Annotation;
41
import eu.etaxonomy.cdm.model.common.AnnotationType;
42
import eu.etaxonomy.cdm.model.common.CdmBase;
43
import eu.etaxonomy.cdm.model.common.Credit;
44
import eu.etaxonomy.cdm.model.common.ExtensionType;
45
import eu.etaxonomy.cdm.model.common.ISourceable;
46
import eu.etaxonomy.cdm.model.common.Language;
47
import eu.etaxonomy.cdm.model.common.Marker;
48
import eu.etaxonomy.cdm.model.common.MarkerType;
49
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
50
import eu.etaxonomy.cdm.model.common.TimePeriod;
51
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
52
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
53
import eu.etaxonomy.cdm.model.description.Feature;
54
import eu.etaxonomy.cdm.model.description.KeyStatement;
55
import eu.etaxonomy.cdm.model.description.PolytomousKey;
56
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
57
import eu.etaxonomy.cdm.model.description.TaxonDescription;
58
import eu.etaxonomy.cdm.model.description.TextData;
59
import eu.etaxonomy.cdm.model.name.BotanicalName;
60
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
61
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
62
import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
63
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
64
import eu.etaxonomy.cdm.model.name.NonViralName;
65
import eu.etaxonomy.cdm.model.name.Rank;
66
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
67
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
68
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
69
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
70
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
71
import eu.etaxonomy.cdm.model.reference.IBook;
72
import eu.etaxonomy.cdm.model.reference.IJournal;
73
import eu.etaxonomy.cdm.model.reference.Reference;
74
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
75
import eu.etaxonomy.cdm.model.reference.ReferenceType;
76
import eu.etaxonomy.cdm.model.taxon.Classification;
77
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
78
import eu.etaxonomy.cdm.model.taxon.Taxon;
79
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
80
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
81
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
82
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
83
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
84

    
85

    
86
/**
87
 * @author a.mueller
88
 *
89
 */
90
@Component
91
public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<EfloraImportState> {
92
	private static final Logger logger = Logger.getLogger(EfloraTaxonImport.class);
93

    
94
	private static int modCount = 30000;
95
	private NonViralNameParserImpl parser = new NonViralNameParserImpl();
96

    
97
	public EfloraTaxonImport(){
98
		super();
99
	}
100
	
101
	
102
	@Override
103
	public boolean doCheck(EfloraImportState state){
104
		boolean result = true;
105
		return result;
106
	}
107
	
108
	//TODO make part of state, but state is renewed when invoking the import a second time 
109
	private UnmatchedLeads unmatchedLeads;
110
	
111
	@Override
112
	public void doInvoke(EfloraImportState state){
113
		logger.info("start make Taxa ...");
114
		
115
		//FIXME reset state
116
		state.putTree(null, null);
117
//		UnmatchedLeads unmatchedLeads = state.getOpenKeys();
118
		if (unmatchedLeads == null){
119
			unmatchedLeads = UnmatchedLeads.NewInstance();
120
		}
121
		state.setUnmatchedLeads(unmatchedLeads);
122
		
123
		TransactionStatus tx = startTransaction();
124
		unmatchedLeads.saveToSession(getPolytomousKeyNodeService());
125
		
126
		
127
		//TODO generally do not store the reference object in the config
128
		Reference sourceReference = state.getConfig().getSourceReference();
129
		getReferenceService().saveOrUpdate(sourceReference);
130
		
131
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
132
		ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
133

    
134
		Element elbody= getBodyElement(state.getConfig());
135
		List<Element> elTaxonList = elbody.getChildren();
136
		
137
		int i = 0;
138
		
139
		Set<String> unhandledTitleClassess = new HashSet<String>();
140
		Set<String> unhandledNomeclatureChildren = new HashSet<String>();
141
		Set<String> unhandledDescriptionChildren = new HashSet<String>();
142
		
143
		Taxon lastTaxon = getLastTaxon(state);
144
		
145
		//for each taxon
146
		for (Element elTaxon : elTaxonList){
147
			try {
148
				if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
149
				if (! elTaxon.getName().equalsIgnoreCase("taxon")){
150
					logger.warn("body has element other than 'taxon'");
151
				}
152
				
153
				BotanicalName botanicalName = BotanicalName.NewInstance(Rank.SPECIES());
154
				Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
155
				
156
				handleTaxonAttributes(elTaxon, taxon, state);
157

    
158
				
159
				List<Element> children = elTaxon.getChildren();
160
				handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren,	unhandledDescriptionChildren, taxon, children);
161
				handleTaxonRelation(state, taxon, lastTaxon);
162
				lastTaxon = taxon;
163
				taxaToSave.add(taxon);
164
				state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
165
				
166
			} catch (Exception e) {
167
				logger.warn("Exception occurred in Sapindacea taxon import: " + e);
168
				e.printStackTrace();
169
			}
170
			
171
		}
172
		
173
		System.out.println(state.getUnmatchedLeads().toString());
174
		logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
175
		
176
		logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
177
		logger.info("Children for description are: " + unhandledDescriptionChildren);
178
		logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
179
		logger.info("Children for nom are: " + unhandledNomChildren);
180
		
181
		
182
		//invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
183
		logger.info(i + " taxa handled. Saving ...");
184
		getTaxonService().saveOrUpdate(taxaToSave);
185
		getFeatureTreeService().saveOrUpdateFeatureNodesAll(state.getFeatureNodesToSave());
186
		state.getFeatureNodesToSave().clear();
187
		commitTransaction(tx);
188
		
189
		logger.info("end makeTaxa ...");
190
		logger.info("start makeKey ...");
191
//		invokeDoKey(state);
192
		logger.info("end makeKey ...");
193
		
194
		if (! success.getValue()){
195
			state.setUnsuccessfull();
196
		}
197
		return ;
198
	}
199

    
200

    
201
	private void handleTaxonAttributes(Element elTaxon, Taxon taxon, EfloraImportState state) {
202
		List<Attribute> attrList = elTaxon.getAttributes();
203
		for (Attribute attr : attrList){
204
			String attrName = attr.getName();
205
			String attrValue = attr.getValue();
206
			if ("class".equals(attrName)){
207
				if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES")  ){
208
					taxon.setDoubtful(true);
209
				}else{
210
					MarkerType markerType = getMarkerType(state, attrValue);
211
					if (markerType == null){
212
						logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
213
					}else{
214
						taxon.addMarker(Marker.NewInstance(markerType, true));
215
					}
216
				}
217
			}else if ("num".equals(attrName)){
218
				logger.warn("num not yet supported");
219
			}else{
220
				logger.warn("Attribute " + attrName + " not yet supported for element taxon");
221
			}
222
		}
223

    
224
	}
225

    
226

    
227
	private Taxon getLastTaxon(EfloraImportState state) {
228
		if (state.getConfig().getLastTaxonUuid() == null){
229
			return null;
230
		}else{
231
			return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
232
		}
233
	}
234

    
235

    
236
//	private void invokeDoKey(SapindaceaeImportState state) {
237
//		TransactionStatus tx = startTransaction();
238
//		
239
//		Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
240
//		ITaxonService taxonService = getTaxonService();
241
//		ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
242
//
243
//		Element elbody= getBodyElement(state.getConfig());
244
//		List<Element> elTaxonList = elbody.getChildren();
245
//		
246
//		int i = 0;
247
//		
248
//		//for each taxon
249
//		for (Element elTaxon : elTaxonList){
250
//			if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
251
//			if (! elTaxon.getName().equalsIgnoreCase("taxon")){
252
//				continue;
253
//			}
254
//			
255
//			List<Element> children = elTaxon.getChildren("key");
256
//			for (Element element : children){
257
//				handleKeys(state, element, null);
258
//			}
259
//			nodesToSave.add(taxon);
260
//
261
//		}
262
//		
263
//	}
264

    
265

    
266
	// body/taxon/*
267
	private void handleTaxonElement(EfloraImportState state, Set<String> unhandledTitleClassess, Set<String> unhandledNomeclatureChildren, Set<String> unhandledDescriptionChildren, Taxon taxon, List<Element> children) {
268
		AnnotatableEntity lastEntity = null;
269
		for (Element element : children){
270
			String elName = element.getName();
271
			
272
			if (elName.equalsIgnoreCase("title")){
273
				handleTitle(state, element, taxon, unhandledTitleClassess);
274
				lastEntity = null;
275
			}else if(elName.equalsIgnoreCase("nomenclature")){
276
				handleNomenclature(state, element, taxon, unhandledNomeclatureChildren);
277
				lastEntity = null;
278
			}else if(elName.equalsIgnoreCase("description")){
279
				handleDescription(state, element, taxon, unhandledDescriptionChildren);
280
				lastEntity = null;
281
			}else if(elName.equalsIgnoreCase("habitatecology")){
282
				lastEntity = handleEcology(state, element, taxon);
283
			}else if(elName.equalsIgnoreCase("distribution")){
284
				lastEntity = handleDistribution(state, element, taxon);
285
			}else if(elName.equalsIgnoreCase("uses")){
286
				lastEntity = handleUses(state, element, taxon);
287
			}else if(elName.equalsIgnoreCase("notes")){
288
				lastEntity = handleTaxonNotes(state, element, taxon);
289
			}else if(elName.equalsIgnoreCase("chromosomes")){
290
				lastEntity = handleChromosomes(state, element, taxon);
291
			}else if(elName.equalsIgnoreCase("vernacularnames")){
292
				handleVernaculars(state, element, taxon);
293
			}else if(elName.equalsIgnoreCase("key")){
294
				lastEntity = handleKeys(state, element, taxon);
295
			}else if(elName.equalsIgnoreCase("references")){
296
				handleReferences(state, element, taxon, lastEntity);
297
				lastEntity = null;
298
			}else if(elName.equalsIgnoreCase("taxon")){
299
				logger.warn("A taxon should not be part of a taxon");
300
			}else if(elName.equalsIgnoreCase("homotypes")){
301
				logger.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
302
			}else{
303
				logger.warn("Unexpected child for taxon: " + elName);
304
			}
305
		}
306
	}
307
	
308
	
309
	private void handleVernaculars(EfloraImportState state, Element elVernacular, Taxon taxon) {
310
		verifyNoAttribute(elVernacular);
311
		verifyNoChildren(elVernacular, false);
312
		String value = elVernacular.getTextNormalize();
313
		Feature feature = Feature.COMMON_NAME();
314
		value = replaceStart(value, "Noms vernaculaires");
315
		String[] dialects = value.split(";");
316
		for (String singleDialect : dialects){
317
			handleSingleDialect(taxon, singleDialect, feature, state);
318
		}
319
		return;
320
	}
321

    
322

    
323
	private void handleSingleDialect(Taxon taxon, String singleDialect, Feature feature, EfloraImportState state) {
324
		singleDialect = singleDialect.trim();
325
		TaxonDescription description = getDescription(taxon);
326
		String reDialect = "\\(dial\\.\\s.*\\)";
327
//		String reDialect = "\\(.*\\)";
328
		Pattern patDialect = Pattern.compile(reDialect);
329
		Matcher matcher = patDialect.matcher(singleDialect);
330
		if (matcher.find()){
331
			String dialect = singleDialect.substring(matcher.start(), matcher.end());
332
			dialect = dialect.replace("(dial. ", "").replace(")", "");
333
			
334
			Language language = null;
335
			try {
336
				language = this.getLanguage(state, state.getTransformer().getLanguageUuid(dialect), dialect, dialect, dialect);
337
			} catch (UndefinedTransformerMethodException e) {
338
				logger.error(e.getMessage());
339
			}
340
			
341
			String commonNames = singleDialect.substring(0, matcher.start());
342
			String[] splitNames = commonNames.split(",");
343
			for (String commonNameString : splitNames){
344
				commonNameString = commonNameString.trim();
345
				CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameString, language);
346
				description.addElement(commonName);
347
			}
348
		}else{
349
			logger.warn("No dialect match: " +  singleDialect);
350
		}
351
	}
352

    
353

    
354
	private void handleReferences(EfloraImportState state, Element elReferences, Taxon taxon, AnnotatableEntity lastEntity) {
355
		verifyNoAttribute(elReferences);
356
		verifyNoChildren(elReferences, true);
357
		String refString = elReferences.getTextNormalize(); 
358
		if (lastEntity == null){
359
			logger.warn("No last entity defined: " + refString);
360
			return;
361
		}
362
		
363
		Annotation annotation = Annotation.NewInstance(refString, AnnotationType.EDITORIAL(), Language.DEFAULT());
364
		lastEntity.addAnnotation(annotation);
365
	}
366

    
367

    
368
	private PolytomousKey handleKeys(EfloraImportState state, Element elKey, Taxon taxon) {
369
		UnmatchedLeads openKeys = state.getUnmatchedLeads();
370
		
371
		//title
372
		String title = makeKeyTitle(elKey);
373
		
374
		//key
375
		PolytomousKey key = PolytomousKey.NewTitledInstance(title);
376
		
377
		//TODO add covered taxa etc.
378
		verifyNoAttribute(elKey);
379
		
380
		//notes
381
		makeKeyNotes(elKey, key);
382
		
383
		//keycouplets
384
		List<Element> keychoices = new ArrayList<Element>();
385
		keychoices.addAll(elKey.getChildren("keycouplet"));
386
		keychoices.addAll(elKey.getChildren("keychoice"));
387
		
388
		
389
		for (Element elKeychoice : keychoices){
390
			handleKeyChoices(state, openKeys, key, elKeychoice, taxon);
391
			elKey.removeContent(elKeychoice);
392
		}
393
		
394
		//
395
		verifyNoChildren(elKey);
396
		logger.info("Unmatched leads after key handling:" + openKeys.toString());
397
		
398

    
399
		if (state.getConfig().isDoPrintKeys()){
400
			key.print(System.err);
401
		}
402
		getPolytomousKeyService().save(key);
403
		return key;
404
	}
405

    
406

    
407
	/**
408
	 * @param state
409
	 * @param elKey
410
	 * @param openKeys
411
	 * @param key
412
	 * @param elKeychoice
413
	 * @param taxon 
414
	 */
415
	private void handleKeyChoices(EfloraImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
416
		
417
		//char Attribute
418
		//TODO it's still unclear if char is a feature and needs to be a new attribute 
419
		//or if it is handled as question. Therefore both cases are handled but feature
420
		//is finally not yet set
421
		KeyStatement question = handleKeychoiceChar(state, elKeychoice);
422
		Feature feature = handleKeychoiceCharAsFeature(state, elKeychoice);
423
		
424
		//lead
425
		List<PolytomousKeyNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, question, feature);
426
		
427
		//num -> match with unmatched leads
428
		handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
429

    
430
		//others
431
		verifyNoAttribute(elKeychoice);
432
	}
433

    
434

    
435
	/**
436
	 * @param openKeys
437
	 * @param key
438
	 * @param elKeychoice
439
	 * @param childNodes
440
	 */
441
	private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<PolytomousKeyNode> childNodes) {
442
		Attribute numAttr = elKeychoice.getAttribute("num");
443
		String num = CdmUtils.removeTrailingDot(numAttr == null? "":numAttr.getValue());
444
		UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
445
		Set<PolytomousKeyNode> matchingNodes = openKeys.getNodes(okk);
446
		for (PolytomousKeyNode matchingNode : matchingNodes){
447
			for (PolytomousKeyNode childNode : childNodes){
448
				matchingNode.addChild(childNode);
449
			}
450
			openKeys.removeNode(okk, matchingNode);
451
		}
452
		if (matchingNodes.isEmpty()){
453
			for (PolytomousKeyNode childNode : childNodes){
454
				key.getRoot().addChild(childNode);
455
			}
456
		}
457
		
458
		elKeychoice.removeAttribute("num");
459
	}
460

    
461

    
462
	/**
463
	 * @param state
464
	 * @param key
465
	 * @param elKeychoice
466
	 * @param taxon
467
	 * @param feature
468
	 * @return
469
	 */
470
	private List<PolytomousKeyNode> handleKeychoiceLeads(	EfloraImportState state, PolytomousKey key,	Element elKeychoice, Taxon taxon, KeyStatement question, Feature feature) {
471
		List<PolytomousKeyNode> childNodes = new ArrayList<PolytomousKeyNode>();
472
		List<Element> leads = elKeychoice.getChildren("lead");
473
		for(Element elLead : leads){
474
			PolytomousKeyNode childNode = handleLead(state, key, elLead, taxon, question, feature);
475
			childNodes.add(childNode);
476
		}
477
		return childNodes;
478
	}
479

    
480

    
481
	/**
482
	 * @param state
483
	 * @param elKeychoice
484
	 * @return
485
	 */
486
	private KeyStatement handleKeychoiceChar(EfloraImportState state, Element elKeychoice) {
487
		KeyStatement statement = null;
488
		Attribute charAttr = elKeychoice.getAttribute("char");
489
		if (charAttr != null){
490
			String charStr = charAttr.getValue();
491
			if (StringUtils.isNotBlank(charStr)){
492
				statement = KeyStatement.NewInstance(charStr);
493
			}
494
			elKeychoice.removeAttribute("char");
495
		}
496
		return statement;
497
	}
498
	
499
	/**
500
	 * @param state
501
	 * @param elKeychoice
502
	 * @return
503
	 */
504
	private Feature handleKeychoiceCharAsFeature(EfloraImportState state, Element elKeychoice) {
505
		Feature feature = null;
506
		Attribute charAttr = elKeychoice.getAttribute("char");
507
		if (charAttr != null){
508
			String charStr = charAttr.getValue();
509
			feature = getFeature(charStr, state);
510
			elKeychoice.removeAttribute("char");
511
		}
512
		return feature;
513
	}
514

    
515

    
516
	private PolytomousKeyNode handleLead(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, KeyStatement question, Feature feature) {
517
		PolytomousKeyNode node = PolytomousKeyNode.NewInstance();
518
		//TODO the char attribute in the keychoice is more a feature than a question
519
		//needs to be discussed on model side
520
		node.setQuestion(question);
521
//		node.setFeature(feature);
522
		
523
		//text
524
		String text = handleLeadText(elLead, node);
525
		
526
		//num
527
		handleLeadNum(elLead, text);
528
		
529
		//goto
530
		handleLeadGoto(state, key, elLead, taxon, node);
531
		
532
		//others
533
		verifyNoAttribute(elLead);
534
		
535
		return node;
536
	}
537

    
538

    
539
	/**
540
	 * @param elLead
541
	 * @param node
542
	 * @return
543
	 */
544
	private String handleLeadText(Element elLead, PolytomousKeyNode node) {
545
		String text = elLead.getAttributeValue("text").trim();
546
		if (StringUtils.isBlank(text)){
547
			logger.warn("Empty text in lead");
548
		}
549
		elLead.removeAttribute("text");
550
		KeyStatement statement = KeyStatement.NewInstance(text);
551
		node.setStatement(statement);
552
		return text;
553
	}
554

    
555

    
556
	/**
557
	 * @param state
558
	 * @param key
559
	 * @param elLead
560
	 * @param taxon
561
	 * @param node
562
	 */
563
	private void handleLeadGoto(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, PolytomousKeyNode node) {
564
		Attribute gotoAttr = elLead.getAttribute("goto");
565
		if (gotoAttr != null){
566
			String strGoto = gotoAttr.getValue().trim();
567
			//create key
568
			UnmatchedLeadsKey gotoKey = null;
569
			if (isInternalNode(strGoto)){
570
				gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
571
			}else{
572
				String taxonKey = makeTaxonKey(strGoto, taxon);
573
				gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
574
			}
575
			//
576
			UnmatchedLeads openKeys = state.getUnmatchedLeads();
577
			if (gotoKey.isInnerLead()){
578
				Set<PolytomousKeyNode> existingNodes = openKeys.getNodes(gotoKey);
579
				for (PolytomousKeyNode existingNode : existingNodes){
580
					node.addChild(existingNode);
581
				}
582
			}
583
			openKeys.addKey(gotoKey, node);
584
			//remove attribute (need for consistency check)
585
			elLead.removeAttribute("goto");
586
		}else{
587
			logger.warn("lead has no goto attribute");
588
		}
589
	}
590

    
591

    
592
	/**
593
	 * @param elLead
594
	 * @param text
595
	 */
596
	private void handleLeadNum(Element elLead, String text) {
597
		Attribute numAttr = elLead.getAttribute("num");
598
		if (numAttr != null){
599
			//TODO num
600
			String num = numAttr.getValue();
601
			elLead.removeAttribute("num");
602
		}else{
603
			logger.info("Keychoice has no num attribute: " + text);
604
		}
605
	}
606

    
607

    
608
	private String makeTaxonKey(String strGoto, Taxon taxon) {
609
		String result = "";
610
		if (strGoto == null){
611
			return "";
612
		}
613
		String strGenusName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getGenusOrUninomial();
614
		strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", "");  //replace all brackets
615
		strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
616
		
617
		strGoto = strGoto.trim();  
618
		String[] split = strGoto.split("\\s");
619
		for (int i = 0; i<split.length; i++){
620
			String single = split[i];
621
			if (isGenusAbbrev(single, strGenusName)){
622
				split[i] = strGenusName;
623
			}
624
//			if (isInfraSpecificMarker(single)){
625
//				String strSpeciesName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getSpecificEpithet();
626
//				split[i] = strGenusName + " " + strSpeciesName + " ";
627
//			}
628
			result = (result + " " + split[i]).trim();
629
		}
630
		return result;
631
	}
632

    
633

    
634
	private boolean isInfraSpecificMarker(String single) {
635
		try {
636
			if (Rank.getRankByIdInVoc(single).isInfraSpecific()){
637
				return true;
638
			}
639
		} catch (UnknownCdmTypeException e) {
640
			return false;
641
		}
642
		return false;
643
	}
644

    
645

    
646
	private boolean isGenusAbbrev(String single, String strGenusName) {
647
		if (! single.matches("[A-Z]\\.?")) {
648
			return false;
649
		}else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
650
			return false; 
651
		}else{
652
			return single.charAt(0) == strGenusName.charAt(0);
653
		}
654
	}
655

    
656

    
657
	private boolean isInternalNode(String strGoto) {
658
		return CdmUtils.isNumeric(strGoto);
659
	}
660

    
661

    
662
	private void makeKeyNotes(Element keyElement, PolytomousKey key) {
663
		Element elNotes = keyElement.getChild("notes");
664
		if (elNotes != null){
665
			keyElement.removeContent(elNotes);
666
			String notes = elNotes.getTextNormalize();
667
			if (StringUtils.isNotBlank(notes)){
668
				key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
669
			}
670
		}
671
	}
672

    
673

    
674
	private String makeKeyTitle(Element keyElement) {
675
		String title = "- no title - ";
676
		Attribute titleAttr = keyElement.getAttribute("title");
677
		keyElement.removeAttribute(titleAttr);
678
		if (titleAttr == null){
679
			Element elTitle = keyElement.getChild("keytitle");
680
			keyElement.removeContent(elTitle);
681
			if (elTitle != null){
682
				title = elTitle.getTextNormalize();
683
			}
684
		}else{
685
			title = titleAttr.getValue();
686
		}
687
		return title;
688
	}
689

    
690

    
691
	/**
692
	 * @param state
693
	 * @param element
694
	 * @param taxon
695
	 */
696
	private TextData handleChromosomes(EfloraImportState state, Element element, Taxon taxon) {
697
		Feature chromosomeFeature = getFeature("chromosomes", state);
698
		verifyNoAttribute(element);
699
		verifyNoChildren(element);
700
		String value = element.getTextNormalize();
701
		value = replaceStart(value, "Chromosomes");
702
		String chromosomesPart = getChromosomesPart(value);
703
		String references = value.replace(chromosomesPart, "").trim();
704
		chromosomesPart = chromosomesPart.replace(":", "").trim();
705
		return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);	
706
	}
707

    
708

    
709
	/**
710
	 * @param ref 
711
	 * @param string 
712
	 * @return
713
	 */
714
	private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
715
		String[] splits = refAll.split(splitter);
716
		for (String strRef: splits){
717
			Reference<?> ref = ReferenceFactory.newGeneric();
718
			ref.setTitleCache(strRef, true);
719
			String refDetail = parseReferenceYearAndDetail(ref);
720
			sourcable.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, ref, refDetail);
721
		}
722
		
723
		
724
//TODO use regex instead
725
/*		String detailResult = null;
726
		String titleToParse = ref.getTitleCache();
727
		String reReference = "^\\.{1,}";
728
//		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
729
		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
730
		String reYearPeriod = reYear + "(-" + reYear + ")+";
731
		String reDetail = "\\.{1,10}$";
732
*/		
733
	}
734

    
735

    
736
	/**
737
	 * @param value
738
	 * @return
739
	 */
740
	private String getChromosomesPart(String str) {
741
		Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
742
		Matcher matcher = pattern.matcher(str);
743
		if (matcher.find()){
744
			return matcher.group(0);
745
		}else{
746
			logger.warn("Chromosomes could not be parsed: " + str);
747
		}
748
		return str;
749
	}
750

    
751

    
752
	/**
753
	 * @param state
754
	 * @param element
755
	 * @param taxon
756
	 */
757
	private TextData handleTaxonNotes(EfloraImportState state, Element element, Taxon taxon) {
758
		TextData result = null;
759
		verifyNoChildren(element, true);
760
		//verifyNoAttribute(element);
761
		List<Attribute> attributes = element.getAttributes();
762
		for (Attribute attribute : attributes){
763
			if (! attribute.getName().equalsIgnoreCase("class")){
764
				logger.warn("Char has unhandled attribute " +  attribute.getName());
765
			}else{
766
				String classValue = attribute.getValue();
767
				result = handleDescriptiveElement(state, element, taxon, classValue);
768
			}
769
		}
770
		//if no class attribute exists, handle as note
771
		if (attributes.isEmpty()){
772
			result = handleDescriptiveElement(state, element, taxon, "Note");
773
		}
774

    
775
		//Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
776
		//taxon.addAnnotation(annotation);
777
		return result; //annotation;
778
	}
779

    
780

    
781
	/**
782
	 * @param state
783
	 * @param element
784
	 * @param taxon
785
	 * @param result
786
	 * @param attribute
787
	 * @return
788
	 */
789
	private TextData handleDescriptiveElement(EfloraImportState state, Element element, Taxon taxon, String classValue) {
790
		TextData result = null;
791
		Feature feature = getFeature(classValue, state);
792
		if (feature == null){
793
			logger.warn("Unhandled feature: " + classValue);
794
		}else{
795
			String value = element.getValue();
796
			value = replaceStart(value, "Notes");
797
			value = replaceStart(value, "Note");
798
			result = addDescriptionElement(state, taxon, value, feature, null);
799
		}
800
		return result;
801
	}
802

    
803

    
804
	private void removeBr(Element element) {
805
		element.removeChildren("Br");
806
		element.removeChildren("br");
807
		element.removeChildren("BR");
808
	}
809

    
810

    
811
	/**
812
	 * @param state
813
	 * @param element
814
	 * @param taxon
815
	 */
816
	private TextData handleUses(EfloraImportState state, Element element, Taxon taxon) {
817
		verifyNoAttribute(element);
818
		verifyNoChildren(element, true);
819
		String value = element.getTextNormalize();
820
		value = replaceStart(value, "Uses");
821
		Feature feature = Feature.USES();
822
		return addDescriptionElement(state, taxon, value, feature, null);
823
		
824
	}
825

    
826

    
827
	/**
828
	 * @param state
829
	 * @param element
830
	 * @param taxon
831
	 * @param unhandledDescriptionChildren
832
	 */
833
	private DescriptionElementBase handleDistribution(EfloraImportState state, Element element, Taxon taxon) {
834
		verifyNoAttribute(element);
835
		verifyNoChildren(element, true);
836
		String value = element.getTextNormalize();
837
		value = replaceStart(value, "Distribution");
838
		Feature feature = Feature.DISTRIBUTION();
839
		//distribution parsing almost impossible as there is lots of freetext in the distribution tag
840
		return addDescriptionElement(state, taxon, value, feature, null);
841
	}
842

    
843

    
844
	/**
845
	 * @param state
846
	 * @param element
847
	 * @param taxon
848
	 * @param unhandledDescriptionChildren
849
	 */
850
	private TextData handleEcology(EfloraImportState state, Element elEcology, Taxon taxon) {
851
		verifyNoAttribute(elEcology);
852
		verifyNoChildren(elEcology, true);
853
		String value = elEcology.getTextNormalize();
854
		Feature feature = Feature.ECOLOGY();
855
		if (value.startsWith("Habitat & Ecology")){
856
			feature = getFeature("Habitat & Ecology", state);
857
			value = replaceStart(value, "Habitat & Ecology");
858
		}else if (value.startsWith("Habitat")){
859
			value = replaceStart(value, "Habitat");
860
			feature = getFeature("Habitat", state);
861
		}
862
		return addDescriptionElement(state, taxon, value, feature, null);
863
	}
864

    
865

    
866

    
867
	/**
868
	 * @param value
869
	 * @param replacementString
870
	 */
871
	private String replaceStart(String value, String replacementString) {
872
		if (value.startsWith(replacementString) ){
873
			value = value.substring(replacementString.length()).trim();
874
		}
875
		while (value.startsWith("-") || value.startsWith("–") ){
876
			value = value.substring("-".length()).trim();
877
		}
878
		return value;
879
	}
880

    
881

    
882
	/**
883
	 * @param value
884
	 * @param replacementString
885
	 */
886
	protected String removeTrailing(String value, String replacementString) {
887
		if (value == null){
888
			return null;
889
		}
890
		if (value.endsWith(replacementString) ){
891
			value = value.substring(0, value.length() - replacementString.length()).trim();
892
		}
893
		return value;
894
	}
895

    
896
	/**
897
	 * @param state
898
	 * @param element
899
	 * @param taxon
900
	 * @param unhandledNomeclatureChildren 
901
	 */
902
	private void handleNomenclature(EfloraImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
903
		verifyNoAttribute(elNomenclature);
904
		
905
		List<Element> elements = elNomenclature.getChildren();
906
		for (Element element : elements){
907
			if (element.getName().equals("homotypes")){
908
				handleHomotypes(state, element, taxon);
909
			}else if (element.getName().equals("notes")){
910
				handleNomenclatureNotes(state, element, taxon);
911
			}else{
912
				unhandledChildren.add(element.getName());
913
			}
914
		}
915
		
916
	}
917

    
918

    
919

    
920
	private void handleNomenclatureNotes(EfloraImportState state, Element elNotes, Taxon taxon) {
921
		verifyNoAttribute(elNotes);
922
		verifyNoChildren(elNotes);
923
		String notesText = elNotes.getTextNormalize();
924
		Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
925
		taxon.addAnnotation(annotation);
926
	}
927

    
928

    
929

    
930
	private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
931
	/**
932
	 * @param state
933
	 * @param element
934
	 * @param taxon
935
	 */
936
	private void handleHomotypes(EfloraImportState state, Element elHomotypes, Taxon taxon) {
937
		verifyNoAttribute(elHomotypes);
938
		
939
		List<Element> elements = elHomotypes.getChildren();
940
		HomotypicalGroup homotypicalGroup = null;
941
		for (Element element : elements){
942
			if (element.getName().equals("nom")){
943
				homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
944
			}else{
945
				unhandledHomotypeChildren.add(element.getName());
946
			}
947
		}
948
		
949
	}
950

    
951
	private static Set<String> unhandledNomChildren = new HashSet<String>();
952

    
953
	/**
954
	 * @param state
955
	 * @param element
956
	 * @param taxon
957
	 */
958
	private HomotypicalGroup handleNom(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
959
		List<Attribute> attributes = elNom.getAttributes();
960
		
961
		boolean taxonBaseClassType = false;
962
		for (Attribute attribute : attributes){
963
			if (! attribute.getName().equalsIgnoreCase("class")){
964
				logger.warn("Nom has unhandled attribute " +  attribute.getName());
965
			}else{
966
				String classValue = attribute.getValue();
967
				if (classValue.equalsIgnoreCase("acceptedname")){
968
					homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
969
					taxonBaseClassType = true;
970
				}else if (classValue.equalsIgnoreCase("synonym")){
971
					homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
972
					taxonBaseClassType = true;
973
				}else if (classValue.equalsIgnoreCase("typeref")){
974
					handleTypeRef(state, elNom, taxon, homotypicalGroup);
975
				}else{
976
					logger.warn("Unhandled class value for nom: " + classValue);
977
				}
978
				
979
			}
980
		}
981
		
982
		List<Element> elements = elNom.getChildren();
983
		for (Element element : elements){
984
			if (element.getName().equals("name") || element.getName().equals("homonym") ){
985
				if (taxonBaseClassType == false){
986
					logger.warn("Name or homonym tag not allowed in non taxon nom tag");
987
				}
988
			}else{
989
				unhandledNomChildren.add(element.getName());
990
			}
991
		}
992
		
993
		return homotypicalGroup;
994
		
995
	}
996

    
997
	/**
998
	 * @param state
999
	 * @param elNom
1000
	 * @param taxon
1001
	 * @param homotypicalGroup 
1002
	 */
1003
	protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
1004
		verifyNoChildren(elNom);
1005
		String typeRef = elNom.getTextNormalize();
1006
		typeRef = removeStartingTypeRefMinus(typeRef);
1007
		
1008
		String[] split = typeRef.split(":");
1009
		if (split.length < 2){
1010
			logger.warn("typeRef has no ':' : " + typeRef);
1011
		}else if (split.length > 2){
1012
			logger.warn("typeRef has more than 1 ':' : " + typeRef);
1013
		}else{
1014
			StringBuffer typeType = new StringBuffer(split[0]);
1015
			String typeText = split[1].trim();
1016
			TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
1017
			
1018
			//Name Type Desitnations
1019
			if (typeDesignation instanceof NameTypeDesignation){
1020
				makeNameTypeDesignations(typeType, typeText, typeDesignation);
1021
			}
1022
			//SpecimenTypeDesignations
1023
			else if (typeDesignation instanceof SpecimenTypeDesignation){
1024
				makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
1025
			}else{
1026
				logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
1027
			}
1028
			for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
1029
				name.addTypeDesignation(typeDesignation, true);
1030
			}
1031
		}
1032
	}
1033

    
1034

    
1035
	/**
1036
	 * @param typeRef
1037
	 * @return
1038
	 */
1039
	protected String removeStartingTypeRefMinus(String typeRef) {
1040
		typeRef = replaceStart(typeRef, "-");
1041
		typeRef = replaceStart(typeRef, "—");
1042
		typeRef = replaceStart(typeRef, "\u002d");
1043
		typeRef = replaceStart(typeRef, "\u2013");
1044
		typeRef = replaceStart(typeRef, "--");
1045
		return typeRef;
1046
	}
1047

    
1048
	/**
1049
	 * @param typeType
1050
	 * @param typeText
1051
	 * @param typeDesignation
1052
	 */
1053
	private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1054
		if (typeType.toString().trim().equalsIgnoreCase("Type")){
1055
			//do nothing
1056
		}else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1057
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1058
		}else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
1059
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1060
		}else{
1061
			logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
1062
		}
1063
		//clean
1064
		typeText = cleanNameType(typeText);
1065
		//create name
1066
		BotanicalName nameType = (BotanicalName)parser.parseFullName(typeText, NomenclaturalCode.ICNAFP, Rank.SPECIES());
1067
		((NameTypeDesignation) typeDesignation).setTypeName(nameType);
1068
		//TODO wie können NameTypes den Namen zugeordnet werden? -  wird aber vom Portal via NameCache matching gemacht
1069
	}
1070

    
1071

    
1072
	private String cleanNameType(String typeText) {
1073
		String result;
1074
		String[] split = typeText.split("\\[.*\\].?");
1075
		result = split[0];
1076
		return result;
1077
	}
1078

    
1079

    
1080
	/**
1081
	 * @param typeType
1082
	 * @param typeText
1083
	 * @param typeDesignation
1084
	 */
1085
	protected void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1086
		if (typeType.toString().trim().equalsIgnoreCase("Type")){
1087
			//do nothing
1088
		}else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1089
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1090
		}else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1091
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1092
		}else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1093
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1094
		}else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1095
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1096
		}else{
1097
			logger.warn("Unhandled type string: " + typeType);
1098
		}
1099
		DerivedUnit specimen = DerivedUnit.NewPreservedSpecimenInstance();
1100
		if (typeText.length() > 255){
1101
			specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1102
		}else{
1103
			specimen.setTitleCache(typeText, true);
1104
		}
1105
		specimen.putDefinition(Language.ENGLISH(), typeText);
1106
		((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1107
	}
1108

    
1109
	private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1110
		TypeDesignationBase result;
1111
		Reference ref = parseTypeDesignationReference(typeType);
1112
		if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1113
			if (typeType.indexOf(" species")>-1 ){
1114
				result = NameTypeDesignation.NewInstance();
1115
				int start = typeType.indexOf(" species");
1116
				typeType.replace(start, start + " species".length(), "");
1117
			}else {
1118
				result = NameTypeDesignation.NewInstance();
1119
				int start = typeType.indexOf(" genus");
1120
				typeType.replace(start, start + " genus".length(), "");
1121
			}
1122
		}else{
1123
			result = SpecimenTypeDesignation.NewInstance();
1124
		}
1125
		result.setCitation(ref);
1126
		return result;
1127
	}
1128

    
1129

    
1130
	private Reference parseTypeDesignationReference(StringBuffer typeType) {
1131
		Reference result = null;
1132
		String reBracketReference = "\\(.*\\)";
1133
		Pattern patBracketReference = Pattern.compile(reBracketReference);
1134
		Matcher matcher = patBracketReference.matcher(typeType);
1135
		if (matcher.find()){
1136
			String refString = matcher.group();
1137
			int start = typeType.indexOf(refString);
1138
			typeType.replace(start, start + refString.length(), "");
1139
			refString = refString.replace("(", "").replace(")", "").trim();
1140
			Reference ref = ReferenceFactory.newGeneric();
1141
			ref.setTitleCache(refString, true);
1142
			result = ref;
1143
		}
1144
		return result;
1145
	}
1146

    
1147

    
1148
	/**
1149
	 * @param state
1150
	 * @param elNom
1151
	 * @param taxon
1152
	 */
1153
	//body/taxon/
1154
	private HomotypicalGroup handleNomTaxon(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1155
		NonViralName name = makeName(taxon, homotypicalGroup, isSynonym);
1156
		String num = null;
1157
		
1158
		boolean hasGenusInfo = false;
1159
		TeamOrPersonBase lastTeam = null;
1160
		
1161
		//genus
1162
		List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1163
		if (elGenus.size() > 0){
1164
			hasGenusInfo = true;
1165
		}else{
1166
			logger.debug ("No Synonym Genus");
1167
		}
1168
		//infra rank -> needed to handle authors correctly
1169
		List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1170
		Rank infraRank = null;
1171
		infraRank = handleInfRank(name, elInfraRank, infraRank);
1172
		
1173
		//get left over elements
1174
		List<Element> elements = elNom.getChildren();
1175
		elements.removeAll(elInfraRank);
1176
		
1177
		for (Element element : elements){
1178
			if (element.getName().equals("name")){
1179
				String classValue = element.getAttributeValue("class");
1180
				String value = element.getValue().trim();
1181
				if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1182
					name.setGenusOrUninomial(value);
1183
				}else if (classValue.equalsIgnoreCase("family") ){
1184
					name.setGenusOrUninomial(value);
1185
					name.setRank(Rank.FAMILY());
1186
				}else if (classValue.equalsIgnoreCase("subgenus")){
1187
					//name.setInfraGenericEpithet(value);
1188
					name.setNameCache(value.replace(":", "").trim());
1189
					name.setRank(Rank.SUBGENUS());
1190
				}else if (classValue.equalsIgnoreCase("epithet") ){
1191
					if (hasGenusInfo == true){
1192
						name.setSpecificEpithet(value);
1193
					}else{
1194
						handleInfraspecificEpithet(element, classValue, name);
1195
					}
1196
				}else if (classValue.equalsIgnoreCase("author")){
1197
					handleNameAuthors(element, name);
1198
				}else if (classValue.equalsIgnoreCase("paraut")){
1199
					handleBasionymAuthor(state, element, name, false);
1200
				}else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1201
					handleInfrAuthor(state, element, name, true);
1202
				}else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1203
					handleBasionymAuthor(state, element, name, true);
1204
				}else if (classValue.equalsIgnoreCase("infrepi")){
1205
					handleInfrEpi(name, infraRank, value);
1206
				}else if (classValue.equalsIgnoreCase("pub")){
1207
					lastTeam = handleNomenclaturalReference(name, value);
1208
				}else if (classValue.equalsIgnoreCase("usage")){
1209
					lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1210
				}else if (classValue.equalsIgnoreCase("note")){
1211
					handleNameNote(name, value);
1212
				}else if (classValue.equalsIgnoreCase("num")){
1213
					if (num != null){
1214
						logger.warn("Duplicate num: " + value);
1215
					}else{
1216
						num = value;
1217
					}
1218
					if (isSynonym == true){
1219
						logger.warn("Synonym should not have a num");
1220
					}
1221
				}else if (classValue.equalsIgnoreCase("typification")){
1222
					logger.warn("Typification should not be a nom class");
1223
				}else{
1224
					logger.warn("Unhandled name class: " +  classValue);
1225
				}
1226
			}else if(element.getName().equals("homonym")){
1227
				handleHomonym(state, element, name);
1228
			}else{
1229
				// child element is not "name"
1230
				unhandledNomChildren.add(element.getName());
1231
			}
1232
		}
1233
		
1234
		//handle key
1235
		if (! isSynonym){
1236
			String taxonString = name.getNameCache();
1237
			//try to find matching lead nodes 
1238
			UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1239
			Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1240
			//same without using the num
1241
			if (num != null){
1242
				UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1243
				handleMatchingNodes(state, taxon, noNumLeadsKey);
1244
			}
1245
			if (matchingNodes.isEmpty() && num != null){
1246
				logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1247
			}
1248
		}
1249
		
1250
		//test nom element has no text
1251
		if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("—", "").replace("\u002d","").replace("\u2013", ""))){
1252
			String strElNom = elNom.getTextNormalize();
1253
			if ("?".equals(strElNom)){
1254
				handleQuestionMark(name, taxon);
1255
			}
1256
//			Character c = strElNom.charAt(0);
1257
			//System.out.println(CharUtils.unicodeEscaped(c));
1258
			logger.warn("Nom tag has text: " + strElNom);
1259
		}
1260
		
1261
		return name.getHomotypicalGroup();
1262
	}
1263

    
1264

    
1265
	private void handleQuestionMark(NonViralName name, Taxon taxon) {
1266
		int count = name.getTaxonBases().size();
1267
		if (count != 1){
1268
			logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1269
		}else{
1270
			TaxonBase taxonBase = (TaxonBase)name.getTaxonBases().iterator().next();
1271
			taxonBase.setDoubtful(true);
1272
		}
1273
	}
1274

    
1275

    
1276
	//merge with handleNomTaxon	
1277
	private void handleHomonym(EfloraImportState state, Element elHomonym, NonViralName upperName) {
1278
		verifyNoAttribute(elHomonym);
1279
		
1280
		//hommonym name
1281
		BotanicalName homonymName = BotanicalName.NewInstance(upperName.getRank());
1282
		homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1283
		homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1284
		homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1285
		homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1286

    
1287
		for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1288
			String classValue = elName.getAttributeValue("class");
1289
			String value = elName.getValue().trim();
1290
			if (classValue.equalsIgnoreCase("genus") ){
1291
				homonymName.setGenusOrUninomial(value);
1292
			}else if (classValue.equalsIgnoreCase("epithet") ){
1293
				homonymName.setSpecificEpithet(value);
1294
			}else if (classValue.equalsIgnoreCase("author")){
1295
				handleNameAuthors(elName, homonymName);
1296
			}else if (classValue.equalsIgnoreCase("paraut")){
1297
				handleBasionymAuthor(state, elName, homonymName, true);
1298
			}else if (classValue.equalsIgnoreCase("pub")){
1299
				handleNomenclaturalReference(homonymName, value);
1300
			}else if (classValue.equalsIgnoreCase("note")){
1301
				handleNameNote(homonymName, value);
1302
			}else{
1303
				logger.warn("Unhandled class value: " + classValue);
1304
			}
1305
		}
1306
		//TODO verify other information
1307
		
1308

    
1309
		//rel
1310
		boolean homonymIsLater = false;
1311
		NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1312
		if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1313
			TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1314
			TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1315
			homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart())  > 0;
1316
		}else{
1317
			if (upperName.getNomenclaturalReference() == null){
1318
				logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());
1319
			}
1320
			if (homonymName.getNomenclaturalReference() == null){
1321
				logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());
1322
			}
1323
		}
1324
		if (homonymIsLater){
1325
			homonymName.addRelationshipToName(upperName, relType, null);
1326
		}else{
1327
			upperName.addRelationshipToName(homonymName, relType, null);
1328
		}
1329
		
1330
	}
1331

    
1332

    
1333
	/**
1334
	 * @param state
1335
	 * @param taxon
1336
	 * @param leadsKey
1337
	 * @return
1338
	 */
1339
	private Set<PolytomousKeyNode> handleMatchingNodes(EfloraImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1340
		Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1341
		for (PolytomousKeyNode matchingNode : matchingNodes){
1342
			state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1343
			matchingNode.setTaxon(taxon);
1344
			state.getPolytomousKeyNodesToSave().add(matchingNode);
1345
		}
1346
		return matchingNodes;
1347
	}
1348

    
1349

    
1350
	private void handleNameNote(NonViralName name, String value) {
1351
		logger.warn("Name note: " + value + ". Available in portal?");
1352
		Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1353
		name.addAnnotation(annotation);
1354
	}
1355

    
1356

    
1357
	/**
1358
	 * @param taxon
1359
	 * @param name
1360
	 * @param value
1361
	 */
1362
	protected TeamOrPersonBase handleNameUsage(Taxon taxon, NonViralName<?> name, String referenceTitle, TeamOrPersonBase lastTeam) {
1363
		Reference<?> ref = ReferenceFactory.newGeneric();
1364
		referenceTitle = removeStartingSymbols(referenceTitle, ref);
1365
		
1366
		ref.setTitleCache(referenceTitle, true);
1367
		String microReference = parseReferenceYearAndDetail(ref);
1368
		TeamOrPersonBase<?> team = getReferenceAuthor(ref);
1369
		parseReferenceType(ref);
1370
		if (team == null){
1371
			team = lastTeam;
1372
		}
1373
		ref.setAuthorship(team);
1374
		
1375
		TaxonDescription description = getDescription(taxon);
1376
		TextData textData = TextData.NewInstance(Feature.CITATION());
1377
		textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, ref, microReference, name, null);
1378
		description.addElement(textData);
1379
		return team;
1380
	}
1381

    
1382

    
1383
	/**
1384
	 * @param referenceTitle
1385
	 * @param ref
1386
	 * @return
1387
	 */
1388
	private String removeStartingSymbols(String referenceTitle,	Reference ref) {
1389
		if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1390
			referenceTitle = referenceTitle.substring(1).trim();
1391
			ref.setTitleCache(referenceTitle);
1392
		}
1393
		return referenceTitle;
1394
	}
1395

    
1396

    
1397
	private void parseReferenceType(Reference ref) {
1398
		String title = ref.getTitle();
1399
		if (title == null){
1400
			return;
1401
		}
1402
		title = title.trim();
1403
		//no in reference
1404
		if (! title.startsWith("in ")){
1405
			ref.setType(ReferenceType.Book);
1406
			return;
1407
		}
1408
		
1409
		title = title.substring(3);
1410
		//in reference
1411
		//no ,
1412
		if (title.indexOf(",") == -1){
1413
			ref.setType(ReferenceType.Article);
1414
			IJournal journal = ReferenceFactory.newJournal();
1415
			journal.setTitle(title);
1416
			ref.setTitle(null);
1417
			ref.setInJournal(journal);
1418
			//return;
1419
		}else{
1420
			//,-references
1421
			ref.setType(ReferenceType.BookSection);
1422
			String[] split = (title).split(",\\s*[A-Z]");
1423
			if (split.length <= 1){
1424
				logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1425
			}
1426
			IBook book = ReferenceFactory.newBook();
1427
			Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1428
			try {
1429
				title = title.substring(split[0].length() + 1).trim();
1430
			} catch (Exception e) {
1431
				logger.error("ERROR occurred when trying to split title: " +  title + "; split[0]: + " + split[0]);
1432
			}
1433
			book.setTitle(title);
1434
			book.setAuthorship(bookTeam);
1435
			book.setDatePublished(ref.getDatePublished());
1436
			ref.setTitle(null);
1437
			ref.setInBook(book);
1438
		}		
1439
	}
1440

    
1441

    
1442
	protected Team getReferenceAuthor (Reference ref) {
1443
		boolean isCache = false;
1444
		String referenceTitle = ref.getTitle();
1445
		if (referenceTitle == null){
1446
			isCache = true;
1447
			referenceTitle = ref.getTitleCache();
1448
		}
1449
		//in references
1450
		String[] split = (" " + referenceTitle).split(" in ");
1451
		if (split.length > 1){
1452
			if (StringUtils.isNotBlank(split[0])){
1453
				//' in ' is within the reference string, take the preceding string as the team
1454
				Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1455
				if (! isCache){
1456
					ref.setTitle("in " + split[1]);
1457
				}
1458
				return team;
1459
			}else{
1460
				//string starts with in therefore no author is given
1461
				return null;
1462
			}
1463
		}
1464
		//no ,-reference
1465
		split = referenceTitle.split(",");
1466
		if (split.length < 2){
1467
			//no author is given
1468
			return null;
1469
		}
1470
		
1471
		//,-references
1472
		split = (referenceTitle).split(",\\s*[A-Z]");
1473
		if (split.length > 1){
1474
			Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1475
			if (! isCache){
1476
				ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1477
			}
1478
			return team;
1479
		}else{
1480
			logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1481
			return null;
1482
		}
1483
	}
1484

    
1485

    
1486
	/**
1487
	 * Replaced by <homonym> tag but still in use for exceptions
1488
	 * @param detail
1489
	 * @param name
1490
	 * @return
1491
	 */
1492
	protected String parseHomonym(String detail, NonViralName name) {
1493
		String result;
1494
		if (detail == null){
1495
			return detail;
1496
		}
1497

    
1498
		
1499
		//non RE
1500
		String reNon = "(\\s|,)non\\s";
1501
		Pattern patReference = Pattern.compile(reNon);
1502
		Matcher matcher = patReference.matcher(detail);
1503
		if (matcher.find()){
1504
			int start = matcher.start();
1505
			int end = matcher.end();
1506
			
1507
			if (detail != null){
1508
				logger.warn("Unhandled non part: " + detail.substring(start));
1509
				return detail;
1510
			}
1511
			
1512
			result = detail.substring(0, start);
1513

    
1514
			//homonym string
1515
			String homonymString = detail.substring(end);
1516
			
1517
			//hommonym name
1518
			BotanicalName homonymName = BotanicalName.NewInstance(name.getRank());
1519
			homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1520
			homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1521
			homonymName.setSpecificEpithet(name.getSpecificEpithet());
1522
			homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1523
			Reference homonymNomRef = ReferenceFactory.newGeneric();
1524
			homonymNomRef.setTitleCache(homonymString);
1525
			String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1526
			homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1527
			String authorTitle = homonymNomRef.getTitleCache();
1528
			Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1529
			homonymNomRef.setAuthorship(team);
1530
			homonymNomRef.setTitle("");
1531
			homonymNomRef.setProtectedTitleCache(false);
1532
			
1533
			//rel
1534
			boolean homonymIsLater = false;
1535
			NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1536
			TimePeriod homonymYear = homonymNomRef.getDatePublished();
1537
			if (name.getNomenclaturalReference() != null){
1538
				TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1539
				homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart())  > 0;
1540
			}else{
1541
				logger.warn("Classification name has no nomenclatural reference");
1542
			}
1543
			if (homonymIsLater){
1544
				homonymName.addRelationshipToName(name, relType, null);
1545
			}else{
1546
				name.addRelationshipToName(homonymName, relType, null);
1547
			}
1548
			
1549
		}else{
1550
			return detail;
1551
		}
1552
		return result;
1553
	}
1554

    
1555

    
1556
	/**
1557
	 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1558
	 * @param name
1559
	 * @param value
1560
	 */
1561
	protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
1562
		Reference nomRef = ReferenceFactory.newGeneric();
1563
		nomRef.setTitleCache(value, true);
1564
		parseNomStatus(nomRef, name);
1565
		String microReference = parseReferenceYearAndDetail(nomRef);
1566
		name.setNomenclaturalReference(nomRef);
1567
		microReference = parseHomonym(microReference, name);
1568
		name.setNomenclaturalMicroReference(microReference);
1569
		TeamOrPersonBase  team = (TeamOrPersonBase)name.getCombinationAuthorTeam();
1570
		if (team == null){
1571
			logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1572
		}else{
1573
			nomRef.setAuthorship(team);
1574
		}
1575
		return team;
1576
	}
1577

    
1578
	private void handleInfrAuthor(EfloraImportState state, Element elAuthor, NonViralName name, boolean overwrite) {
1579
		String strAuthor = elAuthor.getValue().trim();
1580
		if (strAuthor.endsWith(",")){
1581
			strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1582
		}
1583
		TeamOrPersonBase[] team = getTeam(strAuthor);
1584
		if (name.getCombinationAuthorTeam() != null && overwrite == false){
1585
			logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1586
		}else{
1587
			name.setCombinationAuthorTeam(team[0]);
1588
			name.setExCombinationAuthorTeam(team[1]);
1589
		}
1590
		
1591
		
1592
	}
1593

    
1594

    
1595
	/**
1596
	 * Sets the names rank according to the infrank value
1597
	 * @param name
1598
	 * @param elements
1599
	 * @param elInfraRank
1600
	 * @param infraRank
1601
	 * @return
1602
	 */
1603
	private Rank handleInfRank(NonViralName name, List<Element> elInfraRank, Rank infraRank) {
1604
		if (elInfraRank.size() == 1){
1605
			String strRank = elInfraRank.get(0).getTextNormalize();
1606
			try {
1607
				infraRank = Rank.getRankByNameOrIdInVoc(strRank);
1608
			} catch (UnknownCdmTypeException e) {
1609
				try{
1610
					infraRank = Rank.getRankByNameOrIdInVoc(strRank + ".");
1611
				} catch (UnknownCdmTypeException e2) {
1612
					logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1613
				}
1614
			}
1615
		}else if (elInfraRank.size() > 1){
1616
			logger.warn ("There is more than 1 infrank");
1617
		}
1618
		if (infraRank != null){
1619
			name.setRank(infraRank);
1620
		}
1621
		return infraRank;
1622
	}
1623

    
1624

    
1625
	private void handleInfrEpi(NonViralName<?> name, Rank infraRank, String value) {
1626
		if (infraRank != null && infraRank.isInfraSpecific()){
1627
			name.setInfraSpecificEpithet(value);
1628
			if (CdmUtils.isCapital(value)){
1629
				logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1630
			}
1631
		}else if (infraRank != null && infraRank.isInfraGeneric()){
1632
			name.setInfraGenericEpithet(value);
1633
			if (! CdmUtils.isCapital(value)){
1634
				logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1635
			}
1636
		}else{
1637
			logger.warn("Infrepi could not be handled: " + value);
1638
		}
1639
	}
1640

    
1641

    
1642

    
1643
	/**
1644
	 * Returns the (empty) with the correct homotypical group depending on the taxon status
1645
	 * @param taxon
1646
	 * @param homotypicalGroup
1647
	 * @param isSynonym
1648
	 * @return
1649
	 */
1650
	private NonViralName makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1651
		NonViralName name;
1652
		if (isSynonym){
1653
			name = BotanicalName.NewInstance(Rank.SPECIES(), homotypicalGroup);
1654
			SynonymRelationshipType synonymType = SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF();
1655
			if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1656
				synonymType = SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF();
1657
			}
1658
			taxon.addSynonymName(name, synonymType);
1659
		}else{
1660
			name = (NonViralName)taxon.getName();
1661
		}
1662
		return name;
1663
	}
1664

    
1665

    
1666
	/**
1667
	 * @param element
1668
	 * @param taxon
1669
	 */
1670
	private void handleInfraspecificEpithet(Element element, String attrValue, NonViralName<?> name) {
1671
		String value = element.getTextNormalize();
1672
		if (value.indexOf("subsp.") != -1){
1673
			//TODO genus and species epi
1674
			String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1675
			name.setInfraSpecificEpithet(infrEpi);
1676
			name.setRank(Rank.SUBSPECIES());
1677
		}else if (value.indexOf("var.") != -1){
1678
			//TODO genus and species epi
1679
			String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1680
			name.setInfraSpecificEpithet(infrEpi);
1681
			name.setRank(Rank.VARIETY());
1682
		}else{
1683
			logger.warn("Unhandled infraspecific type: " + value);
1684
		}
1685
	}
1686

    
1687

    
1688
	/**
1689
	 * @param state
1690
	 * @param element
1691
	 * @param name
1692
	 */
1693
	private void handleBasionymAuthor(EfloraImportState state, Element elBasionymAuthor, NonViralName name, boolean overwrite) {
1694
		String strAuthor = elBasionymAuthor.getValue().trim();
1695
		Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1696
		if (reBasionymAuthor.matcher(strAuthor).matches()){
1697
			strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1698
		}else{
1699
			logger.warn("Brackets are missing for original combination author " + strAuthor);
1700
		}
1701
		TeamOrPersonBase[] basionymTeam = getTeam(strAuthor);
1702
		if (name.getBasionymAuthorTeam() != null && overwrite == false){
1703
			logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1704
		}else{
1705
			name.setBasionymAuthorTeam(basionymTeam[0]);
1706
			name.setExBasionymAuthorTeam(basionymTeam[1]);
1707

    
1708
		}
1709
	}
1710

    
1711
	private Map<String, UUID> teamMap = new HashMap<String, UUID>();
1712
	/**
1713
	 * @param elAuthors
1714
	 * @param name
1715
	 * @param elNom 
1716
	 */
1717
	private void handleNameAuthors(Element elAuthor, NonViralName name) {
1718
		if (name.getCombinationAuthorTeam() != null){
1719
			logger.warn("Name already has a combination author. Name: " +  name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1720
		}
1721
		String strAuthor = elAuthor.getValue().trim();
1722
		if (strAuthor.endsWith(",")){
1723
			strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1724
		}
1725
		if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){
1726
			logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);
1727
		}
1728
		TeamOrPersonBase[] team = getTeam(strAuthor);
1729
		name.setCombinationAuthorTeam(team[0]);
1730
		name.setExCombinationAuthorTeam(team[1]);
1731
	}
1732

    
1733

    
1734
	/**
1735
	 * @param strAuthor
1736
	 * @return
1737
	 */
1738
	private TeamOrPersonBase[] getTeam(String strAuthor) {
1739
		TeamOrPersonBase[] result = new TeamOrPersonBase[2];
1740
		String[] split = strAuthor.split(" ex ");
1741
		String strBaseAuthor = null;
1742
		String strExAuthor = null;
1743
		
1744
		if (split.length == 2){
1745
			strBaseAuthor = split[1]; 
1746
			strExAuthor = split[0];	
1747
		}else if (split.length == 1){
1748
			strBaseAuthor = split[0];
1749
		}else{
1750
			logger.warn("Could not parse (ex) author: " + strAuthor);
1751
		}
1752
		result[0] = getUuidTeam(strBaseAuthor);
1753
		if (result[0] == null){
1754
			result[0] = parseSingleTeam(strBaseAuthor);
1755
			teamMap.put(strBaseAuthor, result[0].getUuid());
1756
		}
1757
		if (strExAuthor != null){
1758
			result[1] = getUuidTeam(strExAuthor);
1759
			if (result[1] == null){
1760
				result[1] = Team.NewInstance();
1761
				result[1].setTitleCache(strExAuthor, true);
1762
				teamMap.put(strExAuthor, result[1].getUuid());
1763
			}
1764
		
1765
		}	
1766
		return result;
1767
	}
1768

    
1769

    
1770
	protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {
1771
		TeamOrPersonBase result;
1772
		String[] split = strBaseAuthor.split("&");
1773
		if (split.length > 1){
1774
			result = Team.NewInstance();
1775
			for (String personString : split){
1776
				Person person = makePerson(personString);
1777
				((Team)result).addTeamMember(person);
1778
			}
1779
		}else{
1780
			result = makePerson(strBaseAuthor.trim());
1781
		}
1782
		return result;
1783
	}
1784

    
1785

    
1786
	/**
1787
	 * @param personString
1788
	 * @return
1789
	 */
1790
	private Person makePerson(String personString) {
1791
		personString = personString.trim();
1792
		Person person = Person.NewTitledInstance(personString);
1793
		person.setNomenclaturalTitle(personString);
1794
		return person;
1795
	}
1796

    
1797

    
1798
	/**
1799
	 * @param result
1800
	 * @param strBaseAuthor
1801
	 */
1802
	private TeamOrPersonBase getUuidTeam(String strBaseAuthor) {
1803
		UUID uuidTeam = teamMap.get(strBaseAuthor);
1804
		return CdmBase.deproxy(getAgentService().find(uuidTeam), TeamOrPersonBase.class);
1805
	}
1806

    
1807

    
1808
	private void handleDescription(EfloraImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
1809
		verifyNoAttribute(elDescription);
1810
		
1811
		List<Element> elements = elDescription.getChildren();
1812
		for (Element element : elements){
1813
			if (element.getName().equalsIgnoreCase("char")){
1814
				handleChar(state, element, taxon);
1815
			}else{
1816
				logger.warn("Unhandled description child: " + element.getName());
1817
			}
1818
		}
1819
		
1820
	}
1821
	
1822
	
1823
	/**
1824
	 * @param state
1825
	 * @param element
1826
	 * @param taxon
1827
	 */
1828
	private void handleChar(EfloraImportState state, Element element, Taxon taxon) {
1829
		List<Attribute> attributes = element.getAttributes();
1830
		for (Attribute attribute : attributes){
1831
			if (! attribute.getName().equalsIgnoreCase("class")){
1832
				logger.warn("Char has unhandled attribute " +  attribute.getName());
1833
			}else{
1834
				String classValue = attribute.getValue();
1835
				Feature feature = getFeature(classValue, state);
1836
				if (feature == null){
1837
					logger.warn("Unhandled feature: " + classValue);
1838
				}else{
1839
					String value = element.getValue();
1840
					addDescriptionElement(state, taxon, value, feature, null);
1841
				}
1842
				
1843
			}
1844
		}
1845
		
1846
		List<Element> elements = element.getChildren();
1847
		if (! elements.isEmpty()){
1848
			logger.warn("Char has unhandled children");
1849
		}
1850
	}
1851

    
1852

    
1853
	/**
1854
	 * @param taxon
1855
	 * @return
1856
	 */
1857
	protected TaxonDescription getDescription(Taxon taxon) {
1858
		for (TaxonDescription description : taxon.getDescriptions()){
1859
			if (! description.isImageGallery()){
1860
				return description;
1861
			}
1862
		}
1863
		TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
1864
		return newDescription;
1865
	}
1866

    
1867

    
1868
	/**
1869
	 * @param classValue
1870
	 * @param state 
1871
	 * @return
1872
	 * @throws UndefinedTransformerMethodException 
1873
	 */
1874
	private Feature getFeature(String classValue, EfloraImportState state) {
1875
		UUID uuid;
1876
		try {
1877
			uuid = state.getTransformer().getFeatureUuid(classValue);
1878
			if (uuid == null){
1879
				logger.info("Uuid is null for " + classValue);
1880
			}
1881
			String featureText = StringUtils.capitalize(classValue);
1882
			//TODO eFlora feature vocabulary
1883
			Feature feature = getFeature(state, uuid, featureText, featureText, classValue, null);
1884
			if (feature == null){
1885
				throw new NullPointerException(classValue + " not recognized as a feature");
1886
			}
1887
			return feature;
1888
		} catch (Exception e) {
1889
			logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
1890
			return Feature.UNKNOWN();
1891
		}
1892
	}
1893

    
1894

    
1895
	/**
1896
	 * @param state
1897
	 * @param element
1898
	 * @param taxon
1899
	 * @param unhandledTitleClassess 
1900
	 */
1901
	private void handleTitle(EfloraImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
1902
		// attributes
1903
		List<Attribute> attributes = element.getAttributes();
1904
		for (Attribute attribute : attributes){
1905
			if (! attribute.getName().equalsIgnoreCase("class") ){
1906
				if (! attribute.getName().equalsIgnoreCase("num")){
1907
					logger.warn("Title has unhandled attribute " +  attribute.getName());
1908
				}else{
1909
					//TODO num attribute in taxon
1910
				}
1911
			}else{
1912
				String classValue = attribute.getValue();
1913
				try {
1914
					Rank rank;
1915
					try {
1916
						rank = Rank.getRankByNameOrIdInVoc(classValue);
1917
					} catch (Exception e) {
1918
						//TODO nc
1919
						rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICNAFP, false);
1920
					}
1921
					taxon.getName().setRank(rank);
1922
					if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
1923
						handleGenus(element.getValue(), taxon.getName());
1924
					}else if (rank.equals(Rank.SUBGENUS())){
1925
						handleSubGenus(element.getValue(), taxon.getName());
1926
					}else if (rank.equals(Rank.SECTION_BOTANY())){
1927
						handleSection(element.getValue(), taxon.getName());
1928
					}else if (rank.equals(Rank.SPECIES())){
1929
						handleSpecies(element.getValue(), taxon.getName());
1930
					}else if (rank.equals(Rank.SUBSPECIES())){
1931
						handleSubSpecies(element.getValue(), taxon.getName());
1932
					}else if (rank.equals(Rank.VARIETY())){
1933
						handleVariety(element.getValue(), taxon.getName());
1934
					}else{
1935
						logger.warn("Unhandled rank: " + rank.getLabel());
1936
					}
1937
				} catch (UnknownCdmTypeException e) {
1938
					logger.warn("Unknown rank " + classValue);
1939
					unhandledTitleClassess.add(classValue);
1940
				}
1941
			}
1942
		}
1943
		List<Element> elements = element.getChildren();
1944
		if (! elements.isEmpty()){
1945
			logger.warn("Title has unexpected children");
1946
		}
1947
		UUID uuidTitle = EfloraTransformer.uuidTitle;
1948
		ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
1949
		taxon.addExtension(element.getTextNormalize(), titleExtension);
1950
		
1951
	}
1952

    
1953

    
1954
	/**
1955
	 * @param value
1956
	 * @param taxonNameBase 
1957
	 */
1958
	private void handleSubGenus(String value, TaxonNameBase taxonNameBase) {
1959
		String name = value.replace("Subgenus", "").trim();
1960
		((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1961
	}
1962
	
1963
	/**
1964
	 * @param value
1965
	 * @param taxonNameBase 
1966
	 */
1967
	private void handleSection(String value, TaxonNameBase taxonNameBase) {
1968
		String name = value.replace("Section", "").trim();
1969
		((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1970
	}
1971
	
1972
	/**
1973
	 * @param value
1974
	 * @param taxonNameBase 
1975
	 */
1976
	private void handleSpecies(String value, TaxonNameBase taxonNameBase) {
1977
		//do nothing
1978
	}
1979
	
1980
	/**
1981
	 * @param value
1982
	 * @param taxonNameBase 
1983
	 */
1984
	private void handleVariety(String value, TaxonNameBase taxonNameBase) {
1985
		//do nothing
1986
	}
1987
	
1988
	/**
1989
	 * @param value
1990
	 * @param taxonNameBase 
1991
	 */
1992
	private void handleSubSpecies(String value, TaxonNameBase taxonNameBase) {
1993
		//do nothing
1994
	}
1995

    
1996
	
1997
	private Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
1998
	
1999
	/**
2000
	 * @param value
2001
	 * @param taxonNameBase 
2002
	 */
2003
	protected void handleGenus(String value, TaxonNameBase taxonName) {
2004
		Matcher matcher = rexGenusAuthor.matcher(value);
2005
		if (matcher.find()){
2006
			String author = matcher.group();
2007
//			String genus = value.replace(author, "");
2008
			author = author.substring(1, author.length() - 1);
2009
			Team team = Team.NewInstance();
2010
			team.setTitleCache(author, true);
2011
			Credit credit = Credit.NewInstance(team, null);
2012
			taxonName.addCredit(credit);
2013
//			NonViralName nvn = (NonViralName)taxonName;
2014
//			nvn.setCombinationAuthorTeam(team);
2015
//			nvn.setGenusOrUninomial(genus);
2016
		}else{
2017
			logger.info("No Author match for " + value);
2018
		}
2019
	}
2020
	
2021

    
2022
	/**
2023
	 * @param taxon
2024
	 * @param lastTaxon
2025
	 */
2026
	private void handleTaxonRelation(EfloraImportState state, Taxon taxon, Taxon lastTaxon) {
2027
		
2028
		Classification tree = getTree(state);
2029
		if (lastTaxon == null){
2030
			tree.addChildTaxon(taxon, null, null);
2031
			return;
2032
		}
2033
		Rank thisRank = taxon.getName().getRank();
2034
		Rank lastRank = lastTaxon.getName().getRank();
2035
		if (lastTaxon.getTaxonNodes().size() > 0){
2036
			TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
2037
			if (thisRank.isLower(lastRank )  ){
2038
				lastNode.addChildTaxon(taxon, null, null);
2039
				fillMissingEpithetsForTaxa(lastTaxon, taxon);
2040
			}else if (thisRank.equals(lastRank)){
2041
				TaxonNode parent = lastNode.getParent();
2042
				if (parent != null){
2043
					parent.addChildTaxon(taxon, null, null);
2044
					fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
2045
				}else{
2046
					tree.addChildTaxon(taxon, null, null);
2047
				}
2048
			}else if (thisRank.isHigher(lastRank)){
2049
				handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2050
//				TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2051
//				parentNode.addChildTaxon(taxon, null, null, null);
2052
			}
2053
		}else{
2054
			logger.warn("Last taxon has no node");
2055
		}
2056
	}
2057

    
2058

    
2059

    
2060
	/**
2061
	 * @param state
2062
	 * @return 
2063
	 */
2064
	private Classification getTree(EfloraImportState state) {
2065
		Classification result = state.getTree(null);
2066
		if (result == null){
2067
			UUID uuid = state.getConfig().getClassificationUuid();
2068
			if (uuid == null){
2069
				logger.warn("No classification uuid is defined");
2070
				result = getNewClassification(state);
2071
			}else{
2072
				result = getClassificationService().find(uuid);
2073
				if (result == null){
2074
					result = getNewClassification(state);
2075
					result.setUuid(uuid);
2076
				}
2077
			}
2078
			state.putTree(null, result);
2079
		}
2080
		return result;
2081
	}
2082

    
2083

    
2084
	private Classification getNewClassification(EfloraImportState state) {
2085
		Classification result;
2086
		result = Classification.NewInstance(state.getConfig().getClassificationTitle());
2087
		state.putTree(null, result);
2088
		return result;
2089
	}
2090

    
2091

    
2092
	/**
2093
	 * @param state 
2094
	 * @param taxon
2095
	 * @param value
2096
	 * @param feature
2097
	 * @return 
2098
	 */
2099
	private TextData addDescriptionElement(EfloraImportState state, Taxon taxon, String value, Feature feature, String references) {
2100
		TextData textData = TextData.NewInstance(feature);
2101
		Language textLanguage = getDefaultLanguage(state);
2102
		textData.putText(textLanguage, value);
2103
		TaxonDescription description = getDescription(taxon);
2104
		description.addElement(textData);
2105
		if (references != null){
2106
			makeOriginalSourceReferences(textData, ";", references);
2107
		}
2108
		return textData;
2109
	}
2110

    
2111
	private Language getDefaultLanguage(EfloraImportState state) {
2112
		UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();
2113
		if (defaultLanguageUuid != null){
2114
			Language result = state.getDefaultLanguage();
2115
			if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){
2116
				result = (Language)getTermService().find(defaultLanguageUuid);
2117
				state.setDefaultLanguage(result);
2118
				if (result == null){
2119
					logger.warn("Default language for " + defaultLanguageUuid +  " does not exist.");
2120
				}
2121
			}
2122
			return result;
2123
		}else{
2124
			return Language.DEFAULT();
2125
		}
2126
	}
2127

    
2128

    
2129
	/**
2130
	 * @param elNomenclature
2131
	 */
2132
	private void verifyNoAttribute(Element element) {
2133
		List<Attribute> attributes = element.getAttributes();
2134
		if (! attributes.isEmpty()){
2135
			logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
2136
		}
2137
	}
2138
	
2139
	/**
2140
	 * @param elNomenclature
2141
	 */
2142
	protected void verifyNoChildren(Element element) {
2143
		verifyNoChildren(element, false);
2144
	}
2145
	
2146
	/**
2147
	 * @param elNomenclature
2148
	 */
2149
	private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2150
		List<Element> children = element.getChildren();
2151
		if (! children.isEmpty()){
2152
			if (ignoreLineBreak == true){
2153
				for (Element child : children){
2154
					if (! child.getName().equalsIgnoreCase("BR")){
2155
						logger.warn(element.getName() + " has unhandled child: " + child.getName());
2156
					}
2157
				}
2158
			}else{
2159
				logger.warn(element.getName() + " has unhandled children");
2160
			}
2161
		}
2162
	}
2163
	
2164
	
2165

    
2166
	/**
2167
	 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2168
	 * exists it is added to the name and the nom. status part of the references title cache is 
2169
	 * removed. Requires protected title cache.
2170
	 * @param ref
2171
	 * @param nonViralName
2172
	 */
2173
	protected void parseNomStatus(Reference ref, NonViralName nonViralName) {
2174
		String titleToParse = ref.getTitleCache();
2175
		
2176
		String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName, true);
2177
		if (! noStatusTitle.equals(titleToParse)){
2178
			ref.setTitleCache(noStatusTitle, true);
2179
		}
2180
	}
2181

    
2182
	
2183
	/**
2184
	 * Extracts the date published part and returns micro reference
2185
	 * @param ref
2186
	 * @return
2187
	 */
2188
	private String parseReferenceYearAndDetail(Reference ref){
2189
		String detailResult = null;
2190
		String titleToParse = ref.getTitleCache();
2191
		titleToParse = removeStartingSymbols(titleToParse, ref);
2192
		String reReference = "^\\.{1,}";
2193
//		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2194
		String oneMonth = "(Feb.|Dec.|March|June|July)";
2195
		String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2196
		String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2197
		
2198
		String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2199
		String reDetail = "\\.{1,10}$";
2200
		
2201
		//pattern for the whole string
2202
		Pattern patReference = Pattern.compile(/*reReference +*/ reYearPeriod /*+ reDetail */);
2203
		Matcher matcher = patReference.matcher(titleToParse);
2204
		if (matcher.find()){
2205
			int start = matcher.start();
2206
			int end = matcher.end();
2207
			
2208
			//title and other information precedes the year part
2209
			String title = titleToParse.substring(0, start).trim();
2210
			//detail follows the year part
2211
			String detail = titleToParse.substring(end).trim();
2212
			
2213
			//time period
2214
			String strPeriod = matcher.group().trim();
2215
			strPeriod = strPeriod.substring(1, strPeriod.length()-1);   //remove brackets
2216
			Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2217
			matcher = patStartMonth.matcher(strPeriod);
2218
			strPeriod = strPeriod.replace(" ", "");
2219
			Integer startMonth = null;
2220
			if (matcher.find()){
2221
				end = matcher.end();
2222
				strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2223
				startMonth = getMonth(strPeriod.substring(0, end));
2224
			}
2225
			
2226
			TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
2227
			if (startMonth != null){
2228
				datePublished.setStartMonth(startMonth);
2229
			}
2230
			ref.setDatePublished(datePublished);
2231
			ref.setTitle(title);
2232
			detailResult = CdmUtils.removeTrailingDot(detail);
2233
			if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",")  ){
2234
				detailResult = detailResult.substring(0, detailResult.length() -1);
2235
			}
2236
			ref.setProtectedTitleCache(false);
2237
		}else{
2238
			logger.warn("Could not parse reference: " +  titleToParse);
2239
		}
2240
		return detailResult;
2241
		
2242
	}
2243

    
2244
	
2245
	
2246
	private Integer getMonth(String month) {
2247
		if (month.startsWith("Jan")){
2248
			return 1;
2249
		}else if (month.startsWith("Feb")){
2250
			return 2;
2251
		}else if (month.startsWith("Mar")){
2252
			return 3;
2253
		}else if (month.startsWith("Apr")){
2254
			return 4;
2255
		}else if (month.startsWith("May")){
2256
			return 5;
2257
		}else if (month.startsWith("Jun")){
2258
			return 6;
2259
		}else if (month.startsWith("Jul")){
2260
			return 7;
2261
		}else if (month.startsWith("Aug")){
2262
			return 8;
2263
		}else if (month.startsWith("Sep")){
2264
			return 9;
2265
		}else if (month.startsWith("Oct")){
2266
			return 10;
2267
		}else if (month.startsWith("Nov")){
2268
			return 11;
2269
		}else if (month.startsWith("Dec")){
2270
			return 12;
2271
		}else{
2272
			logger.warn("Month not yet supported: " + month);
2273
			return null;
2274
		}
2275
	}
2276

    
2277

    
2278
	/* (non-Javadoc)
2279
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2280
	 */
2281
	protected boolean isIgnore(EfloraImportState state){
2282
		return ! state.getConfig().isDoTaxa();
2283
	}
2284

    
2285
}
(4-4/6)