Project

General

Profile

Download (73.5 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.eflora;
11

    
12
import java.util.ArrayList;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19
import java.util.regex.Matcher;
20
import java.util.regex.Pattern;
21

    
22
import org.apache.commons.lang.CharUtils;
23
import org.apache.commons.lang.StringUtils;
24
import org.apache.log4j.Logger;
25
import org.jdom.Attribute;
26
import org.jdom.Element;
27
import org.springframework.stereotype.Component;
28
import org.springframework.transaction.TransactionStatus;
29

    
30
import eu.etaxonomy.cdm.common.CdmUtils;
31
import eu.etaxonomy.cdm.common.ResultWrapper;
32
import eu.etaxonomy.cdm.common.XmlHelp;
33
import eu.etaxonomy.cdm.io.common.ICdmIO;
34
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35
import eu.etaxonomy.cdm.io.eflora.UnmatchedLeads.UnmatchedLeadsKey;
36
import eu.etaxonomy.cdm.model.agent.Person;
37
import eu.etaxonomy.cdm.model.agent.Team;
38
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
39
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
40
import eu.etaxonomy.cdm.model.common.Annotation;
41
import eu.etaxonomy.cdm.model.common.AnnotationType;
42
import eu.etaxonomy.cdm.model.common.CdmBase;
43
import eu.etaxonomy.cdm.model.common.Credit;
44
import eu.etaxonomy.cdm.model.common.ExtensionType;
45
import eu.etaxonomy.cdm.model.common.ISourceable;
46
import eu.etaxonomy.cdm.model.common.Language;
47
import eu.etaxonomy.cdm.model.common.Marker;
48
import eu.etaxonomy.cdm.model.common.MarkerType;
49
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
50
import eu.etaxonomy.cdm.model.common.TimePeriod;
51
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
52
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
53
import eu.etaxonomy.cdm.model.description.Feature;
54
import eu.etaxonomy.cdm.model.description.KeyStatement;
55
import eu.etaxonomy.cdm.model.description.PolytomousKey;
56
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
57
import eu.etaxonomy.cdm.model.description.TaxonDescription;
58
import eu.etaxonomy.cdm.model.description.TextData;
59
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
60
import eu.etaxonomy.cdm.model.name.IBotanicalName;
61
import eu.etaxonomy.cdm.model.name.INonViralName;
62
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
63
import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
64
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
65
import eu.etaxonomy.cdm.model.name.Rank;
66
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
67
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
68
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
69
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
70
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
71
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
72
import eu.etaxonomy.cdm.model.reference.IBook;
73
import eu.etaxonomy.cdm.model.reference.IJournal;
74
import eu.etaxonomy.cdm.model.reference.Reference;
75
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
76
import eu.etaxonomy.cdm.model.reference.ReferenceType;
77
import eu.etaxonomy.cdm.model.taxon.Classification;
78
import eu.etaxonomy.cdm.model.taxon.SynonymType;
79
import eu.etaxonomy.cdm.model.taxon.Taxon;
80
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
81
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
82
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
83
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
84
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
85

    
86

    
87
/**
88
 * @author a.mueller
89
 *
90
 */
91
@Component
92
public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<EfloraImportState> {
93
	private static final Logger logger = Logger.getLogger(EfloraTaxonImport.class);
94

    
95
	private static int modCount = 30000;
96
	private final NonViralNameParserImpl parser = new NonViralNameParserImpl();
97

    
98
	public EfloraTaxonImport(){
99
		super();
100
	}
101

    
102

    
103
	@Override
104
	public boolean doCheck(EfloraImportState state){
105
		boolean result = true;
106
		return result;
107
	}
108

    
109
	//TODO make part of state, but state is renewed when invoking the import a second time
110
	private UnmatchedLeads unmatchedLeads;
111

    
112
	@Override
113
	public void doInvoke(EfloraImportState state){
114
		logger.info("start make Taxa ...");
115

    
116
		//FIXME reset state
117
		state.putTree(null, null);
118
//		UnmatchedLeads unmatchedLeads = state.getOpenKeys();
119
		if (unmatchedLeads == null){
120
			unmatchedLeads = UnmatchedLeads.NewInstance();
121
		}
122
		state.setUnmatchedLeads(unmatchedLeads);
123

    
124
		TransactionStatus tx = startTransaction();
125
		unmatchedLeads.saveToSession(getPolytomousKeyNodeService());
126

    
127

    
128
		//TODO generally do not store the reference object in the config
129
		Reference sourceReference = state.getConfig().getSourceReference();
130
		getReferenceService().saveOrUpdate(sourceReference);
131

    
132
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
133
		ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
134

    
135
		Element elbody= getBodyElement(state.getConfig());
136
		List<Element> elTaxonList = elbody.getChildren();
137

    
138
		int i = 0;
139

    
140
		Set<String> unhandledTitleClassess = new HashSet<String>();
141
		Set<String> unhandledNomeclatureChildren = new HashSet<String>();
142
		Set<String> unhandledDescriptionChildren = new HashSet<String>();
143

    
144
		Taxon lastTaxon = getLastTaxon(state);
145

    
146
		//for each taxon
147
		for (Element elTaxon : elTaxonList){
148
			try {
149
				if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
150
				if (! elTaxon.getName().equalsIgnoreCase("taxon")){
151
					logger.warn("body has element other than 'taxon'");
152
				}
153

    
154
				IBotanicalName botanicalName = TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES());
155
				Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
156

    
157
				handleTaxonAttributes(elTaxon, taxon, state);
158

    
159

    
160
				List<Element> children = elTaxon.getChildren();
161
				handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren,	unhandledDescriptionChildren, taxon, children);
162
				handleTaxonRelation(state, taxon, lastTaxon);
163
				lastTaxon = taxon;
164
				taxaToSave.add(taxon);
165
				state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
166

    
167
			} catch (Exception e) {
168
				logger.warn("Exception occurred in Sapindacea taxon import: " + e);
169
				e.printStackTrace();
170
			}
171

    
172
		}
173

    
174
		System.out.println(state.getUnmatchedLeads().toString());
175
		logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
176

    
177
		logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
178
		logger.info("Children for description are: " + unhandledDescriptionChildren);
179
		logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
180
		logger.info("Children for nom are: " + unhandledNomChildren);
181

    
182

    
183
		//invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
184
		logger.info(i + " taxa handled. Saving ...");
185
		getTaxonService().saveOrUpdate(taxaToSave);
186
		getFeatureTreeService().saveOrUpdateFeatureNodesAll(state.getFeatureNodesToSave());
187
		state.getFeatureNodesToSave().clear();
188
		commitTransaction(tx);
189

    
190
		logger.info("end makeTaxa ...");
191
		logger.info("start makeKey ...");
192
//		invokeDoKey(state);
193
		logger.info("end makeKey ...");
194

    
195
		if (! success.getValue()){
196
			state.setUnsuccessfull();
197
		}
198
		return ;
199
	}
200

    
201

    
202
	private void handleTaxonAttributes(Element elTaxon, Taxon taxon, EfloraImportState state) {
203
		List<Attribute> attrList = elTaxon.getAttributes();
204
		for (Attribute attr : attrList){
205
			String attrName = attr.getName();
206
			String attrValue = attr.getValue();
207
			if ("class".equals(attrName)){
208
				if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES")  ){
209
					taxon.setDoubtful(true);
210
				}else{
211
					MarkerType markerType = getMarkerType(state, attrValue);
212
					if (markerType == null){
213
						logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
214
					}else{
215
						taxon.addMarker(Marker.NewInstance(markerType, true));
216
					}
217
				}
218
			}else if ("num".equals(attrName)){
219
				logger.warn("num not yet supported");
220
			}else{
221
				logger.warn("Attribute " + attrName + " not yet supported for element taxon");
222
			}
223
		}
224

    
225
	}
226

    
227

    
228
	private Taxon getLastTaxon(EfloraImportState state) {
229
		if (state.getConfig().getLastTaxonUuid() == null){
230
			return null;
231
		}else{
232
			return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
233
		}
234
	}
235

    
236

    
237
//	private void invokeDoKey(SapindaceaeImportState state) {
238
//		TransactionStatus tx = startTransaction();
239
//
240
//		Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
241
//		ITaxonService taxonService = getTaxonService();
242
//		ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
243
//
244
//		Element elbody= getBodyElement(state.getConfig());
245
//		List<Element> elTaxonList = elbody.getChildren();
246
//
247
//		int i = 0;
248
//
249
//		//for each taxon
250
//		for (Element elTaxon : elTaxonList){
251
//			if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
252
//			if (! elTaxon.getName().equalsIgnoreCase("taxon")){
253
//				continue;
254
//			}
255
//
256
//			List<Element> children = elTaxon.getChildren("key");
257
//			for (Element element : children){
258
//				handleKeys(state, element, null);
259
//			}
260
//			nodesToSave.add(taxon);
261
//
262
//		}
263
//
264
//	}
265

    
266

    
267
	// body/taxon/*
268
	private void handleTaxonElement(EfloraImportState state, Set<String> unhandledTitleClassess, Set<String> unhandledNomeclatureChildren, Set<String> unhandledDescriptionChildren, Taxon taxon, List<Element> children) {
269
		AnnotatableEntity lastEntity = null;
270
		for (Element element : children){
271
			String elName = element.getName();
272

    
273
			if (elName.equalsIgnoreCase("title")){
274
				handleTitle(state, element, taxon, unhandledTitleClassess);
275
				lastEntity = null;
276
			}else if(elName.equalsIgnoreCase("nomenclature")){
277
				handleNomenclature(state, element, taxon, unhandledNomeclatureChildren);
278
				lastEntity = null;
279
			}else if(elName.equalsIgnoreCase("description")){
280
				handleDescription(state, element, taxon, unhandledDescriptionChildren);
281
				lastEntity = null;
282
			}else if(elName.equalsIgnoreCase("habitatecology")){
283
				lastEntity = handleEcology(state, element, taxon);
284
			}else if(elName.equalsIgnoreCase("distribution")){
285
				lastEntity = handleDistribution(state, element, taxon);
286
			}else if(elName.equalsIgnoreCase("uses")){
287
				lastEntity = handleUses(state, element, taxon);
288
			}else if(elName.equalsIgnoreCase("notes")){
289
				lastEntity = handleTaxonNotes(state, element, taxon);
290
			}else if(elName.equalsIgnoreCase("chromosomes")){
291
				lastEntity = handleChromosomes(state, element, taxon);
292
			}else if(elName.equalsIgnoreCase("vernacularnames")){
293
				handleVernaculars(state, element, taxon);
294
			}else if(elName.equalsIgnoreCase("key")){
295
				lastEntity = handleKeys(state, element, taxon);
296
			}else if(elName.equalsIgnoreCase("references")){
297
				handleReferences(state, element, taxon, lastEntity);
298
				lastEntity = null;
299
			}else if(elName.equalsIgnoreCase("taxon")){
300
				logger.warn("A taxon should not be part of a taxon");
301
			}else if(elName.equalsIgnoreCase("homotypes")){
302
				logger.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
303
			}else{
304
				logger.warn("Unexpected child for taxon: " + elName);
305
			}
306
		}
307
	}
308

    
309

    
310
	private void handleVernaculars(EfloraImportState state, Element elVernacular, Taxon taxon) {
311
		verifyNoAttribute(elVernacular);
312
		verifyNoChildren(elVernacular, false);
313
		String value = elVernacular.getTextNormalize();
314
		Feature feature = Feature.COMMON_NAME();
315
		value = replaceStart(value, "Noms vernaculaires");
316
		String[] dialects = value.split(";");
317
		for (String singleDialect : dialects){
318
			handleSingleDialect(taxon, singleDialect, feature, state);
319
		}
320
		return;
321
	}
322

    
323

    
324
	private void handleSingleDialect(Taxon taxon, String singleDialect, Feature feature, EfloraImportState state) {
325
		singleDialect = singleDialect.trim();
326
		TaxonDescription description = getDescription(taxon);
327
		String reDialect = "\\(dial\\.\\s.*\\)";
328
//		String reDialect = "\\(.*\\)";
329
		Pattern patDialect = Pattern.compile(reDialect);
330
		Matcher matcher = patDialect.matcher(singleDialect);
331
		if (matcher.find()){
332
			String dialect = singleDialect.substring(matcher.start(), matcher.end());
333
			dialect = dialect.replace("(dial. ", "").replace(")", "");
334

    
335
			Language language = null;
336
			try {
337
				language = this.getLanguage(state, state.getTransformer().getLanguageUuid(dialect), dialect, dialect, dialect);
338
			} catch (UndefinedTransformerMethodException e) {
339
				logger.error(e.getMessage());
340
			}
341

    
342
			String commonNames = singleDialect.substring(0, matcher.start());
343
			String[] splitNames = commonNames.split(",");
344
			for (String commonNameString : splitNames){
345
				commonNameString = commonNameString.trim();
346
				CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameString, language);
347
				description.addElement(commonName);
348
			}
349
		}else{
350
			logger.warn("No dialect match: " +  singleDialect);
351
		}
352
	}
353

    
354

    
355
	private void handleReferences(EfloraImportState state, Element elReferences, Taxon taxon, AnnotatableEntity lastEntity) {
356
		verifyNoAttribute(elReferences);
357
		verifyNoChildren(elReferences, true);
358
		String refString = elReferences.getTextNormalize();
359
		if (lastEntity == null){
360
			logger.warn("No last entity defined: " + refString);
361
			return;
362
		}
363

    
364
		Annotation annotation = Annotation.NewInstance(refString, AnnotationType.EDITORIAL(), Language.DEFAULT());
365
		lastEntity.addAnnotation(annotation);
366
	}
367

    
368

    
369
	private PolytomousKey handleKeys(EfloraImportState state, Element elKey, Taxon taxon) {
370
		UnmatchedLeads openKeys = state.getUnmatchedLeads();
371

    
372
		//title
373
		String title = makeKeyTitle(elKey);
374

    
375
		//key
376
		PolytomousKey key = PolytomousKey.NewTitledInstance(title);
377

    
378
		//TODO add covered taxa etc.
379
		verifyNoAttribute(elKey);
380

    
381
		//notes
382
		makeKeyNotes(elKey, key);
383

    
384
		//keycouplets
385
		List<Element> keychoices = new ArrayList<Element>();
386
		keychoices.addAll(elKey.getChildren("keycouplet"));
387
		keychoices.addAll(elKey.getChildren("keychoice"));
388

    
389

    
390
		for (Element elKeychoice : keychoices){
391
			handleKeyChoices(state, openKeys, key, elKeychoice, taxon);
392
			elKey.removeContent(elKeychoice);
393
		}
394

    
395
		//
396
		verifyNoChildren(elKey);
397
		logger.info("Unmatched leads after key handling:" + openKeys.toString());
398

    
399

    
400
		if (state.getConfig().isDoPrintKeys()){
401
			key.print(System.err);
402
		}
403
		getPolytomousKeyService().save(key);
404
		return key;
405
	}
406

    
407

    
408
	/**
409
	 * @param state
410
	 * @param elKey
411
	 * @param openKeys
412
	 * @param key
413
	 * @param elKeychoice
414
	 * @param taxon
415
	 */
416
	private void handleKeyChoices(EfloraImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
417

    
418
		//char Attribute
419
		//TODO it's still unclear if char is a feature and needs to be a new attribute
420
		//or if it is handled as question. Therefore both cases are handled but feature
421
		//is finally not yet set
422
		KeyStatement question = handleKeychoiceChar(state, elKeychoice);
423
		Feature feature = handleKeychoiceCharAsFeature(state, elKeychoice);
424

    
425
		//lead
426
		List<PolytomousKeyNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, question, feature);
427

    
428
		//num -> match with unmatched leads
429
		handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
430

    
431
		//others
432
		verifyNoAttribute(elKeychoice);
433
	}
434

    
435

    
436
	/**
437
	 * @param openKeys
438
	 * @param key
439
	 * @param elKeychoice
440
	 * @param childNodes
441
	 */
442
	private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<PolytomousKeyNode> childNodes) {
443
		Attribute numAttr = elKeychoice.getAttribute("num");
444
		String num = CdmUtils.removeTrailingDot(numAttr == null? "":numAttr.getValue());
445
		UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
446
		Set<PolytomousKeyNode> matchingNodes = openKeys.getNodes(okk);
447
		for (PolytomousKeyNode matchingNode : matchingNodes){
448
			for (PolytomousKeyNode childNode : childNodes){
449
				matchingNode.addChild(childNode);
450
			}
451
			openKeys.removeNode(okk, matchingNode);
452
		}
453
		if (matchingNodes.isEmpty()){
454
			for (PolytomousKeyNode childNode : childNodes){
455
				key.getRoot().addChild(childNode);
456
			}
457
		}
458

    
459
		elKeychoice.removeAttribute("num");
460
	}
461

    
462

    
463
	/**
464
	 * @param state
465
	 * @param key
466
	 * @param elKeychoice
467
	 * @param taxon
468
	 * @param feature
469
	 * @return
470
	 */
471
	private List<PolytomousKeyNode> handleKeychoiceLeads(	EfloraImportState state, PolytomousKey key,	Element elKeychoice, Taxon taxon, KeyStatement question, Feature feature) {
472
		List<PolytomousKeyNode> childNodes = new ArrayList<PolytomousKeyNode>();
473
		List<Element> leads = elKeychoice.getChildren("lead");
474
		for(Element elLead : leads){
475
			PolytomousKeyNode childNode = handleLead(state, key, elLead, taxon, question, feature);
476
			childNodes.add(childNode);
477
		}
478
		return childNodes;
479
	}
480

    
481

    
482
	/**
483
	 * @param state
484
	 * @param elKeychoice
485
	 * @return
486
	 */
487
	private KeyStatement handleKeychoiceChar(EfloraImportState state, Element elKeychoice) {
488
		KeyStatement statement = null;
489
		Attribute charAttr = elKeychoice.getAttribute("char");
490
		if (charAttr != null){
491
			String charStr = charAttr.getValue();
492
			if (StringUtils.isNotBlank(charStr)){
493
				statement = KeyStatement.NewInstance(charStr);
494
			}
495
			elKeychoice.removeAttribute("char");
496
		}
497
		return statement;
498
	}
499

    
500
	/**
501
	 * @param state
502
	 * @param elKeychoice
503
	 * @return
504
	 */
505
	private Feature handleKeychoiceCharAsFeature(EfloraImportState state, Element elKeychoice) {
506
		Feature feature = null;
507
		Attribute charAttr = elKeychoice.getAttribute("char");
508
		if (charAttr != null){
509
			String charStr = charAttr.getValue();
510
			feature = getFeature(charStr, state);
511
			elKeychoice.removeAttribute("char");
512
		}
513
		return feature;
514
	}
515

    
516

    
517
	private PolytomousKeyNode handleLead(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, KeyStatement question, Feature feature) {
518
		PolytomousKeyNode node = PolytomousKeyNode.NewInstance();
519
		//TODO the char attribute in the keychoice is more a feature than a question
520
		//needs to be discussed on model side
521
		node.setQuestion(question);
522
//		node.setFeature(feature);
523

    
524
		//text
525
		String text = handleLeadText(elLead, node);
526

    
527
		//num
528
		handleLeadNum(elLead, text);
529

    
530
		//goto
531
		handleLeadGoto(state, key, elLead, taxon, node);
532

    
533
		//others
534
		verifyNoAttribute(elLead);
535

    
536
		return node;
537
	}
538

    
539

    
540
	/**
541
	 * @param elLead
542
	 * @param node
543
	 * @return
544
	 */
545
	private String handleLeadText(Element elLead, PolytomousKeyNode node) {
546
		String text = elLead.getAttributeValue("text").trim();
547
		if (StringUtils.isBlank(text)){
548
			logger.warn("Empty text in lead");
549
		}
550
		elLead.removeAttribute("text");
551
		KeyStatement statement = KeyStatement.NewInstance(text);
552
		node.setStatement(statement);
553
		return text;
554
	}
555

    
556

    
557
	/**
558
	 * @param state
559
	 * @param key
560
	 * @param elLead
561
	 * @param taxon
562
	 * @param node
563
	 */
564
	private void handleLeadGoto(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, PolytomousKeyNode node) {
565
		Attribute gotoAttr = elLead.getAttribute("goto");
566
		if (gotoAttr != null){
567
			String strGoto = gotoAttr.getValue().trim();
568
			//create key
569
			UnmatchedLeadsKey gotoKey = null;
570
			if (isInternalNode(strGoto)){
571
				gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
572
			}else{
573
				String taxonKey = makeTaxonKey(strGoto, taxon);
574
				gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
575
			}
576
			//
577
			UnmatchedLeads openKeys = state.getUnmatchedLeads();
578
			if (gotoKey.isInnerLead()){
579
				Set<PolytomousKeyNode> existingNodes = openKeys.getNodes(gotoKey);
580
				for (PolytomousKeyNode existingNode : existingNodes){
581
					node.addChild(existingNode);
582
				}
583
			}
584
			openKeys.addKey(gotoKey, node);
585
			//remove attribute (need for consistency check)
586
			elLead.removeAttribute("goto");
587
		}else{
588
			logger.warn("lead has no goto attribute");
589
		}
590
	}
591

    
592

    
593
	/**
594
	 * @param elLead
595
	 * @param text
596
	 */
597
	private void handleLeadNum(Element elLead, String text) {
598
		Attribute numAttr = elLead.getAttribute("num");
599
		if (numAttr != null){
600
			//TODO num
601
			String num = numAttr.getValue();
602
			elLead.removeAttribute("num");
603
		}else{
604
			logger.info("Keychoice has no num attribute: " + text);
605
		}
606
	}
607

    
608

    
609
	private String makeTaxonKey(String strGoto, Taxon taxon) {
610
		String result = "";
611
		if (strGoto == null){
612
			return "";
613
		}
614
		String strGenusName = taxon.getName().getGenusOrUninomial();
615
		strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", "");  //replace all brackets
616
		strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
617

    
618
		strGoto = strGoto.trim();
619
		String[] split = strGoto.split("\\s");
620
		for (int i = 0; i<split.length; i++){
621
			String single = split[i];
622
			if (isGenusAbbrev(single, strGenusName)){
623
				split[i] = strGenusName;
624
			}
625
//			if (isInfraSpecificMarker(single)){
626
//				String strSpeciesName = taxon.getName().getSpecificEpithet();
627
//				split[i] = strGenusName + " " + strSpeciesName + " ";
628
//			}
629
			result = (result + " " + split[i]).trim();
630
		}
631
		return result;
632
	}
633

    
634

    
635
	private boolean isInfraSpecificMarker(String single) {
636
		try {
637
			if (Rank.getRankByIdInVoc(single).isInfraSpecific()){
638
				return true;
639
			}
640
		} catch (UnknownCdmTypeException e) {
641
			return false;
642
		}
643
		return false;
644
	}
645

    
646

    
647
	private boolean isGenusAbbrev(String single, String strGenusName) {
648
		if (! single.matches("[A-Z]\\.?")) {
649
			return false;
650
		}else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
651
			return false;
652
		}else{
653
			return single.charAt(0) == strGenusName.charAt(0);
654
		}
655
	}
656

    
657

    
658
	private boolean isInternalNode(String strGoto) {
659
		return CdmUtils.isNumeric(strGoto);
660
	}
661

    
662

    
663
	private void makeKeyNotes(Element keyElement, PolytomousKey key) {
664
		Element elNotes = keyElement.getChild("notes");
665
		if (elNotes != null){
666
			keyElement.removeContent(elNotes);
667
			String notes = elNotes.getTextNormalize();
668
			if (StringUtils.isNotBlank(notes)){
669
				key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
670
			}
671
		}
672
	}
673

    
674

    
675
	private String makeKeyTitle(Element keyElement) {
676
		String title = "- no title - ";
677
		Attribute titleAttr = keyElement.getAttribute("title");
678
		keyElement.removeAttribute(titleAttr);
679
		if (titleAttr == null){
680
			Element elTitle = keyElement.getChild("keytitle");
681
			keyElement.removeContent(elTitle);
682
			if (elTitle != null){
683
				title = elTitle.getTextNormalize();
684
			}
685
		}else{
686
			title = titleAttr.getValue();
687
		}
688
		return title;
689
	}
690

    
691

    
692
	/**
693
	 * @param state
694
	 * @param element
695
	 * @param taxon
696
	 */
697
	private TextData handleChromosomes(EfloraImportState state, Element element, Taxon taxon) {
698
		Feature chromosomeFeature = getFeature("chromosomes", state);
699
		verifyNoAttribute(element);
700
		verifyNoChildren(element);
701
		String value = element.getTextNormalize();
702
		value = replaceStart(value, "Chromosomes");
703
		String chromosomesPart = getChromosomesPart(value);
704
		String references = value.replace(chromosomesPart, "").trim();
705
		chromosomesPart = chromosomesPart.replace(":", "").trim();
706
		return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);
707
	}
708

    
709

    
710
	/**
711
	 * @param ref
712
	 * @param string
713
	 * @return
714
	 */
715
	private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
716
		String[] splits = refAll.split(splitter);
717
		for (String strRef: splits){
718
			Reference ref = ReferenceFactory.newGeneric();
719
			ref.setTitleCache(strRef, true);
720
			String refDetail = parseReferenceYearAndDetail(ref);
721
			sourcable.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, ref, refDetail);
722
		}
723

    
724

    
725
//TODO use regex instead
726
/*		String detailResult = null;
727
		String titleToParse = ref.getTitleCache();
728
		String reReference = "^\\.{1,}";
729
//		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
730
		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
731
		String reYearPeriod = reYear + "(-" + reYear + ")+";
732
		String reDetail = "\\.{1,10}$";
733
*/
734
	}
735

    
736

    
737
	/**
738
	 * @param value
739
	 * @return
740
	 */
741
	private String getChromosomesPart(String str) {
742
		Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
743
		Matcher matcher = pattern.matcher(str);
744
		if (matcher.find()){
745
			return matcher.group(0);
746
		}else{
747
			logger.warn("Chromosomes could not be parsed: " + str);
748
		}
749
		return str;
750
	}
751

    
752

    
753
	/**
754
	 * @param state
755
	 * @param element
756
	 * @param taxon
757
	 */
758
	private TextData handleTaxonNotes(EfloraImportState state, Element element, Taxon taxon) {
759
		TextData result = null;
760
		verifyNoChildren(element, true);
761
		//verifyNoAttribute(element);
762
		List<Attribute> attributes = element.getAttributes();
763
		for (Attribute attribute : attributes){
764
			if (! attribute.getName().equalsIgnoreCase("class")){
765
				logger.warn("Char has unhandled attribute " +  attribute.getName());
766
			}else{
767
				String classValue = attribute.getValue();
768
				result = handleDescriptiveElement(state, element, taxon, classValue);
769
			}
770
		}
771
		//if no class attribute exists, handle as note
772
		if (attributes.isEmpty()){
773
			result = handleDescriptiveElement(state, element, taxon, "Note");
774
		}
775

    
776
		//Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
777
		//taxon.addAnnotation(annotation);
778
		return result; //annotation;
779
	}
780

    
781

    
782
	/**
783
	 * @param state
784
	 * @param element
785
	 * @param taxon
786
	 * @param result
787
	 * @param attribute
788
	 * @return
789
	 */
790
	private TextData handleDescriptiveElement(EfloraImportState state, Element element, Taxon taxon, String classValue) {
791
		TextData result = null;
792
		Feature feature = getFeature(classValue, state);
793
		if (feature == null){
794
			logger.warn("Unhandled feature: " + classValue);
795
		}else{
796
			String value = element.getValue();
797
			value = replaceStart(value, "Notes");
798
			value = replaceStart(value, "Note");
799
			result = addDescriptionElement(state, taxon, value, feature, null);
800
		}
801
		return result;
802
	}
803

    
804

    
805
	private void removeBr(Element element) {
806
		element.removeChildren("Br");
807
		element.removeChildren("br");
808
		element.removeChildren("BR");
809
	}
810

    
811

    
812
	/**
813
	 * @param state
814
	 * @param element
815
	 * @param taxon
816
	 */
817
	private TextData handleUses(EfloraImportState state, Element element, Taxon taxon) {
818
		verifyNoAttribute(element);
819
		verifyNoChildren(element, true);
820
		String value = element.getTextNormalize();
821
		value = replaceStart(value, "Uses");
822
		Feature feature = Feature.USES();
823
		return addDescriptionElement(state, taxon, value, feature, null);
824

    
825
	}
826

    
827

    
828
	/**
829
	 * @param state
830
	 * @param element
831
	 * @param taxon
832
	 * @param unhandledDescriptionChildren
833
	 */
834
	private DescriptionElementBase handleDistribution(EfloraImportState state, Element element, Taxon taxon) {
835
		verifyNoAttribute(element);
836
		verifyNoChildren(element, true);
837
		String value = element.getTextNormalize();
838
		value = replaceStart(value, "Distribution");
839
		Feature feature = Feature.DISTRIBUTION();
840
		//distribution parsing almost impossible as there is lots of freetext in the distribution tag
841
		return addDescriptionElement(state, taxon, value, feature, null);
842
	}
843

    
844

    
845
	/**
846
	 * @param state
847
	 * @param element
848
	 * @param taxon
849
	 * @param unhandledDescriptionChildren
850
	 */
851
	private TextData handleEcology(EfloraImportState state, Element elEcology, Taxon taxon) {
852
		verifyNoAttribute(elEcology);
853
		verifyNoChildren(elEcology, true);
854
		String value = elEcology.getTextNormalize();
855
		Feature feature = Feature.ECOLOGY();
856
		if (value.startsWith("Habitat & Ecology")){
857
			feature = getFeature("Habitat & Ecology", state);
858
			value = replaceStart(value, "Habitat & Ecology");
859
		}else if (value.startsWith("Habitat")){
860
			value = replaceStart(value, "Habitat");
861
			feature = getFeature("Habitat", state);
862
		}
863
		return addDescriptionElement(state, taxon, value, feature, null);
864
	}
865

    
866

    
867

    
868
	/**
869
	 * @param value
870
	 * @param replacementString
871
	 */
872
	private String replaceStart(String value, String replacementString) {
873
		if (value.startsWith(replacementString) ){
874
			value = value.substring(replacementString.length()).trim();
875
		}
876
		while (value.startsWith("-") || value.startsWith("–") ){
877
			value = value.substring("-".length()).trim();
878
		}
879
		return value;
880
	}
881

    
882

    
883
	/**
884
	 * @param value
885
	 * @param replacementString
886
	 */
887
	protected String removeTrailing(String value, String replacementString) {
888
		if (value == null){
889
			return null;
890
		}
891
		if (value.endsWith(replacementString) ){
892
			value = value.substring(0, value.length() - replacementString.length()).trim();
893
		}
894
		return value;
895
	}
896

    
897
	/**
898
	 * @param state
899
	 * @param element
900
	 * @param taxon
901
	 * @param unhandledNomeclatureChildren
902
	 */
903
	private void handleNomenclature(EfloraImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
904
		verifyNoAttribute(elNomenclature);
905

    
906
		List<Element> elements = elNomenclature.getChildren();
907
		for (Element element : elements){
908
			if (element.getName().equals("homotypes")){
909
				handleHomotypes(state, element, taxon);
910
			}else if (element.getName().equals("notes")){
911
				handleNomenclatureNotes(state, element, taxon);
912
			}else{
913
				unhandledChildren.add(element.getName());
914
			}
915
		}
916

    
917
	}
918

    
919

    
920

    
921
	private void handleNomenclatureNotes(EfloraImportState state, Element elNotes, Taxon taxon) {
922
		verifyNoAttribute(elNotes);
923
		verifyNoChildren(elNotes);
924
		String notesText = elNotes.getTextNormalize();
925
		Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
926
		taxon.addAnnotation(annotation);
927
	}
928

    
929

    
930

    
931
	private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
932
	/**
933
	 * @param state
934
	 * @param element
935
	 * @param taxon
936
	 */
937
	private void handleHomotypes(EfloraImportState state, Element elHomotypes, Taxon taxon) {
938
		verifyNoAttribute(elHomotypes);
939

    
940
		List<Element> elements = elHomotypes.getChildren();
941
		HomotypicalGroup homotypicalGroup = null;
942
		for (Element element : elements){
943
			if (element.getName().equals("nom")){
944
				homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
945
			}else{
946
				unhandledHomotypeChildren.add(element.getName());
947
			}
948
		}
949

    
950
	}
951

    
952
	private static Set<String> unhandledNomChildren = new HashSet<String>();
953

    
954
	/**
955
	 * @param state
956
	 * @param element
957
	 * @param taxon
958
	 */
959
	private HomotypicalGroup handleNom(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
960
		List<Attribute> attributes = elNom.getAttributes();
961

    
962
		boolean taxonBaseClassType = false;
963
		for (Attribute attribute : attributes){
964
			if (! attribute.getName().equalsIgnoreCase("class")){
965
				logger.warn("Nom has unhandled attribute " +  attribute.getName());
966
			}else{
967
				String classValue = attribute.getValue();
968
				if (classValue.equalsIgnoreCase("acceptedname")){
969
					homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
970
					taxonBaseClassType = true;
971
				}else if (classValue.equalsIgnoreCase("synonym")){
972
					homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
973
					taxonBaseClassType = true;
974
				}else if (classValue.equalsIgnoreCase("typeref")){
975
					handleTypeRef(state, elNom, taxon, homotypicalGroup);
976
				}else{
977
					logger.warn("Unhandled class value for nom: " + classValue);
978
				}
979

    
980
			}
981
		}
982

    
983
		List<Element> elements = elNom.getChildren();
984
		for (Element element : elements){
985
			if (element.getName().equals("name") || element.getName().equals("homonym") ){
986
				if (taxonBaseClassType == false){
987
					logger.warn("Name or homonym tag not allowed in non taxon nom tag");
988
				}
989
			}else{
990
				unhandledNomChildren.add(element.getName());
991
			}
992
		}
993

    
994
		return homotypicalGroup;
995

    
996
	}
997

    
998
	/**
999
	 * @param state
1000
	 * @param elNom
1001
	 * @param taxon
1002
	 * @param homotypicalGroup
1003
	 */
1004
	protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
1005
		verifyNoChildren(elNom);
1006
		String typeRef = elNom.getTextNormalize();
1007
		typeRef = removeStartingTypeRefMinus(typeRef);
1008

    
1009
		String[] split = typeRef.split(":");
1010
		if (split.length < 2){
1011
			logger.warn("typeRef has no ':' : " + typeRef);
1012
		}else if (split.length > 2){
1013
			logger.warn("typeRef has more than 1 ':' : " + typeRef);
1014
		}else{
1015
			StringBuffer typeType = new StringBuffer(split[0]);
1016
			String typeText = split[1].trim();
1017
			TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
1018

    
1019
			//Name Type Desitnations
1020
			if (typeDesignation instanceof NameTypeDesignation){
1021
				makeNameTypeDesignations(typeType, typeText, typeDesignation);
1022
			}
1023
			//SpecimenTypeDesignations
1024
			else if (typeDesignation instanceof SpecimenTypeDesignation){
1025
				makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
1026
			}else{
1027
				logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
1028
			}
1029
			for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
1030
				name.addTypeDesignation(typeDesignation, true);
1031
			}
1032
		}
1033
	}
1034

    
1035

    
1036
	/**
1037
	 * @param typeRef
1038
	 * @return
1039
	 */
1040
	protected String removeStartingTypeRefMinus(String typeRef) {
1041
		typeRef = replaceStart(typeRef, "-");
1042
		typeRef = replaceStart(typeRef, "—");
1043
		typeRef = replaceStart(typeRef, "\u002d");
1044
		typeRef = replaceStart(typeRef, "\u2013");
1045
		typeRef = replaceStart(typeRef, "--");
1046
		return typeRef;
1047
	}
1048

    
1049
	/**
1050
	 * @param typeType
1051
	 * @param typeText
1052
	 * @param typeDesignation
1053
	 */
1054
	private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1055
		if (typeType.toString().trim().equalsIgnoreCase("Type")){
1056
			//do nothing
1057
		}else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1058
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1059
		}else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
1060
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1061
		}else{
1062
			logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
1063
		}
1064
		//clean
1065
		typeText = cleanNameType(typeText);
1066
		//create name
1067
		TaxonNameBase<?,?> nameType = (TaxonNameBase<?,?>)parser.parseFullName(typeText, NomenclaturalCode.ICNAFP, Rank.SPECIES());
1068
		((NameTypeDesignation) typeDesignation).setTypeName(nameType);
1069
		//TODO wie können NameTypes den Namen zugeordnet werden? -  wird aber vom Portal via NameCache matching gemacht
1070
	}
1071

    
1072

    
1073
	private String cleanNameType(String typeText) {
1074
		String result;
1075
		String[] split = typeText.split("\\[.*\\].?");
1076
		result = split[0];
1077
		return result;
1078
	}
1079

    
1080

    
1081
	/**
1082
	 * @param typeType
1083
	 * @param typeText
1084
	 * @param typeDesignation
1085
	 */
1086
	protected void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1087
		if (typeType.toString().trim().equalsIgnoreCase("Type")){
1088
			//do nothing
1089
		}else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1090
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1091
		}else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1092
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1093
		}else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1094
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1095
		}else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1096
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1097
		}else{
1098
			logger.warn("Unhandled type string: " + typeType);
1099
		}
1100
		DerivedUnit specimen = DerivedUnit.NewPreservedSpecimenInstance();
1101
		if (typeText.length() > 255){
1102
			specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1103
		}else{
1104
			specimen.setTitleCache(typeText, true);
1105
		}
1106
		specimen.putDefinition(Language.ENGLISH(), typeText);
1107
		((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1108
	}
1109

    
1110
	private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1111
		TypeDesignationBase result;
1112
		Reference ref = parseTypeDesignationReference(typeType);
1113
		if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1114
			if (typeType.indexOf(" species")>-1 ){
1115
				result = NameTypeDesignation.NewInstance();
1116
				int start = typeType.indexOf(" species");
1117
				typeType.replace(start, start + " species".length(), "");
1118
			}else {
1119
				result = NameTypeDesignation.NewInstance();
1120
				int start = typeType.indexOf(" genus");
1121
				typeType.replace(start, start + " genus".length(), "");
1122
			}
1123
		}else{
1124
			result = SpecimenTypeDesignation.NewInstance();
1125
		}
1126
		result.setCitation(ref);
1127
		return result;
1128
	}
1129

    
1130

    
1131
	private Reference parseTypeDesignationReference(StringBuffer typeType) {
1132
		Reference result = null;
1133
		String reBracketReference = "\\(.*\\)";
1134
		Pattern patBracketReference = Pattern.compile(reBracketReference);
1135
		Matcher matcher = patBracketReference.matcher(typeType);
1136
		if (matcher.find()){
1137
			String refString = matcher.group();
1138
			int start = typeType.indexOf(refString);
1139
			typeType.replace(start, start + refString.length(), "");
1140
			refString = refString.replace("(", "").replace(")", "").trim();
1141
			Reference ref = ReferenceFactory.newGeneric();
1142
			ref.setTitleCache(refString, true);
1143
			result = ref;
1144
		}
1145
		return result;
1146
	}
1147

    
1148

    
1149
	/**
1150
	 * @param state
1151
	 * @param elNom
1152
	 * @param taxon
1153
	 */
1154
	//body/taxon/
1155
	private HomotypicalGroup handleNomTaxon(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1156
	    INonViralName nvn = makeName(taxon, homotypicalGroup, isSynonym);
1157
	    TaxonNameBase<?,?> name = TaxonNameBase.castAndDeproxy(nvn);
1158
	    String num = null;
1159

    
1160
		boolean hasGenusInfo = false;
1161
		TeamOrPersonBase<?> lastTeam = null;
1162

    
1163
		//genus
1164
		List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1165
		if (elGenus.size() > 0){
1166
			hasGenusInfo = true;
1167
		}else{
1168
			logger.debug ("No Synonym Genus");
1169
		}
1170
		//infra rank -> needed to handle authors correctly
1171
		List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1172
		Rank infraRank = null;
1173
		infraRank = handleInfRank(name, elInfraRank, infraRank);
1174

    
1175
		//get left over elements
1176
		List<Element> elements = elNom.getChildren();
1177
		elements.removeAll(elInfraRank);
1178

    
1179
		for (Element element : elements){
1180
			if (element.getName().equals("name")){
1181
				String classValue = element.getAttributeValue("class");
1182
				String value = element.getValue().trim();
1183
				if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1184
					name.setGenusOrUninomial(value);
1185
				}else if (classValue.equalsIgnoreCase("family") ){
1186
					name.setGenusOrUninomial(value);
1187
					name.setRank(Rank.FAMILY());
1188
				}else if (classValue.equalsIgnoreCase("subgenus")){
1189
					//name.setInfraGenericEpithet(value);
1190
					name.setNameCache(value.replace(":", "").trim());
1191
					name.setRank(Rank.SUBGENUS());
1192
				}else if (classValue.equalsIgnoreCase("epithet") ){
1193
					if (hasGenusInfo == true){
1194
						name.setSpecificEpithet(value);
1195
					}else{
1196
						handleInfraspecificEpithet(element, classValue, name);
1197
					}
1198
				}else if (classValue.equalsIgnoreCase("author")){
1199
					handleNameAuthors(element, name);
1200
				}else if (classValue.equalsIgnoreCase("paraut")){
1201
					handleBasionymAuthor(state, element, name, false);
1202
				}else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1203
					handleInfrAuthor(state, element, name, true);
1204
				}else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1205
					handleBasionymAuthor(state, element, name, true);
1206
				}else if (classValue.equalsIgnoreCase("infrepi")){
1207
					handleInfrEpi(name, infraRank, value);
1208
				}else if (classValue.equalsIgnoreCase("pub")){
1209
					lastTeam = handleNomenclaturalReference(name, value);
1210
				}else if (classValue.equalsIgnoreCase("usage")){
1211
					lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1212
				}else if (classValue.equalsIgnoreCase("note")){
1213
					handleNameNote(name, value);
1214
				}else if (classValue.equalsIgnoreCase("num")){
1215
					if (num != null){
1216
						logger.warn("Duplicate num: " + value);
1217
					}else{
1218
						num = value;
1219
					}
1220
					if (isSynonym == true){
1221
						logger.warn("Synonym should not have a num");
1222
					}
1223
				}else if (classValue.equalsIgnoreCase("typification")){
1224
					logger.warn("Typification should not be a nom class");
1225
				}else{
1226
					logger.warn("Unhandled name class: " +  classValue);
1227
				}
1228
			}else if(element.getName().equals("homonym")){
1229
				handleHomonym(state, element, name);
1230
			}else{
1231
				// child element is not "name"
1232
				unhandledNomChildren.add(element.getName());
1233
			}
1234
		}
1235

    
1236
		//handle key
1237
		if (! isSynonym){
1238
			String taxonString = name.getNameCache();
1239
			//try to find matching lead nodes
1240
			UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1241
			Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1242
			//same without using the num
1243
			if (num != null){
1244
				UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1245
				handleMatchingNodes(state, taxon, noNumLeadsKey);
1246
			}
1247
			if (matchingNodes.isEmpty() && num != null){
1248
				logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1249
			}
1250
		}
1251

    
1252
		//test nom element has no text
1253
		if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("—", "").replace("\u002d","").replace("\u2013", ""))){
1254
			String strElNom = elNom.getTextNormalize();
1255
			if ("?".equals(strElNom)){
1256
				handleQuestionMark(name, taxon);
1257
			}
1258
//			Character c = strElNom.charAt(0);
1259
			//System.out.println(CharUtils.unicodeEscaped(c));
1260
			logger.warn("Nom tag has text: " + strElNom);
1261
		}
1262

    
1263
		return name.getHomotypicalGroup();
1264
	}
1265

    
1266

    
1267
	private void handleQuestionMark(INonViralName name, Taxon taxon) {
1268
		int count = name.getTaxonBases().size();
1269
		if (count != 1){
1270
			logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1271
		}else{
1272
			TaxonBase taxonBase = name.getTaxonBases().iterator().next();
1273
			taxonBase.setDoubtful(true);
1274
		}
1275
	}
1276

    
1277

    
1278
	//merge with handleNomTaxon
1279
	private void handleHomonym(EfloraImportState state, Element elHomonym, TaxonNameBase upperName) {
1280
		verifyNoAttribute(elHomonym);
1281

    
1282
		//hommonym name
1283
		TaxonNameBase<?,?> homonymName = TaxonNameFactory.NewBotanicalInstance(upperName.getRank());
1284
		homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1285
		homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1286
		homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1287
		homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1288

    
1289
		for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1290
			String classValue = elName.getAttributeValue("class");
1291
			String value = elName.getValue().trim();
1292
			if (classValue.equalsIgnoreCase("genus") ){
1293
				homonymName.setGenusOrUninomial(value);
1294
			}else if (classValue.equalsIgnoreCase("epithet") ){
1295
				homonymName.setSpecificEpithet(value);
1296
			}else if (classValue.equalsIgnoreCase("author")){
1297
				handleNameAuthors(elName, homonymName);
1298
			}else if (classValue.equalsIgnoreCase("paraut")){
1299
				handleBasionymAuthor(state, elName, homonymName, true);
1300
			}else if (classValue.equalsIgnoreCase("pub")){
1301
				handleNomenclaturalReference(homonymName, value);
1302
			}else if (classValue.equalsIgnoreCase("note")){
1303
				handleNameNote(homonymName, value);
1304
			}else{
1305
				logger.warn("Unhandled class value: " + classValue);
1306
			}
1307
		}
1308
		//TODO verify other information
1309

    
1310

    
1311
		//rel
1312
		boolean homonymIsLater = false;
1313
		NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1314
		if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1315
			TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1316
			TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1317
			homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart())  > 0;
1318
		}else{
1319
			if (upperName.getNomenclaturalReference() == null){
1320
				logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());
1321
			}
1322
			if (homonymName.getNomenclaturalReference() == null){
1323
				logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());
1324
			}
1325
		}
1326
		if (homonymIsLater){
1327
			homonymName.addRelationshipToName(upperName, relType, null);
1328
		}else{
1329
			upperName.addRelationshipToName(homonymName, relType, null);
1330
		}
1331
	}
1332

    
1333

    
1334
	/**
1335
	 * @param state
1336
	 * @param taxon
1337
	 * @param leadsKey
1338
	 * @return
1339
	 */
1340
	private Set<PolytomousKeyNode> handleMatchingNodes(EfloraImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1341
		Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1342
		for (PolytomousKeyNode matchingNode : matchingNodes){
1343
			state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1344
			matchingNode.setTaxon(taxon);
1345
			state.getPolytomousKeyNodesToSave().add(matchingNode);
1346
		}
1347
		return matchingNodes;
1348
	}
1349

    
1350

    
1351
	private void handleNameNote(INonViralName name, String value) {
1352
		logger.warn("Name note: " + value + ". Available in portal?");
1353
		Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1354
		name.addAnnotation(annotation);
1355
	}
1356

    
1357

    
1358
	/**
1359
	 * @param taxon
1360
	 * @param name
1361
	 * @param value
1362
	 */
1363
	protected TeamOrPersonBase handleNameUsage(Taxon taxon, INonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
1364
		Reference ref = ReferenceFactory.newGeneric();
1365
		referenceTitle = removeStartingSymbols(referenceTitle, ref);
1366

    
1367
		ref.setTitleCache(referenceTitle, true);
1368
		String microReference = parseReferenceYearAndDetail(ref);
1369
		TeamOrPersonBase<?> team = getReferenceAuthor(ref);
1370
		parseReferenceType(ref);
1371
		if (team == null){
1372
			team = lastTeam;
1373
		}
1374
		ref.setAuthorship(team);
1375

    
1376
		TaxonDescription description = getDescription(taxon);
1377
		TextData textData = TextData.NewInstance(Feature.CITATION());
1378
		textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, ref, microReference, (TaxonNameBase)name, null);
1379
		description.addElement(textData);
1380
		return team;
1381
	}
1382

    
1383

    
1384
	/**
1385
	 * @param referenceTitle
1386
	 * @param ref
1387
	 * @return
1388
	 */
1389
	private String removeStartingSymbols(String referenceTitle,	Reference ref) {
1390
		if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1391
			referenceTitle = referenceTitle.substring(1).trim();
1392
			ref.setTitleCache(referenceTitle);
1393
		}
1394
		return referenceTitle;
1395
	}
1396

    
1397

    
1398
	private void parseReferenceType(Reference ref) {
1399
		String title = ref.getTitle();
1400
		if (title == null){
1401
			return;
1402
		}
1403
		title = title.trim();
1404
		//no in reference
1405
		if (! title.startsWith("in ")){
1406
			ref.setType(ReferenceType.Book);
1407
			return;
1408
		}
1409

    
1410
		title = title.substring(3);
1411
		//in reference
1412
		//no ,
1413
		if (title.indexOf(",") == -1){
1414
			ref.setType(ReferenceType.Article);
1415
			IJournal journal = ReferenceFactory.newJournal();
1416
			journal.setTitle(title);
1417
			ref.setTitle(null);
1418
			ref.setInJournal(journal);
1419
			//return;
1420
		}else{
1421
			//,-references
1422
			ref.setType(ReferenceType.BookSection);
1423
			String[] split = (title).split(",\\s*[A-Z]");
1424
			if (split.length <= 1){
1425
				logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1426
			}
1427
			IBook book = ReferenceFactory.newBook();
1428
			Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1429
			try {
1430
				title = title.substring(split[0].length() + 1).trim();
1431
			} catch (Exception e) {
1432
				logger.error("ERROR occurred when trying to split title: " +  title + "; split[0]: + " + split[0]);
1433
			}
1434
			book.setTitle(title);
1435
			book.setAuthorship(bookTeam);
1436
			book.setDatePublished(ref.getDatePublished());
1437
			ref.setTitle(null);
1438
			ref.setInBook(book);
1439
		}
1440
	}
1441

    
1442

    
1443
	protected Team getReferenceAuthor (Reference ref) {
1444
		boolean isCache = false;
1445
		String referenceTitle = ref.getTitle();
1446
		if (referenceTitle == null){
1447
			isCache = true;
1448
			referenceTitle = ref.getTitleCache();
1449
		}
1450
		//in references
1451
		String[] split = (" " + referenceTitle).split(" in ");
1452
		if (split.length > 1){
1453
			if (StringUtils.isNotBlank(split[0])){
1454
				//' in ' is within the reference string, take the preceding string as the team
1455
				Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1456
				if (! isCache){
1457
					ref.setTitle("in " + split[1]);
1458
				}
1459
				return team;
1460
			}else{
1461
				//string starts with in therefore no author is given
1462
				return null;
1463
			}
1464
		}
1465
		//no ,-reference
1466
		split = referenceTitle.split(",");
1467
		if (split.length < 2){
1468
			//no author is given
1469
			return null;
1470
		}
1471

    
1472
		//,-references
1473
		split = (referenceTitle).split(",\\s*[A-Z]");
1474
		if (split.length > 1){
1475
			Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1476
			if (! isCache){
1477
				ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1478
			}
1479
			return team;
1480
		}else{
1481
			logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1482
			return null;
1483
		}
1484
	}
1485

    
1486

    
1487
	/**
1488
	 * Replaced by <homonym> tag but still in use for exceptions
1489
	 * @param detail
1490
	 * @param name
1491
	 * @return
1492
	 */
1493
	protected String parseHomonym(String detail, TaxonNameBase name) {
1494
		String result;
1495
		if (detail == null){
1496
			return detail;
1497
		}
1498

    
1499

    
1500
		//non RE
1501
		String reNon = "(\\s|,)non\\s";
1502
		Pattern patReference = Pattern.compile(reNon);
1503
		Matcher matcher = patReference.matcher(detail);
1504
		if (matcher.find()){
1505
			int start = matcher.start();
1506
			int end = matcher.end();
1507

    
1508
			if (detail != null){
1509
				logger.warn("Unhandled non part: " + detail.substring(start));
1510
				return detail;
1511
			}
1512

    
1513
			result = detail.substring(0, start);
1514

    
1515
			//homonym string
1516
			String homonymString = detail.substring(end);
1517

    
1518
			//hommonym name
1519
			TaxonNameBase<?,?> homonymName = TaxonNameFactory.NewBotanicalInstance(name.getRank());
1520
			homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1521
			homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1522
			homonymName.setSpecificEpithet(name.getSpecificEpithet());
1523
			homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1524
			Reference homonymNomRef = ReferenceFactory.newGeneric();
1525
			homonymNomRef.setTitleCache(homonymString, true);
1526
			String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1527
			homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1528
			String authorTitle = homonymNomRef.getTitleCache();
1529
			Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1530
			homonymNomRef.setAuthorship(team);
1531
			homonymNomRef.setTitle("");
1532
			homonymNomRef.setProtectedTitleCache(false);
1533

    
1534
			//rel
1535
			boolean homonymIsLater = false;
1536
			NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1537
			TimePeriod homonymYear = homonymNomRef.getDatePublished();
1538
			if (name.getNomenclaturalReference() != null){
1539
				TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1540
				homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart())  > 0;
1541
			}else{
1542
				logger.warn("Classification name has no nomenclatural reference");
1543
			}
1544
			if (homonymIsLater){
1545
				homonymName.addRelationshipToName(name, relType, null);
1546
			}else{
1547
				name.addRelationshipToName(homonymName, relType, null);
1548
			}
1549

    
1550
		}else{
1551
			return detail;
1552
		}
1553
		return result;
1554
	}
1555

    
1556

    
1557
	/**
1558
	 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1559
	 * @param name
1560
	 * @param value
1561
	 */
1562
	protected TeamOrPersonBase handleNomenclaturalReference(TaxonNameBase<?,?> name, String value) {
1563
		Reference nomRef = ReferenceFactory.newGeneric();
1564
		nomRef.setTitleCache(value, true);
1565
		parseNomStatus(nomRef, name);
1566
		String microReference = parseReferenceYearAndDetail(nomRef);
1567
		name.setNomenclaturalReference(nomRef);
1568
		microReference = parseHomonym(microReference, name);
1569
		name.setNomenclaturalMicroReference(microReference);
1570
		TeamOrPersonBase<?> team = name.getCombinationAuthorship();
1571
		if (team == null){
1572
			logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1573
		}else{
1574
			nomRef.setAuthorship(team);
1575
		}
1576
		return team;
1577
	}
1578

    
1579
	private void handleInfrAuthor(EfloraImportState state, Element elAuthor, INonViralName name, boolean overwrite) {
1580
		String strAuthor = elAuthor.getValue().trim();
1581
		if (strAuthor.endsWith(",")){
1582
			strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1583
		}
1584
		TeamOrPersonBase[] team = getTeam(strAuthor);
1585
		if (name.getCombinationAuthorship() != null && overwrite == false){
1586
			logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1587
		}else{
1588
			name.setCombinationAuthorship(team[0]);
1589
			name.setExCombinationAuthorship(team[1]);
1590
		}
1591

    
1592

    
1593
	}
1594

    
1595

    
1596
	/**
1597
	 * Sets the names rank according to the infrank value
1598
	 * @param name
1599
	 * @param elements
1600
	 * @param elInfraRank
1601
	 * @param infraRank
1602
	 * @return
1603
	 */
1604
	private Rank handleInfRank(INonViralName name, List<Element> elInfraRank, Rank infraRank) {
1605
		if (elInfraRank.size() == 1){
1606
			String strRank = elInfraRank.get(0).getTextNormalize();
1607
			try {
1608
				infraRank = Rank.getRankByNameOrIdInVoc(strRank);
1609
			} catch (UnknownCdmTypeException e) {
1610
				try{
1611
					infraRank = Rank.getRankByNameOrIdInVoc(strRank + ".");
1612
				} catch (UnknownCdmTypeException e2) {
1613
					logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1614
				}
1615
			}
1616
		}else if (elInfraRank.size() > 1){
1617
			logger.warn ("There is more than 1 infrank");
1618
		}
1619
		if (infraRank != null){
1620
			name.setRank(infraRank);
1621
		}
1622
		return infraRank;
1623
	}
1624

    
1625

    
1626
	private void handleInfrEpi(INonViralName name, Rank infraRank, String value) {
1627
		if (infraRank != null && infraRank.isInfraSpecific()){
1628
			name.setInfraSpecificEpithet(value);
1629
			if (CdmUtils.isCapital(value)){
1630
				logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1631
			}
1632
		}else if (infraRank != null && infraRank.isInfraGeneric()){
1633
			name.setInfraGenericEpithet(value);
1634
			if (! CdmUtils.isCapital(value)){
1635
				logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1636
			}
1637
		}else{
1638
			logger.warn("Infrepi could not be handled: " + value);
1639
		}
1640
	}
1641

    
1642

    
1643

    
1644
	/**
1645
	 * Returns the (empty) with the correct homotypical group depending on the taxon status
1646
	 * @param taxon
1647
	 * @param homotypicalGroup
1648
	 * @param isSynonym
1649
	 * @return
1650
	 */
1651
	private TaxonNameBase makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1652
		TaxonNameBase<?,?> name;
1653
		if (isSynonym){
1654
			name = TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES(), homotypicalGroup);
1655
			SynonymType synonymType = SynonymType.HETEROTYPIC_SYNONYM_OF();
1656
			if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1657
				synonymType = SynonymType.HOMOTYPIC_SYNONYM_OF();
1658
			}
1659
			taxon.addSynonymName(name, synonymType);
1660
		}else{
1661
			name = taxon.getName();
1662
		}
1663
		return name;
1664
	}
1665

    
1666

    
1667
	/**
1668
	 * @param element
1669
	 * @param taxon
1670
	 */
1671
	private void handleInfraspecificEpithet(Element element, String attrValue, INonViralName name) {
1672
		String value = element.getTextNormalize();
1673
		if (value.indexOf("subsp.") != -1){
1674
			//TODO genus and species epi
1675
			String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1676
			name.setInfraSpecificEpithet(infrEpi);
1677
			name.setRank(Rank.SUBSPECIES());
1678
		}else if (value.indexOf("var.") != -1){
1679
			//TODO genus and species epi
1680
			String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1681
			name.setInfraSpecificEpithet(infrEpi);
1682
			name.setRank(Rank.VARIETY());
1683
		}else{
1684
			logger.warn("Unhandled infraspecific type: " + value);
1685
		}
1686
	}
1687

    
1688

    
1689
	/**
1690
	 * @param state
1691
	 * @param element
1692
	 * @param name
1693
	 */
1694
	private void handleBasionymAuthor(EfloraImportState state, Element elBasionymAuthor, INonViralName name, boolean overwrite) {
1695
		String strAuthor = elBasionymAuthor.getValue().trim();
1696
		Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1697
		if (reBasionymAuthor.matcher(strAuthor).matches()){
1698
			strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1699
		}else{
1700
			logger.warn("Brackets are missing for original combination author " + strAuthor);
1701
		}
1702
		TeamOrPersonBase[] basionymTeam = getTeam(strAuthor);
1703
		if (name.getBasionymAuthorship() != null && overwrite == false){
1704
			logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1705
		}else{
1706
			name.setBasionymAuthorship(basionymTeam[0]);
1707
			name.setExBasionymAuthorship(basionymTeam[1]);
1708

    
1709
		}
1710
	}
1711

    
1712
	private final Map<String, UUID> teamMap = new HashMap<String, UUID>();
1713
	/**
1714
	 * @param elAuthors
1715
	 * @param name
1716
	 * @param elNom
1717
	 */
1718
	private void handleNameAuthors(Element elAuthor, INonViralName name) {
1719
		if (name.getCombinationAuthorship() != null){
1720
			logger.warn("Name already has a combination author. Name: " +  name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1721
		}
1722
		String strAuthor = elAuthor.getValue().trim();
1723
		if (strAuthor.endsWith(",")){
1724
			strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1725
		}
1726
		if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){
1727
			logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);
1728
		}
1729
		TeamOrPersonBase[] team = getTeam(strAuthor);
1730
		name.setCombinationAuthorship(team[0]);
1731
		name.setExCombinationAuthorship(team[1]);
1732
	}
1733

    
1734

    
1735
	/**
1736
	 * @param strAuthor
1737
	 * @return
1738
	 */
1739
	private TeamOrPersonBase[] getTeam(String strAuthor) {
1740
		TeamOrPersonBase[] result = new TeamOrPersonBase[2];
1741
		String[] split = strAuthor.split(" ex ");
1742
		String strBaseAuthor = null;
1743
		String strExAuthor = null;
1744

    
1745
		if (split.length == 2){
1746
			strBaseAuthor = split[1];
1747
			strExAuthor = split[0];
1748
		}else if (split.length == 1){
1749
			strBaseAuthor = split[0];
1750
		}else{
1751
			logger.warn("Could not parse (ex) author: " + strAuthor);
1752
		}
1753
		result[0] = getUuidTeam(strBaseAuthor);
1754
		if (result[0] == null){
1755
			result[0] = parseSingleTeam(strBaseAuthor);
1756
			teamMap.put(strBaseAuthor, result[0].getUuid());
1757
		}
1758
		if (strExAuthor != null){
1759
			result[1] = getUuidTeam(strExAuthor);
1760
			if (result[1] == null){
1761
				result[1] = Team.NewInstance();
1762
				result[1].setTitleCache(strExAuthor, true);
1763
				teamMap.put(strExAuthor, result[1].getUuid());
1764
			}
1765

    
1766
		}
1767
		return result;
1768
	}
1769

    
1770

    
1771
	protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {
1772
		TeamOrPersonBase result;
1773
		String[] split = strBaseAuthor.split("&");
1774
		if (split.length > 1){
1775
			result = Team.NewInstance();
1776
			for (String personString : split){
1777
				Person person = makePerson(personString);
1778
				((Team)result).addTeamMember(person);
1779
			}
1780
		}else{
1781
			result = makePerson(strBaseAuthor.trim());
1782
		}
1783
		return result;
1784
	}
1785

    
1786

    
1787
	/**
1788
	 * @param personString
1789
	 * @return
1790
	 */
1791
	private Person makePerson(String personString) {
1792
		personString = personString.trim();
1793
		Person person = Person.NewTitledInstance(personString);
1794
		person.setNomenclaturalTitle(personString);
1795
		return person;
1796
	}
1797

    
1798

    
1799
	/**
1800
	 * @param result
1801
	 * @param strBaseAuthor
1802
	 */
1803
	private TeamOrPersonBase getUuidTeam(String strBaseAuthor) {
1804
		UUID uuidTeam = teamMap.get(strBaseAuthor);
1805
		return CdmBase.deproxy(getAgentService().find(uuidTeam), TeamOrPersonBase.class);
1806
	}
1807

    
1808

    
1809
	private void handleDescription(EfloraImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
1810
		verifyNoAttribute(elDescription);
1811

    
1812
		List<Element> elements = elDescription.getChildren();
1813
		for (Element element : elements){
1814
			if (element.getName().equalsIgnoreCase("char")){
1815
				handleChar(state, element, taxon);
1816
			}else{
1817
				logger.warn("Unhandled description child: " + element.getName());
1818
			}
1819
		}
1820

    
1821
	}
1822

    
1823

    
1824
	/**
1825
	 * @param state
1826
	 * @param element
1827
	 * @param taxon
1828
	 */
1829
	private void handleChar(EfloraImportState state, Element element, Taxon taxon) {
1830
		List<Attribute> attributes = element.getAttributes();
1831
		for (Attribute attribute : attributes){
1832
			if (! attribute.getName().equalsIgnoreCase("class")){
1833
				logger.warn("Char has unhandled attribute " +  attribute.getName());
1834
			}else{
1835
				String classValue = attribute.getValue();
1836
				Feature feature = getFeature(classValue, state);
1837
				if (feature == null){
1838
					logger.warn("Unhandled feature: " + classValue);
1839
				}else{
1840
					String value = element.getValue();
1841
					addDescriptionElement(state, taxon, value, feature, null);
1842
				}
1843

    
1844
			}
1845
		}
1846

    
1847
		List<Element> elements = element.getChildren();
1848
		if (! elements.isEmpty()){
1849
			logger.warn("Char has unhandled children");
1850
		}
1851
	}
1852

    
1853

    
1854
	/**
1855
	 * @param taxon
1856
	 * @return
1857
	 */
1858
	protected TaxonDescription getDescription(Taxon taxon) {
1859
		for (TaxonDescription description : taxon.getDescriptions()){
1860
			if (! description.isImageGallery()){
1861
				return description;
1862
			}
1863
		}
1864
		TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
1865
		return newDescription;
1866
	}
1867

    
1868

    
1869
	/**
1870
	 * @param classValue
1871
	 * @param state
1872
	 * @return
1873
	 * @throws UndefinedTransformerMethodException
1874
	 */
1875
	private Feature getFeature(String classValue, EfloraImportState state) {
1876
		UUID uuid;
1877
		try {
1878
			uuid = state.getTransformer().getFeatureUuid(classValue);
1879
			if (uuid == null){
1880
				logger.info("Uuid is null for " + classValue);
1881
			}
1882
			String featureText = StringUtils.capitalize(classValue);
1883
			//TODO eFlora feature vocabulary
1884
			Feature feature = getFeature(state, uuid, featureText, featureText, classValue, null);
1885
			if (feature == null){
1886
				throw new NullPointerException(classValue + " not recognized as a feature");
1887
			}
1888
			return feature;
1889
		} catch (Exception e) {
1890
			logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
1891
			return Feature.UNKNOWN();
1892
		}
1893
	}
1894

    
1895

    
1896
	/**
1897
	 * @param state
1898
	 * @param element
1899
	 * @param taxon
1900
	 * @param unhandledTitleClassess
1901
	 */
1902
	private void handleTitle(EfloraImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
1903
		// attributes
1904
		List<Attribute> attributes = element.getAttributes();
1905
		for (Attribute attribute : attributes){
1906
			if (! attribute.getName().equalsIgnoreCase("class") ){
1907
				if (! attribute.getName().equalsIgnoreCase("num")){
1908
					logger.warn("Title has unhandled attribute " +  attribute.getName());
1909
				}else{
1910
					//TODO num attribute in taxon
1911
				}
1912
			}else{
1913
				String classValue = attribute.getValue();
1914
				try {
1915
					Rank rank;
1916
					try {
1917
						rank = Rank.getRankByNameOrIdInVoc(classValue);
1918
					} catch (Exception e) {
1919
						//TODO nc
1920
						rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICNAFP, false);
1921
					}
1922
					taxon.getName().setRank(rank);
1923
					if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
1924
						handleGenus(element.getValue(), taxon.getName());
1925
					}else if (rank.equals(Rank.SUBGENUS())){
1926
						handleSubGenus(element.getValue(), taxon.getName());
1927
					}else if (rank.equals(Rank.SECTION_BOTANY())){
1928
						handleSection(element.getValue(), taxon.getName());
1929
					}else if (rank.equals(Rank.SPECIES())){
1930
						handleSpecies(element.getValue(), taxon.getName());
1931
					}else if (rank.equals(Rank.SUBSPECIES())){
1932
						handleSubSpecies(element.getValue(), taxon.getName());
1933
					}else if (rank.equals(Rank.VARIETY())){
1934
						handleVariety(element.getValue(), taxon.getName());
1935
					}else{
1936
						logger.warn("Unhandled rank: " + rank.getLabel());
1937
					}
1938
				} catch (UnknownCdmTypeException e) {
1939
					logger.warn("Unknown rank " + classValue);
1940
					unhandledTitleClassess.add(classValue);
1941
				}
1942
			}
1943
		}
1944
		List<Element> elements = element.getChildren();
1945
		if (! elements.isEmpty()){
1946
			logger.warn("Title has unexpected children");
1947
		}
1948
		UUID uuidTitle = EfloraTransformer.uuidTitle;
1949
		ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
1950
		taxon.addExtension(element.getTextNormalize(), titleExtension);
1951

    
1952
	}
1953

    
1954

    
1955
	/**
1956
	 * @param value
1957
	 * @param taxonNameBase
1958
	 */
1959
	private void handleSubGenus(String value, INonViralName taxonNameBase) {
1960
		String name = value.replace("Subgenus", "").trim();
1961
		taxonNameBase.setInfraGenericEpithet(name);
1962
	}
1963

    
1964
	/**
1965
	 * @param value
1966
	 * @param taxonNameBase
1967
	 */
1968
	private void handleSection(String value, INonViralName taxonNameBase) {
1969
		String name = value.replace("Section", "").trim();
1970
		taxonNameBase.setInfraGenericEpithet(name);
1971
	}
1972

    
1973
	/**
1974
	 * @param value
1975
	 * @param taxonNameBase
1976
	 */
1977
	private void handleSpecies(String value, TaxonNameBase taxonNameBase) {
1978
		//do nothing
1979
	}
1980

    
1981
	/**
1982
	 * @param value
1983
	 * @param taxonNameBase
1984
	 */
1985
	private void handleVariety(String value, TaxonNameBase taxonNameBase) {
1986
		//do nothing
1987
	}
1988

    
1989
	/**
1990
	 * @param value
1991
	 * @param taxonNameBase
1992
	 */
1993
	private void handleSubSpecies(String value, TaxonNameBase taxonNameBase) {
1994
		//do nothing
1995
	}
1996

    
1997

    
1998
	private final Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
1999

    
2000
	/**
2001
	 * @param value
2002
	 * @param taxonNameBase
2003
	 */
2004
	protected void handleGenus(String value, INonViralName taxonName) {
2005
		Matcher matcher = rexGenusAuthor.matcher(value);
2006
		if (matcher.find()){
2007
			String author = matcher.group();
2008
//			String genus = value.replace(author, "");
2009
			author = author.substring(1, author.length() - 1);
2010
			Team team = Team.NewInstance();
2011
			team.setTitleCache(author, true);
2012
			Credit credit = Credit.NewInstance(team, null);
2013
			taxonName.addCredit(credit);
2014
//			taxonName.setCombinationAuthorship(team);
2015
//			taxonName.setGenusOrUninomial(genus);
2016
		}else{
2017
			logger.info("No Author match for " + value);
2018
		}
2019
	}
2020

    
2021

    
2022
	/**
2023
	 * @param taxon
2024
	 * @param lastTaxon
2025
	 */
2026
	private void handleTaxonRelation(EfloraImportState state, Taxon taxon, Taxon lastTaxon) {
2027

    
2028
		Classification tree = getTree(state);
2029
		if (lastTaxon == null){
2030
			tree.addChildTaxon(taxon, null, null);
2031
			return;
2032
		}
2033
		Rank thisRank = taxon.getName().getRank();
2034
		Rank lastRank = lastTaxon.getName().getRank();
2035
		if (lastTaxon.getTaxonNodes().size() > 0){
2036
			TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
2037
			if (thisRank.isLower(lastRank )  ){
2038
				lastNode.addChildTaxon(taxon, null, null);
2039
				fillMissingEpithetsForTaxa(lastTaxon, taxon);
2040
			}else if (thisRank.equals(lastRank)){
2041
				TaxonNode parent = lastNode.getParent();
2042
				if (parent != null){
2043
					parent.addChildTaxon(taxon, null, null);
2044
					fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
2045
				}else{
2046
					tree.addChildTaxon(taxon, null, null);
2047
				}
2048
			}else if (thisRank.isHigher(lastRank)){
2049
				handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2050
//				TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2051
//				parentNode.addChildTaxon(taxon, null, null, null);
2052
			}
2053
		}else{
2054
			logger.warn("Last taxon has no node");
2055
		}
2056
	}
2057

    
2058

    
2059

    
2060
	/**
2061
	 * @param state
2062
	 * @return
2063
	 */
2064
	private Classification getTree(EfloraImportState state) {
2065
		Classification result = state.getTree(null);
2066
		if (result == null){
2067
			UUID uuid = state.getConfig().getClassificationUuid();
2068
			if (uuid == null){
2069
				logger.warn("No classification uuid is defined");
2070
				result = getNewClassification(state);
2071
			}else{
2072
				result = getClassificationService().find(uuid);
2073
				if (result == null){
2074
					result = getNewClassification(state);
2075
					result.setUuid(uuid);
2076
				}
2077
			}
2078
			state.putTree(null, result);
2079
		}
2080
		return result;
2081
	}
2082

    
2083

    
2084
	private Classification getNewClassification(EfloraImportState state) {
2085
		Classification result;
2086
		result = Classification.NewInstance(state.getConfig().getClassificationTitle());
2087
		state.putTree(null, result);
2088
		return result;
2089
	}
2090

    
2091

    
2092
	/**
2093
	 * @param state
2094
	 * @param taxon
2095
	 * @param value
2096
	 * @param feature
2097
	 * @return
2098
	 */
2099
	private TextData addDescriptionElement(EfloraImportState state, Taxon taxon, String value, Feature feature, String references) {
2100
		TextData textData = TextData.NewInstance(feature);
2101
		Language textLanguage = getDefaultLanguage(state);
2102
		textData.putText(textLanguage, value);
2103
		TaxonDescription description = getDescription(taxon);
2104
		description.addElement(textData);
2105
		if (references != null){
2106
			makeOriginalSourceReferences(textData, ";", references);
2107
		}
2108
		return textData;
2109
	}
2110

    
2111
	private Language getDefaultLanguage(EfloraImportState state) {
2112
		UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();
2113
		if (defaultLanguageUuid != null){
2114
			Language result = state.getDefaultLanguage();
2115
			if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){
2116
				result = (Language)getTermService().find(defaultLanguageUuid);
2117
				state.setDefaultLanguage(result);
2118
				if (result == null){
2119
					logger.warn("Default language for " + defaultLanguageUuid +  " does not exist.");
2120
				}
2121
			}
2122
			return result;
2123
		}else{
2124
			return Language.DEFAULT();
2125
		}
2126
	}
2127

    
2128

    
2129
	/**
2130
	 * @param elNomenclature
2131
	 */
2132
	private void verifyNoAttribute(Element element) {
2133
		List<Attribute> attributes = element.getAttributes();
2134
		if (! attributes.isEmpty()){
2135
			logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
2136
		}
2137
	}
2138

    
2139
	/**
2140
	 * @param elNomenclature
2141
	 */
2142
	protected void verifyNoChildren(Element element) {
2143
		verifyNoChildren(element, false);
2144
	}
2145

    
2146
	/**
2147
	 * @param elNomenclature
2148
	 */
2149
	private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2150
		List<Element> children = element.getChildren();
2151
		if (! children.isEmpty()){
2152
			if (ignoreLineBreak == true){
2153
				for (Element child : children){
2154
					if (! child.getName().equalsIgnoreCase("BR")){
2155
						logger.warn(element.getName() + " has unhandled child: " + child.getName());
2156
					}
2157
				}
2158
			}else{
2159
				logger.warn(element.getName() + " has unhandled children");
2160
			}
2161
		}
2162
	}
2163

    
2164

    
2165

    
2166
	/**
2167
	 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2168
	 * exists it is added to the name and the nom. status part of the references title cache is
2169
	 * removed. Requires protected title cache.
2170
	 * @param ref
2171
	 * @param nonViralName
2172
	 */
2173
	protected void parseNomStatus(Reference ref, INonViralName nonViralName) {
2174
		String titleToParse = ref.getTitleCache();
2175

    
2176
		String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName, true);
2177
		if (! noStatusTitle.equals(titleToParse)){
2178
			ref.setTitleCache(noStatusTitle, true);
2179
		}
2180
	}
2181

    
2182

    
2183
	/**
2184
	 * Extracts the date published part and returns micro reference
2185
	 * @param ref
2186
	 * @return
2187
	 */
2188
	private String parseReferenceYearAndDetail(Reference ref){
2189
		String detailResult = null;
2190
		String titleToParse = ref.getTitleCache();
2191
		titleToParse = removeStartingSymbols(titleToParse, ref);
2192
		String reReference = "^\\.{1,}";
2193
//		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2194
		String oneMonth = "(Feb.|Dec.|March|June|July)";
2195
		String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2196
		String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2197

    
2198
		String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2199
		String reDetail = "\\.{1,10}$";
2200

    
2201
		//pattern for the whole string
2202
		Pattern patReference = Pattern.compile(/*reReference +*/ reYearPeriod /*+ reDetail */);
2203
		Matcher matcher = patReference.matcher(titleToParse);
2204
		if (matcher.find()){
2205
			int start = matcher.start();
2206
			int end = matcher.end();
2207

    
2208
			//title and other information precedes the year part
2209
			String title = titleToParse.substring(0, start).trim();
2210
			//detail follows the year part
2211
			String detail = titleToParse.substring(end).trim();
2212

    
2213
			//time period
2214
			String strPeriod = matcher.group().trim();
2215
			strPeriod = strPeriod.substring(1, strPeriod.length()-1);   //remove brackets
2216
			Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2217
			matcher = patStartMonth.matcher(strPeriod);
2218
			strPeriod = strPeriod.replace(" ", "");
2219
			Integer startMonth = null;
2220
			if (matcher.find()){
2221
				end = matcher.end();
2222
				strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2223
				startMonth = getMonth(strPeriod.substring(0, end));
2224
			}
2225

    
2226
			TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
2227
			if (startMonth != null){
2228
				datePublished.setStartMonth(startMonth);
2229
			}
2230
			ref.setDatePublished(datePublished);
2231
			ref.setTitle(title);
2232
			detailResult = CdmUtils.removeTrailingDot(detail);
2233
			if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",")  ){
2234
				detailResult = detailResult.substring(0, detailResult.length() -1);
2235
			}
2236
			ref.setProtectedTitleCache(false);
2237
		}else{
2238
			logger.warn("Could not parse reference: " +  titleToParse);
2239
		}
2240
		return detailResult;
2241

    
2242
	}
2243

    
2244

    
2245

    
2246
	private Integer getMonth(String month) {
2247
		if (month.startsWith("Jan")){
2248
			return 1;
2249
		}else if (month.startsWith("Feb")){
2250
			return 2;
2251
		}else if (month.startsWith("Mar")){
2252
			return 3;
2253
		}else if (month.startsWith("Apr")){
2254
			return 4;
2255
		}else if (month.startsWith("May")){
2256
			return 5;
2257
		}else if (month.startsWith("Jun")){
2258
			return 6;
2259
		}else if (month.startsWith("Jul")){
2260
			return 7;
2261
		}else if (month.startsWith("Aug")){
2262
			return 8;
2263
		}else if (month.startsWith("Sep")){
2264
			return 9;
2265
		}else if (month.startsWith("Oct")){
2266
			return 10;
2267
		}else if (month.startsWith("Nov")){
2268
			return 11;
2269
		}else if (month.startsWith("Dec")){
2270
			return 12;
2271
		}else{
2272
			logger.warn("Month not yet supported: " + month);
2273
			return null;
2274
		}
2275
	}
2276

    
2277

    
2278
	/* (non-Javadoc)
2279
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2280
	 */
2281
	@Override
2282
    protected boolean isIgnore(EfloraImportState state){
2283
		return ! state.getConfig().isDoTaxa();
2284
	}
2285

    
2286
}
(4-4/6)