Project

General

Profile

Download (82.9 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/ 
9

    
10
package eu.etaxonomy.cdm.io.markup;
11

    
12
import java.util.ArrayList;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.Iterator;
16
import java.util.LinkedList;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Queue;
20
import java.util.Set;
21
import java.util.UUID;
22
import java.util.regex.Matcher;
23
import java.util.regex.Pattern;
24

    
25
import javax.xml.stream.FactoryConfigurationError;
26
import javax.xml.stream.XMLEventReader;
27
import javax.xml.stream.XMLStreamException;
28
import javax.xml.stream.events.StartElement;
29
import javax.xml.stream.events.XMLEvent;
30

    
31
import org.apache.commons.lang.CharUtils;
32
import org.apache.commons.lang.StringUtils;
33
import org.apache.log4j.Logger;
34
import org.jdom.Attribute;
35
import org.jdom.Element;
36
import org.springframework.beans.factory.annotation.Autowired;
37
import org.springframework.security.access.PermissionEvaluator;
38
import org.springframework.security.authentication.AuthenticationManager;
39
import org.springframework.security.authentication.UsernamePasswordAuthenticationToken;
40
import org.springframework.security.core.Authentication;
41
import org.springframework.security.core.context.SecurityContext;
42
import org.springframework.security.core.context.SecurityContextHolder;
43
import org.springframework.stereotype.Component;
44

    
45
import eu.etaxonomy.cdm.common.CdmUtils;
46
import eu.etaxonomy.cdm.common.ResultWrapper;
47
import eu.etaxonomy.cdm.common.XmlHelp;
48
import eu.etaxonomy.cdm.io.common.ICdmIO;
49
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
50
import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;
51
import eu.etaxonomy.cdm.model.agent.Person;
52
import eu.etaxonomy.cdm.model.agent.Team;
53
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
54
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
55
import eu.etaxonomy.cdm.model.common.Annotation;
56
import eu.etaxonomy.cdm.model.common.AnnotationType;
57
import eu.etaxonomy.cdm.model.common.CdmBase;
58
import eu.etaxonomy.cdm.model.common.Credit;
59
import eu.etaxonomy.cdm.model.common.ExtensionType;
60
import eu.etaxonomy.cdm.model.common.ISourceable;
61
import eu.etaxonomy.cdm.model.common.Language;
62
import eu.etaxonomy.cdm.model.common.Marker;
63
import eu.etaxonomy.cdm.model.common.MarkerType;
64
import eu.etaxonomy.cdm.model.common.TimePeriod;
65
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
66
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
67
import eu.etaxonomy.cdm.model.description.Feature;
68
import eu.etaxonomy.cdm.model.description.KeyStatement;
69
import eu.etaxonomy.cdm.model.description.PolytomousKey;
70
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
71
import eu.etaxonomy.cdm.model.description.TaxonDescription;
72
import eu.etaxonomy.cdm.model.description.TextData;
73
import eu.etaxonomy.cdm.model.name.BotanicalName;
74
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
75
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
76
import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
77
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
78
import eu.etaxonomy.cdm.model.name.NonViralName;
79
import eu.etaxonomy.cdm.model.name.Rank;
80
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
81
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
82
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
83
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
84
import eu.etaxonomy.cdm.model.occurrence.Specimen;
85
import eu.etaxonomy.cdm.model.reference.IBook;
86
import eu.etaxonomy.cdm.model.reference.IJournal;
87
import eu.etaxonomy.cdm.model.reference.Reference;
88
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
89
import eu.etaxonomy.cdm.model.reference.ReferenceType;
90
import eu.etaxonomy.cdm.model.taxon.Classification;
91
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
92
import eu.etaxonomy.cdm.model.taxon.Taxon;
93
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
94
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
95
import eu.etaxonomy.cdm.permission.CdmPermissionEvaluator;
96
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
97
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
98

    
99

    
100
/**
101
 * @author a.mueller
102
 *
103
 */
104
@Component
105
public class MarkupDocumentImport  extends MarkupImportBase implements ICdmIO<MarkupImportState> {
106
	private static final Logger logger = Logger.getLogger(MarkupDocumentImport.class);
107

    
108
	private static int modCount = 30000;
109
	private NonViralNameParserImpl parser = new NonViralNameParserImpl();
110
	
111
	//TODO make part of state, but state is renewed when invoking the import a second time 
112
	private UnmatchedLeads unmatchedLeads;
113

    
114
	
115
	//TODO remove preliminary
116
	@Autowired
117
	private AuthenticationManager authenticationManager;
118
	private Authentication authentication;
119
	private PermissionEvaluator permissionEvaluator;
120
	
121
	public MarkupDocumentImport(){
122
		super();
123
		System.out.println("TODO remove preliminary authentication");
124
//		UsernamePasswordAuthenticationToken token = new UsernamePasswordAuthenticationToken("admin", "0000");
125
//		authentication = authenticationManager.authenticate(token);
126
//		SecurityContext context = SecurityContextHolder.getContext();
127
//		context.setAuthentication(authentication);
128
//		permissionEvaluator = new CdmPermissionEvaluator();
129
	}
130
	
131
	
132
	@Override
133
	public boolean doCheck(MarkupImportState state){
134
		state.setCheck(true);
135
		doInvoke(state);
136
		state.setCheck(false);
137
		return state.isSuccess();
138
	}
139
	
140
	@Override
141
	public void doInvoke(MarkupImportState state){
142
		fireProgressEvent("Start import markup document", "Before start of document");
143
		fireWarningEvent("Test a warning", "At start", 17);
144
		
145
		Queue<CdmBase> outputStream = new LinkedList<CdmBase>();
146
		
147
		//FIXME reset state
148
		doAllTheOldOtherStuff(state);
149

    
150
		//START
151
		try {
152
			//StAX
153
			XMLEventReader reader = getStaxReader(state); 
154
			state.setReader(reader);
155
			//start document
156
			if (! validateStartOfDocument(reader)){
157
				state.setUnsuccessfull();
158
				return;
159
			}
160
			
161
			//publication
162
			String elName = "publication";
163
			while (reader.hasNext()) {
164
				XMLEvent nextEvent = reader.nextEvent();
165
				if (isStartingElement(nextEvent, elName)){
166
					handlePublication(state, reader, nextEvent, elName);
167
				}else{
168
					fireSchemaConflictEventExpectedStartTag(elName, reader);
169
				}
170
			}
171
			
172
//			//SAX
173
//			ImportHandlerBase handler= new PublicationHandler(this);
174
//			parseSAX(state, handler);
175
			
176
		} catch (FactoryConfigurationError e1) {
177
			fireWarningEvent("Some error occurred while setting up xml factory. Data can't be imported", "Start", 16);
178
			state.setUnsuccessfull();
179
		} catch (XMLStreamException e1) {
180
			fireWarningEvent("An XMLStreamException occurred while parsing. Data can't be imported", "Start", 16);
181
			state.setUnsuccessfull();
182
//		} catch (ParserConfigurationException e) {
183
//			fireWarningEvent("A ParserConfigurationException occurred while parsing. Data can't be imported", "Start", 16);
184
//		} catch (SAXException e) {
185
//			fireWarningEvent("A SAXException occurred while parsing. Data can't be imported", "Start", 16);
186
//		} catch (IOException e) {
187
//			fireWarningEvent("An IO exception occurred while parsing. Data can't be imported", "Start", 16);
188

    
189
		}
190
		 
191
	
192
		return;
193
		
194
	}
195

    
196
	
197

    
198

    
199
	private void handlePublication(MarkupImportState state, XMLEventReader reader, XMLEvent currentEvent, String elName) throws XMLStreamException {
200
			
201
		//attributes
202
		StartElement element = currentEvent.asStartElement().asStartElement();
203
		Map<String, javax.xml.stream.events.Attribute> attributes = getAttributes(element);
204
//		if (attributes.hasNext()){
205
//			handleUnexpectedAttributes(element.getLocation(), attributes);
206
//		}
207
		
208
		while (reader.hasNext()){
209
			XMLEvent event = readNoWhitespace(reader);
210
			//TODO cardinality of alternative
211
			if (event.isEndElement()){
212
				if (isEndingElement(event, elName)){
213
					return;
214
				}else{
215
					if(isEndingElement(event, "metaData")){
216
						//NOT YET IMPLEMENTED
217
					}else if(isStartingElement(event, "treatment")){
218
						//NOT YET IMPLEMENTED
219
					}else if(isStartingElement(event, "biographies")){
220
						//NOT YET IMPLEMENTED
221
					}else if(isStartingElement(event, "references")){
222
						//NOT YET IMPLEMENTED
223
					}else if(isStartingElement(event, "textSection")){
224
						//NOT YET IMPLEMENTED
225
					}else if(isStartingElement(event, "addenda")){
226
						//NOT YET IMPLEMENTED
227
					}else{
228
						handleUnexpectedEndElement(event);
229
					}
230
				}
231
			}else if (event.isStartElement()){
232
				if(isStartingElement(event, "metaData")){
233
					handleNotYetImplementedElement(event);
234
				}else if(isStartingElement(event, "treatment")){
235
					handleTreatment(state, reader, event);
236
				}else if(isStartingElement(event, "biographies")){
237
					handleNotYetImplementedElement(event);
238
				}else if(isStartingElement(event, "references")){
239
					handleNotYetImplementedElement(event);
240
				}else if(isStartingElement(event, "textSection")){
241
					handleNotYetImplementedElement(event);
242
				}else if(isStartingElement(event, "addenda")){
243
					handleNotYetImplementedElement(event);
244
				}else{
245
					handleUnexpectedStartElement(event);
246
				}
247
			}else{
248
				handleUnexpectedElement(event);
249
			}
250
		}
251
		return;
252
	}
253

    
254

    
255

    
256

    
257
	/**
258
	 * Read next event. Ignore whitespace events.
259
	 * @param reader
260
	 * @return
261
	 * @throws XMLStreamException
262
	 */
263
	private XMLEvent readNoWhitespace(XMLEventReader reader) throws XMLStreamException {
264
		XMLEvent event = reader.nextEvent();
265
		while (event.isCharacters() && event.asCharacters().isWhiteSpace()){
266
			event = reader.nextEvent();
267
		}
268
		return event;
269
	}
270
	
271
	private boolean handleTreatment(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
272
		boolean success = true;
273
		while (reader.hasNext()){
274
			XMLEvent next = readNoWhitespace(reader);
275
			if (isStartingElement(next, "taxon")){
276
				handleTaxon(state, reader, next.asStartElement());
277
			}else if(isMyEndingElement(next, parentEvent)){
278
				return success;
279
			}else{
280
				fireSchemaConflictEventExpectedStartTag("taxon", reader);
281
				success = false;
282
			}
283
		}
284
		return success;
285
	}
286

    
287

    
288
	private void handleTaxon(MarkupImportState state, XMLEventReader reader, StartElement parentEvent) throws XMLStreamException {
289
		boolean hasTitle = false;
290
		boolean hasNomenclature = false;
291
		while (reader.hasNext()){
292
			XMLEvent next = readNoWhitespace(reader);
293
			if (next.isEndElement()){
294
				if (isMyEndingElement(next, parentEvent)){
295
					handleMandatoryElement(hasTitle, parentEvent, "taxontitle");
296
					handleMandatoryElement(hasNomenclature, parentEvent, "nomenclature");
297
					//TODO check title and nomenclature exists
298
					return;
299
				}else{
300
					if(isEndingElement(next, "heading")){
301
						//NOT YET IMPLEMENTED
302
					}else if(isStartingElement(next, "taxontitle")){
303
						//NOT YET IMPLEMENTED
304
					}else if(isStartingElement(next, "biographies")){
305
						//NOT YET IMPLEMENTED
306
					}else if(isStartingElement(next, "references")){
307
						//NOT YET IMPLEMENTED
308
					}else if(isStartingElement(next, "textSection")){
309
						//NOT YET IMPLEMENTED
310
					}else if(isStartingElement(next, "addenda")){
311
						//NOT YET IMPLEMENTED
312
					}else{
313
						handleUnexpectedEndElement(next);
314
					}
315
				}
316
			}else if (next.isStartElement()){
317
				if(isStartingElement(next, "heading")){
318
					handleNotYetImplementedElement(next);
319
				}else if(isStartingElement(next, "taxontitle")){
320
					handleTaxonTitle(state, reader, next);
321
				}else if(isStartingElement(next, "writer")){
322
					handleNotYetImplementedElement(next);
323
				}else if(isStartingElement(next, "textsection")){
324
					handleNotYetImplementedElement(next);
325
				}else if(isStartingElement(next, "key")){
326
					handleNotYetImplementedElement(next);
327
				}else if(isStartingElement(next, "feature")){
328
					handleNotYetImplementedElement(next);
329
				}else if(isStartingElement(next, "notes")){
330
					handleNotYetImplementedElement(next);
331
				}else if(isStartingElement(next, "references")){
332
					handleNotYetImplementedElement(next);
333
				}else if(isStartingElement(next, "figure")){
334
					handleNotYetImplementedElement(next);
335
				}else if(isStartingElement(next, "footnote")){
336
					handleNotYetImplementedElement(next);
337
				}else{
338
					handleUnexpectedStartElement(next);
339
				}
340
			}else{
341
				handleUnexpectedElement(next);
342
			}
343
		}
344
		return;
345
	}
346

    
347
	private void handleTaxonTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
348
		boolean hasTitle = false;
349
		
350
		//TODO handle attributes
351
		while (reader.hasNext()){
352
			XMLEvent next = readNoWhitespace(reader);
353
			if (next.isEndElement()){
354
				if (isMyEndingElement(next, parentEvent)){
355
					//TODO check title exists
356
					return;
357
				}else{
358
					if(isEndingElement(next, "footnoteString")){
359
						//NOT YET IMPLEMENTED
360
					}else{
361
						handleUnexpectedEndElement(next);
362
						state.setSuccessToFalse();
363
					}
364
				}
365
			}else if (next.isStartElement()){
366
				if(isStartingElement(next, "footnoteString")){
367
					handleNotYetImplementedElement(next);
368
				}else{
369
					handleUnexpectedStartElement(next);
370
					state.setSuccessToFalse();
371
				}
372
			}else{
373
				handleUnexpectedElement(next);
374
				state.setSuccessToFalse();
375
			}
376
		}
377
		return;
378
		
379
		
380
	}
381

    
382

    
383
	private boolean isMyEndingElement(XMLEvent next, XMLEvent event) throws XMLStreamException {
384
		return isEndingElement(next, event.asStartElement().getName().getLocalPart());
385
	}
386

    
387

    
388
	/**
389
	 * This comes from the old version, needs to be checked on need
390
	 * @param state
391
	 */
392
	private void doAllTheOldOtherStuff(MarkupImportState state) {
393
		state.putTree(null, null);
394
		if (unmatchedLeads == null){
395
			unmatchedLeads = UnmatchedLeads.NewInstance();
396
		}
397
		state.setUnmatchedLeads(unmatchedLeads);
398
		
399
//		TransactionStatus tx = startTransaction();
400
		unmatchedLeads.saveToSession(getPolytomousKeyNodeService());
401
		
402
		
403
		//TODO generally do not store the reference object in the config
404
		Reference sourceReference = state.getConfig().getSourceReference();
405
		getReferenceService().saveOrUpdate(sourceReference);
406
	}
407

    
408

    
409
	private boolean doInvoke_old(MarkupImportState state){
410
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
411
		ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
412

    
413
	//	Element elbody= getBodyElement(state.getConfig());
414
		Element elbody = null;
415
		List<Element> elTaxonList = elbody.getChildren();
416
		
417
		int i = 0;
418
		
419
		Set<String> unhandledTitleClassess = new HashSet<String>();
420
		Set<String> unhandledNomeclatureChildren = new HashSet<String>();
421
		Set<String> unhandledDescriptionChildren = new HashSet<String>();
422
		
423
		Taxon lastTaxon = getLastTaxon(state);
424
		
425
		//for each taxon
426
		for (Element elTaxon : elTaxonList){
427
			try {
428
				if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
429
				if (! elTaxon.getName().equalsIgnoreCase("taxon")){
430
					logger.warn("body has element other than 'taxon'");
431
				}
432
				
433
				BotanicalName botanicalName = BotanicalName.NewInstance(Rank.SPECIES());
434
				Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
435
				
436
				handleTaxonAttributes(elTaxon, taxon, state);
437
	
438
				
439
				List<Element> children = elTaxon.getChildren();
440
				handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren,	unhandledDescriptionChildren, taxon, children);
441
				handleTaxonRelation(state, taxon, lastTaxon);
442
				lastTaxon = taxon;
443
				taxaToSave.add(taxon);
444
				state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
445
				
446
			} catch (Exception e) {
447
				logger.warn("Exception occurred in Sapindacea taxon import: " + e);
448
				e.printStackTrace();
449
			}
450
			
451
		}
452
		
453
		System.out.println(state.getUnmatchedLeads().toString());
454
		logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
455
		
456
		logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
457
		logger.info("Children for description are: " + unhandledDescriptionChildren);
458
		logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
459
		logger.info("Children for nom are: " + unhandledNomChildren);
460
		
461
		
462
		//invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
463
		logger.info(i + " taxa handled. Saving ...");
464
		getTaxonService().saveOrUpdate(taxaToSave);
465
		getFeatureTreeService().saveOrUpdateFeatureNodesAll(state.getFeatureNodesToSave());
466
		state.getFeatureNodesToSave().clear();
467
//		commitTransaction(tx);
468
		
469
		logger.info("end makeTaxa ...");
470
		logger.info("start makeKey ...");
471
	//	invokeDoKey(state);
472
		logger.info("end makeKey ...");
473
		
474
		return success.getValue();
475
	}
476

    
477
	
478

    
479

    
480
	private void handleTaxonAttributes(Element elTaxon, Taxon taxon, MarkupImportState state) {
481
		List<Attribute> attrList = elTaxon.getAttributes();
482
		for (Attribute attr : attrList){
483
			String attrName = attr.getName();
484
			String attrValue = attr.getValue();
485
			if ("class".equals(attrName)){
486
				if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES")  ){
487
					taxon.setDoubtful(true);
488
				}else{
489
					MarkerType markerType = getMarkerType(state, attrValue);
490
					if (markerType == null){
491
						logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
492
					}else{
493
						taxon.addMarker(Marker.NewInstance(markerType, true));
494
					}
495
				}
496
			}else if ("num".equals(attrName)){
497
				logger.warn("num not yet supported");
498
			}else{
499
				logger.warn("Attribute " + attrName + " not yet supported for element taxon");
500
			}
501
		}
502

    
503
	}
504

    
505

    
506
	private Taxon getLastTaxon(MarkupImportState state) {
507
		if (state.getConfig().getLastTaxonUuid() == null){
508
			return null;
509
		}else{
510
			return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
511
		}
512
	}
513

    
514

    
515
//	private void invokeDoKey(SapindaceaeImportState state) {
516
//		TransactionStatus tx = startTransaction();
517
//		
518
//		Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
519
//		ITaxonService taxonService = getTaxonService();
520
//		ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
521
//
522
//		Element elbody= getBodyElement(state.getConfig());
523
//		List<Element> elTaxonList = elbody.getChildren();
524
//		
525
//		int i = 0;
526
//		
527
//		//for each taxon
528
//		for (Element elTaxon : elTaxonList){
529
//			if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
530
//			if (! elTaxon.getName().equalsIgnoreCase("taxon")){
531
//				continue;
532
//			}
533
//			
534
//			List<Element> children = elTaxon.getChildren("key");
535
//			for (Element element : children){
536
//				handleKeys(state, element, null);
537
//			}
538
//			nodesToSave.add(taxon);
539
//
540
//		}
541
//		
542
//	}
543

    
544

    
545
	// body/taxon/*
546
	private void handleTaxonElement(MarkupImportState state, Set<String> unhandledTitleClassess, Set<String> unhandledNomeclatureChildren, Set<String> unhandledDescriptionChildren, Taxon taxon, List<Element> children) {
547
		AnnotatableEntity lastEntity = null;
548
		for (Element element : children){
549
			String elName = element.getName();
550
			
551
			if (elName.equalsIgnoreCase("title")){
552
				handleTitle(state, element, taxon, unhandledTitleClassess);
553
				lastEntity = null;
554
			}else if(elName.equalsIgnoreCase("nomenclature")){
555
				handleNomenclature(state, element, taxon, unhandledNomeclatureChildren);
556
				lastEntity = null;
557
			}else if(elName.equalsIgnoreCase("description")){
558
				handleDescription(state, element, taxon, unhandledDescriptionChildren);
559
				lastEntity = null;
560
			}else if(elName.equalsIgnoreCase("habitatecology")){
561
				lastEntity = handleEcology(state, element, taxon);
562
			}else if(elName.equalsIgnoreCase("distribution")){
563
				lastEntity = handleDistribution(state, element, taxon);
564
			}else if(elName.equalsIgnoreCase("uses")){
565
				lastEntity = handleUses(state, element, taxon);
566
			}else if(elName.equalsIgnoreCase("notes")){
567
				lastEntity = handleTaxonNotes(state, element, taxon);
568
			}else if(elName.equalsIgnoreCase("chromosomes")){
569
				lastEntity = handleChromosomes(state, element, taxon);
570
			}else if(elName.equalsIgnoreCase("vernacularnames")){
571
				handleVernaculars(state, element, taxon);
572
			}else if(elName.equalsIgnoreCase("key")){
573
				lastEntity = handleKeys(state, element, taxon);
574
			}else if(elName.equalsIgnoreCase("references")){
575
				handleReferences(state, element, taxon, lastEntity);
576
				lastEntity = null;
577
			}else if(elName.equalsIgnoreCase("taxon")){
578
				logger.warn("A taxon should not be part of a taxon");
579
			}else if(elName.equalsIgnoreCase("homotypes")){
580
				logger.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
581
			}else{
582
				logger.warn("Unexpected child for taxon: " + elName);
583
			}
584
		}
585
	}
586
	
587
	
588
	private void handleVernaculars(MarkupImportState state, Element elVernacular, Taxon taxon) {
589
		verifyNoAttribute(elVernacular);
590
		verifyNoChildren(elVernacular, false);
591
		String value = elVernacular.getTextNormalize();
592
		Feature feature = Feature.COMMON_NAME();
593
		value = replaceStart(value, "Noms vernaculaires");
594
		String[] dialects = value.split(";");
595
		for (String singleDialect : dialects){
596
			handleSingleDialect(taxon, singleDialect, feature, state);
597
		}
598
		return;
599
	}
600

    
601

    
602
	private void handleSingleDialect(Taxon taxon, String singleDialect, Feature feature, MarkupImportState state) {
603
		singleDialect = singleDialect.trim();
604
		TaxonDescription description = getDescription(taxon);
605
		String reDialect = "\\(dial\\.\\s.*\\)";
606
//		String reDialect = "\\(.*\\)";
607
		Pattern patDialect = Pattern.compile(reDialect);
608
		Matcher matcher = patDialect.matcher(singleDialect);
609
		if (matcher.find()){
610
			String dialect = singleDialect.substring(matcher.start(), matcher.end());
611
			dialect = dialect.replace("(dial. ", "").replace(")", "");
612
			
613
			Language language = null;
614
			try {
615
				language = this.getLanguage(state, state.getTransformer().getLanguageUuid(dialect), dialect, dialect, dialect);
616
			} catch (UndefinedTransformerMethodException e) {
617
				logger.error(e.getMessage());
618
			}
619
			
620
			String commonNames = singleDialect.substring(0, matcher.start());
621
			String[] splitNames = commonNames.split(",");
622
			for (String commonNameString : splitNames){
623
				commonNameString = commonNameString.trim();
624
				CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameString, language);
625
				description.addElement(commonName);
626
			}
627
		}else{
628
			logger.warn("No dialect match: " +  singleDialect);
629
		}
630
	}
631

    
632

    
633
	private void handleReferences(MarkupImportState state, Element elReferences, Taxon taxon, AnnotatableEntity lastEntity) {
634
		verifyNoAttribute(elReferences);
635
		verifyNoChildren(elReferences, true);
636
		String refString = elReferences.getTextNormalize(); 
637
		if (lastEntity == null){
638
			logger.warn("No last entity defined: " + refString);
639
			return;
640
		}
641
		
642
		Annotation annotation = Annotation.NewInstance(refString, AnnotationType.EDITORIAL(), Language.DEFAULT());
643
		lastEntity.addAnnotation(annotation);
644
	}
645

    
646

    
647
	private PolytomousKey handleKeys(MarkupImportState state, Element elKey, Taxon taxon) {
648
		UnmatchedLeads openKeys = state.getUnmatchedLeads();
649
		
650
		//title
651
		String title = makeKeyTitle(elKey);
652
		
653
		//key
654
		PolytomousKey key = PolytomousKey.NewTitledInstance(title);
655
		
656
		//TODO add covered taxa etc.
657
		verifyNoAttribute(elKey);
658
		
659
		//notes
660
		makeKeyNotes(elKey, key);
661
		
662
		//keycouplets
663
		List<Element> keychoices = new ArrayList<Element>();
664
		keychoices.addAll(elKey.getChildren("keycouplet"));
665
		keychoices.addAll(elKey.getChildren("keychoice"));
666
		
667
		
668
		for (Element elKeychoice : keychoices){
669
			handleKeyChoices(state, openKeys, key, elKeychoice, taxon);
670
			elKey.removeContent(elKeychoice);
671
		}
672
		
673
		//
674
		verifyNoChildren(elKey);
675
		logger.info("Unmatched leads after key handling:" + openKeys.toString());
676
		
677

    
678
		if (state.getConfig().isDoPrintKeys()){
679
			key.print(System.err);
680
		}
681
		getPolytomousKeyService().save(key);
682
		return key;
683
	}
684

    
685

    
686
	/**
687
	 * @param state
688
	 * @param elKey
689
	 * @param openKeys
690
	 * @param key
691
	 * @param elKeychoice
692
	 * @param taxon 
693
	 */
694
	private void handleKeyChoices(MarkupImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
695
		
696
		//char Attribute
697
		//TODO it's still unclear if char is a feature and needs to be a new attribute 
698
		//or if it is handled as question. Therefore both cases are handled but feature
699
		//is finally not yet set
700
		KeyStatement question = handleKeychoiceChar(state, elKeychoice);
701
		Feature feature = handleKeychoiceCharAsFeature(state, elKeychoice);
702
		
703
		//lead
704
		List<PolytomousKeyNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, question, feature);
705
		
706
		//num -> match with unmatched leads
707
		handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
708

    
709
		//others
710
		verifyNoAttribute(elKeychoice);
711
	}
712

    
713

    
714
	/**
715
	 * @param openKeys
716
	 * @param key
717
	 * @param elKeychoice
718
	 * @param childNodes
719
	 */
720
	private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<PolytomousKeyNode> childNodes) {
721
		Attribute numAttr = elKeychoice.getAttribute("num");
722
		String num = CdmUtils.removeTrailingDot(numAttr == null? "":numAttr.getValue());
723
		UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
724
		Set<PolytomousKeyNode> matchingNodes = openKeys.getNodes(okk);
725
		for (PolytomousKeyNode matchingNode : matchingNodes){
726
			for (PolytomousKeyNode childNode : childNodes){
727
				matchingNode.addChild(childNode);
728
			}
729
			openKeys.removeNode(okk, matchingNode);
730
		}
731
		if (matchingNodes.isEmpty()){
732
			for (PolytomousKeyNode childNode : childNodes){
733
				key.getRoot().addChild(childNode);
734
			}
735
		}
736
		
737
		elKeychoice.removeAttribute("num");
738
	}
739

    
740

    
741
	/**
742
	 * @param state
743
	 * @param key
744
	 * @param elKeychoice
745
	 * @param taxon
746
	 * @param feature
747
	 * @return
748
	 */
749
	private List<PolytomousKeyNode> handleKeychoiceLeads(	MarkupImportState state, PolytomousKey key,	Element elKeychoice, Taxon taxon, KeyStatement question, Feature feature) {
750
		List<PolytomousKeyNode> childNodes = new ArrayList<PolytomousKeyNode>();
751
		List<Element> leads = elKeychoice.getChildren("lead");
752
		for(Element elLead : leads){
753
			PolytomousKeyNode childNode = handleLead(state, key, elLead, taxon, question, feature);
754
			childNodes.add(childNode);
755
		}
756
		return childNodes;
757
	}
758

    
759

    
760
	/**
761
	 * @param state
762
	 * @param elKeychoice
763
	 * @return
764
	 */
765
	private KeyStatement handleKeychoiceChar(MarkupImportState state, Element elKeychoice) {
766
		KeyStatement statement = null;
767
		Attribute charAttr = elKeychoice.getAttribute("char");
768
		if (charAttr != null){
769
			String charStr = charAttr.getValue();
770
			if (StringUtils.isNotBlank(charStr)){
771
				statement = KeyStatement.NewInstance(charStr);
772
			}
773
			elKeychoice.removeAttribute("char");
774
		}
775
		return statement;
776
	}
777
	
778
	/**
779
	 * @param state
780
	 * @param elKeychoice
781
	 * @return
782
	 */
783
	private Feature handleKeychoiceCharAsFeature(MarkupImportState state, Element elKeychoice) {
784
		Feature feature = null;
785
		Attribute charAttr = elKeychoice.getAttribute("char");
786
		if (charAttr != null){
787
			String charStr = charAttr.getValue();
788
			feature = getFeature(charStr, state);
789
			elKeychoice.removeAttribute("char");
790
		}
791
		return feature;
792
	}
793

    
794

    
795
	private PolytomousKeyNode handleLead(MarkupImportState state, PolytomousKey key, Element elLead, Taxon taxon, KeyStatement question, Feature feature) {
796
		PolytomousKeyNode node = PolytomousKeyNode.NewInstance();
797
		//TODO the char attribute in the keychoice is more a feature than a question
798
		//needs to be discussed on model side
799
		node.setQuestion(question);
800
//		node.setFeature(feature);
801
		
802
		//text
803
		String text = handleLeadText(elLead, node);
804
		
805
		//num
806
		handleLeadNum(elLead, text);
807
		
808
		//goto
809
		handleLeadGoto(state, key, elLead, taxon, node);
810
		
811
		//others
812
		verifyNoAttribute(elLead);
813
		
814
		return node;
815
	}
816

    
817

    
818
	/**
819
	 * @param elLead
820
	 * @param node
821
	 * @return
822
	 */
823
	private String handleLeadText(Element elLead, PolytomousKeyNode node) {
824
		String text = elLead.getAttributeValue("text").trim();
825
		if (StringUtils.isBlank(text)){
826
			logger.warn("Empty text in lead");
827
		}
828
		elLead.removeAttribute("text");
829
		KeyStatement statement = KeyStatement.NewInstance(text);
830
		node.setStatement(statement);
831
		return text;
832
	}
833

    
834

    
835
	/**
836
	 * @param state
837
	 * @param key
838
	 * @param elLead
839
	 * @param taxon
840
	 * @param node
841
	 */
842
	private void handleLeadGoto(MarkupImportState state, PolytomousKey key, Element elLead, Taxon taxon, PolytomousKeyNode node) {
843
		Attribute gotoAttr = elLead.getAttribute("goto");
844
		if (gotoAttr != null){
845
			String strGoto = gotoAttr.getValue().trim();
846
			//create key
847
			UnmatchedLeadsKey gotoKey = null;
848
			if (isInternalNode(strGoto)){
849
				gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
850
			}else{
851
				String taxonKey = makeTaxonKey(strGoto, taxon);
852
				gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
853
			}
854
			//
855
			UnmatchedLeads openKeys = state.getUnmatchedLeads();
856
			if (gotoKey.isInnerLead()){
857
				Set<PolytomousKeyNode> existingNodes = openKeys.getNodes(gotoKey);
858
				for (PolytomousKeyNode existingNode : existingNodes){
859
					node.addChild(existingNode);
860
				}
861
			}
862
			openKeys.addKey(gotoKey, node);
863
			//remove attribute (need for consistency check)
864
			elLead.removeAttribute("goto");
865
		}else{
866
			logger.warn("lead has no goto attribute");
867
		}
868
	}
869

    
870

    
871
	/**
872
	 * @param elLead
873
	 * @param text
874
	 */
875
	private void handleLeadNum(Element elLead, String text) {
876
		Attribute numAttr = elLead.getAttribute("num");
877
		if (numAttr != null){
878
			//TODO num
879
			String num = numAttr.getValue();
880
			elLead.removeAttribute("num");
881
		}else{
882
			logger.info("Keychoice has no num attribute: " + text);
883
		}
884
	}
885

    
886

    
887
	private String makeTaxonKey(String strGoto, Taxon taxon) {
888
		String result = "";
889
		if (strGoto == null){
890
			return "";
891
		}
892
		String strGenusName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getGenusOrUninomial();
893
		strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", "");  //replace all brackets
894
		strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
895
		
896
		strGoto = strGoto.trim();  
897
		String[] split = strGoto.split("\\s");
898
		for (int i = 0; i<split.length; i++){
899
			String single = split[i];
900
			if (isGenusAbbrev(single, strGenusName)){
901
				split[i] = strGenusName;
902
			}
903
//			if (isInfraSpecificMarker(single)){
904
//				String strSpeciesName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getSpecificEpithet();
905
//				split[i] = strGenusName + " " + strSpeciesName + " ";
906
//			}
907
			result = (result + " " + split[i]).trim();
908
		}
909
		return result;
910
	}
911

    
912

    
913
	private boolean isInfraSpecificMarker(String single) {
914
		try {
915
			if (Rank.getRankByAbbreviation(single).isInfraSpecific()){
916
				return true;
917
			}
918
		} catch (UnknownCdmTypeException e) {
919
			return false;
920
		}
921
		return false;
922
	}
923

    
924

    
925
	private boolean isGenusAbbrev(String single, String strGenusName) {
926
		if (! single.matches("[A-Z]\\.?")) {
927
			return false;
928
		}else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
929
			return false; 
930
		}else{
931
			return single.charAt(0) == strGenusName.charAt(0);
932
		}
933
	}
934

    
935

    
936
	private boolean isInternalNode(String strGoto) {
937
		return CdmUtils.isNumeric(strGoto);
938
	}
939

    
940

    
941
	private void makeKeyNotes(Element keyElement, PolytomousKey key) {
942
		Element elNotes = keyElement.getChild("notes");
943
		if (elNotes != null){
944
			keyElement.removeContent(elNotes);
945
			String notes = elNotes.getTextNormalize();
946
			if (StringUtils.isNotBlank(notes)){
947
				key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
948
			}
949
		}
950
	}
951

    
952

    
953
	private String makeKeyTitle(Element keyElement) {
954
		String title = "- no title - ";
955
		Attribute titleAttr = keyElement.getAttribute("title");
956
		keyElement.removeAttribute(titleAttr);
957
		if (titleAttr == null){
958
			Element elTitle = keyElement.getChild("keytitle");
959
			keyElement.removeContent(elTitle);
960
			if (elTitle != null){
961
				title = elTitle.getTextNormalize();
962
			}
963
		}else{
964
			title = titleAttr.getValue();
965
		}
966
		return title;
967
	}
968

    
969

    
970
	/**
971
	 * @param state
972
	 * @param element
973
	 * @param taxon
974
	 */
975
	private TextData handleChromosomes(MarkupImportState state, Element element, Taxon taxon) {
976
		Feature chromosomeFeature = getFeature("chromosomes", state);
977
		verifyNoAttribute(element);
978
		verifyNoChildren(element);
979
		String value = element.getTextNormalize();
980
		value = replaceStart(value, "Chromosomes");
981
		String chromosomesPart = getChromosomesPart(value);
982
		String references = value.replace(chromosomesPart, "").trim();
983
		chromosomesPart = chromosomesPart.replace(":", "").trim();
984
		return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);	
985
	}
986

    
987

    
988
	/**
989
	 * @param ref 
990
	 * @param string 
991
	 * @return
992
	 */
993
	private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
994
		String[] splits = refAll.split(splitter);
995
		for (String strRef: splits){
996
			Reference ref = ReferenceFactory.newGeneric();
997
			ref.setTitleCache(strRef, true);
998
			String refDetail = parseReferenceYearAndDetail(ref);
999
			sourcable.addSource(null, null, ref, refDetail);
1000
		}
1001
		
1002
		
1003
//TODO use regex instead
1004
/*		String detailResult = null;
1005
		String titleToParse = ref.getTitleCache();
1006
		String reReference = "^\\.{1,}";
1007
//		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
1008
		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
1009
		String reYearPeriod = reYear + "(-" + reYear + ")+";
1010
		String reDetail = "\\.{1,10}$";
1011
*/		
1012
	}
1013

    
1014

    
1015
	/**
1016
	 * @param value
1017
	 * @return
1018
	 */
1019
	private String getChromosomesPart(String str) {
1020
		Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
1021
		Matcher matcher = pattern.matcher(str);
1022
		if (matcher.find()){
1023
			return matcher.group(0);
1024
		}else{
1025
			logger.warn("Chromosomes could not be parsed: " + str);
1026
		}
1027
		return str;
1028
	}
1029

    
1030

    
1031
	/**
1032
	 * @param state
1033
	 * @param element
1034
	 * @param taxon
1035
	 */
1036
	private TextData handleTaxonNotes(MarkupImportState state, Element element, Taxon taxon) {
1037
		TextData result = null;
1038
		verifyNoChildren(element, true);
1039
		//verifyNoAttribute(element);
1040
		List<Attribute> attributes = element.getAttributes();
1041
		for (Attribute attribute : attributes){
1042
			if (! attribute.getName().equalsIgnoreCase("class")){
1043
				logger.warn("Char has unhandled attribute " +  attribute.getName());
1044
			}else{
1045
				String classValue = attribute.getValue();
1046
				result = handleDescriptiveElement(state, element, taxon, classValue);
1047
			}
1048
		}
1049
		//if no class attribute exists, handle as note
1050
		if (attributes.isEmpty()){
1051
			result = handleDescriptiveElement(state, element, taxon, "Note");
1052
		}
1053

    
1054
		//Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
1055
		//taxon.addAnnotation(annotation);
1056
		return result; //annotation;
1057
	}
1058

    
1059

    
1060
	/**
1061
	 * @param state
1062
	 * @param element
1063
	 * @param taxon
1064
	 * @param result
1065
	 * @param attribute
1066
	 * @return
1067
	 */
1068
	private TextData handleDescriptiveElement(MarkupImportState state, Element element, Taxon taxon, String classValue) {
1069
		TextData result = null;
1070
		Feature feature = getFeature(classValue, state);
1071
		if (feature == null){
1072
			logger.warn("Unhandled feature: " + classValue);
1073
		}else{
1074
			String value = element.getValue();
1075
			value = replaceStart(value, "Notes");
1076
			value = replaceStart(value, "Note");
1077
			result = addDescriptionElement(state, taxon, value, feature, null);
1078
		}
1079
		return result;
1080
	}
1081

    
1082

    
1083
	private void removeBr(Element element) {
1084
		element.removeChildren("Br");
1085
		element.removeChildren("br");
1086
		element.removeChildren("BR");
1087
	}
1088

    
1089

    
1090
	/**
1091
	 * @param state
1092
	 * @param element
1093
	 * @param taxon
1094
	 */
1095
	private TextData handleUses(MarkupImportState state, Element element, Taxon taxon) {
1096
		verifyNoAttribute(element);
1097
		verifyNoChildren(element, true);
1098
		String value = element.getTextNormalize();
1099
		value = replaceStart(value, "Uses");
1100
		Feature feature = Feature.USES();
1101
		return addDescriptionElement(state, taxon, value, feature, null);
1102
		
1103
	}
1104

    
1105

    
1106
	/**
1107
	 * @param state
1108
	 * @param element
1109
	 * @param taxon
1110
	 * @param unhandledDescriptionChildren
1111
	 */
1112
	private DescriptionElementBase handleDistribution(MarkupImportState state, Element element, Taxon taxon) {
1113
		verifyNoAttribute(element);
1114
		verifyNoChildren(element, true);
1115
		String value = element.getTextNormalize();
1116
		value = replaceStart(value, "Distribution");
1117
		Feature feature = Feature.DISTRIBUTION();
1118
		//distribution parsing almost impossible as there is lots of freetext in the distribution tag
1119
		return addDescriptionElement(state, taxon, value, feature, null);
1120
	}
1121

    
1122

    
1123
	/**
1124
	 * @param state
1125
	 * @param element
1126
	 * @param taxon
1127
	 * @param unhandledDescriptionChildren
1128
	 */
1129
	private TextData handleEcology(MarkupImportState state, Element elEcology, Taxon taxon) {
1130
		verifyNoAttribute(elEcology);
1131
		verifyNoChildren(elEcology, true);
1132
		String value = elEcology.getTextNormalize();
1133
		Feature feature = Feature.ECOLOGY();
1134
		if (value.startsWith("Habitat & Ecology")){
1135
			feature = getFeature("Habitat & Ecology", state);
1136
			value = replaceStart(value, "Habitat & Ecology");
1137
		}else if (value.startsWith("Habitat")){
1138
			value = replaceStart(value, "Habitat");
1139
			feature = getFeature("Habitat", state);
1140
		}
1141
		return addDescriptionElement(state, taxon, value, feature, null);
1142
	}
1143

    
1144

    
1145

    
1146
	/**
1147
	 * @param value
1148
	 * @param replacementString
1149
	 */
1150
	private String replaceStart(String value, String replacementString) {
1151
		if (value.startsWith(replacementString) ){
1152
			value = value.substring(replacementString.length()).trim();
1153
		}
1154
		while (value.startsWith("-") || value.startsWith("–") ){
1155
			value = value.substring("-".length()).trim();
1156
		}
1157
		return value;
1158
	}
1159

    
1160

    
1161
	/**
1162
	 * @param value
1163
	 * @param replacementString
1164
	 */
1165
	protected String removeTrailing(String value, String replacementString) {
1166
		if (value == null){
1167
			return null;
1168
		}
1169
		if (value.endsWith(replacementString) ){
1170
			value = value.substring(0, value.length() - replacementString.length()).trim();
1171
		}
1172
		return value;
1173
	}
1174

    
1175
	/**
1176
	 * @param state
1177
	 * @param element
1178
	 * @param taxon
1179
	 * @param unhandledNomeclatureChildren 
1180
	 */
1181
	private void handleNomenclature(MarkupImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
1182
		verifyNoAttribute(elNomenclature);
1183
		
1184
		List<Element> elements = elNomenclature.getChildren();
1185
		for (Element element : elements){
1186
			if (element.getName().equals("homotypes")){
1187
				handleHomotypes(state, element, taxon);
1188
			}else if (element.getName().equals("notes")){
1189
				handleNomenclatureNotes(state, element, taxon);
1190
			}else{
1191
				unhandledChildren.add(element.getName());
1192
			}
1193
		}
1194
		
1195
	}
1196

    
1197

    
1198

    
1199
	private void handleNomenclatureNotes(MarkupImportState state, Element elNotes, Taxon taxon) {
1200
		verifyNoAttribute(elNotes);
1201
		verifyNoChildren(elNotes);
1202
		String notesText = elNotes.getTextNormalize();
1203
		Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
1204
		taxon.addAnnotation(annotation);
1205
	}
1206

    
1207

    
1208

    
1209
	private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
1210
	/**
1211
	 * @param state
1212
	 * @param element
1213
	 * @param taxon
1214
	 */
1215
	private void handleHomotypes(MarkupImportState state, Element elHomotypes, Taxon taxon) {
1216
		verifyNoAttribute(elHomotypes);
1217
		
1218
		List<Element> elements = elHomotypes.getChildren();
1219
		HomotypicalGroup homotypicalGroup = null;
1220
		for (Element element : elements){
1221
			if (element.getName().equals("nom")){
1222
				homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
1223
			}else{
1224
				unhandledHomotypeChildren.add(element.getName());
1225
			}
1226
		}
1227
		
1228
	}
1229

    
1230
	private static Set<String> unhandledNomChildren = new HashSet<String>();
1231

    
1232
	/**
1233
	 * @param state
1234
	 * @param element
1235
	 * @param taxon
1236
	 */
1237
	private HomotypicalGroup handleNom(MarkupImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
1238
		List<Attribute> attributes = elNom.getAttributes();
1239
		
1240
		boolean taxonBaseClassType = false;
1241
		for (Attribute attribute : attributes){
1242
			if (! attribute.getName().equalsIgnoreCase("class")){
1243
				logger.warn("Nom has unhandled attribute " +  attribute.getName());
1244
			}else{
1245
				String classValue = attribute.getValue();
1246
				if (classValue.equalsIgnoreCase("acceptedname")){
1247
					homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
1248
					taxonBaseClassType = true;
1249
				}else if (classValue.equalsIgnoreCase("synonym")){
1250
					homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
1251
					taxonBaseClassType = true;
1252
				}else if (classValue.equalsIgnoreCase("typeref")){
1253
					handleTypeRef(state, elNom, taxon, homotypicalGroup);
1254
				}else{
1255
					logger.warn("Unhandled class value for nom: " + classValue);
1256
				}
1257
				
1258
			}
1259
		}
1260
		
1261
		List<Element> elements = elNom.getChildren();
1262
		for (Element element : elements){
1263
			if (element.getName().equals("name") || element.getName().equals("homonym") ){
1264
				if (taxonBaseClassType == false){
1265
					logger.warn("Name or homonym tag not allowed in non taxon nom tag");
1266
				}
1267
			}else{
1268
				unhandledNomChildren.add(element.getName());
1269
			}
1270
		}
1271
		
1272
		return homotypicalGroup;
1273
		
1274
	}
1275

    
1276
	/**
1277
	 * @param state
1278
	 * @param elNom
1279
	 * @param taxon
1280
	 * @param homotypicalGroup 
1281
	 */
1282
	protected void handleTypeRef(MarkupImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
1283
		verifyNoChildren(elNom);
1284
		String typeRef = elNom.getTextNormalize();
1285
		typeRef = removeStartingTypeRefMinus(typeRef);
1286
		
1287
		String[] split = typeRef.split(":");
1288
		if (split.length < 2){
1289
			logger.warn("typeRef has no ':' : " + typeRef);
1290
		}else if (split.length > 2){
1291
			logger.warn("typeRef has more than 1 ':' : " + typeRef);
1292
		}else{
1293
			StringBuffer typeType = new StringBuffer(split[0]);
1294
			String typeText = split[1].trim();
1295
			TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
1296
			
1297
			//Name Type Desitnations
1298
			if (typeDesignation instanceof NameTypeDesignation){
1299
				makeNameTypeDesignations(typeType, typeText, typeDesignation);
1300
			}
1301
			//SpecimenTypeDesignations
1302
			else if (typeDesignation instanceof SpecimenTypeDesignation){
1303
				makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
1304
			}else{
1305
				logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
1306
			}
1307
			for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
1308
				name.addTypeDesignation(typeDesignation, true);
1309
			}
1310
		}
1311
	}
1312

    
1313

    
1314
	/**
1315
	 * @param typeRef
1316
	 * @return
1317
	 */
1318
	protected String removeStartingTypeRefMinus(String typeRef) {
1319
		typeRef = replaceStart(typeRef, "-");
1320
		typeRef = replaceStart(typeRef, "—");
1321
		typeRef = replaceStart(typeRef, "\u002d");
1322
		typeRef = replaceStart(typeRef, "\u2013");
1323
		typeRef = replaceStart(typeRef, "--");
1324
		return typeRef;
1325
	}
1326

    
1327
	/**
1328
	 * @param typeType
1329
	 * @param typeText
1330
	 * @param typeDesignation
1331
	 */
1332
	private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1333
		if (typeType.toString().trim().equalsIgnoreCase("Type")){
1334
			//do nothing
1335
		}else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1336
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1337
		}else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
1338
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1339
		}else{
1340
			logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
1341
		}
1342
		//clean
1343
		typeText = cleanNameType(typeText);
1344
		//create name
1345
		BotanicalName nameType = (BotanicalName)parser.parseFullName(typeText, NomenclaturalCode.ICBN, Rank.SPECIES());
1346
		((NameTypeDesignation) typeDesignation).setTypeName(nameType);
1347
		//TODO wie können NameTypes den Namen zugeordnet werden? -  wird aber vom Portal via NameCache matching gemacht
1348
	}
1349

    
1350

    
1351
	private String cleanNameType(String typeText) {
1352
		String result;
1353
		String[] split = typeText.split("\\[.*\\].?");
1354
		result = split[0];
1355
		return result;
1356
	}
1357

    
1358

    
1359
	/**
1360
	 * @param typeType
1361
	 * @param typeText
1362
	 * @param typeDesignation
1363
	 */
1364
	protected void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1365
		if (typeType.toString().trim().equalsIgnoreCase("Type")){
1366
			//do nothing
1367
		}else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1368
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1369
		}else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1370
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1371
		}else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1372
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1373
		}else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1374
			typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1375
		}else{
1376
			logger.warn("Unhandled type string: " + typeType);
1377
		}
1378
		Specimen specimen = Specimen.NewInstance();
1379
		if (typeText.length() > 255){
1380
			specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1381
		}else{
1382
			specimen.setTitleCache(typeText, true);
1383
		}
1384
		specimen.addDefinition(typeText, Language.ENGLISH());
1385
		((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1386
	}
1387

    
1388
	private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1389
		TypeDesignationBase result;
1390
		Reference ref = parseTypeDesignationReference(typeType);
1391
		if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1392
			if (typeType.indexOf(" species")>-1 ){
1393
				result = NameTypeDesignation.NewInstance();
1394
				int start = typeType.indexOf(" species");
1395
				typeType.replace(start, start + " species".length(), "");
1396
			}else {
1397
				result = NameTypeDesignation.NewInstance();
1398
				int start = typeType.indexOf(" genus");
1399
				typeType.replace(start, start + " genus".length(), "");
1400
			}
1401
		}else{
1402
			result = SpecimenTypeDesignation.NewInstance();
1403
		}
1404
		result.setCitation(ref);
1405
		return result;
1406
	}
1407

    
1408

    
1409
	private Reference parseTypeDesignationReference(StringBuffer typeType) {
1410
		Reference result = null;
1411
		String reBracketReference = "\\(.*\\)";
1412
		Pattern patBracketReference = Pattern.compile(reBracketReference);
1413
		Matcher matcher = patBracketReference.matcher(typeType);
1414
		if (matcher.find()){
1415
			String refString = matcher.group();
1416
			int start = typeType.indexOf(refString);
1417
			typeType.replace(start, start + refString.length(), "");
1418
			refString = refString.replace("(", "").replace(")", "").trim();
1419
			Reference ref = ReferenceFactory.newGeneric();
1420
			ref.setTitleCache(refString, true);
1421
			result = ref;
1422
		}
1423
		return result;
1424
	}
1425

    
1426

    
1427
	/**
1428
	 * @param state
1429
	 * @param elNom
1430
	 * @param taxon
1431
	 */
1432
	//body/taxon/
1433
	private HomotypicalGroup handleNomTaxon(MarkupImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1434
		NonViralName name = makeName(taxon, homotypicalGroup, isSynonym);
1435
		String num = null;
1436
		
1437
		boolean hasGenusInfo = false;
1438
		TeamOrPersonBase lastTeam = null;
1439
		
1440
		//genus
1441
		List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1442
		if (elGenus.size() > 0){
1443
			hasGenusInfo = true;
1444
		}else{
1445
			logger.debug ("No Synonym Genus");
1446
		}
1447
		//infra rank -> needed to handle authors correctly
1448
		List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1449
		Rank infraRank = null;
1450
		infraRank = handleInfRank(name, elInfraRank, infraRank);
1451
		
1452
		//get left over elements
1453
		List<Element> elements = elNom.getChildren();
1454
		elements.removeAll(elInfraRank);
1455
		
1456
		for (Element element : elements){
1457
			if (element.getName().equals("name")){
1458
				String classValue = element.getAttributeValue("class");
1459
				String value = element.getValue().trim();
1460
				if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1461
					name.setGenusOrUninomial(value);
1462
				}else if (classValue.equalsIgnoreCase("family") ){
1463
					name.setGenusOrUninomial(value);
1464
					name.setRank(Rank.FAMILY());
1465
				}else if (classValue.equalsIgnoreCase("subgenus")){
1466
					//name.setInfraGenericEpithet(value);
1467
					name.setNameCache(value.replace(":", "").trim());
1468
					name.setRank(Rank.SUBGENUS());
1469
				}else if (classValue.equalsIgnoreCase("epithet") ){
1470
					if (hasGenusInfo == true){
1471
						name.setSpecificEpithet(value);
1472
					}else{
1473
						handleInfraspecificEpithet(element, classValue, name);
1474
					}
1475
				}else if (classValue.equalsIgnoreCase("author")){
1476
					handleNameAuthors(element, name);
1477
				}else if (classValue.equalsIgnoreCase("paraut")){
1478
					handleBasionymAuthor(state, element, name, false);
1479
				}else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1480
					handleInfrAuthor(state, element, name, true);
1481
				}else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1482
					handleBasionymAuthor(state, element, name, true);
1483
				}else if (classValue.equalsIgnoreCase("infrepi")){
1484
					handleInfrEpi(name, infraRank, value);
1485
				}else if (classValue.equalsIgnoreCase("pub")){
1486
					lastTeam = handleNomenclaturalReference(name, value);
1487
				}else if (classValue.equalsIgnoreCase("usage")){
1488
					lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1489
				}else if (classValue.equalsIgnoreCase("note")){
1490
					handleNameNote(name, value);
1491
				}else if (classValue.equalsIgnoreCase("num")){
1492
					if (num != null){
1493
						logger.warn("Duplicate num: " + value);
1494
					}else{
1495
						num = value;
1496
					}
1497
					if (isSynonym == true){
1498
						logger.warn("Synonym should not have a num");
1499
					}
1500
				}else if (classValue.equalsIgnoreCase("typification")){
1501
					logger.warn("Typification should not be a nom class");
1502
				}else{
1503
					logger.warn("Unhandled name class: " +  classValue);
1504
				}
1505
			}else if(element.getName().equals("homonym")){
1506
				handleHomonym(state, element, name);
1507
			}else{
1508
				// child element is not "name"
1509
				unhandledNomChildren.add(element.getName());
1510
			}
1511
		}
1512
		
1513
		//handle key
1514
		if (! isSynonym){
1515
			String taxonString = name.getNameCache();
1516
			//try to find matching lead nodes 
1517
			UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1518
			Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1519
			//same without using the num
1520
			if (num != null){
1521
				UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1522
				handleMatchingNodes(state, taxon, noNumLeadsKey);
1523
			}
1524
			if (matchingNodes.isEmpty() && num != null){
1525
				logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1526
			}
1527
		}
1528
		
1529
		//test nom element has no text
1530
		if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("—", "").replace("\u002d","").replace("\u2013", ""))){
1531
			String strElNom = elNom.getTextNormalize();
1532
			if ("?".equals(strElNom)){
1533
				handleQuestionMark(name, taxon);
1534
			}
1535
//			Character c = strElNom.charAt(0);
1536
			//System.out.println(CharUtils.unicodeEscaped(c));
1537
			logger.warn("Nom tag has text: " + strElNom);
1538
		}
1539
		
1540
		return name.getHomotypicalGroup();
1541
	}
1542

    
1543

    
1544
	private void handleQuestionMark(NonViralName name, Taxon taxon) {
1545
		int count = name.getTaxonBases().size();
1546
		if (count != 1){
1547
			logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1548
		}else{
1549
			TaxonBase taxonBase = (TaxonBase)name.getTaxonBases().iterator().next();
1550
			taxonBase.setDoubtful(true);
1551
		}
1552
	}
1553

    
1554

    
1555
	//merge with handleNomTaxon	
1556
	private void handleHomonym(MarkupImportState state, Element elHomonym, NonViralName upperName) {
1557
		verifyNoAttribute(elHomonym);
1558
		
1559
		//hommonym name
1560
		BotanicalName homonymName = BotanicalName.NewInstance(upperName.getRank());
1561
		homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1562
		homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1563
		homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1564
		homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1565

    
1566
		for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1567
			String classValue = elName.getAttributeValue("class");
1568
			String value = elName.getValue().trim();
1569
			if (classValue.equalsIgnoreCase("genus") ){
1570
				homonymName.setGenusOrUninomial(value);
1571
			}else if (classValue.equalsIgnoreCase("epithet") ){
1572
				homonymName.setSpecificEpithet(value);
1573
			}else if (classValue.equalsIgnoreCase("author")){
1574
				handleNameAuthors(elName, homonymName);
1575
			}else if (classValue.equalsIgnoreCase("paraut")){
1576
				handleBasionymAuthor(state, elName, homonymName, true);
1577
			}else if (classValue.equalsIgnoreCase("pub")){
1578
				handleNomenclaturalReference(homonymName, value);
1579
			}else if (classValue.equalsIgnoreCase("note")){
1580
				handleNameNote(homonymName, value);
1581
			}else{
1582
				logger.warn("Unhandled class value: " + classValue);
1583
			}
1584
		}
1585
		//TODO verify other information
1586
		
1587

    
1588
		//rel
1589
		boolean homonymIsLater = false;
1590
		NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1591
		if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1592
			TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1593
			TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1594
			homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart())  > 0;
1595
		}else{
1596
			if (upperName.getNomenclaturalReference() == null){
1597
				logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());
1598
			}
1599
			if (homonymName.getNomenclaturalReference() == null){
1600
				logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());
1601
			}
1602
		}
1603
		if (homonymIsLater){
1604
			homonymName.addRelationshipToName(upperName, relType, null);
1605
		}else{
1606
			upperName.addRelationshipToName(homonymName, relType, null);
1607
		}
1608
		
1609
	}
1610

    
1611

    
1612
	/**
1613
	 * @param state
1614
	 * @param taxon
1615
	 * @param leadsKey
1616
	 * @return
1617
	 */
1618
	private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1619
		Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1620
		for (PolytomousKeyNode matchingNode : matchingNodes){
1621
			state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1622
			matchingNode.setTaxon(taxon);
1623
			state.getPolytomousKeyNodesToSave().add(matchingNode);
1624
		}
1625
		return matchingNodes;
1626
	}
1627

    
1628

    
1629
	private void handleNameNote(NonViralName name, String value) {
1630
		logger.warn("Name note: " + value + ". Available in portal?");
1631
		Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1632
		name.addAnnotation(annotation);
1633
	}
1634

    
1635

    
1636
	/**
1637
	 * @param taxon
1638
	 * @param name
1639
	 * @param value
1640
	 */
1641
	protected TeamOrPersonBase handleNameUsage(Taxon taxon, NonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
1642
		Reference ref = ReferenceFactory.newGeneric();
1643
		referenceTitle = removeStartingSymbols(referenceTitle, ref);
1644
		
1645
		ref.setTitleCache(referenceTitle, true);
1646
		String microReference = parseReferenceYearAndDetail(ref);
1647
		TeamOrPersonBase team = getReferenceAuthor(ref);
1648
		parseReferenceType(ref);
1649
		if (team == null){
1650
			team = lastTeam;
1651
		}
1652
		ref.setAuthorTeam(team);
1653
		
1654
		TaxonDescription description = getDescription(taxon);
1655
		TextData textData = TextData.NewInstance(Feature.CITATION());
1656
		textData.addSource(null, null, ref, microReference, name, null);
1657
		description.addElement(textData);
1658
		return team;
1659
	}
1660

    
1661

    
1662
	/**
1663
	 * @param referenceTitle
1664
	 * @param ref
1665
	 * @return
1666
	 */
1667
	private String removeStartingSymbols(String referenceTitle,	Reference ref) {
1668
		if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1669
			referenceTitle = referenceTitle.substring(1).trim();
1670
			ref.setTitleCache(referenceTitle);
1671
		}
1672
		return referenceTitle;
1673
	}
1674

    
1675

    
1676
	private void parseReferenceType(Reference ref) {
1677
		String title = ref.getTitle();
1678
		if (title == null){
1679
			return;
1680
		}
1681
		title = title.trim();
1682
		//no in reference
1683
		if (! title.startsWith("in ")){
1684
			ref.setType(ReferenceType.Book);
1685
			return;
1686
		}
1687
		
1688
		title = title.substring(3);
1689
		//in reference
1690
		//no ,
1691
		if (title.indexOf(",") == -1){
1692
			ref.setType(ReferenceType.Article);
1693
			IJournal journal = ReferenceFactory.newJournal();
1694
			journal.setTitle(title);
1695
			ref.setTitle(null);
1696
			ref.setInJournal(journal);
1697
			//return;
1698
		}else{
1699
			//,-references
1700
			ref.setType(ReferenceType.BookSection);
1701
			String[] split = (title).split(",\\s*[A-Z]");
1702
			if (split.length <= 1){
1703
				logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1704
			}
1705
			IBook book = ReferenceFactory.newBook();
1706
			Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1707
			try {
1708
				title = title.substring(split[0].length() + 1).trim();
1709
			} catch (Exception e) {
1710
				logger.error("ERROR occurred when trying to split title: " +  title + "; split[0]: + " + split[0]);
1711
			}
1712
			book.setTitle(title);
1713
			book.setAuthorTeam(bookTeam);
1714
			book.setDatePublished(ref.getDatePublished());
1715
			ref.setTitle(null);
1716
			ref.setInBook(book);
1717
		}		
1718
	}
1719

    
1720

    
1721
	protected Team getReferenceAuthor (Reference ref) {
1722
		boolean isCache = false;
1723
		String referenceTitle = ref.getTitle();
1724
		if (referenceTitle == null){
1725
			isCache = true;
1726
			referenceTitle = ref.getTitleCache();
1727
		}
1728
		//in references
1729
		String[] split = (" " + referenceTitle).split(" in ");
1730
		if (split.length > 1){
1731
			if (StringUtils.isNotBlank(split[0])){
1732
				//' in ' is within the reference string, take the preceding string as the team
1733
				Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1734
				if (! isCache){
1735
					ref.setTitle("in " + split[1]);
1736
				}
1737
				return team;
1738
			}else{
1739
				//string starts with in therefore no author is given
1740
				return null;
1741
			}
1742
		}
1743
		//no ,-reference
1744
		split = referenceTitle.split(",");
1745
		if (split.length < 2){
1746
			//no author is given
1747
			return null;
1748
		}
1749
		
1750
		//,-references
1751
		split = (referenceTitle).split(",\\s*[A-Z]");
1752
		if (split.length > 1){
1753
			Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1754
			if (! isCache){
1755
				ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1756
			}
1757
			return team;
1758
		}else{
1759
			logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1760
			return null;
1761
		}
1762
	}
1763

    
1764

    
1765
	/**
1766
	 * Replaced by <homonym> tag but still in use for exceptions
1767
	 * @param detail
1768
	 * @param name
1769
	 * @return
1770
	 */
1771
	protected String parseHomonym(String detail, NonViralName name) {
1772
		String result;
1773
		if (detail == null){
1774
			return detail;
1775
		}
1776

    
1777
		
1778
		//non RE
1779
		String reNon = "(\\s|,)non\\s";
1780
		Pattern patReference = Pattern.compile(reNon);
1781
		Matcher matcher = patReference.matcher(detail);
1782
		if (matcher.find()){
1783
			int start = matcher.start();
1784
			int end = matcher.end();
1785
			
1786
			if (detail != null){
1787
				logger.warn("Unhandled non part: " + detail.substring(start));
1788
				return detail;
1789
			}
1790
			
1791
			result = detail.substring(0, start);
1792

    
1793
			//homonym string
1794
			String homonymString = detail.substring(end);
1795
			
1796
			//hommonym name
1797
			BotanicalName homonymName = BotanicalName.NewInstance(name.getRank());
1798
			homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1799
			homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1800
			homonymName.setSpecificEpithet(name.getSpecificEpithet());
1801
			homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1802
			Reference homonymNomRef = ReferenceFactory.newGeneric();
1803
			homonymNomRef.setTitleCache(homonymString);
1804
			String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1805
			homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1806
			String authorTitle = homonymNomRef.getTitleCache();
1807
			Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1808
			homonymNomRef.setAuthorTeam(team);
1809
			homonymNomRef.setTitle("");
1810
			homonymNomRef.setProtectedTitleCache(false);
1811
			
1812
			//rel
1813
			boolean homonymIsLater = false;
1814
			NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1815
			TimePeriod homonymYear = homonymNomRef.getDatePublished();
1816
			if (name.getNomenclaturalReference() != null){
1817
				TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1818
				homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart())  > 0;
1819
			}else{
1820
				logger.warn("Classification name has no nomenclatural reference");
1821
			}
1822
			if (homonymIsLater){
1823
				homonymName.addRelationshipToName(name, relType, null);
1824
			}else{
1825
				name.addRelationshipToName(homonymName, relType, null);
1826
			}
1827
			
1828
		}else{
1829
			return detail;
1830
		}
1831
		return result;
1832
	}
1833

    
1834

    
1835
	/**
1836
	 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1837
	 * @param name
1838
	 * @param value
1839
	 */
1840
	protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
1841
		Reference nomRef = ReferenceFactory.newGeneric();
1842
		nomRef.setTitleCache(value, true);
1843
		parseNomStatus(nomRef, name);
1844
		String microReference = parseReferenceYearAndDetail(nomRef);
1845
		name.setNomenclaturalReference(nomRef);
1846
		microReference = parseHomonym(microReference, name);
1847
		name.setNomenclaturalMicroReference(microReference);
1848
		TeamOrPersonBase  team = (TeamOrPersonBase)name.getCombinationAuthorTeam();
1849
		if (team == null){
1850
			logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1851
		}else{
1852
			nomRef.setAuthorTeam(team);
1853
		}
1854
		return team;
1855
	}
1856

    
1857
	private void handleInfrAuthor(MarkupImportState state, Element elAuthor, NonViralName name, boolean overwrite) {
1858
		String strAuthor = elAuthor.getValue().trim();
1859
		if (strAuthor.endsWith(",")){
1860
			strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1861
		}
1862
		TeamOrPersonBase[] team = getTeam(strAuthor);
1863
		if (name.getCombinationAuthorTeam() != null && overwrite == false){
1864
			logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1865
		}else{
1866
			name.setCombinationAuthorTeam(team[0]);
1867
			name.setExCombinationAuthorTeam(team[1]);
1868
		}
1869
		
1870
		
1871
	}
1872

    
1873

    
1874
	/**
1875
	 * Sets the names rank according to the infrank value
1876
	 * @param name
1877
	 * @param elements
1878
	 * @param elInfraRank
1879
	 * @param infraRank
1880
	 * @return
1881
	 */
1882
	private Rank handleInfRank(NonViralName name, List<Element> elInfraRank, Rank infraRank) {
1883
		if (elInfraRank.size() == 1){
1884
			String strRank = elInfraRank.get(0).getTextNormalize();
1885
			try {
1886
				infraRank = Rank.getRankByNameOrAbbreviation(strRank);
1887
			} catch (UnknownCdmTypeException e) {
1888
				try{
1889
					infraRank = Rank.getRankByNameOrAbbreviation(strRank + ".");
1890
				} catch (UnknownCdmTypeException e2) {
1891
					logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1892
				}
1893
			}
1894
		}else if (elInfraRank.size() > 1){
1895
			logger.warn ("There is more than 1 infrank");
1896
		}
1897
		if (infraRank != null){
1898
			name.setRank(infraRank);
1899
		}
1900
		return infraRank;
1901
	}
1902

    
1903

    
1904
	private void handleInfrEpi(NonViralName name, Rank infraRank, String value) {
1905
		if (infraRank != null && infraRank.isInfraSpecific()){
1906
			name.setInfraSpecificEpithet(value);
1907
			if (CdmUtils.isCapital(value)){
1908
				logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1909
			}
1910
		}else if (infraRank != null && infraRank.isInfraGeneric()){
1911
			name.setInfraGenericEpithet(value);
1912
			if (! CdmUtils.isCapital(value)){
1913
				logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1914
			}
1915
		}else{
1916
			logger.warn("Infrepi could not be handled: " + value);
1917
		}
1918
	}
1919

    
1920

    
1921

    
1922
	/**
1923
	 * Returns the (empty) with the correct homotypical group depending on the taxon status
1924
	 * @param taxon
1925
	 * @param homotypicalGroup
1926
	 * @param isSynonym
1927
	 * @return
1928
	 */
1929
	private NonViralName makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1930
		NonViralName name;
1931
		if (isSynonym){
1932
			name = BotanicalName.NewInstance(Rank.SPECIES(), homotypicalGroup);
1933
			SynonymRelationshipType synonymType = SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF();
1934
			if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1935
				synonymType = SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF();
1936
			}
1937
			taxon.addSynonymName(name, synonymType);
1938
		}else{
1939
			name = (NonViralName)taxon.getName();
1940
		}
1941
		return name;
1942
	}
1943

    
1944

    
1945
	/**
1946
	 * @param element
1947
	 * @param taxon
1948
	 */
1949
	private void handleInfraspecificEpithet(Element element, String attrValue, NonViralName name) {
1950
		String value = element.getTextNormalize();
1951
		if (value.indexOf("subsp.") != -1){
1952
			//TODO genus and species epi
1953
			String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1954
			name.setInfraSpecificEpithet(infrEpi);
1955
			name.setRank(Rank.SUBSPECIES());
1956
		}else if (value.indexOf("var.") != -1){
1957
			//TODO genus and species epi
1958
			String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1959
			name.setInfraSpecificEpithet(infrEpi);
1960
			name.setRank(Rank.VARIETY());
1961
		}else{
1962
			logger.warn("Unhandled infraspecific type: " + value);
1963
		}
1964
	}
1965

    
1966

    
1967
	/**
1968
	 * @param state
1969
	 * @param element
1970
	 * @param name
1971
	 */
1972
	private void handleBasionymAuthor(MarkupImportState state, Element elBasionymAuthor, NonViralName name, boolean overwrite) {
1973
		String strAuthor = elBasionymAuthor.getValue().trim();
1974
		Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1975
		if (reBasionymAuthor.matcher(strAuthor).matches()){
1976
			strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1977
		}else{
1978
			logger.warn("Brackets are missing for original combination author " + strAuthor);
1979
		}
1980
		TeamOrPersonBase[] basionymTeam = getTeam(strAuthor);
1981
		if (name.getBasionymAuthorTeam() != null && overwrite == false){
1982
			logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1983
		}else{
1984
			name.setBasionymAuthorTeam(basionymTeam[0]);
1985
			name.setExBasionymAuthorTeam(basionymTeam[1]);
1986

    
1987
		}
1988
	}
1989

    
1990
	private Map<String, UUID> teamMap = new HashMap<String, UUID>();
1991
	/**
1992
	 * @param elAuthors
1993
	 * @param name
1994
	 * @param elNom 
1995
	 */
1996
	private void handleNameAuthors(Element elAuthor, NonViralName name) {
1997
		if (name.getCombinationAuthorTeam() != null){
1998
			logger.warn("Name already has a combination author. Name: " +  name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1999
		}
2000
		String strAuthor = elAuthor.getValue().trim();
2001
		if (strAuthor.endsWith(",")){
2002
			strAuthor = strAuthor.substring(0, strAuthor.length() -1);
2003
		}
2004
		if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){
2005
			logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);
2006
		}
2007
		TeamOrPersonBase[] team = getTeam(strAuthor);
2008
		name.setCombinationAuthorTeam(team[0]);
2009
		name.setExCombinationAuthorTeam(team[1]);
2010
	}
2011

    
2012

    
2013
	/**
2014
	 * @param strAuthor
2015
	 * @return
2016
	 */
2017
	private TeamOrPersonBase[] getTeam(String strAuthor) {
2018
		TeamOrPersonBase[] result = new TeamOrPersonBase[2];
2019
		String[] split = strAuthor.split(" ex ");
2020
		String strBaseAuthor = null;
2021
		String strExAuthor = null;
2022
		
2023
		if (split.length == 2){
2024
			strBaseAuthor = split[1]; 
2025
			strExAuthor = split[0];	
2026
		}else if (split.length == 1){
2027
			strBaseAuthor = split[0];
2028
		}else{
2029
			logger.warn("Could not parse (ex) author: " + strAuthor);
2030
		}
2031
		result[0] = getUuidTeam(strBaseAuthor);
2032
		if (result[0] == null){
2033
			result[0] = parseSingleTeam(strBaseAuthor);
2034
			teamMap.put(strBaseAuthor, result[0].getUuid());
2035
		}
2036
		if (strExAuthor != null){
2037
			result[1] = getUuidTeam(strExAuthor);
2038
			if (result[1] == null){
2039
				result[1] = Team.NewInstance();
2040
				result[1].setTitleCache(strExAuthor, true);
2041
				teamMap.put(strExAuthor, result[1].getUuid());
2042
			}
2043
		
2044
		}	
2045
		return result;
2046
	}
2047

    
2048

    
2049
	protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {
2050
		TeamOrPersonBase result;
2051
		String[] split = strBaseAuthor.split("&");
2052
		if (split.length > 1){
2053
			result = Team.NewInstance();
2054
			for (String personString : split){
2055
				Person person = makePerson(personString);
2056
				((Team)result).addTeamMember(person);
2057
			}
2058
		}else{
2059
			result = makePerson(strBaseAuthor.trim());
2060
		}
2061
		return result;
2062
	}
2063

    
2064

    
2065
	/**
2066
	 * @param personString
2067
	 * @return
2068
	 */
2069
	private Person makePerson(String personString) {
2070
		personString = personString.trim();
2071
		Person person = Person.NewTitledInstance(personString);
2072
		person.setNomenclaturalTitle(personString);
2073
		return person;
2074
	}
2075

    
2076

    
2077
	/**
2078
	 * @param result
2079
	 * @param strBaseAuthor
2080
	 */
2081
	private TeamOrPersonBase getUuidTeam(String strBaseAuthor) {
2082
		UUID uuidTeam = teamMap.get(strBaseAuthor);
2083
		return CdmBase.deproxy(getAgentService().find(uuidTeam), TeamOrPersonBase.class);
2084
	}
2085

    
2086

    
2087
	private void handleDescription(MarkupImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
2088
		verifyNoAttribute(elDescription);
2089
		
2090
		List<Element> elements = elDescription.getChildren();
2091
		for (Element element : elements){
2092
			if (element.getName().equalsIgnoreCase("char")){
2093
				handleChar(state, element, taxon);
2094
			}else{
2095
				logger.warn("Unhandled description child: " + element.getName());
2096
			}
2097
		}
2098
		
2099
	}
2100
	
2101
	
2102
	/**
2103
	 * @param state
2104
	 * @param element
2105
	 * @param taxon
2106
	 */
2107
	private void handleChar(MarkupImportState state, Element element, Taxon taxon) {
2108
		List<Attribute> attributes = element.getAttributes();
2109
		for (Attribute attribute : attributes){
2110
			if (! attribute.getName().equalsIgnoreCase("class")){
2111
				logger.warn("Char has unhandled attribute " +  attribute.getName());
2112
			}else{
2113
				String classValue = attribute.getValue();
2114
				Feature feature = getFeature(classValue, state);
2115
				if (feature == null){
2116
					logger.warn("Unhandled feature: " + classValue);
2117
				}else{
2118
					String value = element.getValue();
2119
					addDescriptionElement(state, taxon, value, feature, null);
2120
				}
2121
				
2122
			}
2123
		}
2124
		
2125
		List<Element> elements = element.getChildren();
2126
		if (! elements.isEmpty()){
2127
			logger.warn("Char has unhandled children");
2128
		}
2129
	}
2130

    
2131

    
2132
	/**
2133
	 * @param taxon
2134
	 * @return
2135
	 */
2136
	protected TaxonDescription getDescription(Taxon taxon) {
2137
		for (TaxonDescription description : taxon.getDescriptions()){
2138
			if (! description.isImageGallery()){
2139
				return description;
2140
			}
2141
		}
2142
		TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
2143
		return newDescription;
2144
	}
2145

    
2146

    
2147
	/**
2148
	 * @param classValue
2149
	 * @param state 
2150
	 * @return
2151
	 * @throws UndefinedTransformerMethodException 
2152
	 */
2153
	private Feature getFeature(String classValue, MarkupImportState state) {
2154
		UUID uuid;
2155
		try {
2156
			uuid = state.getTransformer().getFeatureUuid(classValue);
2157
			if (uuid == null){
2158
				logger.info("Uuid is null for " + classValue);
2159
			}
2160
			String featureText = StringUtils.capitalize(classValue);
2161
			Feature feature = getFeature(state, uuid, featureText, featureText, classValue);
2162
			if (feature == null){
2163
				throw new NullPointerException(classValue + " not recognized as a feature");
2164
			}
2165
			return feature;
2166
		} catch (Exception e) {
2167
			logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
2168
			return Feature.UNKNOWN();
2169
		}
2170
	}
2171

    
2172

    
2173
	/**
2174
	 * @param state
2175
	 * @param element
2176
	 * @param taxon
2177
	 * @param unhandledTitleClassess 
2178
	 */
2179
	private void handleTitle(MarkupImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
2180
		// attributes
2181
		List<Attribute> attributes = element.getAttributes();
2182
		for (Attribute attribute : attributes){
2183
			if (! attribute.getName().equalsIgnoreCase("class") ){
2184
				if (! attribute.getName().equalsIgnoreCase("num")){
2185
					logger.warn("Title has unhandled attribute " +  attribute.getName());
2186
				}else{
2187
					//TODO num attribute in taxon
2188
				}
2189
			}else{
2190
				String classValue = attribute.getValue();
2191
				try {
2192
					Rank rank;
2193
					try {
2194
						rank = Rank.getRankByNameOrAbbreviation(classValue);
2195
					} catch (Exception e) {
2196
						//TODO nc
2197
						rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICBN, false);
2198
					}
2199
					taxon.getName().setRank(rank);
2200
					if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
2201
						handleGenus(element.getValue(), taxon.getName());
2202
					}else if (rank.equals(Rank.SUBGENUS())){
2203
						handleSubGenus(element.getValue(), taxon.getName());
2204
					}else if (rank.equals(Rank.SECTION_BOTANY())){
2205
						handleSection(element.getValue(), taxon.getName());
2206
					}else if (rank.equals(Rank.SPECIES())){
2207
						handleSpecies(element.getValue(), taxon.getName());
2208
					}else if (rank.equals(Rank.SUBSPECIES())){
2209
						handleSubSpecies(element.getValue(), taxon.getName());
2210
					}else if (rank.equals(Rank.VARIETY())){
2211
						handleVariety(element.getValue(), taxon.getName());
2212
					}else{
2213
						logger.warn("Unhandled rank: " + rank.getLabel());
2214
					}
2215
				} catch (UnknownCdmTypeException e) {
2216
					logger.warn("Unknown rank " + classValue);
2217
					unhandledTitleClassess.add(classValue);
2218
				}
2219
			}
2220
		}
2221
		List<Element> elements = element.getChildren();
2222
		if (! elements.isEmpty()){
2223
			logger.warn("Title has unexpected children");
2224
		}
2225
		UUID uuidTitle = MarkupTransformer.uuidTitle;
2226
		ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
2227
		taxon.addExtension(element.getTextNormalize(), titleExtension);
2228
		
2229
	}
2230

    
2231

    
2232
	/**
2233
	 * @param value
2234
	 * @param taxonNameBase 
2235
	 */
2236
	private void handleSubGenus(String value, TaxonNameBase taxonNameBase) {
2237
		String name = value.replace("Subgenus", "").trim();
2238
		((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
2239
	}
2240
	
2241
	/**
2242
	 * @param value
2243
	 * @param taxonNameBase 
2244
	 */
2245
	private void handleSection(String value, TaxonNameBase taxonNameBase) {
2246
		String name = value.replace("Section", "").trim();
2247
		((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
2248
	}
2249
	
2250
	/**
2251
	 * @param value
2252
	 * @param taxonNameBase 
2253
	 */
2254
	private void handleSpecies(String value, TaxonNameBase taxonNameBase) {
2255
		//do nothing
2256
	}
2257
	
2258
	/**
2259
	 * @param value
2260
	 * @param taxonNameBase 
2261
	 */
2262
	private void handleVariety(String value, TaxonNameBase taxonNameBase) {
2263
		//do nothing
2264
	}
2265
	
2266
	/**
2267
	 * @param value
2268
	 * @param taxonNameBase 
2269
	 */
2270
	private void handleSubSpecies(String value, TaxonNameBase taxonNameBase) {
2271
		//do nothing
2272
	}
2273

    
2274
	
2275
	private Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
2276
	
2277
	/**
2278
	 * @param value
2279
	 * @param taxonNameBase 
2280
	 */
2281
	protected void handleGenus(String value, TaxonNameBase taxonName) {
2282
		Matcher matcher = rexGenusAuthor.matcher(value);
2283
		if (matcher.find()){
2284
			String author = matcher.group();
2285
//			String genus = value.replace(author, "");
2286
			author = author.substring(1, author.length() - 1);
2287
			Team team = Team.NewInstance();
2288
			team.setTitleCache(author, true);
2289
			Credit credit = Credit.NewInstance(team, null);
2290
			taxonName.addCredit(credit);
2291
//			NonViralName nvn = (NonViralName)taxonName;
2292
//			nvn.setCombinationAuthorTeam(team);
2293
//			nvn.setGenusOrUninomial(genus);
2294
		}else{
2295
			logger.info("No Author match for " + value);
2296
		}
2297
	}
2298
	
2299

    
2300
	/**
2301
	 * @param taxon
2302
	 * @param lastTaxon
2303
	 */
2304
	private void handleTaxonRelation(MarkupImportState state, Taxon taxon, Taxon lastTaxon) {
2305
		
2306
		Classification tree = getTree(state);
2307
		if (lastTaxon == null){
2308
			tree.addChildTaxon(taxon, null, null, null);
2309
			return;
2310
		}
2311
		Rank thisRank = taxon.getName().getRank();
2312
		Rank lastRank = lastTaxon.getName().getRank();
2313
		if (lastTaxon.getTaxonNodes().size() > 0){
2314
			TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
2315
			if (thisRank.isLower(lastRank )  ){
2316
				lastNode.addChildTaxon(taxon, null, null, null);
2317
				fillMissingEpithetsForTaxa(lastTaxon, taxon);
2318
			}else if (thisRank.equals(lastRank)){
2319
				TaxonNode parent = lastNode.getParent();
2320
				if (parent != null){
2321
					parent.addChildTaxon(taxon, null, null, null);
2322
					fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
2323
				}else{
2324
					tree.addChildTaxon(taxon, null, null, null);
2325
				}
2326
			}else if (thisRank.isHigher(lastRank)){
2327
				handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2328
//				TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2329
//				parentNode.addChildTaxon(taxon, null, null, null);
2330
			}
2331
		}else{
2332
			logger.warn("Last taxon has no node");
2333
		}
2334
	}
2335

    
2336

    
2337

    
2338
	/**
2339
	 * @param state
2340
	 * @return 
2341
	 */
2342
	private Classification getTree(MarkupImportState state) {
2343
		Classification result = state.getTree(null);
2344
		if (result == null){
2345
			UUID uuid = state.getConfig().getClassificationUuid();
2346
			if (uuid == null){
2347
				logger.warn("No classification uuid is defined");
2348
				result = getNewClassification(state);
2349
			}else{
2350
				result = getClassificationService().find(uuid);
2351
				if (result == null){
2352
					result = getNewClassification(state);
2353
					result.setUuid(uuid);
2354
				}
2355
			}
2356
			state.putTree(null, result);
2357
		}
2358
		return result;
2359
	}
2360

    
2361

    
2362
	private Classification getNewClassification(MarkupImportState state) {
2363
		Classification result;
2364
		result = Classification.NewInstance(state.getConfig().getClassificationTitle());
2365
		state.putTree(null, result);
2366
		return result;
2367
	}
2368

    
2369

    
2370
	/**
2371
	 * @param state 
2372
	 * @param taxon
2373
	 * @param value
2374
	 * @param feature
2375
	 * @return 
2376
	 */
2377
	private TextData addDescriptionElement(MarkupImportState state, Taxon taxon, String value, Feature feature, String references) {
2378
		TextData textData = TextData.NewInstance(feature);
2379
		Language textLanguage = getDefaultLanguage(state);
2380
		textData.putText(textLanguage, value);
2381
		TaxonDescription description = getDescription(taxon);
2382
		description.addElement(textData);
2383
		if (references != null){
2384
			makeOriginalSourceReferences(textData, ";", references);
2385
		}
2386
		return textData;
2387
	}
2388

    
2389
	private Language getDefaultLanguage(MarkupImportState state) {
2390
		UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();
2391
		if (defaultLanguageUuid != null){
2392
			Language result = state.getDefaultLanguage();
2393
			if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){
2394
				result = (Language)getTermService().find(defaultLanguageUuid);
2395
				state.setDefaultLanguage(result);
2396
				if (result == null){
2397
					logger.warn("Default language for " + defaultLanguageUuid +  " does not exist.");
2398
				}
2399
			}
2400
			return result;
2401
		}else{
2402
			return Language.DEFAULT();
2403
		}
2404
	}
2405

    
2406

    
2407
	/**
2408
	 * @param elNomenclature
2409
	 */
2410
	private void verifyNoAttribute(Element element) {
2411
		List<Attribute> attributes = element.getAttributes();
2412
		if (! attributes.isEmpty()){
2413
			logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
2414
		}
2415
	}
2416
	
2417
	/**
2418
	 * @param elNomenclature
2419
	 */
2420
	protected void verifyNoChildren(Element element) {
2421
		verifyNoChildren(element, false);
2422
	}
2423
	
2424
	/**
2425
	 * @param elNomenclature
2426
	 */
2427
	private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2428
		List<Element> children = element.getChildren();
2429
		if (! children.isEmpty()){
2430
			if (ignoreLineBreak == true){
2431
				for (Element child : children){
2432
					if (! child.getName().equalsIgnoreCase("BR")){
2433
						logger.warn(element.getName() + " has unhandled child: " + child.getName());
2434
					}
2435
				}
2436
			}else{
2437
				logger.warn(element.getName() + " has unhandled children");
2438
			}
2439
		}
2440
	}
2441
	
2442
	
2443

    
2444
	/**
2445
	 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2446
	 * exists it is added to the name and the nom. status part of the references title cache is 
2447
	 * removed. Requires protected title cache.
2448
	 * @param ref
2449
	 * @param nonViralName
2450
	 */
2451
	protected void parseNomStatus(Reference ref, NonViralName nonViralName) {
2452
		String titleToParse = ref.getTitleCache();
2453
		
2454
		String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName);
2455
		if (! noStatusTitle.equals(titleToParse)){
2456
			ref.setTitleCache(noStatusTitle, true);
2457
		}
2458
	}
2459

    
2460
	
2461
	/**
2462
	 * Extracts the date published part and returns micro reference
2463
	 * @param ref
2464
	 * @return
2465
	 */
2466
	private String parseReferenceYearAndDetail(Reference ref){
2467
		String detailResult = null;
2468
		String titleToParse = ref.getTitleCache();
2469
		titleToParse = removeStartingSymbols(titleToParse, ref);
2470
		String reReference = "^\\.{1,}";
2471
//		String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2472
		String oneMonth = "(Feb.|Dec.|March|June|July)";
2473
		String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2474
		String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2475
		
2476
		String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2477
		String reDetail = "\\.{1,10}$";
2478
		
2479
		//pattern for the whole string
2480
		Pattern patReference = Pattern.compile(/*reReference +*/ reYearPeriod /*+ reDetail */);
2481
		Matcher matcher = patReference.matcher(titleToParse);
2482
		if (matcher.find()){
2483
			int start = matcher.start();
2484
			int end = matcher.end();
2485
			
2486
			//title and other information precedes the year part
2487
			String title = titleToParse.substring(0, start).trim();
2488
			//detail follows the year part
2489
			String detail = titleToParse.substring(end).trim();
2490
			
2491
			//time period
2492
			String strPeriod = matcher.group().trim();
2493
			strPeriod = strPeriod.substring(1, strPeriod.length()-1);   //remove brackets
2494
			Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2495
			matcher = patStartMonth.matcher(strPeriod);
2496
			strPeriod = strPeriod.replace(" ", "");
2497
			Integer startMonth = null;
2498
			if (matcher.find()){
2499
				end = matcher.end();
2500
				strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2501
				startMonth = getMonth(strPeriod.substring(0, end));
2502
			}
2503
			
2504
			TimePeriod datePublished = TimePeriod.parseString(strPeriod);
2505
			if (startMonth != null){
2506
				datePublished.setStartMonth(startMonth);
2507
			}
2508
			ref.setDatePublished(datePublished);
2509
			ref.setTitle(title);
2510
			detailResult = CdmUtils.removeTrailingDot(detail);
2511
			if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",")  ){
2512
				detailResult = detailResult.substring(0, detailResult.length() -1);
2513
			}
2514
			ref.setProtectedTitleCache(false);
2515
		}else{
2516
			logger.warn("Could not parse reference: " +  titleToParse);
2517
		}
2518
		return detailResult;
2519
		
2520
	}
2521

    
2522
	
2523
	
2524
	private Integer getMonth(String month) {
2525
		if (month.startsWith("Jan")){
2526
			return 1;
2527
		}else if (month.startsWith("Feb")){
2528
			return 2;
2529
		}else if (month.startsWith("Mar")){
2530
			return 3;
2531
		}else if (month.startsWith("Apr")){
2532
			return 4;
2533
		}else if (month.startsWith("May")){
2534
			return 5;
2535
		}else if (month.startsWith("Jun")){
2536
			return 6;
2537
		}else if (month.startsWith("Jul")){
2538
			return 7;
2539
		}else if (month.startsWith("Aug")){
2540
			return 8;
2541
		}else if (month.startsWith("Sep")){
2542
			return 9;
2543
		}else if (month.startsWith("Oct")){
2544
			return 10;
2545
		}else if (month.startsWith("Nov")){
2546
			return 11;
2547
		}else if (month.startsWith("Dec")){
2548
			return 12;
2549
		}else{
2550
			logger.warn("Month not yet supported: " + month);
2551
			return null;
2552
		}
2553
	}
2554

    
2555

    
2556
	/* (non-Javadoc)
2557
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2558
	 */
2559
	protected boolean isIgnore(MarkupImportState state){
2560
		return ! state.getConfig().isDoTaxa();
2561
	}
2562

    
2563

    
2564

    
2565

    
2566
}
(2-2/7)