6d0e3a9d9c225f6933c9abc7878482e15ef9d0c7
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupNomenclatureImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.markup;
10
11 import java.util.HashMap;
12 import java.util.Map;
13
14 import javax.xml.stream.XMLEventReader;
15 import javax.xml.stream.XMLStreamException;
16 import javax.xml.stream.events.Attribute;
17 import javax.xml.stream.events.StartElement;
18 import javax.xml.stream.events.XMLEvent;
19
20 import org.apache.commons.lang.StringUtils;
21 import org.apache.logging.log4j.LogManager;
22 import org.apache.logging.log4j.Logger;
23
24 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
25 import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
26 import eu.etaxonomy.cdm.model.description.Feature;
27 import eu.etaxonomy.cdm.model.description.TaxonDescription;
28 import eu.etaxonomy.cdm.model.description.TextData;
29 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
30 import eu.etaxonomy.cdm.model.name.INonViralName;
31 import eu.etaxonomy.cdm.model.name.NameTypeDesignationStatus;
32 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
33 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
34 import eu.etaxonomy.cdm.model.name.Rank;
35 import eu.etaxonomy.cdm.model.name.TaxonName;
36 import eu.etaxonomy.cdm.model.reference.IArticle;
37 import eu.etaxonomy.cdm.model.reference.IBook;
38 import eu.etaxonomy.cdm.model.reference.IJournal;
39 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
40 import eu.etaxonomy.cdm.model.reference.Reference;
41 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
42 import eu.etaxonomy.cdm.model.reference.ReferenceType;
43 import eu.etaxonomy.cdm.model.taxon.SynonymType;
44 import eu.etaxonomy.cdm.model.taxon.Taxon;
45 import eu.etaxonomy.cdm.model.taxon.TaxonRelationship;
46 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
47 import eu.etaxonomy.cdm.strategy.parser.NameTypeParser;
48 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
49
50 /**
51 * @author a.mueller
52 * @since 30.05.2012
53 */
54 public class MarkupNomenclatureImport extends MarkupImportBase {
55
56 @SuppressWarnings("unused")
57 private static final Logger logger = LogManager.getLogger(MarkupNomenclatureImport.class);
58
59 private final MarkupSpecimenImport specimenImport;
60
61 public MarkupNomenclatureImport(MarkupDocumentImport docImport, MarkupSpecimenImport specimenImport) {
62 super(docImport);
63 this.specimenImport = specimenImport;
64 }
65
66 public void handleNomenclature(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent)
67 throws XMLStreamException {
68 checkNoAttributes(parentEvent);
69
70 while (reader.hasNext()) {
71 XMLEvent next = readNoWhitespace(reader);
72 if (isMyEndingElement(next, parentEvent)) {
73 return;
74 } else if (isStartingElement(next, HOMOTYPES)) {
75 handleHomotypes(state, reader, next.asStartElement());
76 } else if (isStartingElement(next, NOMENCLATURAL_NOTES)) {
77 handleAmbigousManually(state, reader, next.asStartElement());
78 } else {
79 fireSchemaConflictEventExpectedStartTag(HOMOTYPES, reader);
80 state.setUnsuccessfull();
81 }
82 }
83 return;
84 }
85
86
87 private void handleHomotypes(MarkupImportState state,
88 XMLEventReader reader, StartElement parentEvent)
89 throws XMLStreamException {
90 checkNoAttributes(parentEvent);
91
92 HomotypicalGroup homotypicalGroup = null;
93
94 boolean hasNom = false;
95 while (reader.hasNext()) {
96 XMLEvent next = readNoWhitespace(reader);
97 if (isMyEndingElement(next, parentEvent)) {
98 checkMandatoryElement(hasNom, parentEvent, NOM);
99 state.setLatestAuthorInHomotype(null);
100 return;
101 } else if (isEndingElement(next, NAME_TYPE)) {
102 state.setNameType(false);
103 } else if (isStartingElement(next, NOM)) {
104 INonViralName name = handleNom(state, reader, next, homotypicalGroup);
105 homotypicalGroup = name.getHomotypicalGroup();
106 hasNom = true;
107 } else if (isStartingElement(next, NAME_TYPE)) {
108 state.setNameType(true);
109 handleNameType(state, reader, next, homotypicalGroup);
110 } else if (isStartingElement(next, SPECIMEN_TYPE)) {
111 specimenImport.handleSpecimenType(state, reader, next, homotypicalGroup);
112 } else if (isStartingElement(next, NOTES)) {
113 handleNotYetImplementedElement(next);
114 } else {
115 handleUnexpectedElement(next);
116 }
117 }
118 state.setLatestAuthorInHomotype(null);
119 // TODO handle missing end element
120 throw new IllegalStateException("Homotypes has no closing tag");
121 }
122
123 private void handleNameType(MarkupImportState state, XMLEventReader reader,
124 XMLEvent parentEvent, HomotypicalGroup homotypicalGroup)
125 throws XMLStreamException {
126 Map<String, Attribute> attributes = getAttributes(parentEvent);
127 String typeStatus = getAndRemoveAttributeValue(attributes, TYPE_STATUS);
128 checkNoAttributes(attributes, parentEvent);
129
130 NameTypeDesignationStatus status;
131 try {
132 status = NameTypeParser.parseNameTypeStatus(typeStatus);
133 } catch (UnknownCdmTypeException e) {
134 String message = "Type status could not be recognized: %s";
135 message = String.format(message, typeStatus);
136 fireWarningEvent(message, parentEvent, 4);
137 status = null;
138 }
139
140 boolean hasNom = false;
141 while (reader.hasNext()) {
142 XMLEvent next = readNoWhitespace(reader);
143 if (next.isEndElement()) {
144 if (isMyEndingElement(next, parentEvent)) {
145 checkMandatoryElement(hasNom, parentEvent.asStartElement(),
146 NOM);
147 state.setNameType(false);
148 return;
149 } else {
150 if (isEndingElement(next, ACCEPTED_NAME)) {
151 // NOT YET IMPLEMENTED
152 popUnimplemented(next.asEndElement());
153 } else {
154 handleUnexpectedEndElement(next.asEndElement());
155 }
156 }
157 } else if (next.isStartElement()) {
158 if (isStartingElement(next, NOM)) {
159 // TODO should we check if the type is always a species, is
160 // this a rule?
161 TaxonName speciesName = TaxonName.castAndDeproxy(
162 handleNom(state, reader, next, null));
163 for (TaxonName name : homotypicalGroup
164 .getTypifiedNames()) {
165 name.addNameTypeDesignation(speciesName, null, null,
166 null, status, false, false, false, false);
167 }
168 hasNom = true;
169 } else if (isStartingElement(next, ACCEPTED_NAME)) {
170 handleNotYetImplementedElement(next);
171 } else {
172 handleUnexpectedStartElement(next);
173 }
174 } else {
175 handleUnexpectedElement(next);
176 }
177 }
178 // TODO handle missing end element
179 throw new IllegalStateException("Homotypes has no closing tag");
180 }
181
182 /**
183 * Creates the name defined by a nom tag. Adds it to the given homotypical
184 * group (if not null).
185 *
186 * @param state
187 * @param reader
188 * @param parentEvent
189 * @param homotypicalGroup
190 * @return
191 * @throws XMLStreamException
192 */
193 private INonViralName handleNom(MarkupImportState state, XMLEventReader reader,
194 XMLEvent parentEvent, HomotypicalGroup homotypicalGroup) throws XMLStreamException {
195 boolean isSynonym = false;
196 boolean isNameType = state.isNameType();
197 // attributes
198 Map<String, Attribute> attributes = getAttributes(parentEvent);
199 boolean isMisidentification = getAndRemoveBooleanAttributeValue(parentEvent, attributes, "misidentification", false);
200 String classValue = getAndRemoveRequiredAttributeValue(parentEvent, attributes, "class");
201 checkNoAttributes(attributes, parentEvent);
202
203 INonViralName name;
204 TaxonRelationship misappliedRelation = null;
205 if (isMisidentification) {
206 if (isNameType || ACCEPTED.equalsIgnoreCase(classValue) ){
207 fireWarningEvent("Misidentification only defined for synonyms", parentEvent, 4);
208 }
209 name = createNameByCode(state, null);
210 Taxon acc = state.getCurrentTaxon();
211 Taxon misapplied = Taxon.NewInstance(name, null);
212 this.save(acc, state); //we got a transient object exception later in the taxon otherwise
213 this.save(misapplied, state);
214 misappliedRelation = acc.addMisappliedName(misapplied, null, null);
215 }else if (!isNameType && ACCEPTED.equalsIgnoreCase(classValue)) {
216 isSynonym = false;
217 name = createName(state, homotypicalGroup, isSynonym);
218 } else if (!isNameType && SYNONYM.equalsIgnoreCase(classValue)) {
219 isSynonym = true;
220 name = createName(state, homotypicalGroup, isSynonym);
221 } else if (isNameType && NAME_TYPE.equalsIgnoreCase(classValue)) {
222 // TODO do we need to define the rank here?
223 name = createNameByCode(state, null);
224 } else {
225 fireUnexpectedAttributeValue(parentEvent, CLASS, classValue);
226 name = createNameByCode(state, null);
227 }
228
229 Map<String, String> nameMap = new HashMap<>();
230 String text = "";
231
232 boolean nameFilled = false;
233 state.setNameStatus(null);
234 while (reader.hasNext()) {
235 XMLEvent next = readNoWhitespace(reader);
236 if (isMyEndingElement(next, parentEvent)) {
237 // fill the name with all data gathered, if not yet done before
238 if (nameFilled == false){
239 fillName(state, nameMap, name, misappliedRelation, next);
240 }
241 handleNomText(state, parentEvent, text, isNameType);
242 state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
243 handleNameStatus(state, name, next);
244 state.setNameStatus(null);
245 return name;
246 } else if (isEndingElement(next, ANNOTATION)) {
247 // NOT YET IMPLEMENTED //TODO test
248 // handleSimpleAnnotation
249 popUnimplemented(next.asEndElement());
250 }else if (isStartingElement(next, FULL_NAME)) {
251 handleFullName(state, reader, name, next);
252 } else if (isStartingElement(next, NUM)) {
253 handleNomNum(state, reader, next);
254 } else if (isStartingElement(next, NAME)) {
255 handleName(state, reader, next, nameMap);
256 } else if (isStartingElement(next, CITATION)) {
257 //we need to fill the name here to have nomenclatural author available for the following citations
258 fillName(state, nameMap, name, misappliedRelation, next);
259 nameFilled = true;
260 handleCitation(state, reader, next, name, misappliedRelation);
261 } else if (next.isCharacters()) {
262 text += next.asCharacters().getData();
263 } else if (isStartingElement(next, HOMONYM)) {
264 handleNotYetImplementedElement(next);
265 } else if (isStartingElement(next, NOTES)) {
266 handleNotYetImplementedElement(next);
267 } else if (isStartingElement(next, NOMENCLATURAL_NOTES)) {
268 handleNotYetImplementedElement(next);
269 } else if (isStartingElement(next, ANNOTATION)) {
270 handleNotYetImplementedElement(next);
271 } else {
272 handleUnexpectedElement(next);
273 }
274 }
275 throw new IllegalStateException("Nom has no closing tag");
276 }
277
278 /**
279 * @param state
280 * @param name
281 * @param next
282 */
283 private void handleNameStatus(MarkupImportState state, INonViralName name, XMLEvent next) {
284 if (isNotBlank(state.getNameStatus())){
285 String nameStatus = state.getNameStatus().trim();
286 try {
287 NomenclaturalStatusType nomStatusType = NomenclaturalStatusType
288 .getNomenclaturalStatusTypeByAbbreviation(nameStatus, name);
289 name.addStatus(NomenclaturalStatus.NewInstance(nomStatusType));
290 } catch (UnknownCdmTypeException e) {
291 String message = "Status '%s' could not be recognized";
292 message = String.format(message, nameStatus);
293 fireWarningEvent(message, next, 4);
294 }
295 }
296 }
297
298 /**
299 * Handles appearance of text within <nom> tags.
300 * Usually this is not expected except for some information that is already handled
301 * elsewhere, e.g. the string Nametype is holding information that is available already
302 * via the surrounding nametype tag. Therefore this information can be neglected.
303 * This method is open for upcoming cases which need to be handled.
304 * @param state
305 * @param event
306 * @param text
307 * @param isNameType
308 */
309 private void handleNomText(MarkupImportState state, XMLEvent event, String text, boolean isNameType) {
310 if (isBlank(text)){
311 return;
312 }
313 text = text.trim();
314 //neglect known redundant strings
315 if (isNameType && (text.matches("(?i)^Esp[\u00E8\u00C8]ce[·\\-\\s]type\\:$")
316 || charIsSimpleType(text) )){
317 return;
318 }//neglect meaningless punctuation
319 else if (isPunctuation(text)){
320 return;
321 }//neglect mea
322 else if (isPunctuation(text)){
323 return;
324 }else{
325 String message = "Unhandled text in <nom> tag: \"%s\"";
326 fireWarningEvent(String.format(message, text), event, 4);
327 }
328 }
329
330 /**
331 * @param state
332 * @param reader
333 * @param next
334 * @throws XMLStreamException
335 */
336 private void handleNomNum(MarkupImportState state, XMLEventReader reader,
337 XMLEvent next) throws XMLStreamException {
338 String num = getCData(state, reader, next);
339 num = num.replace(".", "");
340 num = num.replace(")", "");
341 if (StringUtils.isNotBlank(num)) {
342 if (state.getCurrentTaxonNum() != null
343 && !state.getCurrentTaxonNum().equals(num)) {
344 String message = "Taxontitle num and homotypes/nom/num differ ( %s <-> %s ). I use the later one.";
345 message = String.format(message,
346 state.getCurrentTaxonNum(), num);
347 fireWarningEvent(message, next, 4);
348 }
349 state.setCurrentTaxonNum(num);
350 }
351 }
352
353
354
355 private void handleName(MarkupImportState state, XMLEventReader reader,
356 XMLEvent parentEvent, Map<String, String> nameMap)
357 throws XMLStreamException {
358 String classValue = getClassOnlyAttribute(parentEvent);
359
360 String text = "";
361 while (reader.hasNext()) {
362 XMLEvent next = readNoWhitespace(reader);
363 if (isMyEndingElement(next, parentEvent)) {
364 nameMap.put(classValue, text);
365 return;
366 } else if (isStartingElement(next, ANNOTATION)) {
367 handleNotYetImplementedElement(next); // TODO test handleSimpleAnnotation
368 } else if (isStartingElement(next, FOOTNOTE_REF)) {
369 handleNotYetImplementedElement(next);
370 } else if (next.isCharacters()) {
371 text += next.asCharacters().getData();
372 } else {
373 handleUnexpectedElement(next);
374 }
375 }
376 throw new IllegalStateException("name has no closing tag");
377 }
378
379 private void fillName(MarkupImportState state, Map<String, String> nameMap,
380 INonViralName name, TaxonRelationship misappliedRel, XMLEvent event) {
381
382 // Ranks: family, subfamily, tribus, genus, subgenus, section,
383 // subsection, species, subspecies, variety, subvariety, forma
384 // infrank, paraut, author, infrparaut, infraut, status, notes
385
386 String infrank = getAndRemoveMapKey(nameMap, INFRANK);
387 String authorStr = getAndRemoveMapKey(nameMap, AUTHOR);
388 String paraut = getAndRemoveMapKey(nameMap, PARAUT);
389
390 String infrParAut = getAndRemoveMapKey(nameMap, INFRPARAUT);
391 String infrAut = getAndRemoveMapKey(nameMap, INFRAUT);
392
393 String statusStr = getAndRemoveMapKey(nameMap, STATUS);
394 String notes = getAndRemoveMapKey(nameMap, NOTES);
395
396 if (misappliedRel != null && authorStr != null && authorStr.startsWith("auct.")){
397 misappliedRel.getFromTaxon().setAppendedPhrase(authorStr);
398 authorStr = null;
399 }
400
401 if (!name.isProtectedTitleCache()) { // otherwise fullName
402
403 makeRankDecision(state, nameMap, name, event, infrank);
404
405 // test consistency of rank and authors
406 testRankAuthorConsistency(name, event, authorStr, paraut,infrParAut, infrAut);
407
408 // authors
409 makeNomenclaturalAuthors(state, event, name, authorStr, paraut, infrParAut, infrAut);
410 }
411
412 // status
413 // TODO handle pro parte, pro syn. etc.
414 if (isNotBlank(statusStr)) {
415 String proPartePattern = "(pro parte|p.p.)";
416 if (statusStr.matches(proPartePattern)) {
417 state.setProParte(true);
418 }
419 try {
420 // TODO handle trim earlier
421 statusStr = statusStr.trim();
422 NomenclaturalStatusType nomStatusType = NomenclaturalStatusType
423 .getNomenclaturalStatusTypeByAbbreviation(statusStr, name);
424 name.addStatus(NomenclaturalStatus.NewInstance(nomStatusType));
425 } catch (UnknownCdmTypeException e) {
426 String message = "Status '%s' could not be recognized";
427 message = String.format(message, statusStr);
428 fireWarningEvent(message, event, 4);
429 }
430 }
431
432 // notes
433 if (StringUtils.isNotBlank(notes)) {
434 handleNotYetImplementedAttributeValue(event, CLASS, NOTES);
435 }
436
437 return;
438 }
439
440 /**
441 * @param statusStr
442 * @return
443 */
444 private String normalizeStatus(String statusStr) {
445 if (statusStr == null){
446 return null;
447 }else if (statusStr.equals("nomen")){
448 statusStr = "nom. nud.";
449 }
450 return statusStr.trim();
451 }
452
453 /**
454 * @param state
455 * @param nameMap
456 * @param name
457 * @param event
458 * @param infrankStr
459 */
460 private void makeRankDecision(MarkupImportState state,
461 Map<String, String> nameMap, INonViralName name, XMLEvent event,
462 String infrankStr) {
463 // TODO ranks
464 for (String key : nameMap.keySet()) {
465 Rank rank = makeRank(state, key, false);
466 if (rank == null) {
467 handleNotYetImplementedAttributeValue(event, CLASS, key);
468 } else {
469 if (name.getRank() == null || rank.isLower(name.getRank())) {
470 name.setRank(rank);
471 }
472 String value = nameMap.get(key);
473 if (rank.isSupraGeneric() || rank.isGenus()) {
474 if ((key.equalsIgnoreCase(GENUS_ABBREVIATION)
475 && isNotBlank(state.getLatestGenusEpithet()) || isGenusAbbrev(
476 value, state.getLatestGenusEpithet()))) {
477 value = state.getLatestGenusEpithet();
478 }
479 name.setGenusOrUninomial(toFirstCapital(value));
480 } else if (rank.isInfraGeneric()) {
481 name.setInfraGenericEpithet(toFirstCapital(value));
482 } else if (rank.isSpecies()) {
483 if (state.getConfig().isAllowCapitalSpeciesEpithet()
484 && isFirstCapitalWord(value)) { // capital letters
485 // are allowed for
486 // species epithet
487 // in case of person
488 // names (e.g.
489 // Manilkara
490 // Welwitschii Engl.
491 name.setSpecificEpithet(value);
492 } else {
493 name.setSpecificEpithet(value.toLowerCase());
494 }
495 } else if (rank.isInfraSpecific()) {
496 name.setInfraSpecificEpithet(value.toLowerCase());
497 } else {
498 String message = "Invalid rank '%s'. Can't decide which epithet to fill with '%s'";
499 message = String.format(message, rank.getTitleCache(),
500 value);
501 fireWarningEvent(message, event, 4);
502 }
503 }
504
505 }
506 // handle given infrank marker
507 if (StringUtils.isNotBlank(infrankStr)) {
508 Rank infRank = makeRank(state, infrankStr, true);
509
510 if (infRank == null) {
511 String message = "Infrank '%s' rank not recognized";
512 message = String.format(message, infrankStr);
513 fireWarningEvent(message, event, 4);
514 } else {
515 if (name.getRank() == null) {
516 name.setRank(infRank);
517 } else if (infRank.isLower(name.getRank())) {
518 String message = "InfRank '%s' is lower than existing rank ";
519 message = String.format(message, infrankStr);
520 fireWarningEvent(message, event, 2);
521 name.setRank(infRank);
522 } else if (infRank.equals(name.getRank())) {
523 // nothing
524 } else {
525 String message = "InfRank '%s' is higher than existing rank ";
526 message = String.format(message, infrankStr);
527 fireWarningEvent(message, event, 2);
528 }
529 }
530 }
531 }
532
533 /**
534 * @param state
535 * @param name
536 * @param event
537 * @param authorStr
538 * @param paraut
539 * @param infrParAut
540 * @param infrAut
541 */
542 private void makeNomenclaturalAuthors(MarkupImportState state, XMLEvent event, INonViralName name,
543 String authorStr, String paraut, String infrParAut, String infrAut) {
544 if (name.getRank() != null && name.getRank().isInfraSpecific()) {
545 if (StringUtils.isNotBlank(infrAut)) {
546 TeamOrPersonBase<?>[] authorAndEx = authorAndEx(state, infrAut, event);
547 name.setCombinationAuthorship(authorAndEx[0]);
548 name.setExCombinationAuthorship(authorAndEx[1]);
549 }
550 if (StringUtils.isNotBlank(infrParAut)) {
551 TeamOrPersonBase<?>[] authorAndEx = authorAndEx(state, infrParAut,event);
552 name.setBasionymAuthorship(authorAndEx[0]);
553 name.setExBasionymAuthorship(authorAndEx[1]);
554 }
555 } else {
556 if (name.getRank() == null) {
557 String message = "No rank defined. Check correct usage of authors!";
558 fireWarningEvent(message, event, 4);
559 if (isNotBlank(infrParAut) || isNotBlank(infrAut)) {
560 authorStr = infrAut;
561 paraut = infrParAut;
562 }
563 }
564 if (StringUtils.isNotBlank(authorStr)) {
565 TeamOrPersonBase<?>[] authorAndEx = authorAndEx(state, authorStr, event);
566 name.setCombinationAuthorship(authorAndEx[0]);
567 name.setExCombinationAuthorship(authorAndEx[1]);
568 }
569 if (StringUtils.isNotBlank(paraut)) {
570 TeamOrPersonBase<?>[] authorAndEx = authorAndEx(state, paraut, event);
571 name.setBasionymAuthorship(authorAndEx[0]);
572 name.setExBasionymAuthorship(authorAndEx[1]);
573 }
574 }
575
576 //remember author for following citations
577 state.setLatestAuthorInHomotype(name.getCombinationAuthorship());
578 }
579
580 private TeamOrPersonBase<?>[] authorAndEx(MarkupImportState state, String authorAndEx, XMLEvent xmlEvent) {
581 authorAndEx = authorAndEx.trim();
582 TeamOrPersonBase<?>[] result = new TeamOrPersonBase[2];
583
584 String[] split = authorAndEx.split("\\sex\\s");
585 if (split.length > 2) {
586 String message = "There is more then 1 ' ex ' in author string. Can't separate author and ex-author";
587 fireWarningEvent(message, xmlEvent, 4);
588 result[0] = createAuthor(state, authorAndEx);
589 } else if (split.length == 2) {
590 result[0] = createAuthor(state, split[1]);
591 result[1] = createAuthor(state, split[0]);
592 } else {
593 result[0] = createAuthor(state, split[0]);
594 }
595 return result;
596 }
597
598 /**
599 * Returns the (empty) name with the correct homotypical group depending on
600 * the taxon status and in case of synonym adds it to the taxon.
601 * Throws NPE if no currentTaxon is set in state.
602 *
603 * @param state
604 * @param homotypicalGroup
605 * @param isSynonym
606 * @return
607 */
608 private INonViralName createName(MarkupImportState state,
609 HomotypicalGroup homotypicalGroup, boolean isSynonym) {
610 INonViralName name;
611 Taxon taxon = state.getCurrentTaxon();
612 if (isSynonym) {
613 Rank defaultRank = Rank.SPECIES(); // can be any
614 name = createNameByCode(state, defaultRank);
615 if (homotypicalGroup != null) {
616 name.setHomotypicalGroup(homotypicalGroup);
617 }
618 SynonymType synonymType = SynonymType.HETEROTYPIC_SYNONYM_OF;
619 if (taxon.getHomotypicGroup().equals(homotypicalGroup)) {
620 synonymType = SynonymType.HOMOTYPIC_SYNONYM_OF;
621 }
622 taxon.addSynonymName(TaxonName.castAndDeproxy(name), synonymType);
623 } else {
624 name = taxon.getName();
625 }
626 return name;
627 }
628
629 private void handleCitation(MarkupImportState state, XMLEventReader reader,
630 XMLEvent parentEvent, INonViralName nvn, TaxonRelationship misappliedRel) throws XMLStreamException {
631 String classValue = getClassOnlyAttribute(parentEvent);
632
633 TaxonName name = TaxonName.castAndDeproxy(nvn);
634 state.setCitation(true);
635 boolean hasRefPart = false;
636 Map<String, String> refMap = new HashMap<>();
637 while (reader.hasNext()) {
638 XMLEvent next = readNoWhitespace(reader);
639 if (isMyEndingElement(next, parentEvent)) {
640 checkMandatoryElement(hasRefPart, parentEvent.asStartElement(), REF_PART);
641 Reference reference = createReference(state, refMap, next);
642 String microReference = refMap.get(DETAILS);
643 doCitation(state, name, classValue, misappliedRel,
644 reference, microReference, parentEvent);
645 state.setCitation(false);
646 return;
647 } else if (isStartingElement(next, REF_PART)) {
648 handleRefPart(state, reader, next, refMap);
649 hasRefPart = true;
650 } else {
651 handleUnexpectedElement(next);
652 }
653 }
654 throw new IllegalStateException("Citation has no closing tag");
655
656 }
657
658
659 private void doCitation(MarkupImportState state, TaxonName name,
660 String classValue, TaxonRelationship misappliedRel,
661 Reference reference, String microCitation,
662 XMLEvent parentEvent) {
663 reference = state.getDeduplicationHelper().getExistingReference(reference, true);
664 if (misappliedRel != null){
665 if (!PUBLICATION.equalsIgnoreCase(classValue)){
666 fireWarningEvent("'Usage' not handled correctly for misidentifications", parentEvent, 4);
667 }else{
668 Taxon misappliedTaxon = misappliedRel.getFromTaxon();
669 misappliedTaxon.setSec(reference);
670 misappliedTaxon.setSecMicroReference(microCitation);
671 misappliedRel.setCitation(state.getConfig().getSourceReference());
672 }
673 }else if (PUBLICATION.equalsIgnoreCase(classValue)) {
674 name.setNomenclaturalReference(reference);
675 name.setNomenclaturalMicroReference(microCitation);
676 } else if (USAGE.equalsIgnoreCase(classValue)) {
677 Taxon taxon = state.getCurrentTaxon();
678 TaxonDescription td = getDefaultTaxonDescription(taxon, false, true, state.getConfig().getSourceReference());
679 TextData citation = TextData.NewInstance(Feature.CITATION());
680 // TODO name used in source
681 citation.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, reference, microCitation, name, null);
682 td.addElement(citation);
683 } else if (TYPE.equalsIgnoreCase(classValue)) {
684 handleNotYetImplementedAttributeValue(parentEvent, CLASS, classValue);
685 } else {
686 // TODO Not yet implemented
687 handleNotYetImplementedAttributeValue(parentEvent, CLASS, classValue);
688 }
689 }
690
691 /**
692 * Tests if the names rank is consistent with the given author strings.
693 * NOTE: Tags for authors are differ depending on the rank.
694 *
695 * @param name
696 * @param event
697 * @param authorStr
698 * @param paraut
699 * @param infrParAut
700 * @param infrAut
701 */
702 private void testRankAuthorConsistency(INonViralName name, XMLEvent event,
703 String authorStr, String paraut, String infrParAut, String infrAut) {
704 if (name.getRank() == null) {
705 return;
706 }
707 if (name.getRank().isInfraSpecific()) {
708 if (StringUtils.isBlank(infrParAut)
709 && StringUtils.isBlank(infrAut) // was isNotBlank before
710 // 29.5.2012
711 && (StringUtils.isNotBlank(paraut) || StringUtils
712 .isNotBlank(authorStr)) && !name.isAutonym()) {
713 String message = "Rank is infraspecicific but has only specific or higher author(s)";
714 fireWarningEvent(message, event, 4);
715 }
716 } else {
717 // is not infraspecific
718 if (StringUtils.isNotBlank(infrParAut)
719 || StringUtils.isNotBlank(infrAut)) {
720 String message = "Rank is not infraspecicific but name has infra author(s)";
721 fireWarningEvent(message, event, 4);
722 }
723 }
724 }
725
726 private Reference createReference(MarkupImportState state,
727 Map<String, String> refMap, XMLEvent parentEvent) {
728 // TODO
729 Reference reference;
730
731 String type = getAndRemoveMapKey(refMap, PUBTYPE);
732 String authorStr = getAndRemoveMapKey(refMap, AUTHOR);
733 String titleStr = getAndRemoveMapKey(refMap, PUBTITLE);
734 String titleCache = getAndRemoveMapKey(refMap, PUBFULLNAME);
735 String volume = getAndRemoveMapKey(refMap, VOLUME);
736 String edition = getAndRemoveMapKey(refMap, EDITION);
737 String editors = getAndRemoveMapKey(refMap, EDITORS);
738 String year = getAndRemoveMapKey(refMap, YEAR);
739 String pubName = getAndRemoveMapKey(refMap, PUBNAME);
740 String pages = getAndRemoveMapKey(refMap, PAGES);
741 String publocation = getAndRemoveMapKey(refMap, PUBLOCATION);
742 String publisher = getAndRemoveMapKey(refMap, PUBLISHER);
743 String appendix = getAndRemoveMapKey(refMap, APPENDIX);
744 String issue = getAndRemoveMapKey(refMap, ISSUE);
745 String nameStatus = getAndRemoveMapKey(refMap, NAME_STATUS);
746
747 if (state.isCitation()) {
748 reference = handleCitationSpecific(state, type, authorStr,
749 titleStr, titleCache, volume, issue, edition, editors, pubName, pages, appendix, refMap, parentEvent);
750
751 } else { // no citation
752 reference = handleNonCitationSpecific(state, type, authorStr, titleStr,
753 titleCache, volume, issue, edition, editors, pubName, appendix, pages, parentEvent);
754 }
755
756 //year
757 VerbatimTimePeriod timeperiod = TimePeriodParser.parseStringVerbatim(year);
758 if (reference.getType().equals(ReferenceType.BookSection)){
759 reference.getInBook().setDatePublished(timeperiod);
760 }
761 reference.setDatePublished(timeperiod);
762
763 //Quickfix for these 2 attributes used in feature.references
764 Reference inRef = reference.getInReference() == null ? reference : reference.getInReference();
765 //publocation
766 if (isNotBlank(publisher)){
767 inRef.setPublisher(publisher);
768 }
769
770 //publisher
771 if (isNotBlank(publocation)){
772 inRef.setPlacePublished(publocation);
773 }
774
775 if (isNotBlank(nameStatus)){
776 state.setNameStatus(nameStatus);
777 }
778
779 // TODO
780 String[] unhandledList = new String[] { ALTERNATEPUBTITLE, NOTES, STATUS };
781 for (String unhandled : unhandledList) {
782 String value = getAndRemoveMapKey(refMap, unhandled);
783 if (isNotBlank(value)) {
784 this.handleNotYetImplementedAttributeValue(parentEvent, CLASS, unhandled);
785 }
786 }
787
788 for (String key : refMap.keySet()) {
789 if (!DETAILS.equalsIgnoreCase(key)) {
790 this.fireUnexpectedAttributeValue(parentEvent, CLASS, key);
791 }
792 }
793
794 return reference;
795 }
796
797
798 /**
799 * Handles references used in the citation tag
800 * @param appendix
801 * @see #handleNonCitationSpecific(String, String, String, String, String, String, String, String)
802 */
803 private Reference handleCitationSpecific(MarkupImportState state,
804 String type, String authorStr, String titleStr, String titleCache,
805 String volume, String issue, String edition, String editors, String pubName,
806 String pages, String appendix, Map<String, String> refMap, XMLEvent parentEvent) {
807
808 if (titleStr != null){
809 String message = "Currently it is not expected that a titleStr exists in a citation";
810 fireWarningEvent(message, parentEvent, 4);
811 }
812 if (isBlank(volume) && isNotBlank(issue)){
813 String message = "Issue ('"+issue+"') exists but no volume";
814 fireWarningEvent(message, parentEvent, 4);
815 volume = issue;
816 }else if (isNotBlank(issue)){
817 volume = volume + "("+ issue + ")";
818 }
819
820
821 RefType refType = defineRefTypeForCitation(type, volume, editors, authorStr, pubName, parentEvent);
822 Reference reference;
823
824 if (isNotBlank(appendix)){
825 pubName = pubName == null ? appendix : (pubName + " " + appendix).replaceAll(" ", " ");
826 }
827
828 if (refType == RefType.Article) {
829 IArticle article = ReferenceFactory.newArticle();
830 if (pubName != null) {
831 IJournal journal = ReferenceFactory.newJournal();
832 journal.setTitle(pubName);
833 article.setInJournal(journal);
834 article.setVolume(volume);
835 if (isNotBlank(edition)){
836 String message = "Article must not have an edition.";
837 fireWarningEvent(message, parentEvent, 4);
838 }
839 }
840 reference = (Reference) article;
841 } else if (refType == RefType.BookSection) {
842 //Book Section
843 reference = ReferenceFactory.newBookSection();
844 IBook book = ReferenceFactory.newBook();
845 reference.setInBook(book);
846 book.setTitle(pubName);
847 book.setVolume(volume);
848 book.setEdition(edition);
849
850 if (state.getConfig().isUseEditorAsInAuthorWhereNeeded()){
851 TeamOrPersonBase<?> inAuthor = createAuthor(state, editors);
852 book.setAuthorship(inAuthor);
853 editors = null;
854 }
855 } else if (refType == RefType.Book){
856 //Book
857 reference = ReferenceFactory.newBook();
858 reference.setTitle(pubName);
859 reference.setVolume(volume);
860 reference.setEdition(edition);
861 }else if (refType == RefType.Generic){
862 //Generic - undefinable
863 // String message = "Can't define the type of the reference. Use generic instead";
864 // fireWarningEvent(message, parentEvent, 4);
865 reference = ReferenceFactory.newGeneric();
866 reference.setTitle(pubName);
867 reference.setEdition(edition);
868
869 //volume indicates an in-reference
870 if (isNotBlank(volume)){
871 Reference partOf = ReferenceFactory.newGeneric();
872 partOf.setVolume(volume);
873 partOf.setInReference(reference);
874 reference = partOf;
875 }
876 }else if (refType == RefType.LatestUsed){
877 Reference latestReference = state.getLatestReferenceInHomotype();
878 if (latestReference == null){
879 String message = "No former reference available for incomplete citation";
880 fireWarningEvent(message, parentEvent, 6);
881 reference = ReferenceFactory.newGeneric();
882 }else{
883 if (latestReference.getInReference() != null){
884 reference = latestReference.clone();
885 }else{
886 String message = "Latest reference is not an in-reference. This is not yet handled.";
887 fireWarningEvent(message, parentEvent, 6);
888 reference = ReferenceFactory.newGeneric();
889 }
890 }
891 reference.setVolume(volume);
892 if (isNotBlank(edition)){
893 String message = "Edition not yet handled for incomplete citations";
894 fireWarningEvent(message, parentEvent, 4);
895 }
896
897 }else{
898 String message = "Unhandled reference type: %s" ;
899 fireWarningEvent(String.format(message, refType.toString()), parentEvent, 8);
900 reference = ReferenceFactory.newGeneric();
901 }
902
903 //author
904 TeamOrPersonBase<?> author;
905 if (isBlank(authorStr)){
906 if (refType != RefType.LatestUsed){
907 author = state.getLatestAuthorInHomotype();
908 reference.setAuthorship(author);
909 }
910 }else{
911 author = createAuthor(state, authorStr);
912 state.setLatestAuthorInHomotype(author);
913 reference.setAuthorship(author);
914 }
915
916
917 //title, titleCache
918 handleTitlesInCitation(titleStr, titleCache, parentEvent, reference);
919
920 //editors
921 handleEditorsInCitation(edition, editors, reference, parentEvent);
922
923 //pages
924 handlePages(state, refMap, parentEvent, reference, pages);
925
926 // state.getDeduplicationHelper(docImport).getExistingReference(state, reference);
927
928 //remember reference for following citation
929 state.setLatestReferenceInHomotype(reference);
930
931 return reference;
932 }
933
934 private void handleEditorsInCitation(String edition, String editors, Reference reference, XMLEvent parentEvent) {
935 //editor
936 reference.setEditor(editors);
937 if ( editors != null){
938 String message = "Citation reference has an editor. This is unusual for a citation reference (appears regularly in <reference> references";
939 fireWarningEvent(message, parentEvent, 4);
940 }
941 }
942
943 private void handleTitlesInCitation(String titleStr, String titleCache,
944 XMLEvent parentEvent, Reference reference) {
945 if (isNotBlank(titleStr)){
946 reference.setTitle(titleStr);
947 }
948 //titleCache
949 if (StringUtils.isNotBlank(titleCache)) {
950 reference.setTitleCache(titleCache, true);
951 }
952 if (titleStr != null || titleCache != null){
953 String message = "Citation reference has a title or a full title. Both is unusual for a citation reference (appears regularly in <reference> references";
954 fireWarningEvent(message, parentEvent, 4);
955 }
956 }
957
958 private enum RefType{
959 Article,
960 BookSection,
961 Book,
962 Generic,
963 LatestUsed
964 }
965
966 private RefType defineRefTypeForCitation(String type, String volume, String editors,
967 String authorStr, String pubName, XMLEvent parentEvent) {
968 if ("journal".equalsIgnoreCase(type)){
969 return RefType.Article;
970 }else {
971 if (editors == null){
972 //no editors
973 if (pubName == null){
974 //looks like we need to use reference info from former citations here
975 return RefType.LatestUsed;
976 }else if (volume == null){
977 return RefType.Book; //Book must not have in-authors
978 }else if (IJournal.guessIsJournalName(pubName)){
979 return RefType.Article;
980 }else{
981 return RefType.Generic;
982 }
983
984 }else{
985 //editors
986 if (pubName != null){
987 return RefType.BookSection;
988 }else{
989 String message = "Unexpected state: Citation has editors but no pubName";
990 fireWarningEvent(message, parentEvent, 4);
991 return RefType.Generic;
992 }
993 }
994 }
995 }
996
997
998 private void handlePages(MarkupImportState state,
999 Map<String, String> refMap, XMLEvent parentEvent,
1000 Reference reference, String pages) {
1001 // TODO check if this is handled correctly in FM markup
1002 boolean switchPages = state.getConfig().isHandlePagesAsDetailWhereNeeded();
1003 if (switchPages){
1004 if (pages != null ){
1005 String detail = refMap.get(DETAILS);
1006 if (isBlank(detail)){
1007 if (pages.contains("-")){
1008 String message = "There is a pages tag with '-'. Unclear if this really means pages";
1009 fireWarningEvent(message, parentEvent, 8);
1010 reference.setPages(pages);
1011 }else{
1012 //handle pages as detail, this is at least true for Flora Malesiana
1013 refMap.put(DETAILS, pages);
1014 }
1015 }else{
1016 if (! pages.contains("-")){
1017 String message = "There are pages and detail available where pages may also hold details information.";
1018 fireWarningEvent(message, parentEvent, 8);
1019 }
1020 reference.setPages(pages);
1021 }
1022 }
1023 }
1024 }
1025
1026 public Reference handleReference(MarkupImportState state,
1027 XMLEventReader reader, XMLEvent parentEvent)
1028 throws XMLStreamException {
1029 checkNoAttributes(parentEvent);
1030
1031 boolean hasRefPart = false;
1032 Map<String, String> refMap = new HashMap<String, String>();
1033 while (reader.hasNext()) {
1034 XMLEvent next = readNoWhitespace(reader);
1035 if (isMyEndingElement(next, parentEvent)) {
1036 checkMandatoryElement(hasRefPart, parentEvent.asStartElement(), REF_PART);
1037 Reference reference = createReference(state, refMap, next);
1038 return reference;
1039 } else if (isStartingElement(next, REF_PART)) {
1040 handleRefPart(state, reader, next, refMap);
1041 hasRefPart = true;
1042 } else {
1043 handleUnexpectedElement(next);
1044 }
1045 }
1046 // TODO handle missing end element
1047 throw new IllegalStateException("<Reference> has no closing tag");
1048 }
1049
1050 }