24 |
24 |
import eu.etaxonomy.cdm.model.reference.ReferenceType;
|
25 |
25 |
import eu.etaxonomy.cdm.model.taxon.*;
|
26 |
26 |
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
|
27 |
import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
|
27 |
28 |
import org.apache.commons.lang.ArrayUtils;
|
28 |
29 |
import org.apache.commons.lang.StringEscapeUtils;
|
29 |
30 |
import org.apache.commons.lang.StringUtils;
|
... | ... | |
92 |
93 |
Pattern.compile("^(?<month>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
|
93 |
94 |
Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
|
94 |
95 |
};
|
95 |
|
private static final Pattern typeSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
|
|
96 |
private static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
|
|
97 |
|
|
98 |
private static final Pattern typeNameBasionymPattern = Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$");
|
|
99 |
private static final Pattern typeNameNotePattern = Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]'
|
|
100 |
private static final Pattern typeNameSpecialSplitPattern = Pattern.compile("(?<note>.*\\;.*?)\\:(?<agent>)\\;(<name>.*)");
|
96 |
101 |
|
97 |
102 |
private static final Pattern collectorPattern = Pattern.compile(".*?(?<fullStr1>\\(leg\\.\\s+(?<data1>[^\\)]*)\\))|.*?(?<fullStr2>\\sleg\\.\\s+(?<data2>.*?)\\.?)$");
|
98 |
103 |
private static final Pattern collectionDataPattern = Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$");
|
... | ... | |
292 |
297 |
|
293 |
298 |
// Types
|
294 |
299 |
if(!StringUtils.isEmpty(typeStr)){
|
295 |
|
makeTypeData(typeStr, taxonName, regNumber, state);
|
|
300 |
|
|
301 |
if(taxonName.getRank().isSpecies() || taxonName.getRank().isLower(Rank.SPECIES())) {
|
|
302 |
makeSpecimenTypeData(typeStr, taxonName, regNumber, state);
|
|
303 |
} else {
|
|
304 |
makeNameTypeData(typeStr, taxonName, regNumber, state);
|
|
305 |
}
|
296 |
306 |
}
|
297 |
307 |
|
298 |
308 |
getTaxonService().save(taxon);
|
... | ... | |
304 |
314 |
return taxon;
|
305 |
315 |
}
|
306 |
316 |
|
307 |
|
private void makeTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
|
|
317 |
private void makeSpecimenTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
|
308 |
318 |
|
309 |
|
Matcher m = typeSplitPattern.matcher(typeStr);
|
|
319 |
Matcher m = typeSpecimenSplitPattern.matcher(typeStr);
|
310 |
320 |
|
311 |
321 |
if(m.matches()){
|
312 |
322 |
String fieldUnitStr = m.group(TypesName.fieldUnit.name());
|
... | ... | |
335 |
345 |
getNameService().save(taxonName);
|
336 |
346 |
}
|
337 |
347 |
|
|
348 |
private void makeNameTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
|
|
349 |
|
|
350 |
String nameStr = typeStr.replaceAll("^Type\\s?\\:\\s?", "");
|
|
351 |
if(nameStr.isEmpty()) {
|
|
352 |
return;
|
|
353 |
}
|
|
354 |
|
|
355 |
String basionymNameStr = null;
|
|
356 |
String noteStr = null;
|
|
357 |
String agentStr = null;
|
|
358 |
|
|
359 |
Matcher m;
|
|
360 |
|
|
361 |
if(typeStr.startsWith("not to be indicated")){
|
|
362 |
// Special case:
|
|
363 |
// Type: not to be indicated (Art. H.9.1. Tokyo Code); stated parent genera: Hechtia Klotzsch; Deuterocohnia Mez
|
|
364 |
// FIXME
|
|
365 |
m = typeNameSpecialSplitPattern.matcher(nameStr);
|
|
366 |
if(m.matches()){
|
|
367 |
nameStr = m.group("name");
|
|
368 |
noteStr = m.group("note");
|
|
369 |
agentStr = m.group("agent");
|
|
370 |
// TODO better import of agent?
|
|
371 |
if(agentStr != null){
|
|
372 |
noteStr = noteStr + ": " + agentStr;
|
|
373 |
}
|
|
374 |
}
|
|
375 |
} else {
|
|
376 |
// Generic case
|
|
377 |
m = typeNameBasionymPattern.matcher(nameStr);
|
|
378 |
if (m.find()) {
|
|
379 |
basionymNameStr = m.group("basionymName");
|
|
380 |
if (basionymNameStr != null) {
|
|
381 |
nameStr = nameStr.replace(m.group(0), "");
|
|
382 |
}
|
|
383 |
}
|
|
384 |
|
|
385 |
m = typeNameNotePattern.matcher(nameStr);
|
|
386 |
if (m.find()) {
|
|
387 |
noteStr = m.group(1);
|
|
388 |
if (noteStr != null) {
|
|
389 |
nameStr = nameStr.replace(m.group(0), "");
|
|
390 |
}
|
|
391 |
}
|
|
392 |
}
|
|
393 |
|
|
394 |
BotanicalName typeName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
|
|
395 |
|
|
396 |
if(typeName.isProtectedTitleCache() || typeName.getNomenclaturalReference() != null && typeName.getNomenclaturalReference().isProtectedTitleCache()) {
|
|
397 |
logger.warn(csvReportLine(regNumber, "NameType not parsable", typeStr, nameStr));
|
|
398 |
}
|
|
399 |
|
|
400 |
if(basionymNameStr != null){
|
|
401 |
BotanicalName basionymName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
|
|
402 |
getNameService().save(basionymName);
|
|
403 |
typeName.addBasionym(basionymName);
|
|
404 |
}
|
|
405 |
|
|
406 |
|
|
407 |
NameTypeDesignation nameTypeDesignation = NameTypeDesignation.NewInstance();
|
|
408 |
nameTypeDesignation.setTypeName(typeName);
|
|
409 |
getNameService().save(typeName);
|
|
410 |
|
|
411 |
if(noteStr != null){
|
|
412 |
nameTypeDesignation.addAnnotation(Annotation.NewInstance(noteStr, AnnotationType.EDITORIAL(), Language.UNKNOWN_LANGUAGE()));
|
|
413 |
}
|
|
414 |
taxonName.addNameTypeDesignation(typeName, null, null, null, null, false);
|
|
415 |
|
|
416 |
}
|
|
417 |
|
338 |
418 |
/**
|
339 |
419 |
* Currently only parses the collector, fieldNumber and the collection date.
|
340 |
420 |
*
|
ref #6026 NameTypes (with minor problems)