Revision 1496a7e6
Added by Andreas Kohlbecker over 7 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java | ||
---|---|---|
24 | 24 |
import eu.etaxonomy.cdm.model.reference.ReferenceType; |
25 | 25 |
import eu.etaxonomy.cdm.model.taxon.*; |
26 | 26 |
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; |
27 |
import eu.etaxonomy.cdm.strategy.parser.ParserProblem; |
|
27 | 28 |
import org.apache.commons.lang.ArrayUtils; |
28 | 29 |
import org.apache.commons.lang.StringEscapeUtils; |
29 | 30 |
import org.apache.commons.lang.StringUtils; |
... | ... | |
92 | 93 |
Pattern.compile("^(?<month>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969 |
93 | 94 |
Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999 |
94 | 95 |
}; |
95 |
private static final Pattern typeSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$"); |
|
96 |
private static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$"); |
|
97 |
|
|
98 |
private static final Pattern typeNameBasionymPattern = Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$"); |
|
99 |
private static final Pattern typeNameNotePattern = Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]' |
|
100 |
private static final Pattern typeNameSpecialSplitPattern = Pattern.compile("(?<note>.*\\;.*?)\\:(?<agent>)\\;(<name>.*)"); |
|
96 | 101 |
|
97 | 102 |
private static final Pattern collectorPattern = Pattern.compile(".*?(?<fullStr1>\\(leg\\.\\s+(?<data1>[^\\)]*)\\))|.*?(?<fullStr2>\\sleg\\.\\s+(?<data2>.*?)\\.?)$"); |
98 | 103 |
private static final Pattern collectionDataPattern = Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$"); |
... | ... | |
292 | 297 |
|
293 | 298 |
// Types |
294 | 299 |
if(!StringUtils.isEmpty(typeStr)){ |
295 |
makeTypeData(typeStr, taxonName, regNumber, state); |
|
300 |
|
|
301 |
if(taxonName.getRank().isSpecies() || taxonName.getRank().isLower(Rank.SPECIES())) { |
|
302 |
makeSpecimenTypeData(typeStr, taxonName, regNumber, state); |
|
303 |
} else { |
|
304 |
makeNameTypeData(typeStr, taxonName, regNumber, state); |
|
305 |
} |
|
296 | 306 |
} |
297 | 307 |
|
298 | 308 |
getTaxonService().save(taxon); |
... | ... | |
304 | 314 |
return taxon; |
305 | 315 |
} |
306 | 316 |
|
307 |
private void makeTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) { |
|
317 |
private void makeSpecimenTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
|
|
308 | 318 |
|
309 |
Matcher m = typeSplitPattern.matcher(typeStr); |
|
319 |
Matcher m = typeSpecimenSplitPattern.matcher(typeStr);
|
|
310 | 320 |
|
311 | 321 |
if(m.matches()){ |
312 | 322 |
String fieldUnitStr = m.group(TypesName.fieldUnit.name()); |
... | ... | |
335 | 345 |
getNameService().save(taxonName); |
336 | 346 |
} |
337 | 347 |
|
348 |
private void makeNameTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) { |
|
349 |
|
|
350 |
String nameStr = typeStr.replaceAll("^Type\\s?\\:\\s?", ""); |
|
351 |
if(nameStr.isEmpty()) { |
|
352 |
return; |
|
353 |
} |
|
354 |
|
|
355 |
String basionymNameStr = null; |
|
356 |
String noteStr = null; |
|
357 |
String agentStr = null; |
|
358 |
|
|
359 |
Matcher m; |
|
360 |
|
|
361 |
if(typeStr.startsWith("not to be indicated")){ |
|
362 |
// Special case: |
|
363 |
// Type: not to be indicated (Art. H.9.1. Tokyo Code); stated parent genera: Hechtia Klotzsch; Deuterocohnia Mez |
|
364 |
// FIXME |
|
365 |
m = typeNameSpecialSplitPattern.matcher(nameStr); |
|
366 |
if(m.matches()){ |
|
367 |
nameStr = m.group("name"); |
|
368 |
noteStr = m.group("note"); |
|
369 |
agentStr = m.group("agent"); |
|
370 |
// TODO better import of agent? |
|
371 |
if(agentStr != null){ |
|
372 |
noteStr = noteStr + ": " + agentStr; |
|
373 |
} |
|
374 |
} |
|
375 |
} else { |
|
376 |
// Generic case |
|
377 |
m = typeNameBasionymPattern.matcher(nameStr); |
|
378 |
if (m.find()) { |
|
379 |
basionymNameStr = m.group("basionymName"); |
|
380 |
if (basionymNameStr != null) { |
|
381 |
nameStr = nameStr.replace(m.group(0), ""); |
|
382 |
} |
|
383 |
} |
|
384 |
|
|
385 |
m = typeNameNotePattern.matcher(nameStr); |
|
386 |
if (m.find()) { |
|
387 |
noteStr = m.group(1); |
|
388 |
if (noteStr != null) { |
|
389 |
nameStr = nameStr.replace(m.group(0), ""); |
|
390 |
} |
|
391 |
} |
|
392 |
} |
|
393 |
|
|
394 |
BotanicalName typeName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null); |
|
395 |
|
|
396 |
if(typeName.isProtectedTitleCache() || typeName.getNomenclaturalReference() != null && typeName.getNomenclaturalReference().isProtectedTitleCache()) { |
|
397 |
logger.warn(csvReportLine(regNumber, "NameType not parsable", typeStr, nameStr)); |
|
398 |
} |
|
399 |
|
|
400 |
if(basionymNameStr != null){ |
|
401 |
BotanicalName basionymName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null); |
|
402 |
getNameService().save(basionymName); |
|
403 |
typeName.addBasionym(basionymName); |
|
404 |
} |
|
405 |
|
|
406 |
|
|
407 |
NameTypeDesignation nameTypeDesignation = NameTypeDesignation.NewInstance(); |
|
408 |
nameTypeDesignation.setTypeName(typeName); |
|
409 |
getNameService().save(typeName); |
|
410 |
|
|
411 |
if(noteStr != null){ |
|
412 |
nameTypeDesignation.addAnnotation(Annotation.NewInstance(noteStr, AnnotationType.EDITORIAL(), Language.UNKNOWN_LANGUAGE())); |
|
413 |
} |
|
414 |
taxonName.addNameTypeDesignation(typeName, null, null, null, null, false); |
|
415 |
|
|
416 |
} |
|
417 |
|
|
338 | 418 |
/** |
339 | 419 |
* Currently only parses the collector, fieldNumber and the collection date. |
340 | 420 |
* |
Also available in: Unified diff
ref #6026 NameTypes (with minor problems)