fix #6068
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / iapt / IAPTExcelImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.iapt;
11
12 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
13 import eu.etaxonomy.cdm.common.CdmUtils;
14 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
15 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
16 import eu.etaxonomy.cdm.model.agent.Institution;
17 import eu.etaxonomy.cdm.model.agent.Person;
18 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
19 import eu.etaxonomy.cdm.model.common.*;
20 import eu.etaxonomy.cdm.model.name.*;
21 import eu.etaxonomy.cdm.model.occurrence.*;
22 import eu.etaxonomy.cdm.model.occurrence.Collection;
23 import eu.etaxonomy.cdm.model.reference.Reference;
24 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
25 import eu.etaxonomy.cdm.model.reference.ReferenceType;
26 import eu.etaxonomy.cdm.model.taxon.*;
27 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
28 import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
29 import org.apache.commons.lang.ArrayUtils;
30 import org.apache.commons.lang.StringEscapeUtils;
31 import org.apache.commons.lang.StringUtils;
32 import org.apache.log4j.Level;
33 import org.apache.log4j.Logger;
34 import org.joda.time.DateTimeFieldType;
35 import org.joda.time.Partial;
36 import org.joda.time.format.DateTimeFormat;
37 import org.joda.time.format.DateTimeFormatter;
38 import org.springframework.stereotype.Component;
39
40 import java.util.*;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43
44 /**
45 * @author a.mueller
46 * @created 05.01.2016
47 */
48
49 @Component("iAPTExcelImport")
50 public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends SimpleExcelTaxonImport<CONFIG> {
51 private static final long serialVersionUID = -747486709409732371L;
52 private static final Logger logger = Logger.getLogger(IAPTExcelImport.class);
53 public static final String ANNOTATION_MARKER_STRING = "[*]";
54
55
56 private static UUID ROOT_UUID = UUID.fromString("4137fd2a-20f6-4e70-80b9-f296daf51d82");
57
58 private static NonViralNameParserImpl nameParser = NonViralNameParserImpl.NewInstance();
59
60 private final static String REGISTRATIONNO_PK= "RegistrationNo_Pk";
61 private final static String HIGHERTAXON= "HigherTaxon";
62 private final static String FULLNAME= "FullName";
63 private final static String AUTHORSSPELLING= "AuthorsSpelling";
64 private final static String LITSTRING= "LitString";
65 private final static String REGISTRATION= "Registration";
66 private final static String TYPE= "Type";
67 private final static String CAVEATS= "Caveats";
68 private final static String FULLBASIONYM= "FullBasionym";
69 private final static String FULLSYNSUBST= "FullSynSubst";
70 private final static String NOTESTXT= "NotesTxt";
71 private final static String REGDATE= "RegDate";
72 private final static String NAMESTRING= "NameString";
73 private final static String BASIONYMSTRING= "BasionymString";
74 private final static String SYNSUBSTSTR= "SynSubstStr";
75 private final static String AUTHORSTRING= "AuthorString";
76
77 private static List<String> expectedKeys= Arrays.asList(new String[]{
78 REGISTRATIONNO_PK, HIGHERTAXON, FULLNAME, AUTHORSSPELLING, LITSTRING, REGISTRATION, TYPE, CAVEATS, FULLBASIONYM, FULLSYNSUBST, NOTESTXT, REGDATE, NAMESTRING, BASIONYMSTRING, SYNSUBSTSTR, AUTHORSTRING});
79
80 private static final Pattern nomRefTokenizeP = Pattern.compile("^(?<title>.*):\\s(?<detail>[^\\.:]+)\\.(?<date>.*?)(?:\\s\\((?<issue>[^\\)]*)\\)\\s*)?\\.?$");
81 private static final Pattern[] datePatterns = new Pattern[]{
82 // NOTE:
83 // The order of the patterns is extremely important!!!
84 //
85 // all patterns cover the years 1700 - 1999
86 Pattern.compile("^(?<year>1[7,8,9][0-9]{2})$"), // only year, like '1969'
87 Pattern.compile("^(?<monthName>\\p{L}+\\.?)\\s(?<day>[0-9]{1,2})(?:st|rd|th)?\\.?,?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like April 12, 1969 or april 12th 1999
88 Pattern.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12
89 Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969
90 Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<month>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
91 Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)\\s)(?<monthName>\\p{L}+)\\sde\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
92 Pattern.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969
93 Pattern.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),// partial date like 1999-04
94 Pattern.compile("^(?<month>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
95 Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
96 };
97 private static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
98
99 private static final Pattern typeNameBasionymPattern = Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$");
100 private static final Pattern typeNameNotePattern = Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]'
101 private static final Pattern typeNameSpecialSplitPattern = Pattern.compile("(?<note>.*\\;.*?)\\:(?<agent>)\\;(<name>.*)");
102
103 private static final Pattern collectorPattern = Pattern.compile(".*?(?<fullStr1>\\(leg\\.\\s+(?<data1>[^\\)]*)\\))|.*?(?<fullStr2>\\sleg\\.\\s+(?<data2>.*?)\\.?)$");
104 private static final Pattern collectionDataPattern = Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$");
105 private static final Pattern collectorsNumber = Pattern.compile("^([nN]o\\.\\s.*)$");
106
107 // AccessionNumbers: , #.*, n°:?, 96/3293, No..*, -?\w{1,3}-[0-9\-/]*
108 private static final Pattern accessionNumberOnlyPattern = Pattern.compile("^(?<accNumber>(?:n°\\:?\\s?|#|No\\.?\\s?)?[\\d\\w\\-/]*)$");
109
110 private static final Pattern[] specimenTypePatterns = new Pattern[]{
111 Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?<institute>.*[^\\)])\\))(?<accNumber>.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222
112 Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<accNumber>.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78
113 Pattern.compile("^(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<institute>.*?)(?<accNumber>Praep\\..*)?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62
114 Pattern.compile("^(?<colCode>[A-Z]+)(?:\\s+(?<accNumber>.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any
115 };
116
117 private static Map<String, Integer> monthFromNameMap = new HashMap<>();
118
119 static {
120 String[] ck = new String[]{"leden", "únor", "březen", "duben", "květen", "červen", "červenec ", "srpen", "září", "říjen", "listopad", "prosinec"};
121 String[] fr = new String[]{"janvier", "février", "mars", "avril", "mai", "juin", "juillet", "août", "septembre", "octobre", "novembre", "décembre"};
122 String[] de = new String[]{"januar", "februar", "märz", "april", "mai", "juni", "juli", "august", "september", "oktober", "november", "dezember"};
123 String[] en = new String[]{"january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"};
124 String[] it = new String[]{"gennaio", "febbraio", "marzo", "aprile", "maggio", "giugno", "luglio", "agosto", "settembre", "ottobre", "novembre", "dicembre"};
125 String[] sp = new String[]{"enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre", "noviembre", "diciembre"};
126 String[] de_abbrev = new String[]{"jan.", "feb.", "märz", "apr.", "mai", "jun.", "jul.", "aug.", "sept.", "okt.", "nov.", "dez."};
127 String[] en_abbrev = new String[]{"jan.", "feb.", "mar.", "apr.", "may", "jun.", "jul.", "aug.", "sep.", "oct.", "nov.", "dec."};
128 String[] port = new String[]{"Janeiro", "Fevereiro", "Março", "Abril", "Maio", "Junho", "Julho", "Agosto", "Setembro", "Outubro", "Novembro", "Dezembro"};
129 String[] rom_num = new String[]{"i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii"};
130
131 String[][] perLang = new String[][]{ck, de, fr, en, it, sp, port, de_abbrev, en_abbrev, rom_num};
132
133 for (String[] months: perLang) {
134 for(int m = 1; m < 13; m++){
135 monthFromNameMap.put(months[m - 1].toLowerCase(), m);
136 }
137 }
138
139 // special cases
140 monthFromNameMap.put("mar", 3);
141 monthFromNameMap.put("dec", 12);
142 monthFromNameMap.put("Februari", 2);
143 }
144
145
146 DateTimeFormatter formatterYear = DateTimeFormat.forPattern("yyyy");
147
148 private Map<String, Collection> collectionMap = new HashMap<>();
149
150
151 enum TypesName {
152 fieldUnit, holotype, isotype;
153
154 public SpecimenTypeDesignationStatus status(){
155 switch (this) {
156 case holotype:
157 return SpecimenTypeDesignationStatus.HOLOTYPE();
158 case isotype:
159 return SpecimenTypeDesignationStatus.ISOTYPE();
160 default:
161 return null;
162 }
163 }
164 }
165
166 private MarkerType markerTypeFossil = null;
167 private Rank rankUnrankedSupraGeneric = null;
168 private Rank familyIncertisSedis = null;
169 private AnnotationType annotationTypeCaveats = null;
170
171 private Reference bookVariedadesTradicionales = null;
172
173 private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
174 TaxonNode higherTaxonNode, boolean isFossil) {
175
176 String regNumber = getValue(record, REGISTRATIONNO_PK, false);
177 String regStr = getValue(record, REGISTRATION, true);
178 String titleCacheStr = getValue(record, FULLNAME, true);
179 String nameStr = getValue(record, NAMESTRING, true);
180 String authorStr = getValue(record, AUTHORSTRING, true);
181 String nomRefStr = getValue(record, LITSTRING, true);
182 String authorsSpelling = getValue(record, AUTHORSSPELLING, true);
183 String notesTxt = getValue(record, NOTESTXT, true);
184 String caveats = getValue(record, CAVEATS, true);
185 String fullSynSubstStr = getValue(record, FULLSYNSUBST, true);
186 String fullBasionymStr = getValue(record, FULLBASIONYM, true);
187 String basionymNameStr = getValue(record, FULLBASIONYM, true);
188 String synSubstStr = getValue(record, SYNSUBSTSTR, true);
189 String typeStr = getValue(record, TYPE, true);
190
191
192 String nomRefTitle = null;
193 String nomRefDetail;
194 String nomRefPupDate = null;
195 String nomRefIssue = null;
196 Partial pupDate = null;
197
198 boolean restoreOriginalReference = false;
199 boolean nameIsValid = true;
200
201 // preprocess nomRef: separate citation, reference detail, publishing date
202 if(!StringUtils.isEmpty(nomRefStr)){
203 nomRefStr = nomRefStr.trim();
204
205 // handle the special case which is hard to parse:
206 //
207 // Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita: 154. 1997.
208 if(nomRefStr.startsWith("Las variedades tradicionales de frutales ")){
209
210 if(bookVariedadesTradicionales == null){
211 bookVariedadesTradicionales = ReferenceFactory.newBook();
212 bookVariedadesTradicionales.setTitle("Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita");
213 bookVariedadesTradicionales.setDatePublished(TimePeriod.NewInstance(1997));
214 getReferenceService().save(bookVariedadesTradicionales);
215 }
216 nomRefStr = nomRefStr.replaceAll("^.*?\\:.*?\\:", "Las variedades tradicionales:");
217 restoreOriginalReference = true;
218 }
219
220 Matcher m = nomRefTokenizeP.matcher(nomRefStr);
221 if(m.matches()){
222 nomRefTitle = m.group("title");
223 nomRefDetail = m.group("detail");
224 nomRefPupDate = m.group("date").trim();
225 nomRefIssue = m.group("issue");
226
227 pupDate = parseDate(regNumber, nomRefPupDate);
228 if (pupDate != null) {
229 nomRefTitle = nomRefTitle + ": " + nomRefDetail + ". " + pupDate.toString(formatterYear) + ".";
230 } else {
231 logger.warn(csvReportLine(regNumber, "Pub date", nomRefPupDate, "in", nomRefStr, "not parsable"));
232 }
233 } else {
234 nomRefTitle = nomRefStr;
235 }
236 }
237
238 BotanicalName taxonName = makeBotanicalName(state, regNumber, titleCacheStr, nameStr, authorStr, nomRefTitle);
239
240 // always add the original strings of parsed data as annotation
241 taxonName.addAnnotation(Annotation.NewInstance("imported and parsed data strings:" +
242 "\n - '" + LITSTRING + "': "+ nomRefStr +
243 "\n - '" + TYPE + "': " + typeStr +
244 "\n - '" + REGISTRATION + "': " + regStr
245 , AnnotationType.TECHNICAL(), Language.DEFAULT()));
246
247 if(restoreOriginalReference){
248 taxonName.setNomenclaturalReference(bookVariedadesTradicionales);
249 }
250 if(pupDate != null) {
251 taxonName.getNomenclaturalReference().setDatePublished(TimePeriod.NewInstance(pupDate));
252 }
253 if(nomRefIssue != null) {
254 ((Reference)taxonName.getNomenclaturalReference()).setVolume(nomRefIssue);
255 }
256
257
258 if(!StringUtils.isEmpty(notesTxt)){
259 notesTxt = notesTxt.replace("Notes: ", "").trim();
260 taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
261 nameIsValid = false;
262
263 }
264 if(!StringUtils.isEmpty(caveats)){
265 caveats = caveats.replace("Caveats: ", "").trim();
266 taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
267 nameIsValid = false;
268 }
269
270 if(nameIsValid){
271 // Status is always considered valid if no notes and cavets are set
272 taxonName.addStatus(NomenclaturalStatus.NewInstance(NomenclaturalStatusType.VALID()));
273 }
274
275 getNameService().save(taxonName);
276
277 // Namerelations
278 if(!StringUtils.isEmpty(authorsSpelling)){
279 authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
280
281 String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
282 String[] nameStrTokens = StringUtils.split(nameStr, " ");
283
284 ArrayUtils.reverse(authorSpellingTokens);
285 ArrayUtils.reverse(nameStrTokens);
286
287 for (int i = 0; i < nameStrTokens.length; i++){
288 if(i < authorSpellingTokens.length){
289 nameStrTokens[i] = authorSpellingTokens[i];
290 }
291 }
292 ArrayUtils.reverse(nameStrTokens);
293
294 String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
295 // build the fullnameString of the misspelled name
296 misspelledNameStr = taxonName.getTitleCache().replace(nameStr, misspelledNameStr);
297
298 TaxonNameBase misspelledName = (BotanicalName) nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
299 misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
300 getNameService().save(misspelledName);
301 }
302
303 // Replaced Synonyms
304 if(!StringUtils.isEmpty(fullSynSubstStr)){
305 fullSynSubstStr = fullSynSubstStr.replace("Syn. subst.: ", "");
306 BotanicalName replacedSynonymName = makeBotanicalName(state, regNumber, fullSynSubstStr, synSubstStr, null, null);
307 replacedSynonymName.addReplacedSynonym(taxonName, null, null, null);
308 getNameService().save(replacedSynonymName);
309 }
310
311 Reference sec = state.getConfig().getSecReference();
312 Taxon taxon = Taxon.NewInstance(taxonName, sec);
313
314 // Basionym
315 if(fullBasionymStr != null){
316 fullBasionymStr = fullBasionymStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
317 basionymNameStr = basionymNameStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
318 BotanicalName basionym = makeBotanicalName(state, regNumber, fullBasionymStr, basionymNameStr, null, null);
319 getNameService().save(basionym);
320 taxonName.addBasionym(basionym);
321
322 Synonym syn = Synonym.NewInstance(basionym, sec);
323 taxon.addSynonym(syn, SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF());
324 getTaxonService().save(syn);
325 }
326
327 // Markers
328 if(isFossil){
329 taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
330 }
331
332 // Types
333 if(!StringUtils.isEmpty(typeStr)){
334
335 if(taxonName.getRank().isSpecies() || taxonName.getRank().isLower(Rank.SPECIES())) {
336 makeSpecimenTypeData(typeStr, taxonName, regNumber, state);
337 } else {
338 makeNameTypeData(typeStr, taxonName, regNumber, state);
339 }
340 }
341
342 getTaxonService().save(taxon);
343 if(higherTaxonNode != null){
344 higherTaxonNode.addChildTaxon(taxon, null, null);
345 getTaxonNodeService().save(higherTaxonNode);
346 }
347
348 return taxon;
349 }
350
351 private void makeSpecimenTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
352
353 Matcher m = typeSpecimenSplitPattern.matcher(typeStr);
354
355 if(m.matches()){
356 String fieldUnitStr = m.group(TypesName.fieldUnit.name());
357 // boolean isFieldUnit = typeStr.matches(".*([°']|\\d+\\s?m\\s|\\d+\\s?km\\s).*"); // check for location or unit m, km // makes no sense!!!!
358 FieldUnit fieldUnit = parseFieldUnit(fieldUnitStr, regNumber, state);
359 if(fieldUnit == null) {
360 // create a field unit with only a titleCache using the fieldUnitStr substring
361 logger.warn(csvReportLine(regNumber, "Type: fieldUnitStr can not be parsed", fieldUnitStr));
362 fieldUnit = FieldUnit.NewInstance();
363 fieldUnit.setTitleCache(fieldUnitStr, true);
364 getOccurrenceService().save(fieldUnit);
365 }
366 getOccurrenceService().save(fieldUnit);
367
368 // all others ..
369 addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.holotype.name()), TypesName.holotype, false, regNumber);
370 addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.isotype.name()), TypesName.isotype, true, regNumber);
371
372 } else {
373 // create a field unit with only a titleCache using the full typeStr
374 FieldUnit fieldUnit = FieldUnit.NewInstance();
375 fieldUnit.setTitleCache(typeStr, true);
376 getOccurrenceService().save(fieldUnit);
377 logger.warn(csvReportLine(regNumber, "Type: field 'Type' can not be parsed", typeStr));
378 }
379 getNameService().save(taxonName);
380 }
381
382 private void makeNameTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
383
384 String nameStr = typeStr.replaceAll("^Type\\s?\\:\\s?", "");
385 if(nameStr.isEmpty()) {
386 return;
387 }
388
389 String basionymNameStr = null;
390 String noteStr = null;
391 String agentStr = null;
392
393 Matcher m;
394
395 if(typeStr.startsWith("not to be indicated")){
396 // Special case:
397 // Type: not to be indicated (Art. H.9.1. Tokyo Code); stated parent genera: Hechtia Klotzsch; Deuterocohnia Mez
398 // FIXME
399 m = typeNameSpecialSplitPattern.matcher(nameStr);
400 if(m.matches()){
401 nameStr = m.group("name");
402 noteStr = m.group("note");
403 agentStr = m.group("agent");
404 // TODO better import of agent?
405 if(agentStr != null){
406 noteStr = noteStr + ": " + agentStr;
407 }
408 }
409 } else {
410 // Generic case
411 m = typeNameBasionymPattern.matcher(nameStr);
412 if (m.find()) {
413 basionymNameStr = m.group("basionymName");
414 if (basionymNameStr != null) {
415 nameStr = nameStr.replace(m.group(0), "");
416 }
417 }
418
419 m = typeNameNotePattern.matcher(nameStr);
420 if (m.find()) {
421 noteStr = m.group(1);
422 if (noteStr != null) {
423 nameStr = nameStr.replace(m.group(0), "");
424 }
425 }
426 }
427
428 BotanicalName typeName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
429
430 if(typeName.isProtectedTitleCache() || typeName.getNomenclaturalReference() != null && typeName.getNomenclaturalReference().isProtectedTitleCache()) {
431 logger.warn(csvReportLine(regNumber, "NameType not parsable", typeStr, nameStr));
432 }
433
434 if(basionymNameStr != null){
435 BotanicalName basionymName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
436 getNameService().save(basionymName);
437 typeName.addBasionym(basionymName);
438 }
439
440
441 NameTypeDesignation nameTypeDesignation = NameTypeDesignation.NewInstance();
442 nameTypeDesignation.setTypeName(typeName);
443 getNameService().save(typeName);
444
445 if(noteStr != null){
446 nameTypeDesignation.addAnnotation(Annotation.NewInstance(noteStr, AnnotationType.EDITORIAL(), Language.UNKNOWN_LANGUAGE()));
447 }
448 taxonName.addNameTypeDesignation(typeName, null, null, null, null, false);
449
450 }
451
452 /**
453 * Currently only parses the collector, fieldNumber and the collection date.
454 *
455 * @param fieldUnitStr
456 * @param regNumber
457 * @param state
458 * @return null if the fieldUnitStr could not be parsed
459 */
460 private FieldUnit parseFieldUnit(String fieldUnitStr, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
461
462 FieldUnit fieldUnit = null;
463
464 Matcher m1 = collectorPattern.matcher(fieldUnitStr);
465 if(m1.matches()){
466
467 String collectorData = m1.group(2); // like (leg. Metzeltin, 30. 9. 1996)
468 String removal = m1.group(1);
469 if(collectorData == null){
470 collectorData = m1.group(4); // like leg. Metzeltin, 30. 9. 1996
471 removal = m1.group(3);
472 }
473 if(collectorData == null){
474 return null;
475 }
476
477 // the fieldUnitStr is parsable
478 // remove all collectorData from the fieldUnitStr and use the rest as locality
479 String locality = fieldUnitStr.replace(removal, "");
480
481 String collectorStr = null;
482 String detailStr = null;
483 Partial date = null;
484 String fieldNumber = null;
485
486 Matcher m2 = collectionDataPattern.matcher(collectorData);
487 if(m2.matches()){
488 collectorStr = m2.group("collector");
489 detailStr = m2.group("detail");
490
491 // Try to make sense of the detailStr
492 if(detailStr != null){
493 detailStr = detailStr.trim();
494 // 1. try to parse as date
495 date = parseDate(regNumber, detailStr);
496 if(date == null){
497 // 2. try to parse as number
498 if(collectorsNumber.matcher(detailStr).matches()){
499 fieldNumber = detailStr;
500 }
501 }
502 }
503 if(date == null && fieldNumber == null){
504 // detailed parsing not possible, so need fo fallback
505 collectorStr = collectorData;
506 }
507 }
508
509 if(collectorStr == null) {
510 collectorStr = collectorData;
511 }
512
513 fieldUnit = FieldUnit.NewInstance();
514 GatheringEvent ge = GatheringEvent.NewInstance();
515 ge.setLocality(LanguageString.NewInstance(locality, Language.UNKNOWN_LANGUAGE()));
516
517 TeamOrPersonBase agent = state.getAgentBase(collectorStr);
518 if(agent == null) {
519 agent = Person.NewTitledInstance(collectorStr);
520 getAgentService().save(agent);
521 state.putAgentBase(collectorStr, agent);
522 }
523 ge.setCollector(agent);
524
525 if(date != null){
526 ge.setGatheringDate(date);
527 }
528
529 getEventBaseService().save(ge);
530 fieldUnit.setGatheringEvent(ge);
531
532 if(fieldNumber != null) {
533 fieldUnit.setFieldNumber(fieldNumber);
534 }
535 getOccurrenceService().save(fieldUnit);
536
537 }
538
539 return fieldUnit;
540 }
541
542 private Partial parseDate(String regNumber, String dateStr) {
543
544 Partial pupDate = null;
545 boolean parseError = false;
546
547 String day = null;
548 String month = null;
549 String monthName = null;
550 String year = null;
551
552 for(Pattern p : datePatterns){
553 Matcher m2 = p.matcher(dateStr);
554 if(m2.matches()){
555 try {
556 year = m2.group("year");
557 } catch (IllegalArgumentException e){
558 // named capture group not found
559 }
560 try {
561 month = m2.group("month");
562 } catch (IllegalArgumentException e){
563 // named capture group not found
564 }
565
566 try {
567 monthName = m2.group("monthName");
568 month = monthFromName(monthName, regNumber);
569 if(month == null){
570 parseError = true;
571 }
572 } catch (IllegalArgumentException e){
573 // named capture group not found
574 }
575 try {
576 day = m2.group("day");
577 } catch (IllegalArgumentException e){
578 // named capture group not found
579 }
580
581 if(year != null){
582 if (year.length() == 2) {
583 // it is an abbreviated year from the 19** years
584 year = "19" + year;
585 }
586 break;
587 } else {
588 parseError = true;
589 }
590 }
591 }
592 if(year == null){
593 parseError = true;
594 }
595 List<DateTimeFieldType> types = new ArrayList<>();
596 List<Integer> values = new ArrayList<>();
597 if(!parseError) {
598 types.add(DateTimeFieldType.year());
599 values.add(Integer.parseInt(year));
600 if (month != null) {
601 types.add(DateTimeFieldType.monthOfYear());
602 values.add(Integer.parseInt(month));
603 }
604 if (day != null) {
605 types.add(DateTimeFieldType.dayOfMonth());
606 values.add(Integer.parseInt(day));
607 }
608 pupDate = new Partial(types.toArray(new DateTimeFieldType[types.size()]), ArrayUtils.toPrimitive(values.toArray(new Integer[values.size()])));
609 }
610 return pupDate;
611 }
612
613 private String monthFromName(String monthName, String regNumber) {
614
615 Integer month = monthFromNameMap.get(monthName.toLowerCase());
616 if(month == null){
617 logger.warn(csvReportLine(regNumber, "Unknown month name", monthName));
618 return null;
619 } else {
620 return month.toString();
621 }
622 }
623
624
625 private void addSpecimenTypes(BotanicalName taxonName, FieldUnit fieldUnit, String typeStr, TypesName typeName, boolean multiple, String regNumber){
626
627 if(StringUtils.isEmpty(typeStr)){
628 return;
629 }
630 typeStr = typeStr.trim().replaceAll("\\.$", "");
631
632 Collection collection = null;
633 DerivedUnit specimen = null;
634
635 List<DerivedUnit> specimens = new ArrayList<>();
636 if(multiple){
637 String[] tokens = typeStr.split("\\s?,\\s?");
638 for (String t : tokens) {
639 // command to list all complex parsabel types:
640 // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Holotype:\s([A-Z]*\s)[^.]*?'
641 // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Isotype[^:]*:\s([A-Z]*\s)[^.]*?'
642
643 if(!t.isEmpty()){
644 // trying to parse the string
645 specimen = parseSpecimenType(fieldUnit, typeName, collection, t, regNumber);
646 if(specimen != null){
647 specimens.add(specimen);
648 } else {
649 // parsing was not successful make simple specimen
650 specimens.add(makeSpecimenType(fieldUnit, t));
651 }
652 }
653 }
654 } else {
655 specimen = parseSpecimenType(fieldUnit, typeName, collection, typeStr, regNumber);
656 if(specimen != null) {
657 specimens.add(specimen);
658 // remember current collection
659 collection = specimen.getCollection();
660 } else {
661 // parsing was not successful make simple specimen
662 specimens.add(makeSpecimenType(fieldUnit, typeStr));
663 }
664 }
665
666 for(DerivedUnit s : specimens){
667 taxonName.addSpecimenTypeDesignation(s, typeName.status(), null, null, null, false, true);
668 }
669 }
670
671 private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, String titleCache) {
672 DerivedUnit specimen;DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
673 facade.setTitleCache(titleCache.trim(), true);
674 specimen = facade.innerDerivedUnit();
675 return specimen;
676 }
677
678 /**
679 *
680 * @param fieldUnit
681 * @param typeName
682 * @param collection
683 * @param text
684 * @param regNumber
685 * @return
686 */
687 private DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) {
688
689 DerivedUnit specimen = null;
690
691 String collectionCode = null;
692 String subCollectionStr = null;
693 String instituteStr = null;
694 String accessionNumber = null;
695
696 boolean unusualAccessionNumber = false;
697
698 text = text.trim();
699
700 // 1. For Isotypes often the accession number is noted alone if the
701 // preceeding entry has a collection code.
702 if(typeName .equals(TypesName.isotype) && collection != null){
703 Matcher m = accessionNumberOnlyPattern.matcher(text);
704 if(m.matches()){
705 try {
706 accessionNumber = m.group("accNumber");
707 specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
708 } catch (IllegalArgumentException e){
709 // match group acc_number not found
710 }
711 }
712 }
713
714 //2. try it the 'normal' way
715 if(specimen == null) {
716 for (Pattern p : specimenTypePatterns) {
717 Matcher m = p.matcher(text);
718 if (m.matches()) {
719 // collection code is mandatory
720 try {
721 collectionCode = m.group("colCode");
722 } catch (IllegalArgumentException e){
723 // match group colCode not found
724 }
725 try {
726 subCollectionStr = m.group("subCollection");
727 } catch (IllegalArgumentException e){
728 // match group subCollection not found
729 }
730 try {
731 instituteStr = m.group("institute");
732 } catch (IllegalArgumentException e){
733 // match group col_name not found
734 }
735 try {
736 accessionNumber = m.group("accNumber");
737
738 // try to improve the accessionNumber
739 if(accessionNumber!= null) {
740 accessionNumber = accessionNumber.trim();
741 Matcher m2 = accessionNumberOnlyPattern.matcher(accessionNumber);
742 String betterAccessionNumber = null;
743 if (m2.matches()) {
744 try {
745 betterAccessionNumber = m.group("accNumber");
746 } catch (IllegalArgumentException e) {
747 // match group acc_number not found
748 }
749 }
750 if (betterAccessionNumber != null) {
751 accessionNumber = betterAccessionNumber;
752 } else {
753 unusualAccessionNumber = true;
754 }
755 }
756
757 } catch (IllegalArgumentException e){
758 // match group acc_number not found
759 }
760
761 if(collectionCode == null && instituteStr == null){
762 logger.warn(csvReportLine(regNumber, "Type: neither 'collectionCode' nor 'institute' found in ", text));
763 continue;
764 }
765 collection = getCollection(collectionCode, instituteStr, subCollectionStr);
766 specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
767 break;
768 }
769 }
770 }
771 if(specimen == null) {
772 logger.warn(csvReportLine(regNumber, "Type: Could not parse specimen", typeName.name().toString(), text));
773 }
774 if(unusualAccessionNumber){
775 logger.warn(csvReportLine(regNumber, "Type: Unusual accession number", typeName.name().toString(), text, accessionNumber));
776 }
777 return specimen;
778 }
779
780 private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, Collection collection, String accessionNumber) {
781
782 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
783 facade.setCollection(collection);
784 if(accessionNumber != null){
785 facade.setAccessionNumber(accessionNumber);
786 }
787 return facade.innerDerivedUnit();
788 }
789
790 private BotanicalName makeBotanicalName(SimpleExcelTaxonImportState<CONFIG> state, String regNumber, String titleCacheStr, String nameStr,
791 String authorStr, String nomRefTitle) {
792
793 BotanicalName taxonName;// cache field for the taxonName.titleCache
794 String taxonNameTitleCache = null;
795 Map<String, AnnotationType> nameAnnotations = new HashMap<>();
796
797 // TitleCache preprocessing
798 if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || (authorStr != null && authorStr.endsWith(ANNOTATION_MARKER_STRING))){
799 nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL());
800 titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim();
801 if(authorStr != null) {
802 authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim();
803 }
804 }
805
806 // parse the full taxon name
807 if(!StringUtils.isEmpty(nomRefTitle)){
808 String referenceSeparator = nomRefTitle.startsWith("in ") ? " " : ", ";
809 String taxonFullNameStr = titleCacheStr + referenceSeparator + nomRefTitle;
810 logger.debug(":::::" + taxonFullNameStr);
811 taxonName = (BotanicalName) nameParser.parseReferencedName(taxonFullNameStr, NomenclaturalCode.ICNAFP, null);
812 } else {
813 taxonName = (BotanicalName) nameParser.parseFullName(titleCacheStr, NomenclaturalCode.ICNAFP, null);
814 }
815
816 taxonNameTitleCache = taxonName.getTitleCache().trim();
817 if (taxonName.isProtectedTitleCache()) {
818 logger.warn(csvReportLine(regNumber, "Name could not be parsed", titleCacheStr));
819 } else {
820
821 boolean doRestoreTitleCacheStr = false;
822
823 // Check if titleCache and nameCache are plausible
824 String titleCacheCompareStr = titleCacheStr;
825 String nameCache = taxonName.getNameCache();
826 String nameCompareStr = nameStr;
827 if(taxonName.isBinomHybrid()){
828 titleCacheCompareStr = titleCacheCompareStr.replace(" x ", " ×");
829 nameCompareStr = nameCompareStr.replace(" x ", " ×");
830 }
831 if(taxonName.isMonomHybrid()){
832 titleCacheCompareStr = titleCacheCompareStr.replaceAll("^X ", "× ");
833 nameCompareStr = nameCompareStr.replace("^X ", "× ");
834 }
835 if(authorStr != null && authorStr.contains(" et ")){
836 titleCacheCompareStr = titleCacheCompareStr.replaceAll(" et ", " & ");
837 }
838 if (!taxonNameTitleCache.equals(titleCacheCompareStr)) {
839 logger.warn(csvReportLine(regNumber, "The generated titleCache differs from the imported string", taxonNameTitleCache, " != ", titleCacheStr, " ==> original titleCacheStr has been restored"));
840 doRestoreTitleCacheStr = true;
841 }
842 if (!nameCache.trim().equals(nameCompareStr)) {
843 logger.warn(csvReportLine(regNumber, "The parsed nameCache differs from field '" + NAMESTRING + "'", nameCache, " != ", nameCompareStr));
844 }
845
846 // Author
847 //nameParser.handleAuthors(taxonName, titleCacheStr, authorStr);
848 //if (!titleCacheStr.equals(taxonName.getTitleCache())) {
849 // logger.warn(regNumber + ": titleCache has changed after setting authors, will restore original titleCacheStr");
850 // doRestoreTitleCacheStr = true;
851 //}
852
853 if(doRestoreTitleCacheStr){
854 taxonName.setTitleCache(titleCacheStr, true);
855 }
856
857 // deduplicate
858 replaceAuthorNamesAndNomRef(state, taxonName);
859 }
860
861 // Annotations
862 if(!nameAnnotations.isEmpty()){
863 for(String text : nameAnnotations.keySet()){
864 taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
865 }
866 getNameService().save(taxonName);
867 }
868 return taxonName;
869 }
870
871 /**
872 * @param state
873 * @return
874 */
875 private TaxonNode getClassificationRootNode(IAPTImportState state) {
876
877 // Classification classification = state.getClassification();
878 // if (classification == null){
879 // IAPTImportConfigurator config = state.getConfig();
880 // classification = Classification.NewInstance(state.getConfig().getClassificationName());
881 // classification.setUuid(config.getClassificationUuid());
882 // classification.setReference(config.getSecReference());
883 // classification = getClassificationService().find(state.getConfig().getClassificationUuid());
884 // }
885 TaxonNode rootNode = state.getRootNode();
886 if (rootNode == null){
887 rootNode = getTaxonNodeService().find(ROOT_UUID);
888 }
889 if (rootNode == null){
890 Classification classification = state.getClassification();
891 if (classification == null){
892 Reference sec = state.getSecReference();
893 String classificationName = state.getConfig().getClassificationName();
894 Language language = Language.DEFAULT();
895 classification = Classification.NewInstance(classificationName, sec, language);
896 state.setClassification(classification);
897 classification.setUuid(state.getConfig().getClassificationUuid());
898 classification.getRootNode().setUuid(ROOT_UUID);
899 getClassificationService().save(classification);
900 }
901 rootNode = classification.getRootNode();
902 state.setRootNode(rootNode);
903 }
904 return rootNode;
905 }
906
907 private Collection getCollection(String collectionCode, String instituteStr, String subCollectionStr){
908
909 Collection superCollection = null;
910 if(subCollectionStr != null){
911 superCollection = getCollection(collectionCode, instituteStr, null);
912 collectionCode = subCollectionStr;
913 instituteStr = null;
914 }
915
916 final String key = collectionCode + "-#i:" + StringUtils.defaultString(instituteStr);
917
918 Collection collection = collectionMap.get(key);
919
920 if(collection == null) {
921 collection = Collection.NewInstance();
922 collection.setCode(collectionCode);
923 if(instituteStr != null){
924 collection.setInstitute(Institution.NewNamedInstance(instituteStr));
925 }
926 if(superCollection != null){
927 collection.setSuperCollection(superCollection);
928 }
929 collectionMap.put(key, collection);
930 getCollectionService().save(collection);
931 }
932
933 return collection;
934 }
935
936
937 /**
938 * @param record
939 * @param originalKey
940 * @param doUnescapeHtmlEntities
941 * @return
942 */
943 private String getValue(HashMap<String, String> record, String originalKey, boolean doUnescapeHtmlEntities) {
944 String value = record.get(originalKey);
945
946 value = fixCharacters(value);
947
948 if (! StringUtils.isBlank(value)) {
949 if (logger.isDebugEnabled()) {
950 logger.debug(originalKey + ": " + value);
951 }
952 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
953 if(doUnescapeHtmlEntities){
954 value = StringEscapeUtils.unescapeHtml(value);
955 }
956 return value.trim();
957 }else{
958 return null;
959 }
960 }
961
962 /**
963 * Fixes broken characters.
964 * For details see
965 * http://dev.e-taxonomy.eu/redmine/issues/6035
966 *
967 * @param value
968 * @return
969 */
970 private String fixCharacters(String value) {
971
972 value = StringUtils.replace(value, "s$K", "š");
973 value = StringUtils.replace(value, "n$K", "ň");
974 value = StringUtils.replace(value, "e$K", "ě");
975 value = StringUtils.replace(value, "r$K", "ř");
976 value = StringUtils.replace(value, "c$K", "č");
977 value = StringUtils.replace(value, "z$K", "ž");
978 value = StringUtils.replace(value, "S>U$K", "Š");
979 value = StringUtils.replace(value, "C>U$K", "Č");
980 value = StringUtils.replace(value, "R>U$K", "Ř");
981 value = StringUtils.replace(value, "Z>U$K", "Ž");
982 value = StringUtils.replace(value, "g$K", "ǧ");
983 value = StringUtils.replace(value, "s$A", "ś");
984 value = StringUtils.replace(value, "n$A", "ń");
985 value = StringUtils.replace(value, "c$A", "ć");
986 value = StringUtils.replace(value, "e$E", "ę");
987 value = StringUtils.replace(value, "o$H", "õ");
988 value = StringUtils.replace(value, "s$C", "ş");
989 value = StringUtils.replace(value, "t$C", "ț");
990 value = StringUtils.replace(value, "S>U$C", "Ş");
991 value = StringUtils.replace(value, "a$O", "å");
992 value = StringUtils.replace(value, "A>U$O", "Å");
993 value = StringUtils.replace(value, "u$O", "ů");
994 value = StringUtils.replace(value, "g$B", "ğ");
995 value = StringUtils.replace(value, "g$B", "ĕ");
996 value = StringUtils.replace(value, "a$B", "ă");
997 value = StringUtils.replace(value, "l$/", "ł");
998 value = StringUtils.replace(value, ">i", "ı");
999 value = StringUtils.replace(value, "i$U", "ï");
1000 // Special-cases
1001 value = StringUtils.replace(value, "&yacute", "ý");
1002 value = StringUtils.replace(value, ">L", "Ł"); // corrected rule
1003 value = StringUtils.replace(value, "E>U$D", "З");
1004 value = StringUtils.replace(value, "S>U$E", "Ş");
1005 value = StringUtils.replace(value, "s$E", "ş");
1006
1007 value = StringUtils.replace(value, "c$k", "č");
1008 value = StringUtils.replace(value, " U$K", " Š");
1009
1010 value = StringUtils.replace(value, "B.O>U>!", "Ø");
1011 value = StringUtils.replace(value, "S$K", "Ŝ");
1012 value = StringUtils.replace(value, "§B>i", "ğ");
1013
1014
1015 return value;
1016 }
1017
1018
1019 /**
1020 * Stores taxa records in DB
1021 */
1022 @Override
1023 protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
1024
1025 String lineNumber = "L#" + state.getCurrentLine() + ": ";
1026 logger.setLevel(Level.DEBUG);
1027 HashMap<String, String> record = state.getOriginalRecord();
1028 logger.debug(lineNumber + record.toString());
1029
1030 Set<String> keys = record.keySet();
1031 for (String key: keys) {
1032 if (! expectedKeys.contains(key)){
1033 logger.warn(lineNumber + "Unexpected Key: " + key);
1034 }
1035 }
1036
1037 String reg_id = record.get(REGISTRATIONNO_PK);
1038
1039 //higherTaxon
1040 String higherTaxaString = record.get(HIGHERTAXON);
1041 boolean isFossil = false;
1042 if(higherTaxaString.startsWith("FOSSIL ")){
1043 higherTaxaString = higherTaxaString.replace("FOSSIL ", "");
1044 isFossil = true;
1045 }
1046 TaxonNode higherTaxon = getHigherTaxon(higherTaxaString, (IAPTImportState)state);
1047
1048 //Taxon
1049 Taxon taxon = makeTaxon(record, state, higherTaxon, isFossil);
1050 if (taxon == null){
1051 logger.warn(lineNumber + "taxon could not be created and is null");
1052 return;
1053 }
1054 ((IAPTImportState)state).setCurrentTaxon(taxon);
1055
1056
1057 return;
1058 }
1059
1060 private TaxonNode getHigherTaxon(String higherTaxaString, IAPTImportState state) {
1061 String[] higherTaxaNames = higherTaxaString.toLowerCase().replaceAll("[\\[\\]]", "").split(":");
1062 TaxonNode higherTaxonNode = null;
1063
1064 ITaxonTreeNode rootNode = getClassificationRootNode(state);
1065 for (String htn : higherTaxaNames) {
1066 htn = StringUtils.capitalize(htn.trim());
1067 Taxon higherTaxon = state.getHigherTaxon(htn);
1068 if (higherTaxon != null){
1069 higherTaxonNode = higherTaxon.getTaxonNodes().iterator().next();
1070 }else{
1071 BotanicalName name = makeHigherTaxonName(state, htn);
1072 Reference sec = state.getSecReference();
1073 higherTaxon = Taxon.NewInstance(name, sec);
1074 getTaxonService().save(higherTaxon);
1075 higherTaxonNode = rootNode.addChildTaxon(higherTaxon, sec, null);
1076 state.putHigherTaxon(htn, higherTaxon);
1077 getClassificationService().saveTreeNode(higherTaxonNode);
1078 }
1079 rootNode = higherTaxonNode;
1080 }
1081 return higherTaxonNode;
1082 }
1083
1084 private BotanicalName makeHigherTaxonName(IAPTImportState state, String name) {
1085
1086 Rank rank = guessRank(name);
1087
1088 BotanicalName taxonName = BotanicalName.NewInstance(rank);
1089 taxonName.addSource(makeOriginalSource(state));
1090 taxonName.setGenusOrUninomial(StringUtils.capitalize(name));
1091 return taxonName;
1092 }
1093
1094 private Rank guessRank(String name) {
1095
1096 // normalize
1097 name = name.replaceAll("\\(.*\\)", "").trim();
1098
1099 if(name.matches("^Plantae$|^Fungi$")){
1100 return Rank.KINGDOM();
1101 } else if(name.matches("^Incertae sedis$|^No group assigned$")){
1102 return rankFamilyIncertisSedis();
1103 } else if(name.matches(".*phyta$|.*mycota$")){
1104 return Rank.PHYLUM();
1105 } else if(name.matches(".*phytina$|.*mycotina$")){
1106 return Rank.SUBPHYLUM();
1107 } else if(name.matches("Gymnospermae$|.*ones$")){ // Monocotyledones, Dicotyledones
1108 return rankUnrankedSupraGeneric();
1109 } else if(name.matches(".*opsida$|.*phyceae$|.*mycetes$|.*ones$|^Musci$|^Hepaticae$")){
1110 return Rank.CLASS();
1111 } else if(name.matches(".*idae$|.*phycidae$|.*mycetidae$")){
1112 return Rank.SUBCLASS();
1113 } else if(name.matches(".*ales$")){
1114 return Rank.ORDER();
1115 } else if(name.matches(".*ineae$")){
1116 return Rank.SUBORDER();
1117 } else if(name.matches(".*aceae$")){
1118 return Rank.FAMILY();
1119 } else if(name.matches(".*oideae$")){
1120 return Rank.SUBFAMILY();
1121 } else
1122 // if(name.matches(".*eae$")){
1123 // return Rank.TRIBE();
1124 // } else
1125 if(name.matches(".*inae$")){
1126 return Rank.SUBTRIBE();
1127 } else if(name.matches(".*ae$")){
1128 return Rank.FAMILY();
1129 }
1130 return Rank.UNKNOWN_RANK();
1131 }
1132
1133 private Rank rankUnrankedSupraGeneric() {
1134
1135 if(rankUnrankedSupraGeneric == null){
1136 rankUnrankedSupraGeneric = Rank.NewInstance(RankClass.Suprageneric, "Unranked supra generic", " ", " ");
1137 getTermService().save(rankUnrankedSupraGeneric);
1138 }
1139 return rankUnrankedSupraGeneric;
1140 }
1141
1142 private Rank rankFamilyIncertisSedis() {
1143
1144 if(familyIncertisSedis == null){
1145 familyIncertisSedis = Rank.NewInstance(RankClass.Suprageneric, "Family incertis sedis", " ", " ");
1146 getTermService().save(familyIncertisSedis);
1147 }
1148 return familyIncertisSedis;
1149 }
1150
1151 private AnnotationType annotationTypeCaveats(){
1152 if(annotationTypeCaveats == null){
1153 annotationTypeCaveats = AnnotationType.NewInstance("Caveats", "Caveats", "");
1154 getTermService().save(annotationTypeCaveats);
1155 }
1156 return annotationTypeCaveats;
1157 }
1158
1159
1160 /**
1161 * @param state
1162 * @return
1163 */
1164 private IdentifiableSource makeOriginalSource(IAPTImportState state) {
1165 return IdentifiableSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
1166 }
1167
1168
1169 private Reference makeReference(IAPTImportState state, UUID uuidRef) {
1170 Reference ref = state.getReference(uuidRef);
1171 if (ref == null){
1172 ref = getReferenceService().find(uuidRef);
1173 state.putReference(uuidRef, ref);
1174 }
1175 return ref;
1176 }
1177
1178 private MarkerType markerTypeFossil(){
1179 if(this.markerTypeFossil == null){
1180 markerTypeFossil = MarkerType.NewInstance("isFossilTaxon", "isFossil", null);
1181 getTermService().save(this.markerTypeFossil);
1182 }
1183 return markerTypeFossil;
1184 }
1185
1186 private String csvReportLine(String regId, String message, String ... fields){
1187 StringBuilder out = new StringBuilder("regID#");
1188 out.append(regId).append(",\"").append(message).append('"');
1189
1190 for(String f : fields){
1191 out.append(",\"").append(f).append('"');
1192 }
1193 return out.toString();
1194 }
1195
1196
1197 }