ref #6026 fixing rank
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / iapt / IAPTExcelImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.iapt;
11
12 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
13 import eu.etaxonomy.cdm.common.CdmUtils;
14 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
15 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
16 import eu.etaxonomy.cdm.model.agent.Institution;
17 import eu.etaxonomy.cdm.model.agent.Person;
18 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
19 import eu.etaxonomy.cdm.model.common.*;
20 import eu.etaxonomy.cdm.model.name.*;
21 import eu.etaxonomy.cdm.model.occurrence.*;
22 import eu.etaxonomy.cdm.model.occurrence.Collection;
23 import eu.etaxonomy.cdm.model.reference.Reference;
24 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
25 import eu.etaxonomy.cdm.model.reference.ReferenceType;
26 import eu.etaxonomy.cdm.model.taxon.*;
27 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
28 import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
29 import org.apache.commons.lang.ArrayUtils;
30 import org.apache.commons.lang.StringEscapeUtils;
31 import org.apache.commons.lang.StringUtils;
32 import org.apache.log4j.Level;
33 import org.apache.log4j.Logger;
34 import org.joda.time.DateTimeFieldType;
35 import org.joda.time.Partial;
36 import org.joda.time.format.DateTimeFormat;
37 import org.joda.time.format.DateTimeFormatter;
38 import org.springframework.stereotype.Component;
39
40 import java.util.*;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43
44 /**
45 * @author a.mueller
46 * @created 05.01.2016
47 */
48
49 @Component("iAPTExcelImport")
50 public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends SimpleExcelTaxonImport<CONFIG> {
51 private static final long serialVersionUID = -747486709409732371L;
52 private static final Logger logger = Logger.getLogger(IAPTExcelImport.class);
53 public static final String ANNOTATION_MARKER_STRING = "[*]";
54
55
56 private static UUID ROOT_UUID = UUID.fromString("4137fd2a-20f6-4e70-80b9-f296daf51d82");
57
58 private static NonViralNameParserImpl nameParser = NonViralNameParserImpl.NewInstance();
59
60 private final static String REGISTRATIONNO_PK= "RegistrationNo_Pk";
61 private final static String HIGHERTAXON= "HigherTaxon";
62 private final static String FULLNAME= "FullName";
63 private final static String AUTHORSSPELLING= "AuthorsSpelling";
64 private final static String LITSTRING= "LitString";
65 private final static String REGISTRATION= "Registration";
66 private final static String TYPE= "Type";
67 private final static String CAVEATS= "Caveats";
68 private final static String FULLBASIONYM= "FullBasionym";
69 private final static String FULLSYNSUBST= "FullSynSubst";
70 private final static String NOTESTXT= "NotesTxt";
71 private final static String REGDATE= "RegDate";
72 private final static String NAMESTRING= "NameString";
73 private final static String BASIONYMSTRING= "BasionymString";
74 private final static String SYNSUBSTSTR= "SynSubstStr";
75 private final static String AUTHORSTRING= "AuthorString";
76
77 private static List<String> expectedKeys= Arrays.asList(new String[]{
78 REGISTRATIONNO_PK, HIGHERTAXON, FULLNAME, AUTHORSSPELLING, LITSTRING, REGISTRATION, TYPE, CAVEATS, FULLBASIONYM, FULLSYNSUBST, NOTESTXT, REGDATE, NAMESTRING, BASIONYMSTRING, SYNSUBSTSTR, AUTHORSTRING});
79
80 private static final Pattern nomRefTokenizeP = Pattern.compile("^(?<title>.*):\\s(?<detail>[^\\.:]+)\\.(?<date>.*?)(?:\\s\\((?<issue>[^\\)]*)\\)\\s*)?\\.?$");
81 private static final Pattern[] datePatterns = new Pattern[]{
82 // NOTE:
83 // The order of the patterns is extremely important!!!
84 //
85 // all patterns cover the years 1700 - 1999
86 Pattern.compile("^(?<year>1[7,8,9][0-9]{2})$"), // only year, like '1969'
87 Pattern.compile("^(?<monthName>\\p{L}+\\.?)\\s(?<day>[0-9]{1,2})(?:st|rd|th)?\\.?,?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like April 12, 1969 or april 12th 1999
88 Pattern.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12
89 Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969
90 Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<month>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
91 Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)\\s)(?<monthName>\\p{L}+)\\sde\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
92 Pattern.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969
93 Pattern.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),// partial date like 1999-04
94 Pattern.compile("^(?<month>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
95 Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
96 };
97 private static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
98
99 private static final Pattern typeNameBasionymPattern = Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$");
100 private static final Pattern typeNameNotePattern = Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]'
101 private static final Pattern typeNameSpecialSplitPattern = Pattern.compile("(?<note>.*\\;.*?)\\:(?<agent>)\\;(<name>.*)");
102
103 private static final Pattern collectorPattern = Pattern.compile(".*?(?<fullStr1>\\(leg\\.\\s+(?<data1>[^\\)]*)\\))|.*?(?<fullStr2>\\sleg\\.\\s+(?<data2>.*?)\\.?)$");
104 private static final Pattern collectionDataPattern = Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$");
105 private static final Pattern collectorsNumber = Pattern.compile("^([nN]o\\.\\s.*)$");
106
107 // AccessionNumbers: , #.*, n°:?, 96/3293, No..*, -?\w{1,3}-[0-9\-/]*
108 private static final Pattern accessionNumberOnlyPattern = Pattern.compile("^(?<accNumber>(?:n°\\:?\\s?|#|No\\.?\\s?)?[\\d\\w\\-/]*)$");
109
110 private static final Pattern[] specimenTypePatterns = new Pattern[]{
111 Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?<institute>.*[^\\)])\\))(?<accNumber>.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222
112 Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<accNumber>.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78
113 Pattern.compile("^(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<institute>.*?)(?<accNumber>Praep\\..*)?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62
114 Pattern.compile("^(?<colCode>[A-Z]+)(?:\\s+(?<accNumber>.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any
115 };
116
117 private static Map<String, Integer> monthFromNameMap = new HashMap<>();
118
119 static {
120 String[] ck = new String[]{"leden", "únor", "březen", "duben", "květen", "červen", "červenec ", "srpen", "září", "říjen", "listopad", "prosinec"};
121 String[] fr = new String[]{"janvier", "février", "mars", "avril", "mai", "juin", "juillet", "août", "septembre", "octobre", "novembre", "décembre"};
122 String[] de = new String[]{"januar", "februar", "märz", "april", "mai", "juni", "juli", "august", "september", "oktober", "november", "dezember"};
123 String[] en = new String[]{"january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"};
124 String[] it = new String[]{"gennaio", "febbraio", "marzo", "aprile", "maggio", "giugno", "luglio", "agosto", "settembre", "ottobre", "novembre", "dicembre"};
125 String[] sp = new String[]{"enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre", "noviembre", "diciembre"};
126 String[] de_abbrev = new String[]{"jan.", "feb.", "märz", "apr.", "mai", "jun.", "jul.", "aug.", "sept.", "okt.", "nov.", "dez."};
127 String[] en_abbrev = new String[]{"jan.", "feb.", "mar.", "apr.", "may", "jun.", "jul.", "aug.", "sep.", "oct.", "nov.", "dec."};
128 String[] port = new String[]{"Janeiro", "Fevereiro", "Março", "Abril", "Maio", "Junho", "Julho", "Agosto", "Setembro", "Outubro", "Novembro", "Dezembro"};
129 String[] rom_num = new String[]{"i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii"};
130
131 String[][] perLang = new String[][]{ck, de, fr, en, it, sp, port, de_abbrev, en_abbrev, rom_num};
132
133 for (String[] months: perLang) {
134 for(int m = 1; m < 13; m++){
135 monthFromNameMap.put(months[m - 1].toLowerCase(), m);
136 }
137 }
138
139 // special cases
140 monthFromNameMap.put("mar", 3);
141 monthFromNameMap.put("dec", 12);
142 monthFromNameMap.put("Februari", 2);
143 }
144
145
146 DateTimeFormatter formatterYear = DateTimeFormat.forPattern("yyyy");
147
148 private Map<String, Collection> collectionMap = new HashMap<>();
149
150
151 enum TypesName {
152 fieldUnit, holotype, isotype;
153
154 public SpecimenTypeDesignationStatus status(){
155 switch (this) {
156 case holotype:
157 return SpecimenTypeDesignationStatus.HOLOTYPE();
158 case isotype:
159 return SpecimenTypeDesignationStatus.ISOTYPE();
160 default:
161 return null;
162 }
163 }
164 }
165
166 private MarkerType markerTypeFossil = null;
167 private Rank rankUnrankedSupraGeneric = null;
168 private Rank familyIncertisSedis = null;
169 private AnnotationType annotationTypeCaveats = null;
170
171 private Reference bookVariedadesTradicionales = null;
172
173 private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
174 TaxonNode higherTaxonNode, boolean isFossil) {
175
176 String regNumber = getValue(record, REGISTRATIONNO_PK, false);
177 String regStr = getValue(record, REGISTRATION, true);
178 String titleCacheStr = getValue(record, FULLNAME, true);
179 String nameStr = getValue(record, NAMESTRING, true);
180 String authorStr = getValue(record, AUTHORSTRING, true);
181 String nomRefStr = getValue(record, LITSTRING, true);
182 String authorsSpelling = getValue(record, AUTHORSSPELLING, true);
183 String notesTxt = getValue(record, NOTESTXT, true);
184 String caveats = getValue(record, CAVEATS, true);
185 String fullSynSubstStr = getValue(record, FULLSYNSUBST, true);
186 String fullBasionymStr = getValue(record, FULLBASIONYM, true);
187 String basionymNameStr = getValue(record, FULLBASIONYM, true);
188 String synSubstStr = getValue(record, SYNSUBSTSTR, true);
189 String typeStr = getValue(record, TYPE, true);
190
191
192 String nomRefTitle = null;
193 String nomRefDetail;
194 String nomRefPupDate = null;
195 String nomRefIssue = null;
196 Partial pupDate = null;
197
198 boolean restoreOriginalReference = false;
199
200 // preprocess nomRef: separate citation, reference detail, publishing date
201 if(!StringUtils.isEmpty(nomRefStr)){
202 nomRefStr = nomRefStr.trim();
203
204 // handle the special case which is hard to parse:
205 //
206 // Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita: 154. 1997.
207 if(nomRefStr.startsWith("Las variedades tradicionales de frutales ")){
208
209 if(bookVariedadesTradicionales == null){
210 bookVariedadesTradicionales = ReferenceFactory.newBook();
211 bookVariedadesTradicionales.setTitle("Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita");
212 bookVariedadesTradicionales.setDatePublished(TimePeriod.NewInstance(1997));
213 getReferenceService().save(bookVariedadesTradicionales);
214 }
215 nomRefStr = nomRefStr.replaceAll("^.*?\\:.*?\\:", "Las variedades tradicionales:");
216 restoreOriginalReference = true;
217 }
218
219 Matcher m = nomRefTokenizeP.matcher(nomRefStr);
220 if(m.matches()){
221 nomRefTitle = m.group("title");
222 nomRefDetail = m.group("detail");
223 nomRefPupDate = m.group("date").trim();
224 nomRefIssue = m.group("issue");
225
226 pupDate = parseDate(regNumber, nomRefPupDate);
227 if (pupDate != null) {
228 nomRefTitle = nomRefTitle + ": " + nomRefDetail + ". " + pupDate.toString(formatterYear) + ".";
229 } else {
230 logger.warn(csvReportLine(regNumber, "Pub date", nomRefPupDate, "in", nomRefStr, "not parsable"));
231 }
232 } else {
233 nomRefTitle = nomRefStr;
234 }
235 }
236
237 BotanicalName taxonName = makeBotanicalName(state, regNumber, titleCacheStr, nameStr, authorStr, nomRefTitle);
238
239 // always add the original strings of parsed data as annotation
240 taxonName.addAnnotation(Annotation.NewInstance("imported and parsed data strings:" +
241 "\n - '" + LITSTRING + "': "+ nomRefStr +
242 "\n - '" + TYPE + "': " + typeStr +
243 "\n - '" + REGISTRATION + "': " + regStr
244 , AnnotationType.TECHNICAL(), Language.DEFAULT()));
245
246 if(restoreOriginalReference){
247 taxonName.setNomenclaturalReference(bookVariedadesTradicionales);
248 }
249 if(pupDate != null) {
250 taxonName.getNomenclaturalReference().setDatePublished(TimePeriod.NewInstance(pupDate));
251 }
252 if(nomRefIssue != null) {
253 ((Reference)taxonName.getNomenclaturalReference()).setVolume(nomRefIssue);
254 }
255
256
257 if(!StringUtils.isEmpty(notesTxt)){
258 notesTxt = notesTxt.replace("Notes: ", "").trim();
259 taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
260 }
261 if(!StringUtils.isEmpty(caveats)){
262 caveats = caveats.replace("Caveats: ", "").trim();
263 taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
264 }
265
266 getNameService().save(taxonName);
267
268 // Namerelations
269 if(!StringUtils.isEmpty(authorsSpelling)){
270 authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
271
272 String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
273 String[] nameStrTokens = StringUtils.split(nameStr, " ");
274
275 ArrayUtils.reverse(authorSpellingTokens);
276 ArrayUtils.reverse(nameStrTokens);
277
278 for (int i = 0; i < nameStrTokens.length; i++){
279 if(i < authorSpellingTokens.length){
280 nameStrTokens[i] = authorSpellingTokens[i];
281 }
282 }
283 ArrayUtils.reverse(nameStrTokens);
284
285 String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
286 // build the fullnameString of the misspelled name
287 misspelledNameStr = taxonName.getTitleCache().replace(nameStr, misspelledNameStr);
288
289 TaxonNameBase misspelledName = (BotanicalName) nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
290 misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
291 getNameService().save(misspelledName);
292 }
293
294 // Replaced Synonyms
295 if(!StringUtils.isEmpty(fullSynSubstStr)){
296 fullSynSubstStr = fullSynSubstStr.replace("Syn. subst.: ", "");
297 BotanicalName replacedSynonymName = makeBotanicalName(state, regNumber, fullSynSubstStr, synSubstStr, null, null);
298 replacedSynonymName.addReplacedSynonym(taxonName, null, null, null);
299 getNameService().save(replacedSynonymName);
300 }
301
302 Reference sec = state.getConfig().getSecReference();
303 Taxon taxon = Taxon.NewInstance(taxonName, sec);
304
305 // Basionym
306 if(fullBasionymStr != null){
307 fullBasionymStr = fullBasionymStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
308 basionymNameStr = basionymNameStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
309 BotanicalName basionym = makeBotanicalName(state, regNumber, fullBasionymStr, basionymNameStr, null, null);
310 getNameService().save(basionym);
311 taxonName.addBasionym(basionym);
312
313 Synonym syn = Synonym.NewInstance(basionym, sec);
314 taxon.addSynonym(syn, SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF());
315 getTaxonService().save(syn);
316 }
317
318 // Markers
319 if(isFossil){
320 taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
321 }
322
323 // Types
324 if(!StringUtils.isEmpty(typeStr)){
325
326 if(taxonName.getRank().isSpecies() || taxonName.getRank().isLower(Rank.SPECIES())) {
327 makeSpecimenTypeData(typeStr, taxonName, regNumber, state);
328 } else {
329 makeNameTypeData(typeStr, taxonName, regNumber, state);
330 }
331 }
332
333 getTaxonService().save(taxon);
334 if(higherTaxonNode != null){
335 higherTaxonNode.addChildTaxon(taxon, null, null);
336 getTaxonNodeService().save(higherTaxonNode);
337 }
338
339 return taxon;
340 }
341
342 private void makeSpecimenTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
343
344 Matcher m = typeSpecimenSplitPattern.matcher(typeStr);
345
346 if(m.matches()){
347 String fieldUnitStr = m.group(TypesName.fieldUnit.name());
348 // boolean isFieldUnit = typeStr.matches(".*([°']|\\d+\\s?m\\s|\\d+\\s?km\\s).*"); // check for location or unit m, km // makes no sense!!!!
349 FieldUnit fieldUnit = parseFieldUnit(fieldUnitStr, regNumber, state);
350 if(fieldUnit == null) {
351 // create a field unit with only a titleCache using the fieldUnitStr substring
352 logger.warn(csvReportLine(regNumber, "Type: fieldUnitStr can not be parsed", fieldUnitStr));
353 fieldUnit = FieldUnit.NewInstance();
354 fieldUnit.setTitleCache(fieldUnitStr, true);
355 getOccurrenceService().save(fieldUnit);
356 }
357 getOccurrenceService().save(fieldUnit);
358
359 // all others ..
360 addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.holotype.name()), TypesName.holotype, false, regNumber);
361 addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.isotype.name()), TypesName.isotype, true, regNumber);
362
363 } else {
364 // create a field unit with only a titleCache using the full typeStr
365 FieldUnit fieldUnit = FieldUnit.NewInstance();
366 fieldUnit.setTitleCache(typeStr, true);
367 getOccurrenceService().save(fieldUnit);
368 logger.warn(csvReportLine(regNumber, "Type: field 'Type' can not be parsed", typeStr));
369 }
370 getNameService().save(taxonName);
371 }
372
373 private void makeNameTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
374
375 String nameStr = typeStr.replaceAll("^Type\\s?\\:\\s?", "");
376 if(nameStr.isEmpty()) {
377 return;
378 }
379
380 String basionymNameStr = null;
381 String noteStr = null;
382 String agentStr = null;
383
384 Matcher m;
385
386 if(typeStr.startsWith("not to be indicated")){
387 // Special case:
388 // Type: not to be indicated (Art. H.9.1. Tokyo Code); stated parent genera: Hechtia Klotzsch; Deuterocohnia Mez
389 // FIXME
390 m = typeNameSpecialSplitPattern.matcher(nameStr);
391 if(m.matches()){
392 nameStr = m.group("name");
393 noteStr = m.group("note");
394 agentStr = m.group("agent");
395 // TODO better import of agent?
396 if(agentStr != null){
397 noteStr = noteStr + ": " + agentStr;
398 }
399 }
400 } else {
401 // Generic case
402 m = typeNameBasionymPattern.matcher(nameStr);
403 if (m.find()) {
404 basionymNameStr = m.group("basionymName");
405 if (basionymNameStr != null) {
406 nameStr = nameStr.replace(m.group(0), "");
407 }
408 }
409
410 m = typeNameNotePattern.matcher(nameStr);
411 if (m.find()) {
412 noteStr = m.group(1);
413 if (noteStr != null) {
414 nameStr = nameStr.replace(m.group(0), "");
415 }
416 }
417 }
418
419 BotanicalName typeName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
420
421 if(typeName.isProtectedTitleCache() || typeName.getNomenclaturalReference() != null && typeName.getNomenclaturalReference().isProtectedTitleCache()) {
422 logger.warn(csvReportLine(regNumber, "NameType not parsable", typeStr, nameStr));
423 }
424
425 if(basionymNameStr != null){
426 BotanicalName basionymName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
427 getNameService().save(basionymName);
428 typeName.addBasionym(basionymName);
429 }
430
431
432 NameTypeDesignation nameTypeDesignation = NameTypeDesignation.NewInstance();
433 nameTypeDesignation.setTypeName(typeName);
434 getNameService().save(typeName);
435
436 if(noteStr != null){
437 nameTypeDesignation.addAnnotation(Annotation.NewInstance(noteStr, AnnotationType.EDITORIAL(), Language.UNKNOWN_LANGUAGE()));
438 }
439 taxonName.addNameTypeDesignation(typeName, null, null, null, null, false);
440
441 }
442
443 /**
444 * Currently only parses the collector, fieldNumber and the collection date.
445 *
446 * @param fieldUnitStr
447 * @param regNumber
448 * @param state
449 * @return null if the fieldUnitStr could not be parsed
450 */
451 private FieldUnit parseFieldUnit(String fieldUnitStr, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
452
453 FieldUnit fieldUnit = null;
454
455 Matcher m1 = collectorPattern.matcher(fieldUnitStr);
456 if(m1.matches()){
457
458 String collectorData = m1.group(2); // like (leg. Metzeltin, 30. 9. 1996)
459 String removal = m1.group(1);
460 if(collectorData == null){
461 collectorData = m1.group(4); // like leg. Metzeltin, 30. 9. 1996
462 removal = m1.group(3);
463 }
464 if(collectorData == null){
465 return null;
466 }
467
468 // the fieldUnitStr is parsable
469 // remove all collectorData from the fieldUnitStr and use the rest as locality
470 String locality = fieldUnitStr.replace(removal, "");
471
472 String collectorStr = null;
473 String detailStr = null;
474 Partial date = null;
475 String fieldNumber = null;
476
477 Matcher m2 = collectionDataPattern.matcher(collectorData);
478 if(m2.matches()){
479 collectorStr = m2.group("collector");
480 detailStr = m2.group("detail");
481
482 // Try to make sense of the detailStr
483 if(detailStr != null){
484 detailStr = detailStr.trim();
485 // 1. try to parse as date
486 date = parseDate(regNumber, detailStr);
487 if(date == null){
488 // 2. try to parse as number
489 if(collectorsNumber.matcher(detailStr).matches()){
490 fieldNumber = detailStr;
491 }
492 }
493 }
494 if(date == null && fieldNumber == null){
495 // detailed parsing not possible, so need fo fallback
496 collectorStr = collectorData;
497 }
498 }
499
500 if(collectorStr == null) {
501 collectorStr = collectorData;
502 }
503
504 fieldUnit = FieldUnit.NewInstance();
505 GatheringEvent ge = GatheringEvent.NewInstance();
506 ge.setLocality(LanguageString.NewInstance(locality, Language.UNKNOWN_LANGUAGE()));
507
508 TeamOrPersonBase agent = state.getAgentBase(collectorStr);
509 if(agent == null) {
510 agent = Person.NewTitledInstance(collectorStr);
511 getAgentService().save(agent);
512 state.putAgentBase(collectorStr, agent);
513 }
514 ge.setCollector(agent);
515
516 if(date != null){
517 ge.setGatheringDate(date);
518 }
519
520 getEventBaseService().save(ge);
521 fieldUnit.setGatheringEvent(ge);
522
523 if(fieldNumber != null) {
524 fieldUnit.setFieldNumber(fieldNumber);
525 }
526 getOccurrenceService().save(fieldUnit);
527
528 }
529
530 return fieldUnit;
531 }
532
533 private Partial parseDate(String regNumber, String dateStr) {
534
535 Partial pupDate = null;
536 boolean parseError = false;
537
538 String day = null;
539 String month = null;
540 String monthName = null;
541 String year = null;
542
543 for(Pattern p : datePatterns){
544 Matcher m2 = p.matcher(dateStr);
545 if(m2.matches()){
546 try {
547 year = m2.group("year");
548 } catch (IllegalArgumentException e){
549 // named capture group not found
550 }
551 try {
552 month = m2.group("month");
553 } catch (IllegalArgumentException e){
554 // named capture group not found
555 }
556
557 try {
558 monthName = m2.group("monthName");
559 month = monthFromName(monthName, regNumber);
560 if(month == null){
561 parseError = true;
562 }
563 } catch (IllegalArgumentException e){
564 // named capture group not found
565 }
566 try {
567 day = m2.group("day");
568 } catch (IllegalArgumentException e){
569 // named capture group not found
570 }
571
572 if(year != null){
573 if (year.length() == 2) {
574 // it is an abbreviated year from the 19** years
575 year = "19" + year;
576 }
577 break;
578 } else {
579 parseError = true;
580 }
581 }
582 }
583 if(year == null){
584 parseError = true;
585 }
586 List<DateTimeFieldType> types = new ArrayList<>();
587 List<Integer> values = new ArrayList<>();
588 if(!parseError) {
589 types.add(DateTimeFieldType.year());
590 values.add(Integer.parseInt(year));
591 if (month != null) {
592 types.add(DateTimeFieldType.monthOfYear());
593 values.add(Integer.parseInt(month));
594 }
595 if (day != null) {
596 types.add(DateTimeFieldType.dayOfMonth());
597 values.add(Integer.parseInt(day));
598 }
599 pupDate = new Partial(types.toArray(new DateTimeFieldType[types.size()]), ArrayUtils.toPrimitive(values.toArray(new Integer[values.size()])));
600 }
601 return pupDate;
602 }
603
604 private String monthFromName(String monthName, String regNumber) {
605
606 Integer month = monthFromNameMap.get(monthName.toLowerCase());
607 if(month == null){
608 logger.warn(csvReportLine(regNumber, "Unknown month name", monthName));
609 return null;
610 } else {
611 return month.toString();
612 }
613 }
614
615
616 private void addSpecimenTypes(BotanicalName taxonName, FieldUnit fieldUnit, String typeStr, TypesName typeName, boolean multiple, String regNumber){
617
618 if(StringUtils.isEmpty(typeStr)){
619 return;
620 }
621 typeStr = typeStr.trim().replaceAll("\\.$", "");
622
623 Collection collection = null;
624 DerivedUnit specimen = null;
625
626 List<DerivedUnit> specimens = new ArrayList<>();
627 if(multiple){
628 String[] tokens = typeStr.split("\\s?,\\s?");
629 for (String t : tokens) {
630 // command to list all complex parsabel types:
631 // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Holotype:\s([A-Z]*\s)[^.]*?'
632 // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Isotype[^:]*:\s([A-Z]*\s)[^.]*?'
633
634 if(!t.isEmpty()){
635 // trying to parse the string
636 specimen = parseSpecimenType(fieldUnit, typeName, collection, t, regNumber);
637 if(specimen != null){
638 specimens.add(specimen);
639 } else {
640 // parsing was not successful make simple specimen
641 specimens.add(makeSpecimenType(fieldUnit, t));
642 }
643 }
644 }
645 } else {
646 specimen = parseSpecimenType(fieldUnit, typeName, collection, typeStr, regNumber);
647 if(specimen != null) {
648 specimens.add(specimen);
649 // remember current collection
650 collection = specimen.getCollection();
651 } else {
652 // parsing was not successful make simple specimen
653 specimens.add(makeSpecimenType(fieldUnit, typeStr));
654 }
655 }
656
657 for(DerivedUnit s : specimens){
658 taxonName.addSpecimenTypeDesignation(s, typeName.status(), null, null, null, false, true);
659 }
660 }
661
662 private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, String titleCache) {
663 DerivedUnit specimen;DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
664 facade.setTitleCache(titleCache.trim(), true);
665 specimen = facade.innerDerivedUnit();
666 return specimen;
667 }
668
669 /**
670 *
671 * @param fieldUnit
672 * @param typeName
673 * @param collection
674 * @param text
675 * @param regNumber
676 * @return
677 */
678 private DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) {
679
680 DerivedUnit specimen = null;
681
682 String collectionCode = null;
683 String subCollectionStr = null;
684 String instituteStr = null;
685 String accessionNumber = null;
686
687 boolean unusualAccessionNumber = false;
688
689 text = text.trim();
690
691 // 1. For Isotypes often the accession number is noted alone if the
692 // preceeding entry has a collection code.
693 if(typeName .equals(TypesName.isotype) && collection != null){
694 Matcher m = accessionNumberOnlyPattern.matcher(text);
695 if(m.matches()){
696 try {
697 accessionNumber = m.group("accNumber");
698 specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
699 } catch (IllegalArgumentException e){
700 // match group acc_number not found
701 }
702 }
703 }
704
705 //2. try it the 'normal' way
706 if(specimen == null) {
707 for (Pattern p : specimenTypePatterns) {
708 Matcher m = p.matcher(text);
709 if (m.matches()) {
710 // collection code is mandatory
711 try {
712 collectionCode = m.group("colCode");
713 } catch (IllegalArgumentException e){
714 // match group colCode not found
715 }
716 try {
717 subCollectionStr = m.group("subCollection");
718 } catch (IllegalArgumentException e){
719 // match group subCollection not found
720 }
721 try {
722 instituteStr = m.group("institute");
723 } catch (IllegalArgumentException e){
724 // match group col_name not found
725 }
726 try {
727 accessionNumber = m.group("accNumber");
728
729 // try to improve the accessionNumber
730 if(accessionNumber!= null) {
731 accessionNumber = accessionNumber.trim();
732 Matcher m2 = accessionNumberOnlyPattern.matcher(accessionNumber);
733 String betterAccessionNumber = null;
734 if (m2.matches()) {
735 try {
736 betterAccessionNumber = m.group("accNumber");
737 } catch (IllegalArgumentException e) {
738 // match group acc_number not found
739 }
740 }
741 if (betterAccessionNumber != null) {
742 accessionNumber = betterAccessionNumber;
743 } else {
744 unusualAccessionNumber = true;
745 }
746 }
747
748 } catch (IllegalArgumentException e){
749 // match group acc_number not found
750 }
751
752 if(collectionCode == null && instituteStr == null){
753 logger.warn(csvReportLine(regNumber, "Type: neither 'collectionCode' nor 'institute' found in ", text));
754 continue;
755 }
756 collection = getCollection(collectionCode, instituteStr, subCollectionStr);
757 specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
758 break;
759 }
760 }
761 }
762 if(specimen == null) {
763 logger.warn(csvReportLine(regNumber, "Type: Could not parse specimen", typeName.name().toString(), text));
764 }
765 if(unusualAccessionNumber){
766 logger.warn(csvReportLine(regNumber, "Type: Unusual accession number", typeName.name().toString(), text, accessionNumber));
767 }
768 return specimen;
769 }
770
771 private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, Collection collection, String accessionNumber) {
772
773 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
774 facade.setCollection(collection);
775 if(accessionNumber != null){
776 facade.setAccessionNumber(accessionNumber);
777 }
778 return facade.innerDerivedUnit();
779 }
780
781 private BotanicalName makeBotanicalName(SimpleExcelTaxonImportState<CONFIG> state, String regNumber, String titleCacheStr, String nameStr,
782 String authorStr, String nomRefTitle) {
783
784 BotanicalName taxonName;// cache field for the taxonName.titleCache
785 String taxonNameTitleCache = null;
786 Map<String, AnnotationType> nameAnnotations = new HashMap<>();
787
788 // TitleCache preprocessing
789 if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || (authorStr != null && authorStr.endsWith(ANNOTATION_MARKER_STRING))){
790 nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL());
791 titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim();
792 if(authorStr != null) {
793 authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim();
794 }
795 }
796
797 // parse the full taxon name
798 if(!StringUtils.isEmpty(nomRefTitle)){
799 String referenceSeparator = nomRefTitle.startsWith("in ") ? " " : ", ";
800 String taxonFullNameStr = titleCacheStr + referenceSeparator + nomRefTitle;
801 logger.debug(":::::" + taxonFullNameStr);
802 taxonName = (BotanicalName) nameParser.parseReferencedName(taxonFullNameStr, NomenclaturalCode.ICNAFP, null);
803 } else {
804 taxonName = (BotanicalName) nameParser.parseFullName(titleCacheStr, NomenclaturalCode.ICNAFP, null);
805 }
806
807 taxonNameTitleCache = taxonName.getTitleCache().trim();
808 if (taxonName.isProtectedTitleCache()) {
809 logger.warn(csvReportLine(regNumber, "Name could not be parsed", titleCacheStr));
810 } else {
811
812 boolean doRestoreTitleCacheStr = false;
813
814 // Check if titleCache and nameCache are plausible
815 String titleCacheCompareStr = titleCacheStr;
816 String nameCache = taxonName.getNameCache();
817 String nameCompareStr = nameStr;
818 if(taxonName.isBinomHybrid()){
819 titleCacheCompareStr = titleCacheCompareStr.replace(" x ", " ×");
820 nameCompareStr = nameCompareStr.replace(" x ", " ×");
821 }
822 if(taxonName.isMonomHybrid()){
823 titleCacheCompareStr = titleCacheCompareStr.replaceAll("^X ", "× ");
824 nameCompareStr = nameCompareStr.replace("^X ", "× ");
825 }
826 if(authorStr != null && authorStr.contains(" et ")){
827 titleCacheCompareStr = titleCacheCompareStr.replaceAll(" et ", " & ");
828 }
829 if (!taxonNameTitleCache.equals(titleCacheCompareStr)) {
830 logger.warn(csvReportLine(regNumber, "The generated titleCache differs from the imported string", taxonNameTitleCache, " != ", titleCacheStr, " ==> original titleCacheStr has been restored"));
831 doRestoreTitleCacheStr = true;
832 }
833 if (!nameCache.trim().equals(nameCompareStr)) {
834 logger.warn(csvReportLine(regNumber, "The parsed nameCache differs from field '" + NAMESTRING + "'", nameCache, " != ", nameCompareStr));
835 }
836
837 // Author
838 //nameParser.handleAuthors(taxonName, titleCacheStr, authorStr);
839 //if (!titleCacheStr.equals(taxonName.getTitleCache())) {
840 // logger.warn(regNumber + ": titleCache has changed after setting authors, will restore original titleCacheStr");
841 // doRestoreTitleCacheStr = true;
842 //}
843
844 if(doRestoreTitleCacheStr){
845 taxonName.setTitleCache(titleCacheStr, true);
846 }
847
848 // deduplicate
849 replaceAuthorNamesAndNomRef(state, taxonName);
850 }
851
852 // Annotations
853 if(!nameAnnotations.isEmpty()){
854 for(String text : nameAnnotations.keySet()){
855 taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
856 }
857 getNameService().save(taxonName);
858 }
859 return taxonName;
860 }
861
862 /**
863 * @param state
864 * @return
865 */
866 private TaxonNode getClassificationRootNode(IAPTImportState state) {
867
868 // Classification classification = state.getClassification();
869 // if (classification == null){
870 // IAPTImportConfigurator config = state.getConfig();
871 // classification = Classification.NewInstance(state.getConfig().getClassificationName());
872 // classification.setUuid(config.getClassificationUuid());
873 // classification.setReference(config.getSecReference());
874 // classification = getClassificationService().find(state.getConfig().getClassificationUuid());
875 // }
876 TaxonNode rootNode = state.getRootNode();
877 if (rootNode == null){
878 rootNode = getTaxonNodeService().find(ROOT_UUID);
879 }
880 if (rootNode == null){
881 Classification classification = state.getClassification();
882 if (classification == null){
883 Reference sec = state.getSecReference();
884 String classificationName = state.getConfig().getClassificationName();
885 Language language = Language.DEFAULT();
886 classification = Classification.NewInstance(classificationName, sec, language);
887 state.setClassification(classification);
888 classification.setUuid(state.getConfig().getClassificationUuid());
889 classification.getRootNode().setUuid(ROOT_UUID);
890 getClassificationService().save(classification);
891 }
892 rootNode = classification.getRootNode();
893 state.setRootNode(rootNode);
894 }
895 return rootNode;
896 }
897
898 private Collection getCollection(String collectionCode, String instituteStr, String subCollectionStr){
899
900 Collection superCollection = null;
901 if(subCollectionStr != null){
902 superCollection = getCollection(collectionCode, instituteStr, null);
903 collectionCode = subCollectionStr;
904 instituteStr = null;
905 }
906
907 final String key = collectionCode + "-#i:" + StringUtils.defaultString(instituteStr);
908
909 Collection collection = collectionMap.get(key);
910
911 if(collection == null) {
912 collection = Collection.NewInstance();
913 collection.setCode(collectionCode);
914 if(instituteStr != null){
915 collection.setInstitute(Institution.NewNamedInstance(instituteStr));
916 }
917 if(superCollection != null){
918 collection.setSuperCollection(superCollection);
919 }
920 collectionMap.put(key, collection);
921 getCollectionService().save(collection);
922 }
923
924 return collection;
925 }
926
927
928 /**
929 * @param record
930 * @param originalKey
931 * @param doUnescapeHtmlEntities
932 * @return
933 */
934 private String getValue(HashMap<String, String> record, String originalKey, boolean doUnescapeHtmlEntities) {
935 String value = record.get(originalKey);
936
937 value = fixCharacters(value);
938
939 if (! StringUtils.isBlank(value)) {
940 if (logger.isDebugEnabled()) {
941 logger.debug(originalKey + ": " + value);
942 }
943 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
944 if(doUnescapeHtmlEntities){
945 value = StringEscapeUtils.unescapeHtml(value);
946 }
947 return value.trim();
948 }else{
949 return null;
950 }
951 }
952
953 /**
954 * Fixes broken characters.
955 * For details see
956 * http://dev.e-taxonomy.eu/redmine/issues/6035
957 *
958 * @param value
959 * @return
960 */
961 private String fixCharacters(String value) {
962
963 value = StringUtils.replace(value, "s$K", "š");
964 value = StringUtils.replace(value, "n$K", "ň");
965 value = StringUtils.replace(value, "e$K", "ě");
966 value = StringUtils.replace(value, "r$K", "ř");
967 value = StringUtils.replace(value, "c$K", "č");
968 value = StringUtils.replace(value, "z$K", "ž");
969 value = StringUtils.replace(value, "S>U$K", "Š");
970 value = StringUtils.replace(value, "C>U$K", "Č");
971 value = StringUtils.replace(value, "R>U$K", "Ř");
972 value = StringUtils.replace(value, "Z>U$K", "Ž");
973 value = StringUtils.replace(value, "g$K", "ǧ");
974 value = StringUtils.replace(value, "s$A", "ś");
975 value = StringUtils.replace(value, "n$A", "ń");
976 value = StringUtils.replace(value, "c$A", "ć");
977 value = StringUtils.replace(value, "e$E", "ę");
978 value = StringUtils.replace(value, "o$H", "õ");
979 value = StringUtils.replace(value, "s$C", "ş");
980 value = StringUtils.replace(value, "t$C", "ț");
981 value = StringUtils.replace(value, "S>U$C", "Ş");
982 value = StringUtils.replace(value, "a$O", "å");
983 value = StringUtils.replace(value, "A>U$O", "Å");
984 value = StringUtils.replace(value, "u$O", "ů");
985 value = StringUtils.replace(value, "g$B", "ğ");
986 value = StringUtils.replace(value, "g$B", "ĕ");
987 value = StringUtils.replace(value, "a$B", "ă");
988 value = StringUtils.replace(value, "l$/", "ł");
989 value = StringUtils.replace(value, ">i", "ı");
990 value = StringUtils.replace(value, "i$U", "ï");
991 // Special-cases
992 value = StringUtils.replace(value, "&yacute", "ý");
993 value = StringUtils.replace(value, ">L", "Ł"); // corrected rule
994 value = StringUtils.replace(value, "E>U$D", "З");
995 value = StringUtils.replace(value, "S>U$E", "Ş");
996 value = StringUtils.replace(value, "s$E", "ş");
997
998 value = StringUtils.replace(value, "c$k", "č");
999 value = StringUtils.replace(value, " U$K", " Š");
1000
1001 return value;
1002 }
1003
1004
1005 /**
1006 * Stores taxa records in DB
1007 */
1008 @Override
1009 protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
1010
1011 String lineNumber = "L#" + state.getCurrentLine() + ": ";
1012 logger.setLevel(Level.DEBUG);
1013 HashMap<String, String> record = state.getOriginalRecord();
1014 logger.debug(lineNumber + record.toString());
1015
1016 Set<String> keys = record.keySet();
1017 for (String key: keys) {
1018 if (! expectedKeys.contains(key)){
1019 logger.warn(lineNumber + "Unexpected Key: " + key);
1020 }
1021 }
1022
1023 String reg_id = record.get(REGISTRATIONNO_PK);
1024
1025 //higherTaxon
1026 String higherTaxaString = record.get(HIGHERTAXON);
1027 boolean isFossil = false;
1028 if(higherTaxaString.startsWith("FOSSIL ")){
1029 higherTaxaString = higherTaxaString.replace("FOSSIL ", "");
1030 isFossil = true;
1031 }
1032 TaxonNode higherTaxon = getHigherTaxon(higherTaxaString, (IAPTImportState)state);
1033
1034 //Taxon
1035 Taxon taxon = makeTaxon(record, state, higherTaxon, isFossil);
1036 if (taxon == null){
1037 logger.warn(lineNumber + "taxon could not be created and is null");
1038 return;
1039 }
1040 ((IAPTImportState)state).setCurrentTaxon(taxon);
1041
1042
1043 return;
1044 }
1045
1046 private TaxonNode getHigherTaxon(String higherTaxaString, IAPTImportState state) {
1047 String[] higherTaxaNames = higherTaxaString.toLowerCase().replaceAll("[\\[\\]]", "").split(":");
1048 TaxonNode higherTaxonNode = null;
1049
1050 ITaxonTreeNode rootNode = getClassificationRootNode(state);
1051 for (String htn : higherTaxaNames) {
1052 htn = StringUtils.capitalize(htn.trim());
1053 Taxon higherTaxon = state.getHigherTaxon(htn);
1054 if (higherTaxon != null){
1055 higherTaxonNode = higherTaxon.getTaxonNodes().iterator().next();
1056 }else{
1057 BotanicalName name = makeHigherTaxonName(state, htn);
1058 Reference sec = state.getSecReference();
1059 higherTaxon = Taxon.NewInstance(name, sec);
1060 getTaxonService().save(higherTaxon);
1061 higherTaxonNode = rootNode.addChildTaxon(higherTaxon, sec, null);
1062 state.putHigherTaxon(htn, higherTaxon);
1063 getClassificationService().saveTreeNode(higherTaxonNode);
1064 }
1065 rootNode = higherTaxonNode;
1066 }
1067 return higherTaxonNode;
1068 }
1069
1070 private BotanicalName makeHigherTaxonName(IAPTImportState state, String name) {
1071
1072 Rank rank = guessRank(name);
1073
1074 BotanicalName taxonName = BotanicalName.NewInstance(rank);
1075 taxonName.addSource(makeOriginalSource(state));
1076 taxonName.setGenusOrUninomial(StringUtils.capitalize(name));
1077 return taxonName;
1078 }
1079
1080 private Rank guessRank(String name) {
1081
1082 // normalize
1083 name = name.replaceAll("\\(.*\\)", "").trim();
1084
1085 if(name.matches("^Plantae$|^Fungi$")){
1086 return Rank.KINGDOM();
1087 } else if(name.matches("^Incertae sedis$|^No group assigned$")){
1088 return rankFamilyIncertisSedis();
1089 } else if(name.matches(".*phyta$|.*mycota$")){
1090 return Rank.PHYLUM();
1091 } else if(name.matches(".*phytina$|.*mycotina$")){
1092 return Rank.SUBPHYLUM();
1093 } else if(name.matches("Gymnospermae$|.*ones$")){ // Monocotyledones, Dicotyledones
1094 return rankUnrankedSupraGeneric();
1095 } else if(name.matches(".*opsida$|.*phyceae$|.*mycetes$|.*ones$|^Musci$|^Hepaticae$")){
1096 return Rank.CLASS();
1097 } else if(name.matches(".*idae$|.*phycidae$|.*mycetidae$")){
1098 return Rank.SUBCLASS();
1099 } else if(name.matches(".*ales$")){
1100 return Rank.ORDER();
1101 } else if(name.matches(".*ineae$")){
1102 return Rank.SUBORDER();
1103 } else if(name.matches(".*aceae$")){
1104 return Rank.FAMILY();
1105 } else if(name.matches(".*oideae$")){
1106 return Rank.SUBFAMILY();
1107 } else
1108 // if(name.matches(".*eae$")){
1109 // return Rank.TRIBE();
1110 // } else
1111 if(name.matches(".*inae$")){
1112 return Rank.SUBTRIBE();
1113 } else if(name.matches(".*ae$")){
1114 return Rank.FAMILY();
1115 }
1116 return Rank.UNKNOWN_RANK();
1117 }
1118
1119 private Rank rankUnrankedSupraGeneric() {
1120
1121 if(rankUnrankedSupraGeneric == null){
1122 rankUnrankedSupraGeneric = Rank.NewInstance(RankClass.Suprageneric, "Unranked supra generic", " ", " ");
1123 getTermService().save(rankUnrankedSupraGeneric);
1124 }
1125 return rankUnrankedSupraGeneric;
1126 }
1127
1128 private Rank rankFamilyIncertisSedis() {
1129
1130 if(familyIncertisSedis == null){
1131 familyIncertisSedis = Rank.NewInstance(RankClass.Suprageneric, "Family incertis sedis", " ", " ");
1132 getTermService().save(familyIncertisSedis);
1133 }
1134 return familyIncertisSedis;
1135 }
1136
1137 private AnnotationType annotationTypeCaveats(){
1138 if(annotationTypeCaveats == null){
1139 annotationTypeCaveats = AnnotationType.NewInstance("Caveats", "Caveats", "");
1140 getTermService().save(annotationTypeCaveats);
1141 }
1142 return annotationTypeCaveats;
1143 }
1144
1145
1146 /**
1147 * @param state
1148 * @return
1149 */
1150 private IdentifiableSource makeOriginalSource(IAPTImportState state) {
1151 return IdentifiableSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
1152 }
1153
1154
1155 private Reference makeReference(IAPTImportState state, UUID uuidRef) {
1156 Reference ref = state.getReference(uuidRef);
1157 if (ref == null){
1158 ref = getReferenceService().find(uuidRef);
1159 state.putReference(uuidRef, ref);
1160 }
1161 return ref;
1162 }
1163
1164 private MarkerType markerTypeFossil(){
1165 if(this.markerTypeFossil == null){
1166 markerTypeFossil = MarkerType.NewInstance("isFossilTaxon", "isFossil", null);
1167 getTermService().save(this.markerTypeFossil);
1168 }
1169 return markerTypeFossil;
1170 }
1171
1172 private String csvReportLine(String regId, String message, String ... fields){
1173 StringBuilder out = new StringBuilder("regID#");
1174 out.append(regId).append(",\"").append(message).append('"');
1175
1176 for(String f : fields){
1177 out.append(",\"").append(f).append('"');
1178 }
1179 return out.toString();
1180 }
1181
1182
1183 }