2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.iapt
;
12 import com
.fasterxml
.jackson
.core
.JsonProcessingException
;
13 import com
.fasterxml
.jackson
.databind
.ObjectMapper
;
14 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
15 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
16 import eu
.etaxonomy
.cdm
.io
.mexico
.SimpleExcelTaxonImport
;
17 import eu
.etaxonomy
.cdm
.io
.mexico
.SimpleExcelTaxonImportState
;
18 import eu
.etaxonomy
.cdm
.model
.agent
.Institution
;
19 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
20 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
21 import eu
.etaxonomy
.cdm
.model
.common
.*;
22 import eu
.etaxonomy
.cdm
.model
.name
.*;
23 import eu
.etaxonomy
.cdm
.model
.occurrence
.*;
24 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
25 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
26 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
27 import eu
.etaxonomy
.cdm
.model
.taxon
.*;
28 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
29 import org
.apache
.commons
.lang
.ArrayUtils
;
30 import org
.apache
.commons
.lang
.StringEscapeUtils
;
31 import org
.apache
.commons
.lang
.StringUtils
;
32 import org
.apache
.log4j
.Level
;
33 import org
.apache
.log4j
.Logger
;
34 import org
.joda
.time
.DateTimeFieldType
;
35 import org
.joda
.time
.Partial
;
36 import org
.joda
.time
.format
.DateTimeFormat
;
37 import org
.joda
.time
.format
.DateTimeFormatter
;
38 import org
.springframework
.stereotype
.Component
;
41 import java
.util
.regex
.Matcher
;
42 import java
.util
.regex
.Pattern
;
49 @Component("iAPTExcelImport")
50 public class IAPTExcelImport
<CONFIG
extends IAPTImportConfigurator
> extends SimpleExcelTaxonImport
<CONFIG
> {
51 private static final long serialVersionUID
= -747486709409732371L;
52 private static final Logger logger
= Logger
.getLogger(IAPTExcelImport
.class);
53 public static final String ANNOTATION_MARKER_STRING
= "[*]";
56 private static UUID ROOT_UUID
= UUID
.fromString("4137fd2a-20f6-4e70-80b9-f296daf51d82");
58 private static NonViralNameParserImpl nameParser
= NonViralNameParserImpl
.NewInstance();
60 private final static String REGISTRATIONNO_PK
= "RegistrationNo_Pk";
61 private final static String HIGHERTAXON
= "HigherTaxon";
62 private final static String FULLNAME
= "FullName";
63 private final static String AUTHORSSPELLING
= "AuthorsSpelling";
64 private final static String LITSTRING
= "LitString";
65 private final static String REGISTRATION
= "Registration";
66 private final static String TYPE
= "Type";
67 private final static String CAVEATS
= "Caveats";
68 private final static String FULLBASIONYM
= "FullBasionym";
69 private final static String FULLSYNSUBST
= "FullSynSubst";
70 private final static String NOTESTXT
= "NotesTxt";
71 private final static String REGDATE
= "RegDate";
72 private final static String NAMESTRING
= "NameString";
73 private final static String BASIONYMSTRING
= "BasionymString";
74 private final static String SYNSUBSTSTR
= "SynSubstStr";
75 private final static String AUTHORSTRING
= "AuthorString";
77 private static List
<String
> expectedKeys
= Arrays
.asList(new String
[]{
78 REGISTRATIONNO_PK
, HIGHERTAXON
, FULLNAME
, AUTHORSSPELLING
, LITSTRING
, REGISTRATION
, TYPE
, CAVEATS
, FULLBASIONYM
, FULLSYNSUBST
, NOTESTXT
, REGDATE
, NAMESTRING
, BASIONYMSTRING
, SYNSUBSTSTR
, AUTHORSTRING
});
80 private static final Pattern nomRefTokenizeP
= Pattern
.compile("^(?<title>.*):\\s(?<detail>[^\\.:]+)\\.(?<date>.*?)(?:\\s\\((?<issue>[^\\)]*)\\)\\s*)?\\.?$");
81 private static final Pattern
[] datePatterns
= new Pattern
[]{
83 // The order of the patterns is extremely important!!!
85 // all patterns cover the years 1700 - 1999
86 Pattern
.compile("^(?<year>1[7,8,9][0-9]{2})$"), // only year, like '1969'
87 Pattern
.compile("^(?<monthName>\\p{L}+\\.?)\\s(?<day>[0-9]{1,2})(?:st|rd|th)?\\.?,?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like April 12, 1969 or april 12th 1999
88 Pattern
.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12
89 Pattern
.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969
90 Pattern
.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<monthName>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
91 Pattern
.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)?\\s)?(?<monthName>\\p{L}+)(?:\\sde)?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
92 Pattern
.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969
93 Pattern
.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),// partial date like 1999-04
94 Pattern
.compile("^(?<monthName>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
95 Pattern
.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
97 protected static final Pattern typeSpecimenSplitPattern
= Pattern
.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
99 private static final Pattern typeNameBasionymPattern
= Pattern
.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$");
100 private static final Pattern typeNameNotePattern
= Pattern
.compile("\\[([^\\[]*)"); // matches the inner of '[...]'
101 private static final Pattern typeNameSpecialSplitPattern
= Pattern
.compile("(?<note>.*\\;.*?)\\:(?<agent>)\\;(<name>.*)");
103 protected static final Pattern collectorPattern
= Pattern
.compile(".*?(?<fullStr1>\\([Ll]eg\\.\\s+(?<data1>[^\\)]*)\\)).*$|.*?(?<fullStr2>\\s[Ll]eg\\.\\:?\\s+(?<data2>.*?)\\.?)$|^(?<fullStr3>[Ll]eg\\.\\:?\\s+(?<data3>.*?)\\.?)");
104 private static final Pattern collectionDataPattern
= Pattern
.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$");
105 private static final Pattern collectorsNumber
= Pattern
.compile("^([nN]o\\.\\s.*)$");
107 // AccessionNumbers: , #.*, n°:?, 96/3293, No..*, -?\w{1,3}-[0-9\-/]*
108 private static final Pattern accessionNumberOnlyPattern
= Pattern
.compile("^(?<accNumber>(?:n°\\:?\\s?|#|No\\.?\\s?)?[\\d\\w\\-/]*)$");
110 private static final Pattern
[] specimenTypePatterns
= new Pattern
[]{
111 Pattern
.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?<institute>.*[^\\)])\\))(?<accNumber>.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222
112 Pattern
.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<accNumber>.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78
113 Pattern
.compile("^(?:in\\s)?(?<institute>[Cc]oll\\.\\s.*?)(?:\\s+(?<accNumber>(Praep\\.|slide|No\\.|Inv\\. Nr\\.|Nr\\.).*))?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62
114 Pattern
.compile("^(?<institute>Inst\\.\\s.*?)\\s+(?<accNumber>N\\s.*)?$"), // like Inst. Geological Sciences, Acad. Sci. Belarus, Minsk N 212 A
115 Pattern
.compile("^(?<colCode>[A-Z]+)(?:\\s+(?<accNumber>.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any
119 private static final Pattern registrationPattern
= Pattern
.compile("^Registration date\\:\\s(?<regdate>\\d\\d\\.\\d\\d\\.\\d\\d); no\\.\\:\\s(?<regid>\\d+);\\soffice\\:\\s(?<office>.*?)\\.(?:\\s\\[Form no\\.\\:\\s(?<formNo>d+)\\])?$"); // Registration date: 29.06.98; no.: 2922; office: Berlin.
121 private static Map
<String
, Integer
> monthFromNameMap
= new HashMap
<>();
124 String
[] ck
= new String
[]{"leden", "únor", "březen", "duben", "květen", "červen", "červenec ", "srpen", "září", "říjen", "listopad", "prosinec"};
125 String
[] fr
= new String
[]{"janvier", "février", "mars", "avril", "mai", "juin", "juillet", "août", "septembre", "octobre", "novembre", "décembre"};
126 String
[] de
= new String
[]{"januar", "februar", "märz", "april", "mai", "juni", "juli", "august", "september", "oktober", "november", "dezember"};
127 String
[] en
= new String
[]{"january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"};
128 String
[] it
= new String
[]{"gennaio", "febbraio", "marzo", "aprile", "maggio", "giugno", "luglio", "agosto", "settembre", "ottobre", "novembre", "dicembre"};
129 String
[] sp
= new String
[]{"enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre", "noviembre", "diciembre"};
130 String
[] de_abbrev
= new String
[]{"jan.", "feb.", "märz", "apr.", "mai", "jun.", "jul.", "aug.", "sept.", "okt.", "nov.", "dez."};
131 String
[] en_abbrev
= new String
[]{"jan.", "feb.", "mar.", "apr.", "may", "jun.", "jul.", "aug.", "sep.", "oct.", "nov.", "dec."};
132 String
[] port
= new String
[]{"Janeiro", "Fevereiro", "Março", "Abril", "Maio", "Junho", "Julho", "Agosto", "Setembro", "Outubro", "Novembro", "Dezembro"};
133 String
[] rom_num
= new String
[]{"i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii"};
135 String
[][] perLang
= new String
[][]{ck
, de
, fr
, en
, it
, sp
, port
, de_abbrev
, en_abbrev
, rom_num
};
137 for (String
[] months
: perLang
) {
138 for(int m
= 1; m
< 13; m
++){
139 monthFromNameMap
.put(months
[m
- 1].toLowerCase(), m
);
144 monthFromNameMap
.put("mar", 3);
145 monthFromNameMap
.put("dec", 12);
146 monthFromNameMap
.put("februari", 2);
147 monthFromNameMap
.put("març", 3);
151 DateTimeFormatter formatterYear
= DateTimeFormat
.forPattern("yyyy");
153 private Map
<String
, Collection
> collectionMap
= new HashMap
<>();
154 private ExtensionType extensionTypeIAPTRegData
= null;
158 fieldUnit
, holotype
, isotype
;
160 public SpecimenTypeDesignationStatus
status(){
163 return SpecimenTypeDesignationStatus
.HOLOTYPE();
165 return SpecimenTypeDesignationStatus
.ISOTYPE();
172 private MarkerType markerTypeFossil
= null;
173 private Rank rankUnrankedSupraGeneric
= null;
174 private Rank familyIncertisSedis
= null;
175 private AnnotationType annotationTypeCaveats
= null;
177 private Reference bookVariedadesTradicionales
= null;
180 * HACK for unit simple testing
182 boolean _testMode
= System
.getProperty("TEST_MODE") != null;
184 private Taxon
makeTaxon(HashMap
<String
, String
> record
, SimpleExcelTaxonImportState
<CONFIG
> state
,
185 TaxonNode higherTaxonNode
, boolean isFossil
) {
187 String regNumber
= getValue(record
, REGISTRATIONNO_PK
, false);
188 String regStr
= getValue(record
, REGISTRATION
, true);
189 String titleCacheStr
= getValue(record
, FULLNAME
, true);
190 String nameStr
= getValue(record
, NAMESTRING
, true);
191 String authorStr
= getValue(record
, AUTHORSTRING
, true);
192 String nomRefStr
= getValue(record
, LITSTRING
, true);
193 String authorsSpelling
= getValue(record
, AUTHORSSPELLING
, true);
194 String notesTxt
= getValue(record
, NOTESTXT
, true);
195 String caveats
= getValue(record
, CAVEATS
, true);
196 String fullSynSubstStr
= getValue(record
, FULLSYNSUBST
, true);
197 String fullBasionymStr
= getValue(record
, FULLBASIONYM
, true);
198 String basionymNameStr
= getValue(record
, FULLBASIONYM
, true);
199 String synSubstStr
= getValue(record
, SYNSUBSTSTR
, true);
200 String typeStr
= getValue(record
, TYPE
, true);
203 String nomRefTitle
= null;
205 String nomRefPupDate
= null;
206 String nomRefIssue
= null;
207 Partial pupDate
= null;
209 boolean restoreOriginalReference
= false;
210 boolean nameIsValid
= true;
212 // preprocess nomRef: separate citation, reference detail, publishing date
213 if(!StringUtils
.isEmpty(nomRefStr
)){
214 nomRefStr
= nomRefStr
.trim();
216 // handle the special case which is hard to parse:
218 // Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita: 154. 1997.
219 if(nomRefStr
.startsWith("Las variedades tradicionales de frutales ")){
221 if(bookVariedadesTradicionales
== null){
222 bookVariedadesTradicionales
= ReferenceFactory
.newBook();
223 bookVariedadesTradicionales
.setTitle("Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita");
224 bookVariedadesTradicionales
.setDatePublished(TimePeriod
.NewInstance(1997));
225 getReferenceService().save(bookVariedadesTradicionales
);
227 nomRefStr
= nomRefStr
.replaceAll("^.*?\\:.*?\\:", "Las variedades tradicionales:");
228 restoreOriginalReference
= true;
231 Matcher m
= nomRefTokenizeP
.matcher(nomRefStr
);
233 nomRefTitle
= m
.group("title");
234 nomRefDetail
= m
.group("detail");
235 nomRefPupDate
= m
.group("date").trim();
236 nomRefIssue
= m
.group("issue");
238 pupDate
= parseDate(regNumber
, nomRefPupDate
);
239 if (pupDate
!= null) {
240 nomRefTitle
= nomRefTitle
+ ": " + nomRefDetail
+ ". " + pupDate
.toString(formatterYear
) + ".";
242 logger
.warn(csvReportLine(regNumber
, "Pub date", nomRefPupDate
, "in", nomRefStr
, "not parsable"));
245 nomRefTitle
= nomRefStr
;
249 BotanicalName taxonName
= makeBotanicalName(state
, regNumber
, titleCacheStr
, nameStr
, authorStr
, nomRefTitle
);
251 // always add the original strings of parsed data as annotation
252 taxonName
.addAnnotation(Annotation
.NewInstance("imported and parsed data strings:" +
253 "\n - '" + LITSTRING
+ "': "+ nomRefStr
+
254 "\n - '" + TYPE
+ "': " + typeStr
+
255 "\n - '" + REGISTRATION
+ "': " + regStr
256 , AnnotationType
.TECHNICAL(), Language
.DEFAULT()));
258 if(restoreOriginalReference
){
259 taxonName
.setNomenclaturalReference(bookVariedadesTradicionales
);
261 if(pupDate
!= null) {
262 taxonName
.getNomenclaturalReference().setDatePublished(TimePeriod
.NewInstance(pupDate
));
264 if(nomRefIssue
!= null) {
265 ((Reference
)taxonName
.getNomenclaturalReference()).setVolume(nomRefIssue
);
269 if(!StringUtils
.isEmpty(notesTxt
)){
270 notesTxt
= notesTxt
.replace("Notes: ", "").trim();
271 taxonName
.addAnnotation(Annotation
.NewInstance(notesTxt
, AnnotationType
.EDITORIAL(), Language
.DEFAULT()));
275 if(!StringUtils
.isEmpty(caveats
)){
276 caveats
= caveats
.replace("Caveats: ", "").trim();
277 taxonName
.addAnnotation(Annotation
.NewInstance(caveats
, annotationTypeCaveats(), Language
.DEFAULT()));
282 // Status is always considered valid if no notes and cavets are set
283 taxonName
.addStatus(NomenclaturalStatus
.NewInstance(NomenclaturalStatusType
.VALID()));
286 getNameService().save(taxonName
);
289 if(!StringUtils
.isEmpty(authorsSpelling
)){
290 authorsSpelling
= authorsSpelling
.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
292 String
[] authorSpellingTokens
= StringUtils
.split(authorsSpelling
, " ");
293 String
[] nameStrTokens
= StringUtils
.split(nameStr
, " ");
295 ArrayUtils
.reverse(authorSpellingTokens
);
296 ArrayUtils
.reverse(nameStrTokens
);
298 for (int i
= 0; i
< nameStrTokens
.length
; i
++){
299 if(i
< authorSpellingTokens
.length
){
300 nameStrTokens
[i
] = authorSpellingTokens
[i
];
303 ArrayUtils
.reverse(nameStrTokens
);
305 String misspelledNameStr
= StringUtils
.join (nameStrTokens
, ' ');
306 // build the fullnameString of the misspelled name
307 misspelledNameStr
= taxonName
.getTitleCache().replace(nameStr
, misspelledNameStr
);
309 TaxonNameBase misspelledName
= (BotanicalName
) nameParser
.parseReferencedName(misspelledNameStr
, NomenclaturalCode
.ICNAFP
, null);
310 misspelledName
.addRelationshipToName(taxonName
, NameRelationshipType
.MISSPELLING(), null);
311 getNameService().save(misspelledName
);
315 if(!StringUtils
.isEmpty(fullSynSubstStr
)){
316 fullSynSubstStr
= fullSynSubstStr
.replace("Syn. subst.: ", "");
317 BotanicalName replacedSynonymName
= makeBotanicalName(state
, regNumber
, fullSynSubstStr
, synSubstStr
, null, null);
318 replacedSynonymName
.addReplacedSynonym(taxonName
, null, null, null);
319 getNameService().save(replacedSynonymName
);
322 Reference sec
= state
.getConfig().getSecReference();
323 Taxon taxon
= Taxon
.NewInstance(taxonName
, sec
);
326 if(fullBasionymStr
!= null){
327 fullBasionymStr
= fullBasionymStr
.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
328 basionymNameStr
= basionymNameStr
.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
329 BotanicalName basionym
= makeBotanicalName(state
, regNumber
, fullBasionymStr
, basionymNameStr
, null, null);
330 getNameService().save(basionym
);
331 taxonName
.addBasionym(basionym
);
333 Synonym syn
= Synonym
.NewInstance(basionym
, sec
);
334 taxon
.addSynonym(syn
, SynonymRelationshipType
.HOMOTYPIC_SYNONYM_OF());
335 getTaxonService().save(syn
);
340 taxon
.addMarker(Marker
.NewInstance(markerTypeFossil(), true));
344 if(!StringUtils
.isEmpty(typeStr
)){
346 if(taxonName
.getRank().isSpecies() || taxonName
.getRank().isLower(Rank
.SPECIES())) {
347 makeSpecimenTypeData(typeStr
, taxonName
, regNumber
, state
, false);
349 makeNameTypeData(typeStr
, taxonName
, regNumber
, state
);
353 getTaxonService().save(taxon
);
355 if(taxonName
.getRank().equals(Rank
.SPECIES()) || taxonName
.getRank().isLower(Rank
.SPECIES())){
356 // try to find the genus, it should have been imported already, Genera are coming first in the import file
357 Taxon genus
= ((IAPTImportState
)state
).getGenusTaxonMap().get(taxonName
.getGenusOrUninomial());
359 higherTaxonNode
= genus
.getTaxonNodes().iterator().next();
361 logger
.info(csvReportLine(regNumber
, "Parent genus not found for", nameStr
));
365 if(higherTaxonNode
!= null){
366 higherTaxonNode
.addChildTaxon(taxon
, null, null);
367 getTaxonNodeService().save(higherTaxonNode
);
370 if(taxonName
.getRank().isGenus()){
371 ((IAPTImportState
)state
).getGenusTaxonMap().put(taxonName
.getGenusOrUninomial(), taxon
);
377 private void makeSpecimenTypeData(String typeStr
, BotanicalName taxonName
, String regNumber
, SimpleExcelTaxonImportState
<CONFIG
> state
, boolean isFossil
) {
379 Matcher m
= typeSpecimenSplitPattern
.matcher(typeStr
);
382 String fieldUnitStr
= m
.group(TypesName
.fieldUnit
.name());
383 // boolean isFieldUnit = typeStr.matches(".*([°']|\\d+\\s?m\\s|\\d+\\s?km\\s).*"); // check for location or unit m, km // makes no sense!!!!
384 FieldUnit fieldUnit
= parseFieldUnit(fieldUnitStr
, regNumber
, state
);
385 if(fieldUnit
== null) {
386 // create a field unit with only a titleCache using the fieldUnitStr substring
387 logger
.warn(csvReportLine(regNumber
, "Type: fieldUnitStr can not be parsed", fieldUnitStr
));
388 fieldUnit
= FieldUnit
.NewInstance();
389 fieldUnit
.setTitleCache(fieldUnitStr
, true);
390 getOccurrenceService().save(fieldUnit
);
392 getOccurrenceService().save(fieldUnit
);
394 SpecimenOrObservationType specimenType
;
396 specimenType
= SpecimenOrObservationType
.Fossil
;
398 specimenType
= SpecimenOrObservationType
.PreservedSpecimen
;
402 addSpecimenTypes(taxonName
, fieldUnit
, m
.group(TypesName
.holotype
.name()), TypesName
.holotype
, false, regNumber
, specimenType
);
403 addSpecimenTypes(taxonName
, fieldUnit
, m
.group(TypesName
.isotype
.name()), TypesName
.isotype
, true, regNumber
, specimenType
);
406 // create a field unit with only a titleCache using the full typeStr
407 FieldUnit fieldUnit
= FieldUnit
.NewInstance();
408 fieldUnit
.setTitleCache(typeStr
, true);
409 getOccurrenceService().save(fieldUnit
);
410 logger
.warn(csvReportLine(regNumber
, "Type: field 'Type' can not be parsed", typeStr
));
412 getNameService().save(taxonName
);
415 private void makeNameTypeData(String typeStr
, BotanicalName taxonName
, String regNumber
, SimpleExcelTaxonImportState
<CONFIG
> state
) {
417 String nameStr
= typeStr
.replaceAll("^Type\\s?\\:\\s?", "");
418 if(nameStr
.isEmpty()) {
422 String basionymNameStr
= null;
423 String noteStr
= null;
424 String agentStr
= null;
428 if(typeStr
.startsWith("not to be indicated")){
430 // Type: not to be indicated (Art. H.9.1. Tokyo Code); stated parent genera: Hechtia Klotzsch; Deuterocohnia Mez
432 m
= typeNameSpecialSplitPattern
.matcher(nameStr
);
434 nameStr
= m
.group("name");
435 noteStr
= m
.group("note");
436 agentStr
= m
.group("agent");
437 // TODO better import of agent?
438 if(agentStr
!= null){
439 noteStr
= noteStr
+ ": " + agentStr
;
444 m
= typeNameBasionymPattern
.matcher(nameStr
);
446 basionymNameStr
= m
.group("basionymName");
447 if (basionymNameStr
!= null) {
448 nameStr
= nameStr
.replace(m
.group(0), "");
452 m
= typeNameNotePattern
.matcher(nameStr
);
454 noteStr
= m
.group(1);
455 if (noteStr
!= null) {
456 nameStr
= nameStr
.replace(m
.group(0), "");
461 BotanicalName typeName
= (BotanicalName
) nameParser
.parseFullName(nameStr
, NomenclaturalCode
.ICNAFP
, null);
463 if(typeName
.isProtectedTitleCache() || typeName
.getNomenclaturalReference() != null && typeName
.getNomenclaturalReference().isProtectedTitleCache()) {
464 logger
.warn(csvReportLine(regNumber
, "NameType not parsable", typeStr
, nameStr
));
467 if(basionymNameStr
!= null){
468 BotanicalName basionymName
= (BotanicalName
) nameParser
.parseFullName(nameStr
, NomenclaturalCode
.ICNAFP
, null);
469 getNameService().save(basionymName
);
470 typeName
.addBasionym(basionymName
);
474 NameTypeDesignation nameTypeDesignation
= NameTypeDesignation
.NewInstance();
475 nameTypeDesignation
.setTypeName(typeName
);
476 getNameService().save(typeName
);
479 nameTypeDesignation
.addAnnotation(Annotation
.NewInstance(noteStr
, AnnotationType
.EDITORIAL(), Language
.UNKNOWN_LANGUAGE()));
481 taxonName
.addNameTypeDesignation(typeName
, null, null, null, null, false);
486 * Currently only parses the collector, fieldNumber and the collection date.
488 * @param fieldUnitStr
491 * @return null if the fieldUnitStr could not be parsed
493 protected FieldUnit
parseFieldUnit(String fieldUnitStr
, String regNumber
, SimpleExcelTaxonImportState
<CONFIG
> state
) {
495 FieldUnit fieldUnit
= null;
497 Matcher m1
= collectorPattern
.matcher(fieldUnitStr
);
500 String collectorData
= m1
.group(2); // like ... (leg. Metzeltin, 30. 9. 1996)
501 String removal
= m1
.group(1);
502 if(collectorData
== null){
503 collectorData
= m1
.group(4); // like ... leg. Metzeltin, 30. 9. 1996
504 removal
= m1
.group(3);
506 if(collectorData
== null){
507 collectorData
= m1
.group(6); // like ^leg. J. J. Halda 18.3.1997$
510 if(collectorData
== null){
514 // the fieldUnitStr is parsable
515 // remove all collectorData from the fieldUnitStr and use the rest as locality
516 String locality
= null;
518 locality
= fieldUnitStr
.replace(removal
, "");
521 String collectorStr
= null;
522 String detailStr
= null;
524 String fieldNumber
= null;
526 Matcher m2
= collectionDataPattern
.matcher(collectorData
);
528 collectorStr
= m2
.group("collector");
529 detailStr
= m2
.group("detail");
531 // Try to make sense of the detailStr
532 if(detailStr
!= null){
533 detailStr
= detailStr
.trim();
534 // 1. try to parse as date
535 date
= parseDate(regNumber
, detailStr
);
537 // 2. try to parse as number
538 if(collectorsNumber
.matcher(detailStr
).matches()){
539 fieldNumber
= detailStr
;
543 if(date
== null && fieldNumber
== null){
544 // detailed parsing not possible, so need fo fallback
545 collectorStr
= collectorData
;
549 if(collectorStr
== null) {
550 collectorStr
= collectorData
;
553 fieldUnit
= FieldUnit
.NewInstance();
554 GatheringEvent ge
= GatheringEvent
.NewInstance();
555 if(locality
!= null){
556 ge
.setLocality(LanguageString
.NewInstance(locality
, Language
.UNKNOWN_LANGUAGE()));
559 TeamOrPersonBase agent
= state
.getAgentBase(collectorStr
);
561 agent
= Person
.NewTitledInstance(collectorStr
);
562 getAgentService().save(agent
);
563 state
.putAgentBase(collectorStr
, agent
);
565 ge
.setCollector(agent
);
568 ge
.setGatheringDate(date
);
571 getEventBaseService().save(ge
);
572 fieldUnit
.setGatheringEvent(ge
);
574 if(fieldNumber
!= null) {
575 fieldUnit
.setFieldNumber(fieldNumber
);
577 getOccurrenceService().save(fieldUnit
);
584 protected Partial
parseDate(String regNumber
, String dateStr
) {
586 Partial pupDate
= null;
587 boolean parseError
= false;
591 String monthName
= null;
594 for(Pattern p
: datePatterns
){
595 Matcher m2
= p
.matcher(dateStr
);
598 year
= m2
.group("year");
599 } catch (IllegalArgumentException e
){
600 // named capture group not found
603 month
= m2
.group("month");
604 } catch (IllegalArgumentException e
){
605 // named capture group not found
609 monthName
= m2
.group("monthName");
610 month
= monthFromName(monthName
, regNumber
);
614 } catch (IllegalArgumentException e
){
615 // named capture group not found
618 day
= m2
.group("day");
619 } catch (IllegalArgumentException e
){
620 // named capture group not found
624 if (year
.length() == 2) {
625 // it is an abbreviated year from the 19** years
637 List
<DateTimeFieldType
> types
= new ArrayList
<>();
638 List
<Integer
> values
= new ArrayList
<>();
640 types
.add(DateTimeFieldType
.year());
641 values
.add(Integer
.parseInt(year
));
643 types
.add(DateTimeFieldType
.monthOfYear());
644 values
.add(Integer
.parseInt(month
));
647 types
.add(DateTimeFieldType
.dayOfMonth());
648 values
.add(Integer
.parseInt(day
));
650 pupDate
= new Partial(types
.toArray(new DateTimeFieldType
[types
.size()]), ArrayUtils
.toPrimitive(values
.toArray(new Integer
[values
.size()])));
655 private String
monthFromName(String monthName
, String regNumber
) {
657 Integer month
= monthFromNameMap
.get(monthName
.toLowerCase());
659 logger
.warn(csvReportLine(regNumber
, "Unknown month name", monthName
));
662 return month
.toString();
667 private void addSpecimenTypes(BotanicalName taxonName
, FieldUnit fieldUnit
, String typeStr
, TypesName typeName
, boolean multiple
, String regNumber
, SpecimenOrObservationType specimenType
){
669 if(StringUtils
.isEmpty(typeStr
)){
672 typeStr
= typeStr
.trim().replaceAll("\\.$", "");
674 Collection collection
= null;
675 DerivedUnit specimen
= null;
677 List
<DerivedUnit
> specimens
= new ArrayList
<>();
679 String
[] tokens
= typeStr
.split("\\s?,\\s?");
680 for (String t
: tokens
) {
681 // command to list all complex parsabel types:
682 // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Holotype:\s([A-Z]*\s)[^.]*?'
683 // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Isotype[^:]*:\s([A-Z]*\s)[^.]*?'
686 // trying to parse the string
687 specimen
= parseSpecimenType(fieldUnit
, typeName
, collection
, t
, regNumber
);
688 if(specimen
!= null){
689 specimens
.add(specimen
);
691 // parsing was not successful make simple specimen
692 specimens
.add(makeSpecimenType(fieldUnit
, t
, specimenType
));
697 specimen
= parseSpecimenType(fieldUnit
, typeName
, collection
, typeStr
, regNumber
);
698 if(specimen
!= null) {
699 specimens
.add(specimen
);
700 // remember current collection
701 collection
= specimen
.getCollection();
703 // parsing was not successful make simple specimen
704 specimens
.add(makeSpecimenType(fieldUnit
, typeStr
, SpecimenOrObservationType
.PreservedSpecimen
));
708 for(DerivedUnit s
: specimens
){
709 taxonName
.addSpecimenTypeDesignation(s
, typeName
.status(), null, null, null, false, true);
713 private DerivedUnit
makeSpecimenType(FieldUnit fieldUnit
, String titleCache
, SpecimenOrObservationType specimenType
) {
714 DerivedUnit specimen
;DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(specimenType
, fieldUnit
);
715 facade
.setTitleCache(titleCache
.trim(), true);
716 specimen
= facade
.innerDerivedUnit();
729 protected DerivedUnit
parseSpecimenType(FieldUnit fieldUnit
, TypesName typeName
, Collection collection
, String text
, String regNumber
) {
731 DerivedUnit specimen
= null;
733 String collectionCode
= null;
734 String collectionTitle
= null;
735 String subCollectionStr
= null;
736 String instituteStr
= null;
737 String accessionNumber
= null;
739 boolean unusualAccessionNumber
= false;
743 // 1. For Isotypes often the accession number is noted alone if the
744 // preceeding entry has a collection code.
745 if(typeName
.equals(TypesName
.isotype
) && collection
!= null){
746 Matcher m
= accessionNumberOnlyPattern
.matcher(text
);
749 accessionNumber
= m
.group("accNumber");
750 specimen
= makeSpecimenType(fieldUnit
, collection
, accessionNumber
);
751 } catch (IllegalArgumentException e
){
752 // match group acc_number not found
757 //2. try it the 'normal' way
758 if(specimen
== null) {
759 for (Pattern p
: specimenTypePatterns
) {
760 Matcher m
= p
.matcher(text
);
762 // collection code or collectionTitle is mandatory
764 collectionCode
= m
.group("colCode");
765 } catch (IllegalArgumentException e
){
766 // match group colCode not found
770 instituteStr
= m
.group("institute");
771 } catch (IllegalArgumentException e
){
772 // match group col_name not found
776 subCollectionStr
= m
.group("subCollection");
777 } catch (IllegalArgumentException e
){
778 // match group subCollection not found
781 accessionNumber
= m
.group("accNumber");
783 // try to improve the accessionNumber
784 if(accessionNumber
!= null) {
785 accessionNumber
= accessionNumber
.trim();
786 Matcher m2
= accessionNumberOnlyPattern
.matcher(accessionNumber
);
787 String betterAccessionNumber
= null;
790 betterAccessionNumber
= m
.group("accNumber");
791 } catch (IllegalArgumentException e
) {
792 // match group acc_number not found
795 if (betterAccessionNumber
!= null) {
796 accessionNumber
= betterAccessionNumber
;
798 unusualAccessionNumber
= true;
802 } catch (IllegalArgumentException e
){
803 // match group acc_number not found
806 if(collectionCode
== null && instituteStr
== null){
807 logger
.warn(csvReportLine(regNumber
, "Type: neither 'collectionCode' nor 'institute' found in ", text
));
810 collection
= getCollection(collectionCode
, instituteStr
, subCollectionStr
);
811 specimen
= makeSpecimenType(fieldUnit
, collection
, accessionNumber
);
816 if(specimen
== null) {
817 logger
.warn(csvReportLine(regNumber
, "Type: Could not parse specimen", typeName
.name().toString(), text
));
819 if(unusualAccessionNumber
){
820 logger
.warn(csvReportLine(regNumber
, "Type: Unusual accession number", typeName
.name().toString(), text
, accessionNumber
));
825 private DerivedUnit
makeSpecimenType(FieldUnit fieldUnit
, Collection collection
, String accessionNumber
) {
827 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.PreservedSpecimen
, fieldUnit
);
828 facade
.setCollection(collection
);
829 if(accessionNumber
!= null){
830 facade
.setAccessionNumber(accessionNumber
);
832 return facade
.innerDerivedUnit();
835 private BotanicalName
makeBotanicalName(SimpleExcelTaxonImportState
<CONFIG
> state
, String regNumber
, String titleCacheStr
, String nameStr
,
836 String authorStr
, String nomRefTitle
) {
838 BotanicalName taxonName
;// cache field for the taxonName.titleCache
839 String taxonNameTitleCache
= null;
840 Map
<String
, AnnotationType
> nameAnnotations
= new HashMap
<>();
842 // TitleCache preprocessing
843 if(titleCacheStr
.endsWith(ANNOTATION_MARKER_STRING
) || (authorStr
!= null && authorStr
.endsWith(ANNOTATION_MARKER_STRING
))){
844 nameAnnotations
.put("Author abbreviation not checked.", AnnotationType
.EDITORIAL());
845 titleCacheStr
= titleCacheStr
.replace(ANNOTATION_MARKER_STRING
, "").trim();
846 if(authorStr
!= null) {
847 authorStr
= authorStr
.replace(ANNOTATION_MARKER_STRING
, "").trim();
851 // parse the full taxon name
852 if(!StringUtils
.isEmpty(nomRefTitle
)){
853 String referenceSeparator
= nomRefTitle
.startsWith("in ") ?
" " : ", ";
854 String taxonFullNameStr
= titleCacheStr
+ referenceSeparator
+ nomRefTitle
;
855 logger
.debug(":::::" + taxonFullNameStr
);
856 taxonName
= (BotanicalName
) nameParser
.parseReferencedName(taxonFullNameStr
, NomenclaturalCode
.ICNAFP
, null);
858 taxonName
= (BotanicalName
) nameParser
.parseFullName(titleCacheStr
, NomenclaturalCode
.ICNAFP
, null);
861 taxonNameTitleCache
= taxonName
.getTitleCache().trim();
862 if (taxonName
.isProtectedTitleCache()) {
863 logger
.warn(csvReportLine(regNumber
, "Name could not be parsed", titleCacheStr
));
866 boolean doRestoreTitleCacheStr
= false;
868 // Check if titleCache and nameCache are plausible
869 String titleCacheCompareStr
= titleCacheStr
;
870 String nameCache
= taxonName
.getNameCache();
871 String nameCompareStr
= nameStr
;
872 if(taxonName
.isBinomHybrid()){
873 titleCacheCompareStr
= titleCacheCompareStr
.replace(" x ", " ×");
874 nameCompareStr
= nameCompareStr
.replace(" x ", " ×");
876 if(taxonName
.isMonomHybrid()){
877 titleCacheCompareStr
= titleCacheCompareStr
.replaceAll("^X ", "× ");
878 nameCompareStr
= nameCompareStr
.replace("^X ", "× ");
880 if(authorStr
!= null && authorStr
.contains(" et ")){
881 titleCacheCompareStr
= titleCacheCompareStr
.replaceAll(" et ", " & ");
883 if (!taxonNameTitleCache
.equals(titleCacheCompareStr
)) {
884 logger
.warn(csvReportLine(regNumber
, "The generated titleCache differs from the imported string", taxonNameTitleCache
, " != ", titleCacheStr
, " ==> original titleCacheStr has been restored"));
885 doRestoreTitleCacheStr
= true;
887 if (!nameCache
.trim().equals(nameCompareStr
)) {
888 logger
.warn(csvReportLine(regNumber
, "The parsed nameCache differs from field '" + NAMESTRING
+ "'", nameCache
, " != ", nameCompareStr
));
892 //nameParser.handleAuthors(taxonName, titleCacheStr, authorStr);
893 //if (!titleCacheStr.equals(taxonName.getTitleCache())) {
894 // logger.warn(regNumber + ": titleCache has changed after setting authors, will restore original titleCacheStr");
895 // doRestoreTitleCacheStr = true;
898 if(doRestoreTitleCacheStr
){
899 taxonName
.setTitleCache(titleCacheStr
, true);
903 replaceAuthorNamesAndNomRef(state
, taxonName
);
907 if(!nameAnnotations
.isEmpty()){
908 for(String text
: nameAnnotations
.keySet()){
909 taxonName
.addAnnotation(Annotation
.NewInstance(text
, nameAnnotations
.get(text
), Language
.DEFAULT()));
913 taxonName
.addSource(OriginalSourceType
.Import
, regNumber
, null, state
.getConfig().getSourceReference(), null);
915 getNameService().save(taxonName
);
924 private TaxonNode
getClassificationRootNode(IAPTImportState state
) {
926 // Classification classification = state.getClassification();
927 // if (classification == null){
928 // IAPTImportConfigurator config = state.getConfig();
929 // classification = Classification.NewInstance(state.getConfig().getClassificationName());
930 // classification.setUuid(config.getClassificationUuid());
931 // classification.setReference(config.getSecReference());
932 // classification = getClassificationService().find(state.getConfig().getClassificationUuid());
934 TaxonNode rootNode
= state
.getRootNode();
935 if (rootNode
== null){
936 rootNode
= getTaxonNodeService().find(ROOT_UUID
);
938 if (rootNode
== null){
939 Classification classification
= state
.getClassification();
940 if (classification
== null){
941 Reference sec
= state
.getSecReference();
942 String classificationName
= state
.getConfig().getClassificationName();
943 Language language
= Language
.DEFAULT();
944 classification
= Classification
.NewInstance(classificationName
, sec
, language
);
945 state
.setClassification(classification
);
946 classification
.setUuid(state
.getConfig().getClassificationUuid());
947 classification
.getRootNode().setUuid(ROOT_UUID
);
948 getClassificationService().save(classification
);
950 rootNode
= classification
.getRootNode();
951 state
.setRootNode(rootNode
);
956 private Collection
getCollection(String collectionCode
, String instituteStr
, String subCollectionStr
){
958 Collection superCollection
= null;
959 if(subCollectionStr
!= null){
960 superCollection
= getCollection(collectionCode
, instituteStr
, null);
961 collectionCode
= subCollectionStr
;
965 final String key
= collectionCode
+ "-#i:" + StringUtils
.defaultString(instituteStr
);
967 Collection collection
= collectionMap
.get(key
);
969 if(collection
== null) {
970 collection
= Collection
.NewInstance();
971 collection
.setCode(collectionCode
);
972 if(instituteStr
!= null){
973 collection
.setInstitute(Institution
.NewNamedInstance(instituteStr
));
975 if(superCollection
!= null){
976 collection
.setSuperCollection(superCollection
);
978 collectionMap
.put(key
, collection
);
980 getCollectionService().save(collection
);
991 * @param doUnescapeHtmlEntities
994 private String
getValue(HashMap
<String
, String
> record
, String originalKey
, boolean doUnescapeHtmlEntities
) {
995 String value
= record
.get(originalKey
);
997 value
= fixCharacters(value
);
999 if (! StringUtils
.isBlank(value
)) {
1000 if (logger
.isDebugEnabled()) {
1001 logger
.debug(originalKey
+ ": " + value
);
1003 value
= CdmUtils
.removeDuplicateWhitespace(value
.trim()).toString();
1004 if(doUnescapeHtmlEntities
){
1005 value
= StringEscapeUtils
.unescapeHtml(value
);
1007 return value
.trim();
1014 * Fixes broken characters.
1016 * http://dev.e-taxonomy.eu/redmine/issues/6035
1021 private String
fixCharacters(String value
) {
1023 value
= StringUtils
.replace(value
, "s$K", "š");
1024 value
= StringUtils
.replace(value
, "n$K", "ň");
1025 value
= StringUtils
.replace(value
, "e$K", "ě");
1026 value
= StringUtils
.replace(value
, "r$K", "ř");
1027 value
= StringUtils
.replace(value
, "c$K", "č");
1028 value
= StringUtils
.replace(value
, "z$K", "ž");
1029 value
= StringUtils
.replace(value
, "S>U$K", "Š");
1030 value
= StringUtils
.replace(value
, "C>U$K", "Č");
1031 value
= StringUtils
.replace(value
, "R>U$K", "Ř");
1032 value
= StringUtils
.replace(value
, "Z>U$K", "Ž");
1033 value
= StringUtils
.replace(value
, "g$K", "ǧ");
1034 value
= StringUtils
.replace(value
, "s$A", "ś");
1035 value
= StringUtils
.replace(value
, "n$A", "ń");
1036 value
= StringUtils
.replace(value
, "c$A", "ć");
1037 value
= StringUtils
.replace(value
, "e$E", "ę");
1038 value
= StringUtils
.replace(value
, "o$H", "õ");
1039 value
= StringUtils
.replace(value
, "s$C", "ş");
1040 value
= StringUtils
.replace(value
, "t$C", "ț");
1041 value
= StringUtils
.replace(value
, "S>U$C", "Ş");
1042 value
= StringUtils
.replace(value
, "a$O", "å");
1043 value
= StringUtils
.replace(value
, "A>U$O", "Å");
1044 value
= StringUtils
.replace(value
, "u$O", "ů");
1045 value
= StringUtils
.replace(value
, "g$B", "ğ");
1046 value
= StringUtils
.replace(value
, "g$B", "ĕ");
1047 value
= StringUtils
.replace(value
, "a$B", "ă");
1048 value
= StringUtils
.replace(value
, "l$/", "ł");
1049 value
= StringUtils
.replace(value
, ">i", "ı");
1050 value
= StringUtils
.replace(value
, "i$U", "ï");
1052 value
= StringUtils
.replace(value
, "ý", "ý");
1053 value
= StringUtils
.replace(value
, ">L", "Ł"); // corrected rule
1054 value
= StringUtils
.replace(value
, "E>U$D", "З");
1055 value
= StringUtils
.replace(value
, "S>U$E", "Ş");
1056 value
= StringUtils
.replace(value
, "s$E", "ş");
1058 value
= StringUtils
.replace(value
, "c$k", "č");
1059 value
= StringUtils
.replace(value
, " U$K", " Š");
1061 value
= StringUtils
.replace(value
, "O>U>!", "Ø");
1062 value
= StringUtils
.replace(value
, "o>!", "ø");
1063 value
= StringUtils
.replace(value
, "S$K", "Ŝ");
1064 value
= StringUtils
.replace(value
, ">l", "ğ");
1066 value
= StringUtils
.replace(value
, "§B>i", "ł");
1075 * Stores taxa records in DB
1078 protected void firstPass(SimpleExcelTaxonImportState
<CONFIG
> state
) {
1080 String lineNumber
= "L#" + state
.getCurrentLine() + ": ";
1081 logger
.setLevel(Level
.DEBUG
);
1082 HashMap
<String
, String
> record
= state
.getOriginalRecord();
1083 logger
.debug(lineNumber
+ record
.toString());
1085 Set
<String
> keys
= record
.keySet();
1086 for (String key
: keys
) {
1087 if (! expectedKeys
.contains(key
)){
1088 logger
.warn(lineNumber
+ "Unexpected Key: " + key
);
1092 String reg_id
= record
.get(REGISTRATIONNO_PK
);
1095 String higherTaxaString
= record
.get(HIGHERTAXON
);
1096 boolean isFossil
= false;
1097 if(higherTaxaString
.startsWith("FOSSIL ")){
1098 higherTaxaString
= higherTaxaString
.replace("FOSSIL ", "");
1101 TaxonNode higherTaxon
= getHigherTaxon(higherTaxaString
, (IAPTImportState
)state
);
1104 Taxon taxon
= makeTaxon(record
, state
, higherTaxon
, isFossil
);
1106 logger
.warn(lineNumber
+ "taxon could not be created and is null");
1109 ((IAPTImportState
)state
).setCurrentTaxon(taxon
);
1112 IAPTRegData regData
= makeIAPTRegData(state
);
1113 ObjectMapper mapper
= new ObjectMapper();
1115 String regdataJson
= mapper
.writeValueAsString(regData
);
1116 Extension
.NewInstance(taxon
.getName(), regdataJson
, getExtensionTypeIAPTRegData());
1117 getNameService().save(taxon
.getName());
1118 } catch (JsonProcessingException e
) {
1119 logger
.error("Error on converting IAPTRegData", e
);
1122 logger
.info("#of imported Genera: " + ((IAPTImportState
) state
).getGenusTaxonMap().size());
1126 private ExtensionType
getExtensionTypeIAPTRegData() {
1127 if(extensionTypeIAPTRegData
== null){
1128 extensionTypeIAPTRegData
= ExtensionType
.NewInstance("IAPTRegData.json", "IAPTRegData.json", "");
1129 getTermService().save(extensionTypeIAPTRegData
);
1131 return extensionTypeIAPTRegData
;
1134 private IAPTRegData
makeIAPTRegData(SimpleExcelTaxonImportState
<CONFIG
> state
) {
1136 HashMap
<String
, String
> record
= state
.getOriginalRecord();
1137 String registrationStr
= getValue(record
, REGISTRATION
);
1138 String regDateStr
= getValue(record
, REGDATE
);
1139 String regStr
= getValue(record
, REGISTRATION
, true);
1141 String dateStr
= null;
1142 String office
= null;
1143 Integer regID
= null;
1144 Integer formNo
= null;
1146 Matcher m
= registrationPattern
.matcher(registrationStr
);
1148 dateStr
= m
.group("regdate");
1149 if(parseDate( regStr
, dateStr
) == null){
1150 // check for valid dates
1151 logger
.warn(csvReportLine(regStr
, REGISTRATION
+ ": could not parse date", dateStr
, " in ", registrationStr
));
1153 office
= m
.group("office");
1154 regID
= Integer
.valueOf(m
.group("regid"));
1156 formNo
= Integer
.valueOf(m
.group("formNo"));
1157 } catch(IllegalArgumentException e
){
1161 logger
.warn(csvReportLine(regStr
, REGISTRATION
+ ": could not be parsed", registrationStr
));
1163 IAPTRegData regData
= new IAPTRegData(dateStr
, office
, regID
, formNo
);
1167 private TaxonNode
getHigherTaxon(String higherTaxaString
, IAPTImportState state
) {
1168 String
[] higherTaxaNames
= higherTaxaString
.toLowerCase().replaceAll("[\\[\\]]", "").split(":");
1169 TaxonNode higherTaxonNode
= null;
1171 ITaxonTreeNode rootNode
= getClassificationRootNode(state
);
1172 for (String htn
: higherTaxaNames
) {
1173 htn
= StringUtils
.capitalize(htn
.trim());
1174 Taxon higherTaxon
= state
.getHigherTaxon(htn
);
1175 if (higherTaxon
!= null){
1176 higherTaxonNode
= higherTaxon
.getTaxonNodes().iterator().next();
1178 BotanicalName name
= makeHigherTaxonName(state
, htn
);
1179 Reference sec
= state
.getSecReference();
1180 higherTaxon
= Taxon
.NewInstance(name
, sec
);
1181 getTaxonService().save(higherTaxon
);
1182 higherTaxonNode
= rootNode
.addChildTaxon(higherTaxon
, sec
, null);
1183 state
.putHigherTaxon(htn
, higherTaxon
);
1184 getClassificationService().saveTreeNode(higherTaxonNode
);
1186 rootNode
= higherTaxonNode
;
1188 return higherTaxonNode
;
1191 private BotanicalName
makeHigherTaxonName(IAPTImportState state
, String name
) {
1193 Rank rank
= guessRank(name
);
1195 BotanicalName taxonName
= BotanicalName
.NewInstance(rank
);
1196 taxonName
.addSource(makeOriginalSource(state
));
1197 taxonName
.setGenusOrUninomial(StringUtils
.capitalize(name
));
1201 private Rank
guessRank(String name
) {
1204 name
= name
.replaceAll("\\(.*\\)", "").trim();
1206 if(name
.matches("^Plantae$|^Fungi$")){
1207 return Rank
.KINGDOM();
1208 } else if(name
.matches("^Incertae sedis$|^No group assigned$")){
1209 return rankFamilyIncertisSedis();
1210 } else if(name
.matches(".*phyta$|.*mycota$")){
1211 return Rank
.PHYLUM();
1212 } else if(name
.matches(".*phytina$|.*mycotina$")){
1213 return Rank
.SUBPHYLUM();
1214 } else if(name
.matches("Gymnospermae$|.*ones$")){ // Monocotyledones, Dicotyledones
1215 return rankUnrankedSupraGeneric();
1216 } else if(name
.matches(".*opsida$|.*phyceae$|.*mycetes$|.*ones$|^Musci$|^Hepaticae$")){
1217 return Rank
.CLASS();
1218 } else if(name
.matches(".*idae$|.*phycidae$|.*mycetidae$")){
1219 return Rank
.SUBCLASS();
1220 } else if(name
.matches(".*ales$")){
1221 return Rank
.ORDER();
1222 } else if(name
.matches(".*ineae$")){
1223 return Rank
.SUBORDER();
1224 } else if(name
.matches(".*aceae$")){
1225 return Rank
.FAMILY();
1226 } else if(name
.matches(".*oideae$")){
1227 return Rank
.SUBFAMILY();
1229 // if(name.matches(".*eae$")){
1230 // return Rank.TRIBE();
1232 if(name
.matches(".*inae$")){
1233 return Rank
.SUBTRIBE();
1234 } else if(name
.matches(".*ae$")){
1235 return Rank
.FAMILY();
1237 return Rank
.UNKNOWN_RANK();
1240 private Rank
rankUnrankedSupraGeneric() {
1242 if(rankUnrankedSupraGeneric
== null){
1243 rankUnrankedSupraGeneric
= Rank
.NewInstance(RankClass
.Suprageneric
, "Unranked supra generic", " ", " ");
1244 getTermService().save(rankUnrankedSupraGeneric
);
1246 return rankUnrankedSupraGeneric
;
1249 private Rank
rankFamilyIncertisSedis() {
1251 if(familyIncertisSedis
== null){
1252 familyIncertisSedis
= Rank
.NewInstance(RankClass
.Suprageneric
, "Family incertis sedis", " ", " ");
1253 getTermService().save(familyIncertisSedis
);
1255 return familyIncertisSedis
;
1258 private AnnotationType
annotationTypeCaveats(){
1259 if(annotationTypeCaveats
== null){
1260 annotationTypeCaveats
= AnnotationType
.NewInstance("Caveats", "Caveats", "");
1261 getTermService().save(annotationTypeCaveats
);
1263 return annotationTypeCaveats
;
1271 private IdentifiableSource
makeOriginalSource(IAPTImportState state
) {
1272 return IdentifiableSource
.NewDataImportInstance("line: " + state
.getCurrentLine(), null, state
.getConfig().getSourceReference());
1276 private Reference
makeReference(IAPTImportState state
, UUID uuidRef
) {
1277 Reference ref
= state
.getReference(uuidRef
);
1279 ref
= getReferenceService().find(uuidRef
);
1280 state
.putReference(uuidRef
, ref
);
1285 private MarkerType
markerTypeFossil(){
1286 if(this.markerTypeFossil
== null){
1287 markerTypeFossil
= MarkerType
.NewInstance("isFossilTaxon", "isFossil", null);
1288 getTermService().save(this.markerTypeFossil
);
1290 return markerTypeFossil
;
1293 private String
csvReportLine(String regId
, String message
, String
... fields
){
1294 StringBuilder out
= new StringBuilder("regID#");
1295 out
.append(regId
).append(",\"").append(message
).append('"');
1297 for(String f
: fields
){
1298 out
.append(",\"").append(f
).append('"');
1300 return out
.toString();