1 |
|
/**
|
2 |
|
* Copyright (C) 2007 EDIT
|
3 |
|
* European Distributed Institute of Taxonomy
|
4 |
|
* http://www.e-taxonomy.eu
|
5 |
|
*
|
6 |
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7 |
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8 |
|
*/
|
9 |
|
|
10 |
|
package eu.etaxonomy.cdm.io.iapt;
|
11 |
|
|
12 |
|
import java.util.ArrayList;
|
13 |
|
import java.util.Arrays;
|
14 |
|
import java.util.HashMap;
|
15 |
|
import java.util.HashSet;
|
16 |
|
import java.util.List;
|
17 |
|
import java.util.Map;
|
18 |
|
import java.util.Set;
|
19 |
|
import java.util.UUID;
|
20 |
|
import java.util.regex.Matcher;
|
21 |
|
import java.util.regex.Pattern;
|
22 |
|
|
23 |
|
import org.apache.commons.lang.ArrayUtils;
|
24 |
|
import org.apache.commons.lang.StringEscapeUtils;
|
25 |
|
import org.apache.commons.lang.StringUtils;
|
26 |
|
import org.apache.log4j.Level;
|
27 |
|
import org.apache.log4j.Logger;
|
28 |
|
import org.joda.time.DateTimeFieldType;
|
29 |
|
import org.joda.time.Partial;
|
30 |
|
import org.joda.time.format.DateTimeFormat;
|
31 |
|
import org.joda.time.format.DateTimeFormatter;
|
32 |
|
import org.springframework.stereotype.Component;
|
33 |
|
|
34 |
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
35 |
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
36 |
|
|
37 |
|
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
|
38 |
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
39 |
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
|
40 |
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
41 |
|
import eu.etaxonomy.cdm.model.agent.Institution;
|
42 |
|
import eu.etaxonomy.cdm.model.agent.Person;
|
43 |
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
44 |
|
import eu.etaxonomy.cdm.model.common.Annotation;
|
45 |
|
import eu.etaxonomy.cdm.model.common.AnnotationType;
|
46 |
|
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
|
47 |
|
import eu.etaxonomy.cdm.model.common.Extension;
|
48 |
|
import eu.etaxonomy.cdm.model.common.ExtensionType;
|
49 |
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
50 |
|
import eu.etaxonomy.cdm.model.common.Language;
|
51 |
|
import eu.etaxonomy.cdm.model.common.LanguageString;
|
52 |
|
import eu.etaxonomy.cdm.model.common.Marker;
|
53 |
|
import eu.etaxonomy.cdm.model.common.MarkerType;
|
54 |
|
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
|
55 |
|
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
|
56 |
|
import eu.etaxonomy.cdm.model.name.IBotanicalName;
|
57 |
|
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
|
58 |
|
import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
|
59 |
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
60 |
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
|
61 |
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
62 |
|
import eu.etaxonomy.cdm.model.name.Rank;
|
63 |
|
import eu.etaxonomy.cdm.model.name.RankClass;
|
64 |
|
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
|
65 |
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
66 |
|
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
|
67 |
|
import eu.etaxonomy.cdm.model.occurrence.Collection;
|
68 |
|
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
|
69 |
|
import eu.etaxonomy.cdm.model.occurrence.FieldUnit;
|
70 |
|
import eu.etaxonomy.cdm.model.occurrence.GatheringEvent;
|
71 |
|
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
|
72 |
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
73 |
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
74 |
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
75 |
|
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
|
76 |
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
77 |
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
78 |
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
79 |
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
80 |
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
81 |
|
|
82 |
|
/**
|
83 |
|
* @author a.mueller
|
84 |
|
* @since 05.01.2016
|
85 |
|
*/
|
86 |
|
|
87 |
|
@Component("iAPTExcelImport")
|
88 |
|
public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends SimpleExcelTaxonImport<CONFIG> {
|
89 |
|
private static final long serialVersionUID = -747486709409732371L;
|
90 |
|
private static final Logger logger = Logger.getLogger(IAPTExcelImport.class);
|
91 |
|
public static final String ANNOTATION_MARKER_STRING = "[*]";
|
92 |
|
|
93 |
|
|
94 |
|
private static UUID ROOT_UUID = UUID.fromString("4137fd2a-20f6-4e70-80b9-f296daf51d82");
|
95 |
|
|
96 |
|
private static NonViralNameParserImpl nameParser = NonViralNameParserImpl.NewInstance();
|
97 |
|
|
98 |
|
private final static String REGISTRATIONNO_PK= "RegistrationNo_Pk";
|
99 |
|
private final static String HIGHERTAXON= "HigherTaxon";
|
100 |
|
private final static String FULLNAME= "FullName";
|
101 |
|
private final static String AUTHORSSPELLING= "AuthorsSpelling";
|
102 |
|
private final static String LITSTRING= "LitString";
|
103 |
|
private final static String REGISTRATION= "Registration";
|
104 |
|
private final static String TYPE= "Type";
|
105 |
|
private final static String CAVEATS= "Caveats";
|
106 |
|
private final static String FULLBASIONYM= "FullBasionym";
|
107 |
|
private final static String FULLSYNSUBST= "FullSynSubst";
|
108 |
|
private final static String NOTESTXT= "NotesTxt";
|
109 |
|
private final static String REGDATE= "RegDate";
|
110 |
|
private final static String NAMESTRING= "NameString";
|
111 |
|
private final static String BASIONYMSTRING= "BasionymString";
|
112 |
|
private final static String SYNSUBSTSTR= "SynSubstStr";
|
113 |
|
private final static String AUTHORSTRING= "AuthorString";
|
114 |
|
|
115 |
|
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
116 |
|
REGISTRATIONNO_PK, HIGHERTAXON, FULLNAME, AUTHORSSPELLING, LITSTRING, REGISTRATION, TYPE, CAVEATS, FULLBASIONYM, FULLSYNSUBST, NOTESTXT, REGDATE, NAMESTRING, BASIONYMSTRING, SYNSUBSTSTR, AUTHORSTRING});
|
117 |
|
|
118 |
|
private static final Pattern nomRefTokenizeP = Pattern.compile("^(?<title>.*):\\s(?<detail>[^\\.:]+)\\.(?<date>.*?)(?:\\s\\((?<issue>[^\\)]*)\\)\\s*)?\\.?$");
|
119 |
|
private static final Pattern[] datePatterns = new Pattern[]{
|
120 |
|
// NOTE:
|
121 |
|
// The order of the patterns is extremely important!!!
|
122 |
|
//
|
123 |
|
// all patterns cover the years 1700 - 1999
|
124 |
|
Pattern.compile("^(?<year>1[7,8,9][0-9]{2})$"), // only year, like '1969'
|
125 |
|
Pattern.compile("^(?<monthName>\\p{L}+\\.?)\\s(?<day>[0-9]{1,2})(?:st|rd|th)?\\.?,?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like April 12, 1969 or april 12th 1999
|
126 |
|
Pattern.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12
|
127 |
|
Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969
|
128 |
|
Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<monthName>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
|
129 |
|
Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)?\\s)?(?<monthName>\\p{L}+)(?:\\sde)?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
|
130 |
|
Pattern.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969
|
131 |
|
Pattern.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),// partial date like 1999-04
|
132 |
|
Pattern.compile("^(?<monthName>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
|
133 |
|
Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
|
134 |
|
};
|
135 |
|
protected static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
|
136 |
|
|
137 |
|
private static final Pattern typeNameBasionymPattern = Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$");
|
138 |
|
private static final Pattern typeNameNotePattern = Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]'
|
139 |
|
private static final Pattern typeNameSpecialSplitPattern = Pattern.compile("(?<note>.*\\;.*?)\\:(?<agent>)\\;(<name>.*)");
|
140 |
|
|
141 |
|
protected static final Pattern collectorPattern = Pattern.compile(".*?(?<fullStr1>\\([Ll]eg\\.\\s+(?<data1>[^\\)]*)\\)).*$|.*?(?<fullStr2>\\s[Ll]eg\\.\\:?\\s+(?<data2>.*?)\\.?)$|^(?<fullStr3>[Ll]eg\\.\\:?\\s+(?<data3>.*?)\\.?)");
|
142 |
|
private static final Pattern collectionDataPattern = Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$");
|
143 |
|
private static final Pattern collectorsNumber = Pattern.compile("^([nN]o\\.\\s.*)$");
|
144 |
|
|
145 |
|
// AccessionNumbers: , #.*, n°:?, 96/3293, No..*, -?\w{1,3}-[0-9\-/]*
|
146 |
|
private static final Pattern accessionNumberOnlyPattern = Pattern.compile("^(?<accNumber>(?:n°\\:?\\s?|#|No\\.?\\s?)?[\\d\\w\\-/]*)$");
|
147 |
|
|
148 |
|
private static final Pattern[] specimenTypePatterns = new Pattern[]{
|
149 |
|
Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?<institute>.*[^\\)])\\))(?<accNumber>.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222
|
150 |
|
Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<accNumber>.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78
|
151 |
|
Pattern.compile("^(?:in\\s)?(?<institute>[Cc]oll\\.\\s.*?)(?:\\s+(?<accNumber>(Praep\\.|slide|No\\.|Inv\\. Nr\\.|Nr\\.).*))?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62
|
152 |
|
Pattern.compile("^(?<institute>Inst\\.\\s.*?)\\s+(?<accNumber>N\\s.*)?$"), // like Inst. Geological Sciences, Acad. Sci. Belarus, Minsk N 212 A
|
153 |
|
Pattern.compile("^(?<colCode>[A-Z]+)(?:\\s+(?<accNumber>.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any
|
154 |
|
};
|
155 |
|
|
156 |
|
|
157 |
|
private static final Pattern registrationPattern = Pattern.compile("^Registration date\\:\\s(?<regdate>\\d\\d\\.\\d\\d\\.\\d\\d); no\\.\\:\\s(?<regid>\\d+);\\soffice\\:\\s(?<office>.*?)\\.(?:\\s\\[Form no\\.\\:\\s(?<formNo>d+)\\])?$"); // Registration date: 29.06.98; no.: 2922; office: Berlin.
|
158 |
|
|
159 |
|
private static Map<String, Integer> monthFromNameMap = new HashMap<>();
|
160 |
|
|
161 |
|
static {
|
162 |
|
String[] ck = new String[]{"leden", "únor", "březen", "duben", "květen", "červen", "červenec ", "srpen", "září", "říjen", "listopad", "prosinec"};
|
163 |
|
String[] fr = new String[]{"janvier", "février", "mars", "avril", "mai", "juin", "juillet", "août", "septembre", "octobre", "novembre", "décembre"};
|
164 |
|
String[] de = new String[]{"januar", "februar", "märz", "april", "mai", "juni", "juli", "august", "september", "oktober", "november", "dezember"};
|
165 |
|
String[] en = new String[]{"january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"};
|
166 |
|
String[] it = new String[]{"gennaio", "febbraio", "marzo", "aprile", "maggio", "giugno", "luglio", "agosto", "settembre", "ottobre", "novembre", "dicembre"};
|
167 |
|
String[] sp = new String[]{"enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre", "noviembre", "diciembre"};
|
168 |
|
String[] de_abbrev = new String[]{"jan.", "feb.", "märz", "apr.", "mai", "jun.", "jul.", "aug.", "sept.", "okt.", "nov.", "dez."};
|
169 |
|
String[] en_abbrev = new String[]{"jan.", "feb.", "mar.", "apr.", "may", "jun.", "jul.", "aug.", "sep.", "oct.", "nov.", "dec."};
|
170 |
|
String[] port = new String[]{"Janeiro", "Fevereiro", "Março", "Abril", "Maio", "Junho", "Julho", "Agosto", "Setembro", "Outubro", "Novembro", "Dezembro"};
|
171 |
|
String[] rom_num = new String[]{"i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii"};
|
172 |
|
|
173 |
|
String[][] perLang = new String[][]{ck, de, fr, en, it, sp, port, de_abbrev, en_abbrev, rom_num};
|
174 |
|
|
175 |
|
for (String[] months: perLang) {
|
176 |
|
for(int m = 1; m < 13; m++){
|
177 |
|
monthFromNameMap.put(months[m - 1].toLowerCase(), m);
|
178 |
|
}
|
179 |
|
}
|
180 |
|
|
181 |
|
// special cases
|
182 |
|
monthFromNameMap.put("mar", 3);
|
183 |
|
monthFromNameMap.put("dec", 12);
|
184 |
|
monthFromNameMap.put("februari", 2);
|
185 |
|
monthFromNameMap.put("març", 3);
|
186 |
|
}
|
187 |
|
|
188 |
|
|
189 |
|
DateTimeFormatter formatterYear = DateTimeFormat.forPattern("yyyy");
|
190 |
|
|
191 |
|
private Map<String, Collection> collectionMap = new HashMap<>();
|
192 |
|
|
193 |
|
private ExtensionType extensionTypeIAPTRegData = null;
|
194 |
|
|
195 |
|
private Set<String> nameSet = new HashSet<>();
|
196 |
|
private DefinedTermBase duplicateRegistration = null;
|
197 |
|
|
198 |
|
enum TypesName {
|
199 |
|
fieldUnit, holotype, isotype;
|
200 |
|
|
201 |
|
public SpecimenTypeDesignationStatus status(){
|
202 |
|
switch (this) {
|
203 |
|
case holotype:
|
204 |
|
return SpecimenTypeDesignationStatus.HOLOTYPE();
|
205 |
|
case isotype:
|
206 |
|
return SpecimenTypeDesignationStatus.ISOTYPE();
|
207 |
|
default:
|
208 |
|
return null;
|
209 |
|
}
|
210 |
|
}
|
211 |
|
}
|
212 |
|
|
213 |
|
private MarkerType markerTypeFossil = null;
|
214 |
|
private Rank rankUnrankedSupraGeneric = null;
|
215 |
|
private Rank familyIncertisSedis = null;
|
216 |
|
private AnnotationType annotationTypeCaveats = null;
|
217 |
|
|
218 |
|
private Reference bookVariedadesTradicionales = null;
|
219 |
|
|
220 |
|
/**
|
221 |
|
* HACK for unit simple testing
|
222 |
|
*/
|
223 |
|
boolean _testMode = System.getProperty("TEST_MODE") != null;
|
224 |
|
|
225 |
|
private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
|
226 |
|
TaxonNode higherTaxonNode, boolean isFossil) {
|
227 |
|
|
228 |
|
String regNumber = getValue(record, REGISTRATIONNO_PK, false);
|
229 |
|
String regStr = getValue(record, REGISTRATION, true);
|
230 |
|
String titleCacheStr = getValue(record, FULLNAME, true);
|
231 |
|
String nameStr = getValue(record, NAMESTRING, true);
|
232 |
|
String authorStr = getValue(record, AUTHORSTRING, true);
|
233 |
|
String nomRefStr = getValue(record, LITSTRING, true);
|
234 |
|
String authorsSpelling = getValue(record, AUTHORSSPELLING, true);
|
235 |
|
String notesTxt = getValue(record, NOTESTXT, true);
|
236 |
|
String caveats = getValue(record, CAVEATS, true);
|
237 |
|
String fullSynSubstStr = getValue(record, FULLSYNSUBST, true);
|
238 |
|
String fullBasionymStr = getValue(record, FULLBASIONYM, true);
|
239 |
|
String basionymNameStr = getValue(record, FULLBASIONYM, true);
|
240 |
|
String synSubstStr = getValue(record, SYNSUBSTSTR, true);
|
241 |
|
String typeStr = getValue(record, TYPE, true);
|
242 |
|
|
243 |
|
|
244 |
|
String nomRefTitle = null;
|
245 |
|
String nomRefDetail;
|
246 |
|
String nomRefPupDate = null;
|
247 |
|
String nomRefIssue = null;
|
248 |
|
Partial pupDate = null;
|
249 |
|
|
250 |
|
boolean restoreOriginalReference = false;
|
251 |
|
boolean nameIsValid = true;
|
252 |
|
|
253 |
|
// preprocess nomRef: separate citation, reference detail, publishing date
|
254 |
|
if(!StringUtils.isEmpty(nomRefStr)){
|
255 |
|
nomRefStr = nomRefStr.trim();
|
256 |
|
|
257 |
|
// handle the special case which is hard to parse:
|
258 |
|
//
|
259 |
|
// Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita: 154. 1997.
|
260 |
|
if(nomRefStr.startsWith("Las variedades tradicionales de frutales ")){
|
261 |
|
|
262 |
|
if(bookVariedadesTradicionales == null){
|
263 |
|
bookVariedadesTradicionales = ReferenceFactory.newBook();
|
264 |
|
bookVariedadesTradicionales.setTitle("Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita");
|
265 |
|
bookVariedadesTradicionales.setDatePublished(VerbatimTimePeriod.NewVerbatimInstance(1997));
|
266 |
|
getReferenceService().save(bookVariedadesTradicionales);
|
267 |
|
}
|
268 |
|
nomRefStr = nomRefStr.replaceAll("^.*?\\:.*?\\:", "Las variedades tradicionales:");
|
269 |
|
restoreOriginalReference = true;
|
270 |
|
}
|
271 |
|
|
272 |
|
Matcher m = nomRefTokenizeP.matcher(nomRefStr);
|
273 |
|
if(m.matches()){
|
274 |
|
nomRefTitle = m.group("title");
|
275 |
|
nomRefDetail = m.group("detail");
|
276 |
|
nomRefPupDate = m.group("date").trim();
|
277 |
|
nomRefIssue = m.group("issue");
|
278 |
|
|
279 |
|
pupDate = parseDate(regNumber, nomRefPupDate);
|
280 |
|
if (pupDate != null) {
|
281 |
|
nomRefTitle = nomRefTitle + ": " + nomRefDetail + ". " + pupDate.toString(formatterYear) + ".";
|
282 |
|
} else {
|
283 |
|
logger.warn(csvReportLine(regNumber, "Pub date", nomRefPupDate, "in", nomRefStr, "not parsable"));
|
284 |
|
}
|
285 |
|
} else {
|
286 |
|
nomRefTitle = nomRefStr;
|
287 |
|
}
|
288 |
|
}
|
289 |
|
|
290 |
|
TaxonName taxonName = makeBotanicalName(state, regNumber, titleCacheStr, nameStr, authorStr, nomRefTitle);
|
291 |
|
|
292 |
|
// always add the original strings of parsed data as annotation
|
293 |
|
taxonName.addAnnotation(Annotation.NewInstance("imported and parsed data strings:" +
|
294 |
|
"\n - '" + LITSTRING + "': "+ nomRefStr +
|
295 |
|
"\n - '" + TYPE + "': " + typeStr +
|
296 |
|
"\n - '" + REGISTRATION + "': " + regStr
|
297 |
|
, AnnotationType.TECHNICAL(), Language.DEFAULT()));
|
298 |
|
|
299 |
|
if(restoreOriginalReference){
|
300 |
|
taxonName.setNomenclaturalReference(bookVariedadesTradicionales);
|
301 |
|
}
|
302 |
|
|
303 |
|
if(taxonName.getNomenclaturalReference() != null){
|
304 |
|
if(pupDate != null) {
|
305 |
|
taxonName.getNomenclaturalReference().setDatePublished(VerbatimTimePeriod.NewVerbatimInstance(pupDate));
|
306 |
|
}
|
307 |
|
if(nomRefIssue != null) {
|
308 |
|
taxonName.getNomenclaturalReference().setVolume(nomRefIssue);
|
309 |
|
}
|
310 |
|
}
|
311 |
|
|
312 |
|
|
313 |
|
if(!StringUtils.isEmpty(notesTxt)){
|
314 |
|
notesTxt = notesTxt.replace("Notes: ", "").trim();
|
315 |
|
taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
|
316 |
|
nameIsValid = false;
|
317 |
|
|
318 |
|
}
|
319 |
|
if(!StringUtils.isEmpty(caveats)){
|
320 |
|
caveats = caveats.replace("Caveats: ", "").trim();
|
321 |
|
taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
|
322 |
|
nameIsValid = false;
|
323 |
|
}
|
324 |
|
|
325 |
|
if(nameIsValid){
|
326 |
|
// Status is always considered valid if no notes and cavets are set
|
327 |
|
taxonName.addStatus(NomenclaturalStatus.NewInstance(NomenclaturalStatusType.VALID()));
|
328 |
|
}
|
329 |
|
|
330 |
|
getNameService().save(taxonName);
|
331 |
|
|
332 |
|
// Namerelations
|
333 |
|
if(!StringUtils.isEmpty(authorsSpelling)){
|
334 |
|
authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
|
335 |
|
|
336 |
|
String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
|
337 |
|
String[] nameStrTokens = StringUtils.split(nameStr, " ");
|
338 |
|
|
339 |
|
ArrayUtils.reverse(authorSpellingTokens);
|
340 |
|
ArrayUtils.reverse(nameStrTokens);
|
341 |
|
|
342 |
|
for (int i = 0; i < nameStrTokens.length; i++){
|
343 |
|
if(i < authorSpellingTokens.length){
|
344 |
|
nameStrTokens[i] = authorSpellingTokens[i];
|
345 |
|
}
|
346 |
|
}
|
347 |
|
ArrayUtils.reverse(nameStrTokens);
|
348 |
|
|
349 |
|
String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
|
350 |
|
// build the fullnameString of the misspelled name
|
351 |
|
misspelledNameStr = taxonName.getTitleCache().replace(nameStr, misspelledNameStr);
|
352 |
|
|
353 |
|
TaxonName misspelledName = nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
|
354 |
|
misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
|
355 |
|
getNameService().save(misspelledName);
|
356 |
|
}
|
357 |
|
|
358 |
|
// Replaced Synonyms
|
359 |
|
if(!StringUtils.isEmpty(fullSynSubstStr)){
|
360 |
|
fullSynSubstStr = fullSynSubstStr.replace("Syn. subst.: ", "");
|
361 |
|
TaxonName replacedSynonymName = makeBotanicalName(state, regNumber, fullSynSubstStr, synSubstStr, null, null);
|
362 |
|
replacedSynonymName.addReplacedSynonym(taxonName, null, null, null);
|
363 |
|
getNameService().save(replacedSynonymName);
|
364 |
|
}
|
365 |
|
|
366 |
|
Reference sec = state.getConfig().getSecReference();
|
367 |
|
Taxon taxon = Taxon.NewInstance(taxonName, sec);
|
368 |
|
|
369 |
|
// Basionym
|
370 |
|
if(fullBasionymStr != null){
|
371 |
|
fullBasionymStr = fullBasionymStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
|
372 |
|
basionymNameStr = basionymNameStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
|
373 |
|
TaxonName basionym = makeBotanicalName(state, regNumber, fullBasionymStr, basionymNameStr, null, null);
|
374 |
|
getNameService().save(basionym);
|
375 |
|
taxonName.addBasionym(basionym);
|
376 |
|
|
377 |
|
Synonym syn = Synonym.NewInstance(basionym, sec);
|
378 |
|
taxon.addSynonym(syn, SynonymType.HOMOTYPIC_SYNONYM_OF());
|
379 |
|
getTaxonService().save(syn);
|
380 |
|
}
|
381 |
|
|
382 |
|
// Markers
|
383 |
|
if(isFossil){
|
384 |
|
taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
|
385 |
|
}
|
386 |
|
if(!nameSet.add(titleCacheStr)){
|
387 |
|
taxonName.addMarker(Marker.NewInstance(markerDuplicateRegistration(), true));
|
388 |
|
logger.warn(csvReportLine(regNumber, "Duplicate registration of", titleCacheStr));
|
389 |
|
}
|
390 |
|
|
391 |
|
|
392 |
|
// Types
|
393 |
|
if(!StringUtils.isEmpty(typeStr)){
|
394 |
|
|
395 |
|
if(taxonName.getRank().isSpecies() || taxonName.getRank().isLower(Rank.SPECIES())) {
|
396 |
|
makeSpecimenTypeData(typeStr, taxonName, regNumber, state, false);
|
397 |
|
} else {
|
398 |
|
makeNameTypeData(typeStr, taxonName, regNumber, state);
|
399 |
|
}
|
400 |
|
}
|
401 |
|
|
402 |
|
getTaxonService().save(taxon);
|
403 |
|
|
404 |
|
if(taxonName.getRank().equals(Rank.SPECIES()) || taxonName.getRank().isLower(Rank.SPECIES())){
|
405 |
|
// try to find the genus, it should have been imported already, Genera are coming first in the import file
|
406 |
|
Taxon genus = ((IAPTImportState)state).getGenusTaxonMap().get(taxonName.getGenusOrUninomial());
|
407 |
|
if(genus != null){
|
408 |
|
higherTaxonNode = genus.getTaxonNodes().iterator().next();
|
409 |
|
} else {
|
410 |
|
logger.info(csvReportLine(regNumber, "Parent genus not found for", nameStr));
|
411 |
|
}
|
412 |
|
}
|
413 |
|
|
414 |
|
if(higherTaxonNode != null){
|
415 |
|
higherTaxonNode.addChildTaxon(taxon, null, null);
|
416 |
|
getTaxonNodeService().save(higherTaxonNode);
|
417 |
|
}
|
418 |
|
|
419 |
|
if(taxonName.getRank().isGenus()){
|
420 |
|
((IAPTImportState)state).getGenusTaxonMap().put(taxonName.getGenusOrUninomial(), taxon);
|
421 |
|
}
|
422 |
|
|
423 |
|
return taxon;
|
424 |
|
}
|
425 |
|
|
426 |
|
private void makeSpecimenTypeData(String typeStr, TaxonName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state, boolean isFossil) {
|
427 |
|
|
428 |
|
Matcher m = typeSpecimenSplitPattern.matcher(typeStr);
|
429 |
|
|
430 |
|
if(m.matches()){
|
431 |
|
String fieldUnitStr = m.group(TypesName.fieldUnit.name());
|
432 |
|
// boolean isFieldUnit = typeStr.matches(".*([°']|\\d+\\s?m\\s|\\d+\\s?km\\s).*"); // check for location or unit m, km // makes no sense!!!!
|
433 |
|
FieldUnit fieldUnit = parseFieldUnit(fieldUnitStr, regNumber, state);
|
434 |
|
if(fieldUnit == null) {
|
435 |
|
// create a field unit with only a titleCache using the fieldUnitStr substring
|
436 |
|
logger.warn(csvReportLine(regNumber, "Type: fieldUnitStr can not be parsed", fieldUnitStr));
|
437 |
|
fieldUnit = FieldUnit.NewInstance();
|
438 |
|
fieldUnit.setTitleCache(fieldUnitStr, true);
|
439 |
|
getOccurrenceService().save(fieldUnit);
|
440 |
|
}
|
441 |
|
getOccurrenceService().save(fieldUnit);
|
442 |
|
|
443 |
|
SpecimenOrObservationType specimenType;
|
444 |
|
if(isFossil){
|
445 |
|
specimenType = SpecimenOrObservationType.Fossil;
|
446 |
|
} else {
|
447 |
|
specimenType = SpecimenOrObservationType.PreservedSpecimen;
|
448 |
|
}
|
449 |
|
|
450 |
|
// all others ..
|
451 |
|
addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.holotype.name()), TypesName.holotype, false, regNumber, specimenType);
|
452 |
|
addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.isotype.name()), TypesName.isotype, true, regNumber, specimenType);
|
453 |
|
|
454 |
|
} else {
|
455 |
|
// create a field unit with only a titleCache using the full typeStr
|
456 |
|
FieldUnit fieldUnit = FieldUnit.NewInstance();
|
457 |
|
fieldUnit.setTitleCache(typeStr, true);
|
458 |
|
getOccurrenceService().save(fieldUnit);
|
459 |
|
logger.warn(csvReportLine(regNumber, "Type: field 'Type' can not be parsed", typeStr));
|
460 |
|
}
|
461 |
|
getNameService().save(taxonName);
|
462 |
|
}
|
463 |
|
|
464 |
|
private void makeNameTypeData(String typeStr, IBotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
|
465 |
|
|
466 |
|
String nameStr = typeStr.replaceAll("^Type\\s?\\:\\s?", "");
|
467 |
|
if(nameStr.isEmpty()) {
|
468 |
|
return;
|
469 |
|
}
|
470 |
|
|
471 |
|
String basionymNameStr = null;
|
472 |
|
String noteStr = null;
|
473 |
|
String agentStr = null;
|
474 |
|
|
475 |
|
Matcher m;
|
476 |
|
|
477 |
|
if(typeStr.startsWith("not to be indicated")){
|
478 |
|
// Special case:
|
479 |
|
// Type: not to be indicated (Art. H.9.1. Tokyo Code); stated parent genera: Hechtia Klotzsch; Deuterocohnia Mez
|
480 |
|
// FIXME
|
481 |
|
m = typeNameSpecialSplitPattern.matcher(nameStr);
|
482 |
|
if(m.matches()){
|
483 |
|
nameStr = m.group("name");
|
484 |
|
noteStr = m.group("note");
|
485 |
|
agentStr = m.group("agent");
|
486 |
|
// TODO better import of agent?
|
487 |
|
if(agentStr != null){
|
488 |
|
noteStr = noteStr + ": " + agentStr;
|
489 |
|
}
|
490 |
|
}
|
491 |
|
} else {
|
492 |
|
// Generic case
|
493 |
|
m = typeNameBasionymPattern.matcher(nameStr);
|
494 |
|
if (m.find()) {
|
495 |
|
basionymNameStr = m.group("basionymName");
|
496 |
|
if (basionymNameStr != null) {
|
497 |
|
nameStr = nameStr.replace(m.group(0), "");
|
498 |
|
}
|
499 |
|
}
|
500 |
|
|
501 |
|
m = typeNameNotePattern.matcher(nameStr);
|
502 |
|
if (m.find()) {
|
503 |
|
noteStr = m.group(1);
|
504 |
|
if (noteStr != null) {
|
505 |
|
nameStr = nameStr.replace(m.group(0), "");
|
506 |
|
}
|
507 |
|
}
|
508 |
|
}
|
509 |
|
|
510 |
|
TaxonName typeName = (TaxonName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
|
511 |
|
|
512 |
|
if(typeName.isProtectedTitleCache() || typeName.getNomenclaturalReference() != null && typeName.getNomenclaturalReference().isProtectedTitleCache()) {
|
513 |
|
logger.warn(csvReportLine(regNumber, "NameType not parsable", typeStr, nameStr));
|
514 |
|
}
|
515 |
|
|
516 |
|
if(basionymNameStr != null){
|
517 |
|
TaxonName basionymName = (TaxonName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
|
518 |
|
getNameService().save(basionymName);
|
519 |
|
typeName.addBasionym(basionymName);
|
520 |
|
}
|
521 |
|
|
522 |
|
|
523 |
|
NameTypeDesignation nameTypeDesignation = NameTypeDesignation.NewInstance();
|
524 |
|
nameTypeDesignation.setTypeName(typeName);
|
525 |
|
getNameService().save(typeName);
|
526 |
|
|
527 |
|
if(noteStr != null){
|
528 |
|
nameTypeDesignation.addAnnotation(Annotation.NewInstance(noteStr, AnnotationType.EDITORIAL(), Language.UNKNOWN_LANGUAGE()));
|
529 |
|
}
|
530 |
|
taxonName.addNameTypeDesignation(typeName, null, null, null, null, false);
|
531 |
|
|
532 |
|
}
|
533 |
|
|
534 |
|
/**
|
535 |
|
* Currently only parses the collector, fieldNumber and the collection date.
|
536 |
|
*
|
537 |
|
* @param fieldUnitStr
|
538 |
|
* @param regNumber
|
539 |
|
* @param state
|
540 |
|
* @return null if the fieldUnitStr could not be parsed
|
541 |
|
*/
|
542 |
|
protected FieldUnit parseFieldUnit(String fieldUnitStr, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
|
543 |
|
|
544 |
|
FieldUnit fieldUnit = null;
|
545 |
|
|
546 |
|
Matcher m1 = collectorPattern.matcher(fieldUnitStr);
|
547 |
|
if(m1.matches()){
|
548 |
|
|
549 |
|
String collectorData = m1.group(2); // like ... (leg. Metzeltin, 30. 9. 1996)
|
550 |
|
String removal = m1.group(1);
|
551 |
|
if(collectorData == null){
|
552 |
|
collectorData = m1.group(4); // like ... leg. Metzeltin, 30. 9. 1996
|
553 |
|
removal = m1.group(3);
|
554 |
|
}
|
555 |
|
if(collectorData == null){
|
556 |
|
collectorData = m1.group(6); // like ^leg. J. J. Halda 18.3.1997$
|
557 |
|
removal = null;
|
558 |
|
}
|
559 |
|
if(collectorData == null){
|
560 |
|
return null;
|
561 |
|
}
|
562 |
|
|
563 |
|
// the fieldUnitStr is parsable
|
564 |
|
// remove all collectorData from the fieldUnitStr and use the rest as locality
|
565 |
|
String locality = null;
|
566 |
|
if(removal != null){
|
567 |
|
locality = fieldUnitStr.replace(removal, "");
|
568 |
|
}
|
569 |
|
|
570 |
|
String collectorStr = null;
|
571 |
|
String detailStr = null;
|
572 |
|
Partial date = null;
|
573 |
|
String fieldNumber = null;
|
574 |
|
|
575 |
|
Matcher m2 = collectionDataPattern.matcher(collectorData);
|
576 |
|
if(m2.matches()){
|
577 |
|
collectorStr = m2.group("collector");
|
578 |
|
detailStr = m2.group("detail");
|
579 |
|
|
580 |
|
// Try to make sense of the detailStr
|
581 |
|
if(detailStr != null){
|
582 |
|
detailStr = detailStr.trim();
|
583 |
|
// 1. try to parse as date
|
584 |
|
date = parseDate(regNumber, detailStr);
|
585 |
|
if(date == null){
|
586 |
|
// 2. try to parse as number
|
587 |
|
if(collectorsNumber.matcher(detailStr).matches()){
|
588 |
|
fieldNumber = detailStr;
|
589 |
|
}
|
590 |
|
}
|
591 |
|
}
|
592 |
|
if(date == null && fieldNumber == null){
|
593 |
|
// detailed parsing not possible, so need fo fallback
|
594 |
|
collectorStr = collectorData;
|
595 |
|
}
|
596 |
|
}
|
597 |
|
|
598 |
|
if(collectorStr == null) {
|
599 |
|
collectorStr = collectorData;
|
600 |
|
}
|
601 |
|
|
602 |
|
fieldUnit = FieldUnit.NewInstance();
|
603 |
|
GatheringEvent ge = GatheringEvent.NewInstance();
|
604 |
|
if(locality != null){
|
605 |
|
ge.setLocality(LanguageString.NewInstance(locality, Language.UNKNOWN_LANGUAGE()));
|
606 |
|
}
|
607 |
|
|
608 |
|
TeamOrPersonBase agent = state.getAgentBase(collectorStr);
|
609 |
|
if(agent == null) {
|
610 |
|
agent = Person.NewTitledInstance(collectorStr);
|
611 |
|
getAgentService().save(agent);
|
612 |
|
state.putAgentBase(collectorStr, agent);
|
613 |
|
}
|
614 |
|
ge.setCollector(agent);
|
615 |
|
|
616 |
|
if(date != null){
|
617 |
|
ge.setGatheringDate(date);
|
618 |
|
}
|
619 |
|
|
620 |
|
getEventBaseService().save(ge);
|
621 |
|
fieldUnit.setGatheringEvent(ge);
|
622 |
|
|
623 |
|
if(fieldNumber != null) {
|
624 |
|
fieldUnit.setFieldNumber(fieldNumber);
|
625 |
|
}
|
626 |
|
getOccurrenceService().save(fieldUnit);
|
627 |
|
|
628 |
|
}
|
629 |
|
|
630 |
|
return fieldUnit;
|
631 |
|
}
|
632 |
|
|
633 |
|
protected Partial parseDate(String regNumber, String dateStr) {
|
634 |
|
|
635 |
|
Partial pupDate = null;
|
636 |
|
boolean parseError = false;
|
637 |
|
|
638 |
|
String day = null;
|
639 |
|
String month = null;
|
640 |
|
String monthName = null;
|
641 |
|
String year = null;
|
642 |
|
|
643 |
|
for(Pattern p : datePatterns){
|
644 |
|
Matcher m2 = p.matcher(dateStr);
|
645 |
|
if(m2.matches()){
|
646 |
|
try {
|
647 |
|
year = m2.group("year");
|
648 |
|
} catch (IllegalArgumentException e){
|
649 |
|
// named capture group not found
|
650 |
|
}
|
651 |
|
try {
|
652 |
|
month = m2.group("month");
|
653 |
|
} catch (IllegalArgumentException e){
|
654 |
|
// named capture group not found
|
655 |
|
}
|
656 |
|
|
657 |
|
try {
|
658 |
|
monthName = m2.group("monthName");
|
659 |
|
month = monthFromName(monthName, regNumber);
|
660 |
|
if(month == null){
|
661 |
|
parseError = true;
|
662 |
|
}
|
663 |
|
} catch (IllegalArgumentException e){
|
664 |
|
// named capture group not found
|
665 |
|
}
|
666 |
|
try {
|
667 |
|
day = m2.group("day");
|
668 |
|
} catch (IllegalArgumentException e){
|
669 |
|
// named capture group not found
|
670 |
|
}
|
671 |
|
|
672 |
|
if(year != null){
|
673 |
|
if (year.length() == 2) {
|
674 |
|
// it is an abbreviated year from the 19** years
|
675 |
|
year = "19" + year;
|
676 |
|
}
|
677 |
|
break;
|
678 |
|
} else {
|
679 |
|
parseError = true;
|
680 |
|
}
|
681 |
|
}
|
682 |
|
}
|
683 |
|
if(year == null){
|
684 |
|
parseError = true;
|
685 |
|
}
|
686 |
|
List<DateTimeFieldType> types = new ArrayList<>();
|
687 |
|
List<Integer> values = new ArrayList<>();
|
688 |
|
if(!parseError) {
|
689 |
|
types.add(DateTimeFieldType.year());
|
690 |
|
values.add(Integer.parseInt(year));
|
691 |
|
if (month != null) {
|
692 |
|
types.add(DateTimeFieldType.monthOfYear());
|
693 |
|
values.add(Integer.parseInt(month));
|
694 |
|
}
|
695 |
|
if (day != null) {
|
696 |
|
types.add(DateTimeFieldType.dayOfMonth());
|
697 |
|
values.add(Integer.parseInt(day));
|
698 |
|
}
|
699 |
|
pupDate = new Partial(types.toArray(new DateTimeFieldType[types.size()]), ArrayUtils.toPrimitive(values.toArray(new Integer[values.size()])));
|
700 |
|
}
|
701 |
|
return pupDate;
|
702 |
|
}
|
703 |
|
|
704 |
|
private String monthFromName(String monthName, String regNumber) {
|
705 |
|
|
706 |
|
Integer month = monthFromNameMap.get(monthName.toLowerCase());
|
707 |
|
if(month == null){
|
708 |
|
logger.warn(csvReportLine(regNumber, "Unknown month name", monthName));
|
709 |
|
return null;
|
710 |
|
} else {
|
711 |
|
return month.toString();
|
712 |
|
}
|
713 |
|
}
|
714 |
|
|
715 |
|
|
716 |
|
private void addSpecimenTypes(IBotanicalName taxonName, FieldUnit fieldUnit, String typeStr, TypesName typeName, boolean multiple, String regNumber, SpecimenOrObservationType specimenType){
|
717 |
|
|
718 |
|
if(StringUtils.isEmpty(typeStr)){
|
719 |
|
return;
|
720 |
|
}
|
721 |
|
typeStr = typeStr.trim().replaceAll("\\.$", "");
|
722 |
|
|
723 |
|
Collection collection = null;
|
724 |
|
DerivedUnit specimen = null;
|
725 |
|
|
726 |
|
List<DerivedUnit> specimens = new ArrayList<>();
|
727 |
|
if(multiple){
|
728 |
|
String[] tokens = typeStr.split("\\s?,\\s?");
|
729 |
|
for (String t : tokens) {
|
730 |
|
// command to list all complex parsabel types:
|
731 |
|
// csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Holotype:\s([A-Z]*\s)[^.]*?'
|
732 |
|
// csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Isotype[^:]*:\s([A-Z]*\s)[^.]*?'
|
733 |
|
|
734 |
|
if(!t.isEmpty()){
|
735 |
|
// trying to parse the string
|
736 |
|
specimen = parseSpecimenType(fieldUnit, typeName, collection, t, regNumber);
|
737 |
|
if(specimen != null){
|
738 |
|
specimens.add(specimen);
|
739 |
|
} else {
|
740 |
|
// parsing was not successful make simple specimen
|
741 |
|
specimens.add(makeSpecimenType(fieldUnit, t, specimenType));
|
742 |
|
}
|
743 |
|
}
|
744 |
|
}
|
745 |
|
} else {
|
746 |
|
specimen = parseSpecimenType(fieldUnit, typeName, collection, typeStr, regNumber);
|
747 |
|
if(specimen != null) {
|
748 |
|
specimens.add(specimen);
|
749 |
|
// remember current collection
|
750 |
|
collection = specimen.getCollection();
|
751 |
|
} else {
|
752 |
|
// parsing was not successful make simple specimen
|
753 |
|
specimens.add(makeSpecimenType(fieldUnit, typeStr, SpecimenOrObservationType.PreservedSpecimen));
|
754 |
|
}
|
755 |
|
}
|
756 |
|
|
757 |
|
for(DerivedUnit s : specimens){
|
758 |
|
taxonName.addSpecimenTypeDesignation(s, typeName.status(), null, null, null, false, true);
|
759 |
|
}
|
760 |
|
}
|
761 |
|
|
762 |
|
private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, String titleCache, SpecimenOrObservationType specimenType) {
|
763 |
|
DerivedUnit specimen;DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimenType, fieldUnit);
|
764 |
|
facade.setTitleCache(titleCache.trim(), true);
|
765 |
|
specimen = facade.innerDerivedUnit();
|
766 |
|
return specimen;
|
767 |
|
}
|
768 |
|
|
769 |
|
/**
|
770 |
|
*
|
771 |
|
* @param fieldUnit
|
772 |
|
* @param typeName
|
773 |
|
* @param collection
|
774 |
|
* @param text
|
775 |
|
* @param regNumber
|
776 |
|
* @return
|
777 |
|
*/
|
778 |
|
protected DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) {
|
779 |
|
|
780 |
|
DerivedUnit specimen = null;
|
781 |
|
|
782 |
|
String collectionCode = null;
|
783 |
|
String collectionTitle = null;
|
784 |
|
String subCollectionStr = null;
|
785 |
|
String instituteStr = null;
|
786 |
|
String accessionNumber = null;
|
787 |
|
|
788 |
|
boolean unusualAccessionNumber = false;
|
789 |
|
|
790 |
|
text = text.trim();
|
791 |
|
|
792 |
|
// 1. For Isotypes often the accession number is noted alone if the
|
793 |
|
// preceeding entry has a collection code.
|
794 |
|
if(typeName .equals(TypesName.isotype) && collection != null){
|
795 |
|
Matcher m = accessionNumberOnlyPattern.matcher(text);
|
796 |
|
if(m.matches()){
|
797 |
|
try {
|
798 |
|
accessionNumber = m.group("accNumber");
|
799 |
|
specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
|
800 |
|
} catch (IllegalArgumentException e){
|
801 |
|
// match group acc_number not found
|
802 |
|
}
|
803 |
|
}
|
804 |
|
}
|
805 |
|
|
806 |
|
//2. try it the 'normal' way
|
807 |
|
if(specimen == null) {
|
808 |
|
for (Pattern p : specimenTypePatterns) {
|
809 |
|
Matcher m = p.matcher(text);
|
810 |
|
if (m.matches()) {
|
811 |
|
// collection code or collectionTitle is mandatory
|
812 |
|
try {
|
813 |
|
collectionCode = m.group("colCode");
|
814 |
|
} catch (IllegalArgumentException e){
|
815 |
|
// match group colCode not found
|
816 |
|
}
|
817 |
|
|
818 |
|
try {
|
819 |
|
instituteStr = m.group("institute");
|
820 |
|
} catch (IllegalArgumentException e){
|
821 |
|
// match group col_name not found
|
822 |
|
}
|
823 |
|
|
824 |
|
try {
|
825 |
|
subCollectionStr = m.group("subCollection");
|
826 |
|
} catch (IllegalArgumentException e){
|
827 |
|
// match group subCollection not found
|
828 |
|
}
|
829 |
|
try {
|
830 |
|
accessionNumber = m.group("accNumber");
|
831 |
|
|
832 |
|
// try to improve the accessionNumber
|
833 |
|
if(accessionNumber!= null) {
|
834 |
|
accessionNumber = accessionNumber.trim();
|
835 |
|
Matcher m2 = accessionNumberOnlyPattern.matcher(accessionNumber);
|
836 |
|
String betterAccessionNumber = null;
|
837 |
|
if (m2.matches()) {
|
838 |
|
try {
|
839 |
|
betterAccessionNumber = m.group("accNumber");
|
840 |
|
} catch (IllegalArgumentException e) {
|
841 |
|
// match group acc_number not found
|
842 |
|
}
|
843 |
|
}
|
844 |
|
if (betterAccessionNumber != null) {
|
845 |
|
accessionNumber = betterAccessionNumber;
|
846 |
|
} else {
|
847 |
|
unusualAccessionNumber = true;
|
848 |
|
}
|
849 |
|
}
|
850 |
|
|
851 |
|
} catch (IllegalArgumentException e){
|
852 |
|
// match group acc_number not found
|
853 |
|
}
|
854 |
|
|
855 |
|
if(collectionCode == null && instituteStr == null){
|
856 |
|
logger.warn(csvReportLine(regNumber, "Type: neither 'collectionCode' nor 'institute' found in ", text));
|
857 |
|
continue;
|
858 |
|
}
|
859 |
|
collection = getCollection(collectionCode, instituteStr, subCollectionStr);
|
860 |
|
specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
|
861 |
|
break;
|
862 |
|
}
|
863 |
|
}
|
864 |
|
}
|
865 |
|
if(specimen == null) {
|
866 |
|
logger.warn(csvReportLine(regNumber, "Type: Could not parse specimen", typeName.name().toString(), text));
|
867 |
|
}
|
868 |
|
if(unusualAccessionNumber){
|
869 |
|
logger.warn(csvReportLine(regNumber, "Type: Unusual accession number", typeName.name().toString(), text, accessionNumber));
|
870 |
|
}
|
871 |
|
return specimen;
|
872 |
|
}
|
873 |
|
|
874 |
|
private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, Collection collection, String accessionNumber) {
|
875 |
|
|
876 |
|
DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
|
877 |
|
facade.setCollection(collection);
|
878 |
|
if(accessionNumber != null){
|
879 |
|
facade.setAccessionNumber(accessionNumber);
|
880 |
|
}
|
881 |
|
return facade.innerDerivedUnit();
|
882 |
|
}
|
883 |
|
|
884 |
|
private TaxonName makeBotanicalName(SimpleExcelTaxonImportState<CONFIG> state, String regNumber, String titleCacheStr, String nameStr,
|
885 |
|
String authorStr, String nomRefTitle) {
|
886 |
|
|
887 |
|
TaxonName taxonName;// cache field for the taxonName.titleCache
|
888 |
|
String taxonNameTitleCache = null;
|
889 |
|
Map<String, AnnotationType> nameAnnotations = new HashMap<>();
|
890 |
|
|
891 |
|
// TitleCache preprocessing
|
892 |
|
if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || (authorStr != null && authorStr.endsWith(ANNOTATION_MARKER_STRING))){
|
893 |
|
nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL());
|
894 |
|
titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim();
|
895 |
|
if(authorStr != null) {
|
896 |
|
authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim();
|
897 |
|
}
|
898 |
|
}
|
899 |
|
|
900 |
|
// parse the full taxon name
|
901 |
|
if(!StringUtils.isEmpty(nomRefTitle)){
|
902 |
|
String referenceSeparator = nomRefTitle.startsWith("in ") ? " " : ", ";
|
903 |
|
String taxonFullNameStr = titleCacheStr + referenceSeparator + nomRefTitle;
|
904 |
|
logger.debug(":::::" + taxonFullNameStr);
|
905 |
|
taxonName = nameParser.parseReferencedName(taxonFullNameStr, NomenclaturalCode.ICNAFP, null);
|
906 |
|
} else {
|
907 |
|
taxonName = (TaxonName) nameParser.parseFullName(titleCacheStr, NomenclaturalCode.ICNAFP, null);
|
908 |
|
}
|
909 |
|
|
910 |
|
taxonNameTitleCache = taxonName.getTitleCache().trim();
|
911 |
|
if (taxonName.isProtectedTitleCache()) {
|
912 |
|
logger.warn(csvReportLine(regNumber, "Name could not be parsed", titleCacheStr));
|
913 |
|
} else {
|
914 |
|
|
915 |
|
boolean doRestoreTitleCacheStr = false;
|
916 |
|
|
917 |
|
// Check if titleCache and nameCache are plausible
|
918 |
|
String titleCacheCompareStr = titleCacheStr;
|
919 |
|
String nameCache = taxonName.getNameCache();
|
920 |
|
String nameCompareStr = nameStr;
|
921 |
|
if(taxonName.isBinomHybrid()){
|
922 |
|
titleCacheCompareStr = titleCacheCompareStr.replace(" x ", " ×");
|
923 |
|
nameCompareStr = nameCompareStr.replace(" x ", " ×");
|
924 |
|
}
|
925 |
|
if(taxonName.isMonomHybrid()){
|
926 |
|
titleCacheCompareStr = titleCacheCompareStr.replaceAll("^X ", "× ");
|
927 |
|
nameCompareStr = nameCompareStr.replace("^X ", "× ");
|
928 |
|
}
|
929 |
|
if(authorStr != null && authorStr.contains(" et ")){
|
930 |
|
titleCacheCompareStr = titleCacheCompareStr.replaceAll(" et ", " & ");
|
931 |
|
}
|
932 |
|
if (!taxonNameTitleCache.equals(titleCacheCompareStr)) {
|
933 |
|
logger.warn(csvReportLine(regNumber, "The generated titleCache differs from the imported string", taxonNameTitleCache, " != ", titleCacheStr, " ==> original titleCacheStr has been restored"));
|
934 |
|
doRestoreTitleCacheStr = true;
|
935 |
|
}
|
936 |
|
if (!nameCache.trim().equals(nameCompareStr)) {
|
937 |
|
logger.warn(csvReportLine(regNumber, "The parsed nameCache differs from field '" + NAMESTRING + "'", nameCache, " != ", nameCompareStr));
|
938 |
|
}
|
939 |
|
|
940 |
|
// Author
|
941 |
|
//nameParser.handleAuthors(taxonName, titleCacheStr, authorStr);
|
942 |
|
//if (!titleCacheStr.equals(taxonName.getTitleCache())) {
|
943 |
|
// logger.warn(regNumber + ": titleCache has changed after setting authors, will restore original titleCacheStr");
|
944 |
|
// doRestoreTitleCacheStr = true;
|
945 |
|
//}
|
946 |
|
|
947 |
|
if(doRestoreTitleCacheStr){
|
948 |
|
taxonName.setTitleCache(titleCacheStr, true);
|
949 |
|
}
|
950 |
|
|
951 |
|
// deduplicate
|
952 |
|
replaceAuthorNamesAndNomRef(state, taxonName);
|
953 |
|
}
|
954 |
|
|
955 |
|
// Annotations
|
956 |
|
if(!nameAnnotations.isEmpty()){
|
957 |
|
for(String text : nameAnnotations.keySet()){
|
958 |
|
taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
|
959 |
|
}
|
960 |
|
}
|
961 |
|
|
962 |
|
taxonName.addSource(OriginalSourceType.Import, regNumber, null, state.getConfig().getSourceReference(), null);
|
963 |
|
|
964 |
|
getNameService().save(taxonName);
|
965 |
|
|
966 |
|
return taxonName;
|
967 |
|
}
|
968 |
|
|
969 |
|
/**
|
970 |
|
* @param state
|
971 |
|
* @return
|
972 |
|
*/
|
973 |
|
private TaxonNode getClassificationRootNode(IAPTImportState state) {
|
974 |
|
|
975 |
|
// Classification classification = state.getClassification();
|
976 |
|
// if (classification == null){
|
977 |
|
// IAPTImportConfigurator config = state.getConfig();
|
978 |
|
// classification = Classification.NewInstance(state.getConfig().getClassificationName());
|
979 |
|
// classification.setUuid(config.getClassificationUuid());
|
980 |
|
// classification.setReference(config.getSecReference());
|
981 |
|
// classification = getClassificationService().find(state.getConfig().getClassificationUuid());
|
982 |
|
// }
|
983 |
|
TaxonNode rootNode = state.getRootNode();
|
984 |
|
if (rootNode == null){
|
985 |
|
rootNode = getTaxonNodeService().find(ROOT_UUID);
|
986 |
|
}
|
987 |
|
if (rootNode == null){
|
988 |
|
Classification classification = state.getClassification();
|
989 |
|
if (classification == null){
|
990 |
|
Reference sec = state.getSecReference();
|
991 |
|
String classificationName = state.getConfig().getClassificationName();
|
992 |
|
Language language = Language.DEFAULT();
|
993 |
|
classification = Classification.NewInstance(classificationName, sec, language);
|
994 |
|
state.setClassification(classification);
|
995 |
|
classification.setUuid(state.getConfig().getClassificationUuid());
|
996 |
|
classification.getRootNode().setUuid(ROOT_UUID);
|
997 |
|
getClassificationService().save(classification);
|
998 |
|
}
|
999 |
|
rootNode = classification.getRootNode();
|
1000 |
|
state.setRootNode(rootNode);
|
1001 |
|
}
|
1002 |
|
return rootNode;
|
1003 |
|
}
|
ref #7420 first version of phycobank higher classification import