ec0d2f63117f7b1c0a8e69de5d61a641286c2f1b
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / redlist / germanSL / GermanSLTaxonImport.java
1 /**
2 * Copyright (C) 2016 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.redlist.germanSL;
10
11 import java.util.Arrays;
12 import java.util.HashMap;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16 import java.util.UUID;
17
18 import org.apache.log4j.Logger;
19 import org.springframework.stereotype.Component;
20
21 import eu.etaxonomy.cdm.common.CdmUtils;
22 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
23 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
24 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
25 import eu.etaxonomy.cdm.model.common.CdmBase;
26 import eu.etaxonomy.cdm.model.common.DefinedTerm;
27 import eu.etaxonomy.cdm.model.common.Language;
28 import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary;
29 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
30 import eu.etaxonomy.cdm.model.description.TaxonDescription;
31 import eu.etaxonomy.cdm.model.location.Country;
32 import eu.etaxonomy.cdm.model.name.IBotanicalName;
33 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
34 import eu.etaxonomy.cdm.model.name.Rank;
35 import eu.etaxonomy.cdm.model.name.RankClass;
36 import eu.etaxonomy.cdm.model.name.TaxonName;
37 import eu.etaxonomy.cdm.model.reference.Reference;
38 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
39 import eu.etaxonomy.cdm.model.taxon.Synonym;
40 import eu.etaxonomy.cdm.model.taxon.Taxon;
41 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
42
43 /**
44 * @author a.mueller
45 * @date 25.11.2016
46 *
47 */
48 @Component
49 public class GermanSLTaxonImport<CONFIG extends GermanSLImportConfigurator>
50 extends SimpleExcelTaxonImport<CONFIG> {
51
52 private static final long serialVersionUID = 236093186271666895L;
53
54 private static final Logger logger = Logger.getLogger(GermanSLTaxonImport.class);
55
56 static final String SPECIES_NR = "SPECIES_NR";
57 private static final String AUTHOR = "AUTHOR";
58 private static final String ABBREVIAT = "ABBREVIAT";
59 private static final String SEC = "SECUNDUM";
60 private static final String RANG = "RANG";
61 private static final String EXTERNAL_ID = "external_ID";
62 private static final String GRUPPE = "GRUPPE";
63 static final String VALID_NR = "VALID_NR";
64 static final String SYNONYM = "SYNONYM";
65 private static final String NATIVENAME = "NATIVENAME";
66 private static final String LETTER_CODE = "LETTERCODE";
67 static final String AGG = "AGG";
68
69 private static final String AGG_NAME = "AGG_NAME";
70 private static final String VALID_NAME = "VALID_NAME";
71
72 private static final String NACHWEIS = "NACHWEIS";
73 private static final String HYBRID = "HYBRID";
74 private static final String BEGRUEND = "BEGRUEND";
75 private static final String EDITSTATUS = "EDITSTATUS";
76
77 private static final String UUID_ = "UUID";
78
79
80 public static final String TAXON_NAMESPACE = "1.3.4";
81
82 @Override
83 protected String getWorksheetName() {
84 return "1.3.4";
85 }
86
87 //dirty I know, but who cares, needed by distribution and commmon name import
88 protected static final Map<String, TaxonBase<?>> taxonIdMap = new HashMap<>();
89
90
91 private static List<String> expectedKeys= Arrays.asList(new String[]{
92 SPECIES_NR,EXTERNAL_ID,ABBREVIAT,
93 AUTHOR,SEC,SYNONYM,
94 LETTER_CODE, AGG,
95 NATIVENAME,VALID_NR,RANG,GRUPPE,
96 UUID_,
97 NACHWEIS, HYBRID, BEGRUEND, EDITSTATUS, AGG_NAME, VALID_NAME
98 });
99
100
101 @Override
102 protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
103 String line = state.getCurrentLine() + ": ";
104 HashMap<String, String> record = state.getOriginalRecord();
105
106 Set<String> keys = record.keySet();
107
108 checkAllKeysExist(line, keys, expectedKeys);
109
110 //Name
111 NameResult nameResult = makeName(line, record, state);
112 IBotanicalName taxonName = nameResult.name;
113
114 //sec
115 String secRefStr = getValue(record, SEC);
116 Reference sec = getSecRef(state, secRefStr, line);
117
118
119 //status
120 String statusStr = getValue(record, SYNONYM);
121 TaxonBase<?> taxonBase;
122 if (isAccepted(statusStr)){
123 taxonBase = Taxon.NewInstance(taxonName, sec);
124 if (nameResult.proParte){
125 logger.warn(line + "accepted taxon can not be pro parte");
126 }
127 }else{
128 Synonym syn = Synonym.NewInstance(taxonName, sec);
129 if (nameResult.proParte){
130 syn.setProParte(true);
131 }
132 taxonBase = syn;
133 }
134 if (!isBlank(nameResult.sensu)){
135 taxonBase.setAppendedPhrase(nameResult.sensu);
136 }
137 //TODO right order?
138 taxonBase.setAppendedPhrase(CdmUtils.concat(" ", nameResult.auct, taxonBase.getAppendedPhrase()));
139
140 //lettercode
141 String lettercode = getValue(record, LETTER_CODE);
142 if (isNotBlank(lettercode)){
143 UUID idTypeUUID;
144 try {
145 idTypeUUID = state.getTransformer().getIdentifierTypeUuid("LETTERCODE");
146 DefinedTerm idType = getIdentiferType(state, idTypeUUID, "GermanSL lettercode", "GermanSL lettercode", "LETTERCODE", null);
147 taxonBase.addIdentifier(lettercode, idType);
148 } catch (UndefinedTransformerMethodException e) {
149 e.printStackTrace();
150 }
151 }
152
153 // //annotation
154 // String annotation = getValue(record, "Anotacion al Taxon");
155 // if (annotation != null && (!annotation.equals("nom. illeg.") || !annotation.equals("nom. cons."))){
156 // taxonBase.addAnnotation(Annotation.NewInstance(annotation, AnnotationType.EDITORIAL(), Language.SPANISH_CASTILIAN()));
157 // }
158
159 //UUID
160 String uuid = getValue(record, UUID_);
161 //TOOD why sometimes null?
162 if (uuid != null){
163 taxonBase.setUuid(UUID.fromString(uuid));
164 }
165
166
167 //NATIVE NAME
168 String commonNameStr = getValue(record, NATIVENAME);
169 //Ann.: synonym common names should be removed!
170 if (isNotBlank(commonNameStr)){
171 makeCommonName(commonNameStr, taxonBase, line);
172 }
173
174
175 //id
176 String id = getValue(record, SPECIES_NR);
177 this.addOriginalSource(taxonBase, id, TAXON_NAMESPACE, state.getConfig().getSourceReference());
178
179 //save
180 // getTaxonService().save(taxonBase);
181 taxonIdMap.put(id, taxonBase);
182 }
183
184
185 private String removeProparte(String authorStr) {
186 String regEx = "\\s+p\\.\\s*p\\.$";
187 if (authorStr == null || !authorStr.matches(".*" + regEx)){
188 return authorStr;
189 }else{
190 return authorStr.replaceAll(regEx, "");
191 }
192 }
193
194 private String removeSensuLatoStricto(String authorStr) {
195 String regEx = "\\s+s\\.\\s*(l|str)\\.$";
196
197 if (authorStr == null || !authorStr.matches(".*" + regEx)){
198 return authorStr;
199 }else{
200 return authorStr.replaceAll(regEx, "");
201 }
202 }
203
204 private String removeAuct(String authorStr) {
205 String regEx = "auct\\.\\??$";
206
207 if (authorStr == null || !authorStr.matches(/*".*" + */regEx)){
208 return authorStr;
209 }else{
210 return ""; //authorStr.replaceAll(regEx, "");
211 }
212 }
213
214
215 /**
216 * @param state
217 * @param secRefStr
218 * @return
219 */
220 private Reference getSecRef(SimpleExcelTaxonImportState<CONFIG> state, String secRefStr, String line) {
221 Reference result = state.getReference(secRefStr);
222 if (result == null && secRefStr != null){
223 result = ReferenceFactory.newGeneric();
224 result.setTitleCache(secRefStr, true);
225
226 // TimePeriod tp = TimePeriodParser.parseString(secRefStr.substring(secRefStr.length()-4));
227 // String authorStrPart = secRefStr.substring(0, secRefStr.length()-6);
228 // if (! (authorStrPart + ", " + tp.getYear()).equals(secRefStr)){
229 // logger.warn(line + "Sec ref could not be parsed: " + secRefStr);
230 // }else{
231 // result.setDatePublished(tp);
232 // }
233 // TeamOrPersonBase<?> author = state.getAgentBase(authorStrPart);
234 // if (author == null){
235 // if (authorStrPart.contains("&")){
236 // Team team = Team.NewInstance();
237 // String[] authorSplit = authorStrPart.split("&");
238 // String[] firstAuthorSplit = authorSplit[0].trim().split(",");
239 // for (String authorStr : firstAuthorSplit){
240 // addTeamMember(team, authorStr);
241 // }
242 // addTeamMember(team, authorSplit[1]);
243 // result.setAuthorship(team);
244 // state.putAgentBase(team.getTitleCache(), team);
245 // }else if (authorStrPart.equalsIgnoreCase("Tropicos") || authorStrPart.equalsIgnoreCase("The Plant List")
246 // || authorStrPart.equalsIgnoreCase("APG IV")){
247 // result.setTitle(authorStrPart);
248 // }else{
249 // Person person = Person.NewInstance();
250 // person.setLastname(authorStrPart);
251 // result.setAuthorship(person);
252 // state.putAgentBase(person.getTitleCache(), person);
253 // }
254 // }else{
255 // result.setAuthorship(author);
256 // }
257 state.putReference(secRefStr, result);
258 }
259
260 return result;
261 }
262
263
264 private class NameResult{
265 IBotanicalName name;
266 boolean proParte = false;
267 String sensu = null;
268 String auct = null;
269 }
270
271 /**
272 * @param record
273 * @param state
274 * @return
275 */
276 private NameResult makeName(String line, HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state) {
277
278 String specieNrStr = getValue(record, SPECIES_NR);
279 String nameStr = getValue(record, ABBREVIAT);
280 String authorStr = getValue(record, AUTHOR);
281 String rankStr = getValue(record, RANG);
282
283 NameResult result = new NameResult();
284
285 //rank
286 Rank rank = makeRank(line, state, rankStr);
287
288 //name
289 nameStr = normalizeNameStr(nameStr);
290 String nameStrWithoutSensu = removeSensuLatoStricto(nameStr);
291 if (nameStrWithoutSensu.length() < nameStr.length()){
292 result.sensu = nameStr.substring(nameStrWithoutSensu.length()).trim();
293 nameStr = nameStrWithoutSensu;
294 }
295
296 //author
297 //pp
298 authorStr = normalizeAuthorStr(authorStr);
299 String authorStrWithoutProParte = removeProparte(authorStr);
300 result.proParte = authorStrWithoutProParte.length() < authorStr.length();
301 authorStr = authorStrWithoutProParte;
302
303 //auct.
304 String authorStrWithoutAuct = removeAuct(authorStr);
305 if (authorStrWithoutAuct.length() < authorStr.length()){
306 result.auct = authorStr.substring(authorStrWithoutAuct.length()).trim();
307 }
308 authorStr = authorStrWithoutAuct;
309
310
311 //name+author
312 String fullNameStr = CdmUtils.concat(" ", nameStr, authorStr);
313
314 IBotanicalName fullName = (IBotanicalName)nameParser.parseReferencedName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
315 if (fullName.isProtectedTitleCache()){
316 logger.warn(line + "Name could not be parsed: " + fullNameStr );
317 }else{
318 replaceAuthorNamesAndNomRef(state, fullName);
319 }
320 // BotanicalName existingName = getExistingName(state, fullName);
321
322 //TODO handle existing name
323 IBotanicalName name = fullName;
324 this.addOriginalSource(name, specieNrStr, TAXON_NAMESPACE + "_Name", state.getConfig().getSourceReference());
325
326 result.name = name;
327 return result;
328 }
329
330
331
332 /**
333 * @param line
334 * @param state
335 * @param rankStr
336 * @return
337 */
338 private Rank makeRank(String line, SimpleExcelTaxonImportState<CONFIG> state, String rankStr) {
339 Rank rank = null;
340 try {
341 rank = state.getTransformer().getRankByKey(rankStr);
342 if (rank == null){
343 UUID rankUuid = state.getTransformer().getRankUuid(rankStr);
344 OrderedTermVocabulary<Rank> voc = (OrderedTermVocabulary<Rank>)Rank.SPECIES().getVocabulary();
345 //TODO
346 Rank lowerRank = Rank.FORM();
347 rank = getRank(state, rankUuid, rankStr, rankStr, rankStr, voc, lowerRank, RankClass.Infraspecific);
348 if (rank == null){
349 logger.warn(line + "Rank not recognized: " + rankStr);
350 }
351 }
352 } catch (Exception e1) {
353 logger.warn(line + "Rank not recognized: " + rankStr);
354 }
355 return rank;
356 }
357
358
359 /**
360 * @param authorStr
361 * @return
362 */
363 private String normalizeAuthorStr(String authorStr) {
364 if (isBlank(authorStr)){
365 return "";
366 }else{
367 if (authorStr.equals("-") || authorStr.equals("#")){
368 authorStr = "";
369 }
370 return authorStr;
371 }
372 }
373
374 private String normalizeNameStr(String nameStr) {
375 nameStr = nameStr
376 .replace(" agg.", " aggr.")
377 .replace(" fo. ", " f. ")
378 ;
379 return nameStr;
380 }
381
382
383 boolean nameMapIsInitialized = false;
384 /**
385 * @param state
386 * @param fullName
387 * @return
388 */
389 private IBotanicalName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, IBotanicalName fullName) {
390 initExistinNames(state);
391 return (IBotanicalName)state.getName(fullName.getTitleCache());
392 }
393
394 /**
395 * @param state
396 */
397 @SuppressWarnings("rawtypes")
398 private void initExistinNames(SimpleExcelTaxonImportState<CONFIG> state) {
399 if (!nameMapIsInitialized){
400 List<String> propertyPaths = Arrays.asList("");
401 List<TaxonName> existingNames = this.getNameService().list(null, null, null, null, propertyPaths);
402 for (TaxonName tnb : existingNames){
403 state.putName(tnb.getTitleCache(), tnb);
404 }
405 nameMapIsInitialized = true;
406 }
407 }
408
409
410 /**
411 * @param commmonNameStr
412 * @param taxonBase
413 */
414 private void makeCommonName(String commmonNameStr, TaxonBase<?> taxonBase, String line) {
415 if (taxonBase.isInstanceOf(Synonym.class)){
416 //synonym common names should be neglected
417 return;
418 }
419 Taxon acceptedTaxon = getAccepted(taxonBase);
420 if (acceptedTaxon != null){
421 TaxonDescription desc = getTaxonDescription(acceptedTaxon, false, true);
422 desc.setDefault(true);
423 CommonTaxonName commonName = CommonTaxonName.NewInstance(commmonNameStr, Language.GERMAN(), Country.GERMANY());
424 desc.addElement(commonName);
425 }else{
426 logger.warn(line + "No accepted taxon available");
427 }
428
429 }
430
431
432
433 private boolean isAccepted(String statusStr){
434 if ("FALSE()".equals(statusStr) || "0".equals(statusStr) || "false".equalsIgnoreCase(statusStr)){
435 return true;
436 } else if ("TRUE()".equals(statusStr) || "1".equals(statusStr)|| "true".equalsIgnoreCase(statusStr)){
437 return false;
438 }else{
439 logger.warn("Unhandled taxon status: " + statusStr);
440 return false;
441 }
442 }
443
444
445 /**
446 * @param next
447 * @return
448 */
449 private Taxon getAccepted(TaxonBase<?> taxonBase) {
450 if (taxonBase.isInstanceOf(Taxon.class)){
451 return CdmBase.deproxy(taxonBase, Taxon.class);
452 }else{
453 Synonym syn = CdmBase.deproxy(taxonBase, Synonym.class);
454 return syn.getAcceptedTaxon();
455 }
456 }
457
458
459 @Override
460 protected boolean isIgnore(SimpleExcelTaxonImportState<CONFIG> state) {
461 return ! state.getConfig().isDoTaxa();
462 }
463 }