ref #9752 adapt mexico import for flora-of-mexico import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / mexico / MexicoConabioTaxonImport.java
1 /**
2 * Copyright (C) 2016 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.mexico;
10
11 import java.util.Arrays;
12 import java.util.HashMap;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.UUID;
18
19 import org.apache.commons.lang3.StringUtils;
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
24 import eu.etaxonomy.cdm.model.agent.Person;
25 import eu.etaxonomy.cdm.model.agent.Team;
26 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27 import eu.etaxonomy.cdm.model.common.Annotation;
28 import eu.etaxonomy.cdm.model.common.AnnotationType;
29 import eu.etaxonomy.cdm.model.common.CdmBase;
30 import eu.etaxonomy.cdm.model.common.Extension;
31 import eu.etaxonomy.cdm.model.common.ExtensionType;
32 import eu.etaxonomy.cdm.model.common.Language;
33 import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
34 import eu.etaxonomy.cdm.model.name.IBotanicalName;
35 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
36 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
37 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
38 import eu.etaxonomy.cdm.model.name.Rank;
39 import eu.etaxonomy.cdm.model.name.TaxonName;
40 import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
41 import eu.etaxonomy.cdm.model.reference.Reference;
42 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
43 import eu.etaxonomy.cdm.model.reference.ReferenceType;
44 import eu.etaxonomy.cdm.model.taxon.Classification;
45 import eu.etaxonomy.cdm.model.taxon.Synonym;
46 import eu.etaxonomy.cdm.model.taxon.SynonymType;
47 import eu.etaxonomy.cdm.model.taxon.Taxon;
48 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
49 import eu.etaxonomy.cdm.model.taxon.TaxonRelationship;
50 import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
51 import eu.etaxonomy.cdm.model.term.DefinedTerm;
52 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
53 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
54
55 /**
56 * @author a.mueller
57 * @since 16.06.2016
58 *
59 */
60 @Component
61 public class MexicoConabioTaxonImport<CONFIG extends MexicoConabioImportConfigurator>
62 extends SimpleExcelTaxonImport<CONFIG>{
63
64 private static final long serialVersionUID = 3691221053127007258L;
65
66 private static final Logger logger = Logger.getLogger(MexicoConabioTaxonImport.class);
67
68 public static final String TAXON_NAMESPACE = "Taxonomia";
69
70 @Override
71 protected String getWorksheetName(CONFIG config) {
72 return "Taxonomia";
73 }
74
75 //dirty I know, but who cares, needed by distribution and commmon name import
76 protected static final Map<String, TaxonBase<?>> taxonIdMap = new HashMap<>();
77
78 private Classification classification;
79
80
81 private static List<String> expectedKeys= Arrays.asList(new String[]{
82 "IdCAT","IdCATRel","IdCAT_AscendenteInmediato"
83 ,"IdCAT_AscendenteObligatorio","CategoriaTaxonomica","Nombre",
84 "EstatusNombre","AutorNombre","AutorSinAnio","Anio",
85 "ReferenciaNombre",
86 "Division","AutorDivision","ReferenciaClasificacionDivision",
87 "Clase","AutorClase","ReferenciaClasificacionClase",
88 "Subclase","AutorSubclase","ReferenciaClasificacionSubclase",
89 "Superorden","AutorSuperorden","ReferenciaClasificacionSuperorden",
90 "Orden","AutorOrden","ReferenciaClasificacionOrden",
91 "Familia", "EstatusFamilia","AutorFamilia","ReferenciaClasificacionFamilia",
92 "Tribu", "EstatusTribu","AutorTribu","ReferenciaNombreTribu",
93 "Genero","EstatusGenero","AutorGenero","","ReferenciaNombreGenero",
94 "Epiteto_especifico","EstatusEspecie","AutorEpiteto_especifico","ReferenciaNombreEspecie",
95 "CategoriaInfraespecifica","NombreInfraespecifico","EstatusInfraespecie","AutorInfraespecie","ReferenciaNombreInfraespecifico",
96 "CitaNomenclatural","Anotacion al Taxon","Fuente_BDs",
97 "FamAceptada","GenAceptado","CategoriaTaxAceptada","NombreAceptado","AutorNombreAceptado","AutorSinAnioAceptado","AnioAceptado",
98 "TipoRelacion","ReferenciaSinonimia","ComentariosRevisor",
99 "CompareID","IdCAT_OLD","Nombre_OLD","AutorSinAnio_OLD",
100 "CitaNomenclatural_OLD","ReferenceType","IsUpdated",
101
102 "Hibrido","ReferenciaNombreHibrido","AutorHibrido","EstatusHibrido",
103 "Subgenero","ReferenciaNombreSubgenero","EstatusSubgenero","AutorSubgenero",
104 "Subtribu","ReferenciaClasificacionSubtribu","AutorSubtribu","EstatusSubtribu",
105 "Subfamilia","ReferenciaClasificacionSubfamilia","AutorSubfamilia","EstatusSubfamilia",
106 "ReferenciaClasificacionTribu",
107 "Supertribu","ReferenciaClasificacionSupertribu","AutorSupertribu","EstatusSupertribu",
108
109 });
110
111
112 @Override
113 protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
114 String line = state.getCurrentLine() + ": ";
115 Map<String, String> record = state.getOriginalRecord();
116
117 Set<String> keys = record.keySet();
118
119 checkAllKeysExist(line, keys, expectedKeys);
120
121 if (getValue(record, "Nombre") == null ){
122 logger.warn("No FullnameNoAuthors given: " + line);
123 return;
124 }
125
126 //Name
127 IBotanicalName name = makeName(line, record, state);
128
129 //sec
130 String referenciaNombre = getValueNd(record, "ReferenciaNombre");
131
132 //status
133 String statusStr = getValue(record, "EstatusNombre");
134 String originalInfo = null;
135 TaxonBase<?> taxonBase;
136 if ("aceptado".equals(statusStr)){
137 Reference sec = getSecRef(state, referenciaNombre, line);
138 taxonBase = Taxon.NewInstance(name, sec);
139 }else if (statusStr.startsWith("sin")){
140 String secRefStr = getValue(record, "ReferenciaSinonimia");
141
142 Reference sec = getSynSec(state, secRefStr, referenciaNombre, line);
143 taxonBase = Synonym.NewInstance(name, sec);
144 if (isNotBlank(secRefStr)){
145 originalInfo = "referenciaNombre: " + referenciaNombre;
146 }
147 }else{
148 throw new RuntimeException(line + " Status not recognized: " + statusStr);
149 }
150
151 //annotation
152 String annotation = getValue(record, "Anotacion al Taxon");
153 if (annotation != null && (!annotation.equals("nom. illeg.") || !annotation.equals("nom. cons."))){
154 taxonBase.addAnnotation(Annotation.NewInstance(annotation, AnnotationType.EDITORIAL(), Language.SPANISH_CASTILIAN()));
155 }
156
157 //id
158 String idCat = getValue(record, "IdCAT");
159 this.addOriginalSource(taxonBase, idCat, TAXON_NAMESPACE, state.getConfig().getSourceReference(), originalInfo);
160 name.addIdentifier(idCat, getConabioIdIdentifierType(state));
161
162 // checkSame(record, "EstatusHibrido", statusStr, line);
163 // checkSame(record, "AutorHibrido", "AutorNombre", line);
164 // checkSame(record, "ReferenciaNombreHibrido", "ReferenciaNombre", line);
165 // checkSame(record, "Hibrido", "AutorNombre", line);
166
167 //save
168 getTaxonService().save(taxonBase);
169 taxonIdMap.put(idCat, taxonBase);
170 }
171
172 private DefinedTerm getConabioIdIdentifierType(SimpleExcelTaxonImportState<CONFIG> state) {
173 DefinedTerm conabioIdIdentifierType = getIdentiferType(state, MexicoConabioTransformer.uuidConabioIdIdentifierType, "Conabio name identifier", "Conabio name identifier", "CONABIO ID", null);
174 return conabioIdIdentifierType;
175 }
176
177 private void checkSame(Map<String, String> record, String key, String compareValue, String line) {
178 String value = getValue(record, key);
179 if (value != null && !value.equals(compareValue)){
180 logger.warn(line+ ": Value differs for "+ key +": " + value + "<->" + compareValue );
181 }
182 }
183
184 private Reference getSynSec(SimpleExcelTaxonImportState<CONFIG> state, String secRefStr,
185 String referenciaNombre, String line) {
186 if (isBlank(secRefStr)){
187 secRefStr = referenciaNombre;
188 }
189 if (isNotBlank(secRefStr)){
190 Reference result = state.getReference(secRefStr);
191 if (result == null){
192 result = ReferenceFactory.newBook();
193 result.setTitleCache(secRefStr, true);
194 state.putReference(secRefStr, result);
195 }
196 return result;
197 }else{
198 return null;
199 }
200 }
201
202 /**
203 * @param state
204 * @param secRefStr
205 * @return
206 */
207 private Reference getSecRef(SimpleExcelTaxonImportState<CONFIG> state, String secRefStr, String line) {
208 Reference result = state.getReference(secRefStr);
209 if (result == null && secRefStr != null){
210 result = ReferenceFactory.newBook();
211 VerbatimTimePeriod tp = TimePeriodParser.parseStringVerbatim(secRefStr.substring(secRefStr.length()-4));
212 String authorStrPart = secRefStr.substring(0, secRefStr.length()-6);
213 if (! (authorStrPart + ", " + tp.getYear()).equals(secRefStr)){
214 logger.warn(line + "Sec ref could not be parsed: " + secRefStr);
215 }else{
216 result.setDatePublished(tp);
217 }
218 TeamOrPersonBase<?> author = state.getAgentBase(authorStrPart);
219 if (author == null){
220 if (authorStrPart.contains("&")){
221 Team team = Team.NewInstance();
222 String[] authorSplit = authorStrPart.split("&");
223 String[] firstAuthorSplit = authorSplit[0].trim().split(",");
224 for (String authorStr : firstAuthorSplit){
225 addTeamMember(team, authorStr);
226 }
227 addTeamMember(team, authorSplit[1]);
228 result.setAuthorship(team);
229 state.putAgentBase(team.getTitleCache(), team);
230 }else if (authorStrPart.equalsIgnoreCase("Tropicos") || authorStrPart.equalsIgnoreCase("The Plant List")
231 || authorStrPart.equalsIgnoreCase("APG IV")){
232 result.setTitle(authorStrPart);
233 }else{
234 Person person = Person.NewInstance();
235 person.setFamilyName(authorStrPart);
236 result.setAuthorship(person);
237 state.putAgentBase(person.getTitleCache(), person);
238 }
239 }else{
240 result.setAuthorship(author);
241 }
242 state.putReference(secRefStr, result);
243 }else if(secRefStr == null){
244 return state.getConfig().getSecReference();
245 }
246
247 return result;
248 }
249
250
251
252 /**
253 * @param team
254 * @param author
255 */
256 private void addTeamMember(Team team, String author) {
257 if (StringUtils.isNotBlank(author)){
258 Person person = Person.NewInstance();
259 person.setFamilyName(author.trim());
260 team.addTeamMember(person);
261 }
262 }
263
264
265
266 /**
267 * @param record
268 * @param state
269 * @return
270 */
271 private IBotanicalName makeName(String line, Map<String, String> record, SimpleExcelTaxonImportState<CONFIG> state) {
272
273 String authorStr = getValueNd(record, "AutorSinAnio");
274 String nameStr = getValue(record, "Nombre");
275 String nomRefStr = getValue(record, "CitaNomenclatural");
276 String refType = getValue(record, "ReferenceType");
277 String idCat = getValue(record, "IdCAT");
278 String rankStr = getValue(record, "CategoriaTaxonomica");
279 String annotation = getValue(record, "Anotacion al Taxon");
280
281 //rank
282 Rank rank = null;
283 try {
284 rank = state.getTransformer().getRankByKey(rankStr);
285 if (Rank.SUBSPECIES().equals(rank) || Rank.VARIETY().equals(rank) || Rank.FORM().equals(rank) || Rank.RACE().equals(rank)){
286 int i = nameStr.lastIndexOf(" ");
287 nameStr = nameStr.substring(0, i) + " " + rank.getAbbreviation() + nameStr.substring(i);
288 }
289 } catch (UndefinedTransformerMethodException e) {
290 logger.warn(line + "Rank not recognized: " + rankStr);
291 }
292
293 //name + author
294 String fullNameStr = nameStr + (authorStr != null ? " " + authorStr : "");
295
296 IBotanicalName fullName = (IBotanicalName)nameParser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
297 if (fullName.isProtectedTitleCache()){
298 logger.warn(line + "Name could not be parsed: " + fullNameStr );
299 }else{
300 replaceAuthorNamesAndNomRef(state, fullName);
301 }
302 IBotanicalName existingName = getExistingName(state, fullName);
303
304 //reference
305 String refNameStr = getRefNameStr(nomRefStr, refType, fullNameStr);
306
307 IBotanicalName referencedName = (IBotanicalName)nameParser.parseReferencedName(refNameStr, NomenclaturalCode.ICNAFP, rank);
308 if (referencedName.isProtectedFullTitleCache() || referencedName.isProtectedTitleCache()){
309 logger.warn(line + "Referenced name could not be parsed: " + refNameStr );
310 }else{
311 addSourcesToReferences(referencedName, state);
312 replaceAuthorNamesAndNomRef(state, referencedName);
313 }
314 adaptRefTypeForGeneric(referencedName, refType);
315
316 //compare nom. ref. with Borhidi
317 IBotanicalName result= referencedName;
318 Boolean equal = null;
319 if (existingName != null){
320 String existingRefTitle = existingName.getFullTitleCache();
321 String conabioRefTitle = referencedName.getFullTitleCache();
322 if (!existingRefTitle.equals(conabioRefTitle)){
323 existingName.setNomenclaturalMicroReference(referencedName.getNomenclaturalMicroReference());
324 existingName.setNomenclaturalReference(referencedName.getNomenclaturalReference());
325 equal = false;
326 }else{
327 equal = true;
328 }
329 result = existingName;
330 }
331 addNomRefExtension(state, result, equal);
332
333 //status
334 if (annotation != null && (annotation.equals("nom. illeg.") || annotation.equals("nom. cons."))){
335 try {
336 NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(annotation, result);
337 result.addStatus(NomenclaturalStatus.NewInstance(nomStatusType));
338 } catch (UnknownCdmTypeException e) {
339 logger.warn(line + "nomStatusType not recognized: " + annotation);
340 }
341 }
342
343 this.addOriginalSource(result, idCat, TAXON_NAMESPACE + "_Name", state.getConfig().getSourceReference());
344
345 if(result.getNomenclaturalReference()!=null && result.getNomenclaturalReference().getTitleCache().equals("null")){
346 logger.warn("null");
347 }
348
349 return result;
350 }
351
352
353
354 /**
355 * @param name
356 * @param state
357 */
358 private void addSourcesToReferences(IBotanicalName name, SimpleExcelTaxonImportState<CONFIG> state) {
359 Reference nomRef = name.getNomenclaturalReference();
360 if (nomRef != null){
361 nomRef.addSource(makeOriginalSource(state));
362 if (nomRef.getInReference() != null){
363 nomRef.getInReference().addSource(makeOriginalSource(state));
364 }
365 }
366 }
367
368
369
370 /**
371 * @param referencedName
372 * @param refType
373 */
374 private void adaptRefTypeForGeneric(IBotanicalName referencedName, String refTypeStr) {
375 INomenclaturalReference ref = referencedName.getNomenclaturalReference();
376 if (ref == null){
377 return;
378 }
379 ReferenceType refType = refTypeByRefTypeStr(refTypeStr);
380 if (ref.getType() != refType && refType == ReferenceType.Book){
381 ref.setType(refType);
382 }
383 }
384
385
386 private ReferenceType refTypeByRefTypeStr(String refType){
387 if ("A".equals(refType)){ //Article
388 return ReferenceType.Article;
389 }else if ("B".equals(refType)){ //Book
390 return ReferenceType.Book;
391 }else if (refType == null){ //Book
392 return null;
393 }else{
394 throw new IllegalArgumentException("RefType not supported " + refType);
395 }
396 }
397
398 /**
399 * @param nomRefStr
400 * @param refType
401 * @param fullNameStr
402 * @return
403 */
404 private String getRefNameStr(String nomRefStr, String refTypeStr, String fullNameStr) {
405 String refNameStr = fullNameStr;
406 ReferenceType refType = refTypeByRefTypeStr(refTypeStr);
407 if (nomRefStr == null){
408 //do nothing
409 }else if (refType == ReferenceType.Article){
410 refNameStr = fullNameStr + " in " + nomRefStr;
411 }else if (refType == ReferenceType.Book){
412 refNameStr = fullNameStr + ", " + nomRefStr;
413 }else if (refType == null && nomRefStr != null){
414 logger.warn("RefType is null but nomRefStr exists");
415 }
416 return refNameStr;
417 }
418
419 /**
420 * @param state
421 * @param equal
422 * @param referencedName
423 */
424 private void addNomRefExtension(SimpleExcelTaxonImportState<CONFIG> state, IBotanicalName name, Boolean equal) {
425 String equalStr = equal == null ? "" : equal == true ? "EQUAL\n" : "NOT EQUAL\n";
426 name.setFullTitleCache(null, false);
427 String newExtensionStr = name.getFullTitleCache() + " - CONABIO";
428 UUID uuidNomRefExtension = MexicoConabioTransformer.uuidNomRefExtension;
429 for (Extension extension : name.getExtensions()){
430 if (extension.getType().getUuid().equals(uuidNomRefExtension)){
431 extension.setValue(equalStr + extension.getValue() + "\n" + newExtensionStr);
432 return;
433 }
434 }
435 String label = "Nomenclatural reference in Sources";
436 String abbrev = "Nom. ref. src.";
437 ExtensionType extensionType = getExtensionType(state, uuidNomRefExtension, label, label, abbrev);
438 Extension.NewInstance((TaxonName)name, newExtensionStr, extensionType);
439 }
440
441 boolean nameMapIsInitialized = false;
442 /**
443 * @param state
444 * @param fullName
445 * @return
446 */
447 private IBotanicalName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, IBotanicalName fullName) {
448 initExistinNames(state);
449 return (IBotanicalName)state.getName(fullName.getTitleCache());
450 }
451
452 /**
453 * @param state
454 */
455 @SuppressWarnings("rawtypes")
456 private void initExistinNames(SimpleExcelTaxonImportState<CONFIG> state) {
457 if (!nameMapIsInitialized){
458 List<String> propertyPaths = Arrays.asList("");
459 List<TaxonName> existingNames = this.getNameService().list(null, null, null, null, propertyPaths);
460 for (TaxonName tnb : existingNames){
461 state.putName(tnb.getTitleCache(), tnb);
462 }
463 nameMapIsInitialized = true;
464 }
465 }
466
467
468
469 /**
470 * @param record
471 * @param string
472 * @return
473 */
474 private String getValueNd(Map<String, String> record, String string) {
475 String value = getValue(record, string);
476 if ("ND".equals(value)){
477 return null;
478 }else{
479 return value;
480 }
481 }
482
483
484 @Override
485 protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
486 // IdCAT_AscendenteInmediato, IdCATRel, TipoRelacion
487 Map<String, String> record = state.getOriginalRecord();
488 String line = state.getCurrentLine() + ": ";
489
490 String parentStr = getValue(record, "IdCAT_AscendenteInmediato");
491 String relStr = getValue(record, "IdCATRel");
492
493 String statusStr = getValue(record, "EstatusNombre");
494
495 Classification classification = getClassification(state);
496 String idCat = getValue(record, "IdCAT");
497 TaxonBase<?> taxonBase = taxonIdMap.get(idCat);
498 Taxon parent;
499 if(statusStr == null){
500 logger.warn("No statusStr in line " +line);
501 }else if ("aceptado".equals(statusStr)){
502 parent = (Taxon)taxonIdMap.get(parentStr);
503 if (parent == null){
504 logger.warn(line + "Parent is missing: "+ parentStr);
505 }else{
506 Taxon taxon = (Taxon)taxonBase;
507 Reference relRef = null; //TODO
508 classification.addParentChild(parent, taxon, relRef, null);
509 // makeConceptRelation(line, taxon.getName());
510 }
511 }else if (statusStr.startsWith("sin")){
512 parent = (Taxon)taxonIdMap.get(relStr);
513 if (parent == null){
514 logger.warn(line + "Accepted taxon is missing: "+ relStr);
515 }else{
516 Synonym synonym = (Synonym)taxonBase;
517 parent.addSynonym(synonym, SynonymType.SYNONYM_OF());
518 // makeConceptRelation(line, synonym.getName());
519 }
520 }else{
521 logger.warn("Unhandled statusStr in line " + line);
522 }
523 }
524
525 /**
526 * @param line
527 * @param name
528 */
529 private void makeConceptRelation(String line, TaxonName name) {
530 if (name.getTaxonBases().size()==2){
531 Iterator<TaxonBase> it = name.getTaxonBases().iterator();
532 Taxon taxon1 = getAccepted(it.next());
533 Taxon taxon2 = getAccepted(it.next());
534 Reference citation = null;
535 TaxonRelationship rel;
536 if (taxon1.getSec().getUuid().equals(MexicoConabioTransformer.uuidReferenceBorhidi)){
537 rel = taxon1.addTaxonRelation(taxon2, TaxonRelationshipType.CONGRUENT_TO(),
538 citation, null);
539 }else{
540 rel = taxon2.addTaxonRelation(taxon1, TaxonRelationshipType.CONGRUENT_TO(),
541 citation, null);
542 }
543 rel.setDoubtful(true);
544 }else if (name.getTaxonBases().size()>2){
545 logger.warn(line + "Names with more than 2 taxa not yet handled");
546 }
547
548 }
549
550 /**
551 * @param next
552 * @return
553 */
554 private Taxon getAccepted(TaxonBase<?> taxonBase) {
555 if (taxonBase.isInstanceOf(Taxon.class)){
556 return CdmBase.deproxy(taxonBase, Taxon.class);
557 }else{
558 Synonym syn = CdmBase.deproxy(taxonBase, Synonym.class);
559 return syn.getAcceptedTaxon();
560 }
561 }
562
563 private Classification getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
564 if (classification == null){
565 MexicoConabioImportConfigurator config = state.getConfig();
566 classification = getClassificationService().find(config.getClassificationUuid());
567 if (classification == null){
568 classification = Classification.NewInstance(config.getClassificationName());
569 classification.setUuid(config.getClassificationUuid());
570 classification.setReference(config.getSecReference());
571 getClassificationService().save(classification);
572 }
573 }
574 return classification;
575 }
576
577
578 @Override
579 protected boolean isIgnore(SimpleExcelTaxonImportState<CONFIG> state) {
580 return ! state.getConfig().isDoTaxa();
581 }
582 }