2 * Copyright (C) 2016 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.io
.mexico
;
11 import java
.util
.Arrays
;
12 import java
.util
.HashMap
;
13 import java
.util
.Iterator
;
14 import java
.util
.List
;
17 import java
.util
.UUID
;
19 import org
.apache
.commons
.lang3
.StringUtils
;
20 import org
.apache
.log4j
.Logger
;
21 import org
.springframework
.stereotype
.Component
;
23 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
24 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
25 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
26 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
27 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
28 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
29 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
30 import eu
.etaxonomy
.cdm
.model
.common
.Extension
;
31 import eu
.etaxonomy
.cdm
.model
.common
.ExtensionType
;
32 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
33 import eu
.etaxonomy
.cdm
.model
.common
.VerbatimTimePeriod
;
34 import eu
.etaxonomy
.cdm
.model
.name
.IBotanicalName
;
35 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
36 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
37 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
38 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
39 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
40 import eu
.etaxonomy
.cdm
.model
.reference
.INomenclaturalReference
;
41 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
42 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
43 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceType
;
44 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
45 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
46 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymType
;
47 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
48 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
49 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonRelationship
;
50 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonRelationshipType
;
51 import eu
.etaxonomy
.cdm
.model
.term
.DefinedTerm
;
52 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
53 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
61 public class MexicoConabioTaxonImport
<CONFIG
extends MexicoConabioImportConfigurator
>
62 extends SimpleExcelTaxonImport
<CONFIG
>{
64 private static final long serialVersionUID
= 3691221053127007258L;
66 private static final Logger logger
= Logger
.getLogger(MexicoConabioTaxonImport
.class);
68 public static final String TAXON_NAMESPACE
= "Taxonomia";
71 protected String
getWorksheetName(CONFIG config
) {
75 //dirty I know, but who cares, needed by distribution and commmon name import
76 protected static final Map
<String
, TaxonBase
<?
>> taxonIdMap
= new HashMap
<>();
78 private Classification classification
;
81 private static List
<String
> expectedKeys
= Arrays
.asList(new String
[]{
82 "IdCAT","IdCATRel","IdCAT_AscendenteInmediato"
83 ,"IdCAT_AscendenteObligatorio","CategoriaTaxonomica","Nombre",
84 "EstatusNombre","AutorNombre","AutorSinAnio","Anio",
86 "Division","AutorDivision","ReferenciaClasificacionDivision",
87 "Clase","AutorClase","ReferenciaClasificacionClase",
88 "Subclase","AutorSubclase","ReferenciaClasificacionSubclase",
89 "Superorden","AutorSuperorden","ReferenciaClasificacionSuperorden",
90 "Orden","AutorOrden","ReferenciaClasificacionOrden",
91 "Familia", "EstatusFamilia","AutorFamilia","ReferenciaClasificacionFamilia",
92 "Tribu", "EstatusTribu","AutorTribu","ReferenciaNombreTribu",
93 "Genero","EstatusGenero","AutorGenero","","ReferenciaNombreGenero",
94 "Epiteto_especifico","EstatusEspecie","AutorEpiteto_especifico","ReferenciaNombreEspecie",
95 "CategoriaInfraespecifica","NombreInfraespecifico","EstatusInfraespecie","AutorInfraespecie","ReferenciaNombreInfraespecifico",
96 "CitaNomenclatural","Anotacion al Taxon","Fuente_BDs",
97 "FamAceptada","GenAceptado","CategoriaTaxAceptada","NombreAceptado","AutorNombreAceptado","AutorSinAnioAceptado","AnioAceptado",
98 "TipoRelacion","ReferenciaSinonimia","ComentariosRevisor",
99 "CompareID","IdCAT_OLD","Nombre_OLD","AutorSinAnio_OLD",
100 "CitaNomenclatural_OLD","ReferenceType","IsUpdated",
102 "Hibrido","ReferenciaNombreHibrido","AutorHibrido","EstatusHibrido",
103 "Subgenero","ReferenciaNombreSubgenero","EstatusSubgenero","AutorSubgenero",
104 "Subtribu","ReferenciaClasificacionSubtribu","AutorSubtribu","EstatusSubtribu",
105 "Subfamilia","ReferenciaClasificacionSubfamilia","AutorSubfamilia","EstatusSubfamilia",
106 "ReferenciaClasificacionTribu",
107 "Supertribu","ReferenciaClasificacionSupertribu","AutorSupertribu","EstatusSupertribu",
113 protected void firstPass(SimpleExcelTaxonImportState
<CONFIG
> state
) {
114 String line
= state
.getCurrentLine() + ": ";
115 Map
<String
, String
> record
= state
.getOriginalRecord();
117 Set
<String
> keys
= record
.keySet();
119 checkAllKeysExist(line
, keys
, expectedKeys
);
121 if (getValue(record
, "Nombre") == null ){
122 logger
.warn("No FullnameNoAuthors given: " + line
);
127 IBotanicalName name
= makeName(line
, record
, state
);
130 String referenciaNombre
= getValueNd(record
, "ReferenciaNombre");
133 String statusStr
= getValue(record
, "EstatusNombre");
134 String originalInfo
= null;
135 TaxonBase
<?
> taxonBase
;
136 if ("aceptado".equals(statusStr
)){
137 Reference sec
= getSecRef(state
, referenciaNombre
, line
);
138 taxonBase
= Taxon
.NewInstance(name
, sec
);
139 }else if (statusStr
.startsWith("sin")){
140 String secRefStr
= getValue(record
, "ReferenciaSinonimia");
142 Reference sec
= getSynSec(state
, secRefStr
, referenciaNombre
, line
);
143 taxonBase
= Synonym
.NewInstance(name
, sec
);
144 if (isNotBlank(secRefStr
)){
145 originalInfo
= "referenciaNombre: " + referenciaNombre
;
148 throw new RuntimeException(line
+ " Status not recognized: " + statusStr
);
152 String annotation
= getValue(record
, "Anotacion al Taxon");
153 if (annotation
!= null && (!annotation
.equals("nom. illeg.") || !annotation
.equals("nom. cons."))){
154 taxonBase
.addAnnotation(Annotation
.NewInstance(annotation
, AnnotationType
.EDITORIAL(), Language
.SPANISH_CASTILIAN()));
158 String idCat
= getValue(record
, "IdCAT");
159 this.addOriginalSource(taxonBase
, idCat
, TAXON_NAMESPACE
, state
.getConfig().getSourceReference(), originalInfo
);
160 name
.addIdentifier(idCat
, getConabioIdIdentifierType(state
));
162 // checkSame(record, "EstatusHibrido", statusStr, line);
163 // checkSame(record, "AutorHibrido", "AutorNombre", line);
164 // checkSame(record, "ReferenciaNombreHibrido", "ReferenciaNombre", line);
165 // checkSame(record, "Hibrido", "AutorNombre", line);
168 getTaxonService().save(taxonBase
);
169 taxonIdMap
.put(idCat
, taxonBase
);
172 private DefinedTerm
getConabioIdIdentifierType(SimpleExcelTaxonImportState
<CONFIG
> state
) {
173 DefinedTerm conabioIdIdentifierType
= getIdentiferType(state
, MexicoConabioTransformer
.uuidConabioIdIdentifierType
, "Conabio name identifier", "Conabio name identifier", "CONABIO ID", null);
174 return conabioIdIdentifierType
;
177 private void checkSame(Map
<String
, String
> record
, String key
, String compareValue
, String line
) {
178 String value
= getValue(record
, key
);
179 if (value
!= null && !value
.equals(compareValue
)){
180 logger
.warn(line
+ ": Value differs for "+ key
+": " + value
+ "<->" + compareValue
);
184 private Reference
getSynSec(SimpleExcelTaxonImportState
<CONFIG
> state
, String secRefStr
,
185 String referenciaNombre
, String line
) {
186 if (isBlank(secRefStr
)){
187 secRefStr
= referenciaNombre
;
189 if (isNotBlank(secRefStr
)){
190 Reference result
= state
.getReference(secRefStr
);
192 result
= ReferenceFactory
.newBook();
193 result
.setTitleCache(secRefStr
, true);
194 state
.putReference(secRefStr
, result
);
207 private Reference
getSecRef(SimpleExcelTaxonImportState
<CONFIG
> state
, String secRefStr
, String line
) {
208 Reference result
= state
.getReference(secRefStr
);
209 if (result
== null && secRefStr
!= null){
210 result
= ReferenceFactory
.newBook();
211 VerbatimTimePeriod tp
= TimePeriodParser
.parseStringVerbatim(secRefStr
.substring(secRefStr
.length()-4));
212 String authorStrPart
= secRefStr
.substring(0, secRefStr
.length()-6);
213 if (! (authorStrPart
+ ", " + tp
.getYear()).equals(secRefStr
)){
214 logger
.warn(line
+ "Sec ref could not be parsed: " + secRefStr
);
216 result
.setDatePublished(tp
);
218 TeamOrPersonBase
<?
> author
= state
.getAgentBase(authorStrPart
);
220 if (authorStrPart
.contains("&")){
221 Team team
= Team
.NewInstance();
222 String
[] authorSplit
= authorStrPart
.split("&");
223 String
[] firstAuthorSplit
= authorSplit
[0].trim().split(",");
224 for (String authorStr
: firstAuthorSplit
){
225 addTeamMember(team
, authorStr
);
227 addTeamMember(team
, authorSplit
[1]);
228 result
.setAuthorship(team
);
229 state
.putAgentBase(team
.getTitleCache(), team
);
230 }else if (authorStrPart
.equalsIgnoreCase("Tropicos") || authorStrPart
.equalsIgnoreCase("The Plant List")
231 || authorStrPart
.equalsIgnoreCase("APG IV")){
232 result
.setTitle(authorStrPart
);
234 Person person
= Person
.NewInstance();
235 person
.setFamilyName(authorStrPart
);
236 result
.setAuthorship(person
);
237 state
.putAgentBase(person
.getTitleCache(), person
);
240 result
.setAuthorship(author
);
242 state
.putReference(secRefStr
, result
);
243 }else if(secRefStr
== null){
244 return state
.getConfig().getSecReference();
256 private void addTeamMember(Team team
, String author
) {
257 if (StringUtils
.isNotBlank(author
)){
258 Person person
= Person
.NewInstance();
259 person
.setFamilyName(author
.trim());
260 team
.addTeamMember(person
);
271 private IBotanicalName
makeName(String line
, Map
<String
, String
> record
, SimpleExcelTaxonImportState
<CONFIG
> state
) {
273 String authorStr
= getValueNd(record
, "AutorSinAnio");
274 String nameStr
= getValue(record
, "Nombre");
275 String nomRefStr
= getValue(record
, "CitaNomenclatural");
276 String refType
= getValue(record
, "ReferenceType");
277 String idCat
= getValue(record
, "IdCAT");
278 String rankStr
= getValue(record
, "CategoriaTaxonomica");
279 String annotation
= getValue(record
, "Anotacion al Taxon");
284 rank
= state
.getTransformer().getRankByKey(rankStr
);
285 if (Rank
.SUBSPECIES().equals(rank
) || Rank
.VARIETY().equals(rank
) || Rank
.FORM().equals(rank
) || Rank
.RACE().equals(rank
)){
286 int i
= nameStr
.lastIndexOf(" ");
287 nameStr
= nameStr
.substring(0, i
) + " " + rank
.getAbbreviation() + nameStr
.substring(i
);
289 } catch (UndefinedTransformerMethodException e
) {
290 logger
.warn(line
+ "Rank not recognized: " + rankStr
);
294 String fullNameStr
= nameStr
+ (authorStr
!= null ?
" " + authorStr
: "");
296 IBotanicalName fullName
= (IBotanicalName
)nameParser
.parseFullName(fullNameStr
, NomenclaturalCode
.ICNAFP
, rank
);
297 if (fullName
.isProtectedTitleCache()){
298 logger
.warn(line
+ "Name could not be parsed: " + fullNameStr
);
300 replaceAuthorNamesAndNomRef(state
, fullName
);
302 IBotanicalName existingName
= getExistingName(state
, fullName
);
305 String refNameStr
= getRefNameStr(nomRefStr
, refType
, fullNameStr
);
307 IBotanicalName referencedName
= (IBotanicalName
)nameParser
.parseReferencedName(refNameStr
, NomenclaturalCode
.ICNAFP
, rank
);
308 if (referencedName
.isProtectedFullTitleCache() || referencedName
.isProtectedTitleCache()){
309 logger
.warn(line
+ "Referenced name could not be parsed: " + refNameStr
);
311 addSourcesToReferences(referencedName
, state
);
312 replaceAuthorNamesAndNomRef(state
, referencedName
);
314 adaptRefTypeForGeneric(referencedName
, refType
);
316 //compare nom. ref. with Borhidi
317 IBotanicalName result
= referencedName
;
318 Boolean equal
= null;
319 if (existingName
!= null){
320 String existingRefTitle
= existingName
.getFullTitleCache();
321 String conabioRefTitle
= referencedName
.getFullTitleCache();
322 if (!existingRefTitle
.equals(conabioRefTitle
)){
323 existingName
.setNomenclaturalMicroReference(referencedName
.getNomenclaturalMicroReference());
324 existingName
.setNomenclaturalReference(referencedName
.getNomenclaturalReference());
329 result
= existingName
;
331 addNomRefExtension(state
, result
, equal
);
334 if (annotation
!= null && (annotation
.equals("nom. illeg.") || annotation
.equals("nom. cons."))){
336 NomenclaturalStatusType nomStatusType
= NomenclaturalStatusType
.getNomenclaturalStatusTypeByAbbreviation(annotation
, result
);
337 result
.addStatus(NomenclaturalStatus
.NewInstance(nomStatusType
));
338 } catch (UnknownCdmTypeException e
) {
339 logger
.warn(line
+ "nomStatusType not recognized: " + annotation
);
343 this.addOriginalSource(result
, idCat
, TAXON_NAMESPACE
+ "_Name", state
.getConfig().getSourceReference());
345 if(result
.getNomenclaturalReference()!=null && result
.getNomenclaturalReference().getTitleCache().equals("null")){
358 private void addSourcesToReferences(IBotanicalName name
, SimpleExcelTaxonImportState
<CONFIG
> state
) {
359 Reference nomRef
= name
.getNomenclaturalReference();
361 nomRef
.addSource(makeOriginalSource(state
));
362 if (nomRef
.getInReference() != null){
363 nomRef
.getInReference().addSource(makeOriginalSource(state
));
371 * @param referencedName
374 private void adaptRefTypeForGeneric(IBotanicalName referencedName
, String refTypeStr
) {
375 INomenclaturalReference ref
= referencedName
.getNomenclaturalReference();
379 ReferenceType refType
= refTypeByRefTypeStr(refTypeStr
);
380 if (ref
.getType() != refType
&& refType
== ReferenceType
.Book
){
381 ref
.setType(refType
);
386 private ReferenceType
refTypeByRefTypeStr(String refType
){
387 if ("A".equals(refType
)){ //Article
388 return ReferenceType
.Article
;
389 }else if ("B".equals(refType
)){ //Book
390 return ReferenceType
.Book
;
391 }else if (refType
== null){ //Book
394 throw new IllegalArgumentException("RefType not supported " + refType
);
404 private String
getRefNameStr(String nomRefStr
, String refTypeStr
, String fullNameStr
) {
405 String refNameStr
= fullNameStr
;
406 ReferenceType refType
= refTypeByRefTypeStr(refTypeStr
);
407 if (nomRefStr
== null){
409 }else if (refType
== ReferenceType
.Article
){
410 refNameStr
= fullNameStr
+ " in " + nomRefStr
;
411 }else if (refType
== ReferenceType
.Book
){
412 refNameStr
= fullNameStr
+ ", " + nomRefStr
;
413 }else if (refType
== null && nomRefStr
!= null){
414 logger
.warn("RefType is null but nomRefStr exists");
422 * @param referencedName
424 private void addNomRefExtension(SimpleExcelTaxonImportState
<CONFIG
> state
, IBotanicalName name
, Boolean equal
) {
425 String equalStr
= equal
== null ?
"" : equal
== true ?
"EQUAL\n" : "NOT EQUAL\n";
426 name
.setFullTitleCache(null, false);
427 String newExtensionStr
= name
.getFullTitleCache() + " - CONABIO";
428 UUID uuidNomRefExtension
= MexicoConabioTransformer
.uuidNomRefExtension
;
429 for (Extension extension
: name
.getExtensions()){
430 if (extension
.getType().getUuid().equals(uuidNomRefExtension
)){
431 extension
.setValue(equalStr
+ extension
.getValue() + "\n" + newExtensionStr
);
435 String label
= "Nomenclatural reference in Sources";
436 String abbrev
= "Nom. ref. src.";
437 ExtensionType extensionType
= getExtensionType(state
, uuidNomRefExtension
, label
, label
, abbrev
);
438 Extension
.NewInstance((TaxonName
)name
, newExtensionStr
, extensionType
);
441 boolean nameMapIsInitialized
= false;
447 private IBotanicalName
getExistingName(SimpleExcelTaxonImportState
<CONFIG
> state
, IBotanicalName fullName
) {
448 initExistinNames(state
);
449 return (IBotanicalName
)state
.getName(fullName
.getTitleCache());
455 @SuppressWarnings("rawtypes")
456 private void initExistinNames(SimpleExcelTaxonImportState
<CONFIG
> state
) {
457 if (!nameMapIsInitialized
){
458 List
<String
> propertyPaths
= Arrays
.asList("");
459 List
<TaxonName
> existingNames
= this.getNameService().list(null, null, null, null, propertyPaths
);
460 for (TaxonName tnb
: existingNames
){
461 state
.putName(tnb
.getTitleCache(), tnb
);
463 nameMapIsInitialized
= true;
474 private String
getValueNd(Map
<String
, String
> record
, String string
) {
475 String value
= getValue(record
, string
);
476 if ("ND".equals(value
)){
485 protected void secondPass(SimpleExcelTaxonImportState
<CONFIG
> state
) {
486 // IdCAT_AscendenteInmediato, IdCATRel, TipoRelacion
487 Map
<String
, String
> record
= state
.getOriginalRecord();
488 String line
= state
.getCurrentLine() + ": ";
490 String parentStr
= getValue(record
, "IdCAT_AscendenteInmediato");
491 String relStr
= getValue(record
, "IdCATRel");
493 String statusStr
= getValue(record
, "EstatusNombre");
495 Classification classification
= getClassification(state
);
496 String idCat
= getValue(record
, "IdCAT");
497 TaxonBase
<?
> taxonBase
= taxonIdMap
.get(idCat
);
499 if(statusStr
== null){
500 logger
.warn("No statusStr in line " +line
);
501 }else if ("aceptado".equals(statusStr
)){
502 parent
= (Taxon
)taxonIdMap
.get(parentStr
);
504 logger
.warn(line
+ "Parent is missing: "+ parentStr
);
506 Taxon taxon
= (Taxon
)taxonBase
;
507 Reference relRef
= null; //TODO
508 classification
.addParentChild(parent
, taxon
, relRef
, null);
509 // makeConceptRelation(line, taxon.getName());
511 }else if (statusStr
.startsWith("sin")){
512 parent
= (Taxon
)taxonIdMap
.get(relStr
);
514 logger
.warn(line
+ "Accepted taxon is missing: "+ relStr
);
516 Synonym synonym
= (Synonym
)taxonBase
;
517 parent
.addSynonym(synonym
, SynonymType
.SYNONYM_OF());
518 // makeConceptRelation(line, synonym.getName());
521 logger
.warn("Unhandled statusStr in line " + line
);
529 private void makeConceptRelation(String line
, TaxonName name
) {
530 if (name
.getTaxonBases().size()==2){
531 Iterator
<TaxonBase
> it
= name
.getTaxonBases().iterator();
532 Taxon taxon1
= getAccepted(it
.next());
533 Taxon taxon2
= getAccepted(it
.next());
534 Reference citation
= null;
535 TaxonRelationship rel
;
536 if (taxon1
.getSec().getUuid().equals(MexicoConabioTransformer
.uuidReferenceBorhidi
)){
537 rel
= taxon1
.addTaxonRelation(taxon2
, TaxonRelationshipType
.CONGRUENT_TO(),
540 rel
= taxon2
.addTaxonRelation(taxon1
, TaxonRelationshipType
.CONGRUENT_TO(),
543 rel
.setDoubtful(true);
544 }else if (name
.getTaxonBases().size()>2){
545 logger
.warn(line
+ "Names with more than 2 taxa not yet handled");
554 private Taxon
getAccepted(TaxonBase
<?
> taxonBase
) {
555 if (taxonBase
.isInstanceOf(Taxon
.class)){
556 return CdmBase
.deproxy(taxonBase
, Taxon
.class);
558 Synonym syn
= CdmBase
.deproxy(taxonBase
, Synonym
.class);
559 return syn
.getAcceptedTaxon();
563 private Classification
getClassification(SimpleExcelTaxonImportState
<CONFIG
> state
) {
564 if (classification
== null){
565 MexicoConabioImportConfigurator config
= state
.getConfig();
566 classification
= getClassificationService().find(config
.getClassificationUuid());
567 if (classification
== null){
568 classification
= Classification
.NewInstance(config
.getClassificationName());
569 classification
.setUuid(config
.getClassificationUuid());
570 classification
.setReference(config
.getSecReference());
571 getClassificationService().save(classification
);
574 return classification
;
579 protected boolean isIgnore(SimpleExcelTaxonImportState
<CONFIG
> state
) {
580 return ! state
.getConfig().isDoTaxa();