2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.mexico
;
12 import java
.sql
.ResultSet
;
13 import java
.sql
.SQLException
;
14 import java
.util
.HashMap
;
15 import java
.util
.HashSet
;
18 import java
.util
.UUID
;
20 import org
.apache
.log4j
.Logger
;
21 import org
.springframework
.stereotype
.Component
;
23 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
24 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
25 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
26 import eu
.etaxonomy
.cdm
.model
.name
.IBotanicalName
;
27 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
28 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
29 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
30 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
31 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
32 import eu
.etaxonomy
.cdm
.model
.reference
.INomenclaturalReference
;
33 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
34 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
35 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceType
;
36 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
37 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
38 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
39 import eu
.etaxonomy
.cdm
.model
.term
.DefinedTerm
;
40 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
41 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
42 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
43 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
50 public class MexicoEfloraTaxonImport
extends MexicoEfloraImportBase
{
52 private static final long serialVersionUID
= -1186364983750790695L;
53 private static final Logger logger
= Logger
.getLogger(MexicoEfloraTaxonImport
.class);
55 public static final String NAMESPACE
= "Taxon";
57 private static final String pluralString
= "Taxa";
58 protected static final String dbTableName
= "EFlora_Taxonomia4CDM2";
60 protected static INonViralNameParser
<TaxonName
> nameParser
= (INonViralNameParser
)NonViralNameParserImpl
.NewInstance();
64 public MexicoEfloraTaxonImport(){
65 super(dbTableName
, pluralString
);
69 protected String
getIdQuery(MexicoEfloraImportState state
) {
70 String sql
= " SELECT IdCAT "
71 + " FROM " + dbTableName
72 + " WHERE IdCAT NOT IN ('2PLANT','79217TRACH') "
78 protected String
getRecordQuery(MexicoEfloraImportConfigurator config
) {
79 String sqlSelect
= " SELECT * ";
80 String sqlFrom
= " FROM " + dbTableName
;
81 String sqlWhere
= " WHERE ( IdCAT IN (" + ID_LIST_TOKEN
+ ") )";
83 String strRecordQuery
=sqlSelect
+ " " + sqlFrom
+ " " + sqlWhere
;
84 return strRecordQuery
;
87 boolean firstMissingSec
= true;
89 Reference sourceReference
;
91 public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner
, MexicoEfloraImportState state
) {
92 sourceReference
= this.getSourceReference(state
.getConfig().getSourceReference());
94 state
.getDeduplicationHelper().reset();
95 boolean success
= true ;
96 @SuppressWarnings("rawtypes")
97 Set
<TaxonBase
> taxaToSave
= new HashSet
<>();
99 @SuppressWarnings("unchecked")
100 Map
<String
, Reference
> refMap
= partitioner
.getObjectMap(MexicoEfloraReferenceImportBase
.NAMESPACE
);
103 ResultSet rs
= partitioner
.getResultSet();
105 // System.out.println();
107 success
= handleSingleRecord(partitioner
, state
, success
, taxaToSave
, refMap
, rs
, i
++);
109 } catch (Exception e
) {
111 logger
.error("Exception:" + e
);
115 getTaxonService().save(taxaToSave
);
119 private boolean handleSingleRecord(ResultSetPartitioner partitioner
, MexicoEfloraImportState state
, boolean success
,
120 Set
<TaxonBase
> taxaToSave
, Map
<String
, Reference
> refMap
, ResultSet rs
, int i
) throws SQLException
{
121 if ((i
% 1000) == 0 && i
!= 1 ){ logger
.info("Taxa handled: " + (i
-1));}
122 // System.out.println("i++");
123 //create Taxon element
124 String taxonId
= rs
.getString("IdCAT");
125 String status
= rs
.getString("EstatusNombre");
126 String rankStr
= rs
.getString("CategoriaTaxonomica");
127 String nameStr
= rs
.getString("Nombre");
128 String autorStr
= rs
.getString("AutorSinAnio");
129 String fullNameStr
= nameStr
+ " " + autorStr
;
130 String citaNomenclaturalStr
= rs
.getString("CitaNomenclatural");
131 String annotationStr
= rs
.getString("AnotacionTaxon");
132 String type
= rs
.getString("NomPublicationType");
133 String year
= rs
.getString("Anio");
134 String uuidStr
= rs
.getString("uuid");
135 UUID uuid
= UUID
.fromString(uuidStr
);
136 Integer secFk
= nullSafeInt(rs
, "IdBibliografiaSec");
139 // Rank rank = getRank(rankStr);
140 // NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
141 // TaxonName taxonName = (TaxonName)parser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
143 // DefinedTerm conabioIdentifier = getIdentiferType(state, MexicoConabioTransformer.uuidConabioTaxonIdIdentifierType,
144 // "CONABIO Taxon Identifier", "CONABIO Taxon Identifier", "CONABIO", null);
145 // taxonName.addIdentifier(taxonId, conabioIdentifier);
147 // Reference nomRef = ReferenceFactory.newGeneric();
148 // nomRef.setAbbrevTitleCache(citaNomenclaturalStr, true);
149 // nomRef.setDatePublished(TimePeriodParser.parseStringVerbatim(year));
150 // taxonName.setNomenclaturalReference(nomRef);
152 TaxonName taxonName
= makeName(taxonId
, state
, autorStr
,
153 nameStr
, citaNomenclaturalStr
, type
, rankStr
, annotationStr
, year
);
156 Reference sec
= null;
158 String refFkStr
= String
.valueOf(secFk
);
159 sec
= refMap
.get(refFkStr
);
160 if (sec
== null && firstMissingSec
) {
161 logger
.warn("There are missing sec refs but they are not logged anymore.");
162 logger
.debug("Sec not found for taxonId " + taxonId
+" and secId " + refFkStr
);
163 firstMissingSec
= false;
168 TaxonBase
<?
> taxonBase
;
172 if ("aceptado".equals(status
)){
173 taxon
= Taxon
.NewInstance(taxonName
, sec
);
175 }else if ("sinónimo".equals(status
)){
176 synonym
= Synonym
.NewInstance(taxonName
, sec
);
180 logger
.error("Status not yet implemented: " + status
);
183 taxonBase
.setUuid(uuid
);
185 partitioner
.startDoSave();
186 taxaToSave
.add(taxonBase
);
187 } catch (Exception e
) {
188 logger
.warn("An exception (" +e
.getMessage()+") occurred when creating taxon with id " + taxonId
+ ". Taxon could not be saved.");
194 boolean isFirstDedup
= true;
195 private TaxonName
makeName(String taxonId
, MexicoEfloraImportState state
,
196 String authorStr
, String nameStr
, String nomRefStr
, String refType
, String rankStr
,
197 String annotation
, String year
) {
200 Rank rank
= getRank(rankStr
);
201 //TODO hybrido and race
202 boolean isHybrid
= rank
== null && "híbrido".equals(rankStr
);
203 boolean isRace
= Rank
.RACE().equals(rank
);
204 // rank = state.getTransformer().getRankByKey(rankStr);
206 nameStr
= removeSubgenusBracket(nameStr
, rank
);
209 String fullNameStr
= nameStr
+ (authorStr
!= null ?
" " + authorStr
: "");
211 TaxonName fullName
= nameParser
.parseFullName(fullNameStr
, NomenclaturalCode
.ICNAFP
, rank
);
212 if (fullName
.isProtectedTitleCache()){
213 logger
.info(taxonId
+ ": Name could not be parsed: " + fullNameStr
);
216 logger
.warn("Deduplication is still switcht off!");
217 //siehe auch weiter unten
218 isFirstDedup
= false;
221 state
.getDeduplicationHelper().replaceAuthorNamesAndNomRef(fullName
);
225 String refNameStr
= getRefNameStr(nomRefStr
, refType
, fullNameStr
, taxonId
);
227 TaxonName referencedName
= nameParser
.parseReferencedName(refNameStr
, NomenclaturalCode
.ICNAFP
, rank
);
228 if (referencedName
.isProtectedFullTitleCache() || referencedName
.isProtectedTitleCache()){
229 logger
.warn(taxonId
+ ": Referenced name could not be parsed: " + refNameStr
);
231 addSourcesToReferences(referencedName
, state
);
232 //FIXME deduplication
233 // state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(referencedName);
235 adaptRefTypeForGeneric(referencedName
, refType
);
236 Reference nomRef
= referencedName
.getNomenclaturalReference();
237 if (isNotBlank(year
)) {
238 if (nomRef
== null) {
239 nomRef
= ReferenceFactory
.newGeneric();
241 String nomRefYear
= nomRef
.getYear();
242 if (isBlank(nomRefYear
)) {
243 nomRef
.setDatePublished(TimePeriodParser
.parseStringVerbatim(year
));
244 }else if (! nomRefYear
.equals(year
)){
245 logger
.warn(taxonId
+ ": year and parsed year are not equal: "+ year
+ "<->" + nomRefYear
);
249 TaxonName result
= referencedName
;
252 if (annotation
!= null && (annotation
.equals("nom. illeg.") || annotation
.equals("nom. cons."))){
254 NomenclaturalStatusType nomStatusType
= NomenclaturalStatusType
.getNomenclaturalStatusTypeByAbbreviation(annotation
, result
);
255 result
.addStatus(NomenclaturalStatus
.NewInstance(nomStatusType
));
256 } catch (UnknownCdmTypeException e
) {
257 logger
.warn(taxonId
+ ": nomStatusType not recognized: " + annotation
);
261 if(result
.getNomenclaturalReference()!=null && result
.getNomenclaturalReference().getTitleCache().equals("null")){
265 DefinedTerm conabioIdentifier
= getIdentiferType(state
, MexicoConabioTransformer
.uuidConabioTaxonIdIdentifierType
,
266 "CONABIO Taxon Identifier", "CONABIO Taxon Identifier", "CONABIO", null);
267 result
.addIdentifier(taxonId
, conabioIdentifier
);
272 private String
removeSubgenusBracket(String nameStr
, Rank rank
) {
273 if (nameStr
.matches("[A-Z][a-z]+\\s+\\([A-Za-z]+\\)\\s+[a-z]+.*")) {
274 //species and below: remove bracket completely
275 nameStr
= nameStr
.substring(0, nameStr
.indexOf("(")) + nameStr
.substring(nameStr
.indexOf(")")+1);
276 }else if (nameStr
.matches("[A-Z][a-z]+\\s+\\([A-Za-z]+\\)")) {
277 //subgenus: replace (...) bei subg. ...
278 nameStr
= nameStr
.substring(0, nameStr
.indexOf("(")) + "subg. " + nameStr
.substring(nameStr
.indexOf("(")+1, nameStr
.length()-1);
283 private void adaptRefTypeForGeneric(IBotanicalName referencedName
, String refTypeStr
) {
284 INomenclaturalReference ref
= referencedName
.getNomenclaturalReference();
288 ReferenceType refType
= refTypeByRefTypeStr(refTypeStr
);
289 if (ref
.getType() != refType
&& refType
== ReferenceType
.Book
){
290 ref
.setType(refType
);
294 private String
getRefNameStr(String nomRefStr
, String refTypeStr
, String fullNameStr
, String taxonID
) {
295 String refNameStr
= fullNameStr
;
296 ReferenceType refType
= refTypeByRefTypeStr(refTypeStr
);
297 if (isBlank(nomRefStr
)){
299 }else if (refType
== ReferenceType
.Article
){
300 refNameStr
= fullNameStr
+ " in " + nomRefStr
;
301 }else if (refType
== ReferenceType
.Book
){
302 refNameStr
= fullNameStr
+ ", " + nomRefStr
;
303 }else if (refType
== null){
304 logger
.warn(taxonID
+ ": RefType is null but nomRefStr exists");
309 private ReferenceType
refTypeByRefTypeStr(String refType
){
310 if ("A".equals(refType
)){ //Article
311 return ReferenceType
.Article
;
312 }else if ("B".equals(refType
)){ //Book
313 return ReferenceType
.Book
;
314 }else if (refType
== null || isBlank(refType
)){ //Book
317 throw new IllegalArgumentException("RefType not supported " + refType
);
321 private void addSourcesToReferences(IBotanicalName name
, MexicoEfloraImportState state
) {
322 Reference nomRef
= name
.getNomenclaturalReference();
324 nomRef
.addSource(makeOriginalSource(state
));
325 if (nomRef
.getInReference() != null){
326 nomRef
.getInReference().addSource(makeOriginalSource(state
));
331 protected IdentifiableSource
makeOriginalSource(MexicoEfloraImportState state
) {
332 return IdentifiableSource
.NewDataImportInstance(null, null, sourceReference
);
335 private Rank
getRank(String rank
) {
337 if ("Reino".equals(rank
)){ return Rank
.KINGDOM();}
338 else if ("división".equals(rank
)){ return Rank
.DIVISION();}
339 else if ("clase".equals(rank
)){ return Rank
.CLASS();}
340 else if ("subclase".equals(rank
)){ return Rank
.SUBCLASS();}
341 else if ("superorden".equals(rank
)){ return Rank
.SUPERORDER();}
342 else if ("orden".equals(rank
)){ return Rank
.ORDER();}
343 else if ("suborden".equals(rank
)){ return Rank
.SUBORDER();}
344 else if ("familia".equals(rank
)){ return Rank
.FAMILY();}
345 else if ("subfamilia".equals(rank
)){ return Rank
.SUBFAMILY();}
346 else if ("tribu".equals(rank
)){ return Rank
.TRIBE();}
347 else if ("subtribu".equals(rank
)){ return Rank
.SUBTRIBE();}
348 else if ("género".equals(rank
)){ return Rank
.GENUS();}
349 else if ("subgénero".equals(rank
)){ return Rank
.SUBGENUS();}
350 else if ("sección".equals(rank
)){ return Rank
.SECTION_BOTANY();}
351 else if ("subsección".equals(rank
)){ return Rank
.SUBSECTION_BOTANY();}
352 else if ("serie".equals(rank
)){ return Rank
.SERIES();}
353 else if ("grupo".equals(rank
)){ return Rank
.SPECIESGROUP();}
354 else if ("híbrido".equals(rank
)){ return null;} //will be handled later
355 else if ("especie".equals(rank
)){ return Rank
.SPECIES();}
356 else if ("subespecie".equals(rank
)){ return Rank
.SUBSPECIES();}
357 else if ("raza".equals(rank
)){ return Rank
.RACE();}
358 else if ("variedad".equals(rank
)){ return Rank
.VARIETY();}
359 else if ("subvariedad".equals(rank
)){ return Rank
.SUBVARIETY();}
360 else if ("forma".equals(rank
)){ return Rank
.FORM();}
361 else if ("subforma".equals(rank
)){ return Rank
.SUBFORM();}
362 else if ("raza".equals(rank
)){ return Rank
.RACE();}
364 logger
.warn("Rank not recognized: "+ rank
);
371 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
, MexicoEfloraImportState state
) {
375 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<>();
378 Set
<String
> referenceIdSet
= new HashSet
<>();
380 handleForeignKey(rs
, referenceIdSet
, "IdBibliografiaSec");
384 nameSpace
= MexicoEfloraReferenceImportBase
.NAMESPACE
;
385 idSet
= referenceIdSet
;
386 Map
<String
, Reference
> referenceMap
= getCommonService().getSourcedObjectsByIdInSourceC(Reference
.class, idSet
, nameSpace
);
387 result
.put(nameSpace
, referenceMap
);
389 } catch (SQLException e
) {
390 throw new RuntimeException(e
);
396 protected String
getTableName() {
401 public String
getPluralString() {
406 protected boolean doCheck(MexicoEfloraImportState state
){
411 protected boolean isIgnore(MexicoEfloraImportState state
){
412 return ! state
.getConfig().isDoTaxa();