773efdf83ff988fe867637b6ccc480743a33996e
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / mexico / MexicoEfloraTaxonImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.mexico;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
24 import eu.etaxonomy.cdm.model.common.CdmBase;
25 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
26 import eu.etaxonomy.cdm.model.name.IBotanicalName;
27 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
28 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
29 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
30 import eu.etaxonomy.cdm.model.name.Rank;
31 import eu.etaxonomy.cdm.model.name.TaxonName;
32 import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
33 import eu.etaxonomy.cdm.model.reference.Reference;
34 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
35 import eu.etaxonomy.cdm.model.reference.ReferenceType;
36 import eu.etaxonomy.cdm.model.taxon.Synonym;
37 import eu.etaxonomy.cdm.model.taxon.Taxon;
38 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
39 import eu.etaxonomy.cdm.model.term.DefinedTerm;
40 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
41 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
42 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
43 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
44
45 /**
46 * @author a.mueller
47 * @since 08.02.2022
48 */
49 @Component
50 public class MexicoEfloraTaxonImport extends MexicoEfloraImportBase {
51
52 private static final long serialVersionUID = -1186364983750790695L;
53 private static final Logger logger = Logger.getLogger(MexicoEfloraTaxonImport.class);
54
55 public static final String NAMESPACE = "Taxon";
56
57 private static final String pluralString = "Taxa";
58 protected static final String dbTableName = "EFlora_Taxonomia4CDM2";
59
60 protected static INonViralNameParser<TaxonName> nameParser = (INonViralNameParser)NonViralNameParserImpl.NewInstance();
61
62
63
64 public MexicoEfloraTaxonImport(){
65 super(dbTableName, pluralString);
66 }
67
68 @Override
69 protected String getIdQuery(MexicoEfloraImportState state) {
70 String sql = " SELECT IdCAT "
71 + " FROM " + dbTableName
72 + " WHERE IdCAT NOT IN ('2PLANT','79217TRACH') "
73 + " ORDER BY IdCAT ";
74 return sql;
75 }
76
77 @Override
78 protected String getRecordQuery(MexicoEfloraImportConfigurator config) {
79 String sqlSelect = " SELECT * ";
80 String sqlFrom = " FROM " + dbTableName;
81 String sqlWhere = " WHERE ( IdCAT IN (" + ID_LIST_TOKEN + ") )";
82
83 String strRecordQuery =sqlSelect + " " + sqlFrom + " " + sqlWhere ;
84 return strRecordQuery;
85 }
86
87 boolean firstMissingSec = true;
88
89 Reference sourceReference;
90 @Override
91 public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, MexicoEfloraImportState state) {
92 sourceReference = this.getSourceReference(state.getConfig().getSourceReference());
93
94 state.getDeduplicationHelper().reset();
95 boolean success = true ;
96 @SuppressWarnings("rawtypes")
97 Set<TaxonBase> taxaToSave = new HashSet<>();
98
99 @SuppressWarnings("unchecked")
100 Map<String, Reference> refMap = partitioner.getObjectMap(MexicoEfloraReferenceImportBase.NAMESPACE);
101
102 int i = 0;
103 ResultSet rs = partitioner.getResultSet();
104 try{
105 // System.out.println();
106 while (rs.next()){
107 success = handleSingleRecord(partitioner, state, success, taxaToSave, refMap, rs, i++);
108 }
109 } catch (Exception e) {
110 e.printStackTrace();
111 logger.error("Exception:" + e);
112 return false;
113 }
114
115 getTaxonService().save(taxaToSave);
116 return success;
117 }
118
119 private boolean handleSingleRecord(ResultSetPartitioner partitioner, MexicoEfloraImportState state, boolean success,
120 Set<TaxonBase> taxaToSave, Map<String, Reference> refMap, ResultSet rs, int i) throws SQLException {
121 if ((i % 1000) == 0 && i!= 1 ){ logger.info("Taxa handled: " + (i-1));}
122 // System.out.println("i++");
123 //create Taxon element
124 String taxonId = rs.getString("IdCAT");
125 String status = rs.getString("EstatusNombre");
126 String rankStr = rs.getString("CategoriaTaxonomica");
127 String nameStr = rs.getString("Nombre");
128 String autorStr = rs.getString("AutorSinAnio");
129 String fullNameStr = nameStr + " " + autorStr;
130 String citaNomenclaturalStr = rs.getString("CitaNomenclatural");
131 String annotationStr = rs.getString("AnotacionTaxon");
132 String type = rs.getString("NomPublicationType");
133 String year = rs.getString("Anio");
134 String uuidStr = rs.getString("uuid");
135 UUID uuid = UUID.fromString(uuidStr);
136 Integer secFk = nullSafeInt(rs, "IdBibliografiaSec");
137
138 //name OLD handling
139 // Rank rank = getRank(rankStr);
140 // NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
141 // TaxonName taxonName = (TaxonName)parser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
142 // //.. identifier
143 // DefinedTerm conabioIdentifier = getIdentiferType(state, MexicoConabioTransformer.uuidConabioTaxonIdIdentifierType,
144 // "CONABIO Taxon Identifier", "CONABIO Taxon Identifier", "CONABIO", null);
145 // taxonName.addIdentifier(taxonId, conabioIdentifier);
146 // //.. nom Ref
147 // Reference nomRef = ReferenceFactory.newGeneric();
148 // nomRef.setAbbrevTitleCache(citaNomenclaturalStr, true);
149 // nomRef.setDatePublished(TimePeriodParser.parseStringVerbatim(year));
150 // taxonName.setNomenclaturalReference(nomRef);
151
152 TaxonName taxonName= makeName(taxonId, state, autorStr,
153 nameStr, citaNomenclaturalStr, type, rankStr, annotationStr, year);
154
155 //sec
156 Reference sec = null;
157 if (secFk != null) {
158 String refFkStr = String.valueOf(secFk);
159 sec = refMap.get(refFkStr);
160 if (sec == null && firstMissingSec) {
161 logger.warn("There are missing sec refs but they are not logged anymore.");
162 logger.debug("Sec not found for taxonId " + taxonId +" and secId " + refFkStr);
163 firstMissingSec = false;
164 }
165 }
166
167 //taxon
168 TaxonBase<?> taxonBase;
169 Synonym synonym;
170 Taxon taxon;
171 try {
172 if ("aceptado".equals(status)){
173 taxon = Taxon.NewInstance(taxonName, sec);
174 taxonBase = taxon;
175 }else if ("sinónimo".equals(status)){
176 synonym = Synonym.NewInstance(taxonName, sec);
177 taxonBase = synonym;
178 }else {
179 taxonBase = null;
180 logger.error("Status not yet implemented: " + status);
181 return false;
182 }
183 taxonBase.setUuid(uuid);
184
185 partitioner.startDoSave();
186 taxaToSave.add(taxonBase);
187 } catch (Exception e) {
188 logger.warn("An exception (" +e.getMessage()+") occurred when creating taxon with id " + taxonId + ". Taxon could not be saved.");
189 success = false;
190 }
191 return success;
192 }
193
194 boolean isFirstDedup = true;
195 private TaxonName makeName(String taxonId, MexicoEfloraImportState state,
196 String authorStr, String nameStr, String nomRefStr, String refType, String rankStr,
197 String annotation, String year) {
198
199 //rank
200 Rank rank = getRank(rankStr);
201 //TODO hybrido and race
202 boolean isHybrid = rank == null && "híbrido".equals(rankStr);
203 boolean isRace = Rank.RACE().equals(rank);
204 // rank = state.getTransformer().getRankByKey(rankStr);
205
206 nameStr = removeSubgenusBracket(nameStr, rank);
207
208 //name + author
209 String fullNameStr = nameStr + (authorStr != null ? " " + authorStr : "");
210
211 TaxonName fullName = nameParser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
212 if (fullName.isProtectedTitleCache()){
213 logger.info(taxonId + ": Name could not be parsed: " + fullNameStr );
214 }else{
215 if (isFirstDedup) {
216 logger.warn("Deduplication is still switcht off!");
217 //siehe auch weiter unten
218 isFirstDedup = false;
219 }
220 //FIXME dedup
221 state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(fullName);
222 }
223
224 //reference
225 String refNameStr = getRefNameStr(nomRefStr, refType, fullNameStr, taxonId);
226
227 TaxonName referencedName = nameParser.parseReferencedName(refNameStr, NomenclaturalCode.ICNAFP, rank);
228 if (referencedName.isProtectedFullTitleCache() || referencedName.isProtectedTitleCache()){
229 logger.warn(taxonId + ": Referenced name could not be parsed: " + refNameStr );
230 }else{
231 addSourcesToReferences(referencedName, state);
232 //FIXME deduplication
233 // state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(referencedName);
234 }
235 adaptRefTypeForGeneric(referencedName, refType);
236 Reference nomRef = referencedName.getNomenclaturalReference();
237 if (isNotBlank(year)) {
238 if (nomRef == null) {
239 nomRef = ReferenceFactory.newGeneric();
240 }
241 String nomRefYear = nomRef.getYear();
242 if (isBlank(nomRefYear)) {
243 nomRef.setDatePublished(TimePeriodParser.parseStringVerbatim(year));
244 }else if (! nomRefYear.equals(year)){
245 logger.warn(taxonId + ": year and parsed year are not equal: "+ year + "<->" + nomRefYear);
246 }
247 }
248
249 TaxonName result= referencedName;
250
251 //status
252 if (annotation != null && (annotation.equals("nom. illeg.") || annotation.equals("nom. cons."))){
253 try {
254 NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(annotation, result);
255 result.addStatus(NomenclaturalStatus.NewInstance(nomStatusType));
256 } catch (UnknownCdmTypeException e) {
257 logger.warn(taxonId + ": nomStatusType not recognized: " + annotation);
258 }
259 }
260
261 if(result.getNomenclaturalReference()!=null && result.getNomenclaturalReference().getTitleCache().equals("null")){
262 logger.warn("null");
263 }
264
265 DefinedTerm conabioIdentifier = getIdentiferType(state, MexicoConabioTransformer.uuidConabioTaxonIdIdentifierType,
266 "CONABIO Taxon Identifier", "CONABIO Taxon Identifier", "CONABIO", null);
267 result.addIdentifier(taxonId, conabioIdentifier);
268
269 return result;
270 }
271
272 private String removeSubgenusBracket(String nameStr, Rank rank) {
273 if (nameStr.matches("[A-Z][a-z]+\\s+\\([A-Za-z]+\\)\\s+[a-z]+.*")) {
274 //species and below: remove bracket completely
275 nameStr = nameStr.substring(0, nameStr.indexOf("(")) + nameStr.substring(nameStr.indexOf(")")+1);
276 }else if (nameStr.matches("[A-Z][a-z]+\\s+\\([A-Za-z]+\\)")) {
277 //subgenus: replace (...) bei subg. ...
278 nameStr = nameStr.substring(0, nameStr.indexOf("(")) + "subg. " + nameStr.substring(nameStr.indexOf("(")+1, nameStr.length()-1);
279 }
280 return nameStr;
281 }
282
283 private void adaptRefTypeForGeneric(IBotanicalName referencedName, String refTypeStr) {
284 INomenclaturalReference ref = referencedName.getNomenclaturalReference();
285 if (ref == null){
286 return;
287 }
288 ReferenceType refType = refTypeByRefTypeStr(refTypeStr);
289 if (ref.getType() != refType && refType == ReferenceType.Book){
290 ref.setType(refType);
291 }
292 }
293
294 private String getRefNameStr(String nomRefStr, String refTypeStr, String fullNameStr, String taxonID) {
295 String refNameStr = fullNameStr;
296 ReferenceType refType = refTypeByRefTypeStr(refTypeStr);
297 if (isBlank(nomRefStr)){
298 //do nothing
299 }else if (refType == ReferenceType.Article){
300 refNameStr = fullNameStr + " in " + nomRefStr;
301 }else if (refType == ReferenceType.Book){
302 refNameStr = fullNameStr + ", " + nomRefStr;
303 }else if (refType == null){
304 logger.warn(taxonID + ": RefType is null but nomRefStr exists");
305 }
306 return refNameStr;
307 }
308
309 private ReferenceType refTypeByRefTypeStr(String refType){
310 if ("A".equals(refType)){ //Article
311 return ReferenceType.Article;
312 }else if ("B".equals(refType)){ //Book
313 return ReferenceType.Book;
314 }else if (refType == null || isBlank(refType)){ //Book
315 return null;
316 }else{
317 throw new IllegalArgumentException("RefType not supported " + refType);
318 }
319 }
320
321 private void addSourcesToReferences(IBotanicalName name, MexicoEfloraImportState state) {
322 Reference nomRef = name.getNomenclaturalReference();
323 if (nomRef != null){
324 nomRef.addSource(makeOriginalSource(state));
325 if (nomRef.getInReference() != null){
326 nomRef.getInReference().addSource(makeOriginalSource(state));
327 }
328 }
329 }
330
331 protected IdentifiableSource makeOriginalSource(MexicoEfloraImportState state) {
332 return IdentifiableSource.NewDataImportInstance(null, null, sourceReference);
333 }
334
335 private Rank getRank(String rank) {
336 Rank result = null;
337 if ("Reino".equals(rank)){ return Rank.KINGDOM();}
338 else if ("división".equals(rank)){ return Rank.DIVISION();}
339 else if ("clase".equals(rank)){ return Rank.CLASS();}
340 else if ("subclase".equals(rank)){ return Rank.SUBCLASS();}
341 else if ("superorden".equals(rank)){ return Rank.SUPERORDER();}
342 else if ("orden".equals(rank)){ return Rank.ORDER();}
343 else if ("suborden".equals(rank)){ return Rank.SUBORDER();}
344 else if ("familia".equals(rank)){ return Rank.FAMILY();}
345 else if ("subfamilia".equals(rank)){ return Rank.SUBFAMILY();}
346 else if ("tribu".equals(rank)){ return Rank.TRIBE();}
347 else if ("subtribu".equals(rank)){ return Rank.SUBTRIBE();}
348 else if ("género".equals(rank)){ return Rank.GENUS();}
349 else if ("subgénero".equals(rank)){ return Rank.SUBGENUS();}
350 else if ("sección".equals(rank)){ return Rank.SECTION_BOTANY();}
351 else if ("subsección".equals(rank)){ return Rank.SUBSECTION_BOTANY();}
352 else if ("serie".equals(rank)){ return Rank.SERIES();}
353 else if ("grupo".equals(rank)){ return Rank.SPECIESGROUP();}
354 else if ("híbrido".equals(rank)){ return null;} //will be handled later
355 else if ("especie".equals(rank)){ return Rank.SPECIES();}
356 else if ("subespecie".equals(rank)){ return Rank.SUBSPECIES();}
357 else if ("raza".equals(rank)){ return Rank.RACE();}
358 else if ("variedad".equals(rank)){ return Rank.VARIETY();}
359 else if ("subvariedad".equals(rank)){ return Rank.SUBVARIETY();}
360 else if ("forma".equals(rank)){ return Rank.FORM();}
361 else if ("subforma".equals(rank)){ return Rank.SUBFORM();}
362 else if ("raza".equals(rank)){ return Rank.RACE();}
363 else {
364 logger.warn("Rank not recognized: "+ rank);
365 }
366
367 return result;
368 }
369
370 @Override
371 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, MexicoEfloraImportState state) {
372
373 String nameSpace;
374 Set<String> idSet;
375 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
376
377 try{
378 Set<String> referenceIdSet = new HashSet<>();
379 while (rs.next()){
380 handleForeignKey(rs, referenceIdSet, "IdBibliografiaSec");
381 }
382
383 //reference map
384 nameSpace = MexicoEfloraReferenceImportBase.NAMESPACE;
385 idSet = referenceIdSet;
386 Map<String, Reference> referenceMap = getCommonService().getSourcedObjectsByIdInSourceC(Reference.class, idSet, nameSpace);
387 result.put(nameSpace, referenceMap);
388
389 } catch (SQLException e) {
390 throw new RuntimeException(e);
391 }
392 return result;
393 }
394
395 @Override
396 protected String getTableName() {
397 return dbTableName;
398 }
399
400 @Override
401 public String getPluralString() {
402 return pluralString;
403 }
404
405 @Override
406 protected boolean doCheck(MexicoEfloraImportState state){
407 return true;
408 }
409
410 @Override
411 protected boolean isIgnore(MexicoEfloraImportState state){
412 return ! state.getConfig().isDoTaxa();
413 }
414 }