ref #4670 fix BerlinModelTaxonImport for non-EuroMed
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelTaxonImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.T_STATUS_ACCEPTED;
13 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.T_STATUS_PARTIAL_SYN;
14 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.T_STATUS_PRO_PARTE_SYN;
15 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.T_STATUS_SYNONYM;
16 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.T_STATUS_UNRESOLVED;
17
18 import java.lang.reflect.Method;
19 import java.sql.ResultSet;
20 import java.sql.SQLException;
21 import java.util.Arrays;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Set;
27 import java.util.UUID;
28
29 import org.apache.commons.lang.StringUtils;
30 import org.apache.logging.log4j.LogManager;
31 import org.apache.logging.log4j.Logger;
32 import org.springframework.stereotype.Component;
33
34 import eu.etaxonomy.cdm.common.CdmUtils;
35 import eu.etaxonomy.cdm.database.update.DatabaseTypeNotSupportedException;
36 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
37 import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelTaxonImportValidator;
38 import eu.etaxonomy.cdm.io.common.IOValidator;
39 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
40 import eu.etaxonomy.cdm.model.common.CdmBase;
41 import eu.etaxonomy.cdm.model.common.Extension;
42 import eu.etaxonomy.cdm.model.common.ExtensionType;
43 import eu.etaxonomy.cdm.model.common.Identifier;
44 import eu.etaxonomy.cdm.model.common.Language;
45 import eu.etaxonomy.cdm.model.common.Marker;
46 import eu.etaxonomy.cdm.model.common.MarkerType;
47 import eu.etaxonomy.cdm.model.description.Feature;
48 import eu.etaxonomy.cdm.model.description.TaxonDescription;
49 import eu.etaxonomy.cdm.model.description.TextData;
50 import eu.etaxonomy.cdm.model.name.TaxonName;
51 import eu.etaxonomy.cdm.model.reference.Reference;
52 import eu.etaxonomy.cdm.model.taxon.Synonym;
53 import eu.etaxonomy.cdm.model.taxon.Taxon;
54 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
55 import eu.etaxonomy.cdm.model.term.DefinedTerm;
56
57
58 /**
59 * @author a.mueller
60 * @since 20.03.2008
61 */
62 @Component
63 public class BerlinModelTaxonImport extends BerlinModelImportBase {
64
65 private static final long serialVersionUID = -1186364983750790695L;
66 private static final Logger logger = LogManager.getLogger();
67
68 public static final String NAMESPACE = "Taxon";
69
70 private static final String pluralString = "Taxa";
71 private static final String dbTableName = "PTaxon";
72
73 private static final String LAST_SCRUTINY_FK = "lastScrutinyFk";
74
75 /**
76 * How should the publish flag in table PTaxon be interpreted
77 * NO_MARKER: No marker is set
78 * ONLY_FALSE:
79 */
80 public enum PublishMarkerChooser{
81 NO_MARKER,
82 ONLY_FALSE,
83 ONLY_TRUE,
84 ALL;
85
86 boolean doMark(boolean value){
87 if (value == true){
88 return this == ALL || this == ONLY_TRUE;
89 }else{
90 return this == ALL || this == ONLY_FALSE;
91 }
92 }
93 }
94
95 public BerlinModelTaxonImport(){
96 super(dbTableName, pluralString);
97 }
98
99 @Override
100 protected String getIdQuery(BerlinModelImportState state) {
101 String sqlSelect = " SELECT RIdentifier";
102 String taxonTable = state.getConfig().getTaxonTable();
103 String sqlFrom = String.format(" FROM %s ", taxonTable);
104 String sqlWhere = "";
105
106 String sql = sqlSelect + " " + sqlFrom + " " + sqlWhere ;
107 return sql;
108 }
109
110 @Override
111 protected String getRecordQuery(BerlinModelImportConfigurator config) {
112 String sqlSelect = " SELECT pt.* ";
113 String sqlFrom = " FROM PTaxon pt ";
114 if (config.isEuroMed()){
115 sqlFrom = " FROM PTaxon AS pt "
116 + " INNER JOIN v_cdm_exp_taxaAll AS em ON pt.RIdentifier = em.RIdentifier "
117 + " LEFT OUTER JOIN Name n ON pt.PTNameFk = n.nameId ";
118 if (!config.isUseLastScrutinyAsSec()){
119 sqlFrom += " LEFT OUTER JOIN Reference r ON pt.LastScrutinyFk = r.RefId ";
120 }
121 sqlSelect += ", n.notes nameNotes , em.MA ";
122 if (!config.isUseLastScrutinyAsSec()){
123 sqlSelect += ", r.RefCache as LastScrutiny ";
124 }
125 }
126
127 String sqlWhere = " WHERE ( pt.RIdentifier IN (" + ID_LIST_TOKEN + ") )";
128
129 String strRecordQuery =sqlSelect + " " + sqlFrom + " " + sqlWhere ;
130 // " SELECT * " +
131 // " FROM PTaxon " + state.getConfig().getTaxonTable();
132 // " WHERE ( RIdentifier IN (" + ID_LIST_TOKEN + ") )";
133 return strRecordQuery;
134 }
135
136 @Override
137 protected boolean doCheck(BerlinModelImportState state){
138 IOValidator<BerlinModelImportState> validator = new BerlinModelTaxonImportValidator();
139 return validator.validate(state);
140 }
141
142 @Override
143 public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
144
145 boolean success = true ;
146 BerlinModelImportConfigurator config = state.getConfig();
147 @SuppressWarnings("rawtypes")
148 Set<TaxonBase> taxaToSave = new HashSet<>();
149 @SuppressWarnings("unchecked")
150 Map<String, TaxonName> taxonNameMap = partitioner.getObjectMap(BerlinModelTaxonNameImport.NAMESPACE);
151 @SuppressWarnings("unchecked")
152 Map<String, Reference> refMap = partitioner.getObjectMap(BerlinModelReferenceImport.REFERENCE_NAMESPACE);
153
154 ResultSet rs = partitioner.getResultSet();
155 try{
156 boolean publishFlagExists = state.getConfig().getSource().checkColumnExists("PTaxon", "PublishFlag");
157 boolean isEuroMed = config.isEuroMed();
158 while (rs.next()){
159
160 // if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("PTaxa handled: " + (i-1));}
161
162 //create TaxonName element
163 int taxonId = rs.getInt("RIdentifier");
164 int statusFk = rs.getInt("statusFk");
165
166 int nameFk = rs.getInt("PTNameFk");
167 int refFkInt = rs.getInt("PTRefFk");
168 String doubtful = rs.getString("DoubtfulFlag");
169 String uuid = null;
170 if (resultSetHasColumn(rs,"UUID")){
171 uuid = rs.getString("UUID");
172 }
173
174
175 TaxonName taxonName = null;
176 taxonName = taxonNameMap.get(String.valueOf(nameFk));
177
178 Reference reference = null;
179 String refFkStr = String.valueOf(refFkInt);
180 reference = refMap.get(refFkStr);
181
182 Reference lastScrutinyRef = null;
183 if (state.getConfig().isUseLastScrutinyAsSec() && resultSetHasColumn(rs,LAST_SCRUTINY_FK)){
184 Integer lastScrutinyFk = nullSafeInt(rs,LAST_SCRUTINY_FK);
185 if (lastScrutinyFk != null){
186 String lastScrutinyFkStr = String.valueOf(lastScrutinyFk);
187 if (lastScrutinyFkStr != null){
188 lastScrutinyRef = refMap.get(lastScrutinyFkStr);
189 if (lastScrutinyRef == null){
190 logger.warn("Last scrutiny reference "+lastScrutinyFkStr+" could not be found "
191 + "for taxon " + taxonId);
192 }
193 //MANs do have last scrutiny => the following is not correct
194 // if(!StringUtils.right(refFkStr, 5).equals("00000")){
195 // logger.warn("Unexpected secFk " + refFkStr + " for taxon with last scrutiny. Taxon id " + taxonId);
196 // }
197 }
198 }
199 }
200
201 if(! config.isIgnoreNull()){
202 if (taxonName == null ){
203 logger.warn("TaxonName belonging to taxon (RIdentifier = " + taxonId + ") could not be found in store. Taxon will not be imported");
204 success = false;
205 continue; //next taxon
206 }else if (reference == null ){
207 logger.warn("Sec Reference belonging to taxon could not be found in store. Taxon will not be imported");
208 success = false;
209 continue; //next taxon
210 }
211 }
212 TaxonBase<?> taxonBase;
213 Synonym synonym;
214 Taxon taxon;
215 Reference sec = (lastScrutinyRef != null && isRightAccessSec(refFkInt)) ? lastScrutinyRef: reference;
216 try {
217 logger.debug(statusFk);
218 if (statusFk == T_STATUS_ACCEPTED || statusFk == T_STATUS_UNRESOLVED
219 || statusFk == T_STATUS_PRO_PARTE_SYN || statusFk == T_STATUS_PARTIAL_SYN ){
220 taxon = Taxon.NewInstance(taxonName, sec);
221 taxonBase = taxon;
222 if (statusFk == T_STATUS_UNRESOLVED){
223 taxon.setTaxonStatusUnknown(true);
224 }
225 //TODO marker for pp and partial?
226 }else if (statusFk == T_STATUS_SYNONYM ){
227 synonym = Synonym.NewInstance(taxonName, sec);
228 taxonBase = synonym;
229 // if (statusFk == T_STATUS_PRO_PARTE_SYN){
230 // synonym.setProParte(true);
231 // }
232 // if (statusFk == T_STATUS_PARTIAL_SYN){
233 // synonym.setPartial(true);
234 // }
235 }else{
236 logger.warn("TaxonStatus " + statusFk + " not yet implemented. Taxon (RIdentifier = " + taxonId + ") left out.");
237 success = false;
238 continue;
239 }
240 if (uuid != null){
241 taxonBase.setUuid(UUID.fromString(uuid));
242 }
243
244 //doubtful
245 if (doubtful.equals("a")){
246 taxonBase.setDoubtful(false);
247 }else if(doubtful.equals("d")){
248 taxonBase.setDoubtful(true);
249 }else if(doubtful.equals("i")){
250 taxonBase.setDoubtful(false);
251 logger.warn("Doubtful = i (inactivated) does not exist in CDM. Doubtful set to false. RIdentifier: " + taxonId);
252 }
253
254 //detail
255 String detail = rs.getString("Detail");
256 if (isNotBlank(detail)){
257 // ExtensionType detailExtensionType = getExtensionType(state, BerlinModelTransformer.DETAIL_EXT_UUID, "micro reference","micro reference","micro ref.");
258 // Extension.NewInstance(taxonBase, detail, detailExtensionType);
259 taxonBase.setSecMicroReference(detail.trim());
260 }
261 //idInSource
262 String idInSource = rs.getString("IdInSource");
263 if (isNotBlank(idInSource)){
264 if(!state.getConfig().isEuroMed()){
265 ExtensionType detailExtensionType = getExtensionType(state, BerlinModelTransformer.ID_IN_SOURCE_EXT_UUID, "Berlin Model IdInSource","Berlin Model IdInSource","BM source id");
266 Extension.NewInstance(taxonBase, idInSource.trim(), detailExtensionType);
267 }else if(isMclIdentifier(state,rs, idInSource)){
268 DefinedTerm identifierType = getIdentiferType(state, BerlinModelTransformer.uuidEM_MCLIdentifierType, "MCL identifier", "Med-Checklist identifier", "MCL ID", null);
269 Identifier.NewInstance(taxonBase, idInSource.trim(), identifierType);
270 }
271 }
272 //namePhrase
273 String namePhrase = rs.getString("NamePhrase");
274 if (StringUtils.isNotBlank(namePhrase)){
275 taxonBase.setAppendedPhrase(namePhrase);
276 }
277 //useNameCache
278 Boolean useNameCacheFlag = rs.getBoolean("UseNameCacheFlag");
279 if (useNameCacheFlag){
280 taxonBase.setUseNameCache(true);
281 }
282 //publisheFlag
283 if (publishFlagExists){
284 Boolean publishFlag = rs.getBoolean("PublishFlag");
285 Boolean misapplied = false;
286 if (isEuroMed){
287 misapplied = rs.getBoolean("MA");
288 }
289
290 if ( ! misapplied){
291 taxonBase.setPublish(publishFlag);
292 }
293 }
294
295 // does not exist anymore as we use last scrutiny now as sec ref
296 if (!state.getConfig().isUseLastScrutinyAsSec() && resultSetHasColumn(rs, "LastScrutiny")){
297 String lastScrutiny = rs.getString("LastScrutiny");
298 //TODO strange, why not Extension last scrutiny? To match PESI? Is there a difference
299 //to LastScrutinyFK and SpeciesExpertFK?
300 if (isNotBlank(lastScrutiny)){
301 ExtensionType extensionTypeSpeciesExpert = getExtensionType(state, BerlinModelTransformer.uuidSpeciesExpertName, "Species Expert", "Species Expert", "Species Expert");
302 taxonBase.addExtension(lastScrutiny, extensionTypeSpeciesExpert);
303 ExtensionType extensionTypeExpert = getExtensionType(state, BerlinModelTransformer.uuidExpertName, "Expert", "Expert for a taxonomic group", "Expert");
304 taxonBase.addExtension(lastScrutiny, extensionTypeExpert);
305 }
306 }
307 //
308 if (resultSetHasColumn(rs, "IsExcludedMarker")){
309 boolean isExcluded = rs.getBoolean("IsExcludedMarker");
310 if (isExcluded){
311 String extension = rs.getString("IsExcludedExtension");
312 String valueless = "not accepted: taxonomically valueless local or singular biotype";
313 String provisional = "provisional: probably a taxonomically valueless local or singular biotype";
314
315 MarkerType markerType = null;
316 if (valueless.equals(extension)){
317 markerType = getMarkerType(state, BerlinModelTransformer.uuidTaxonomicallyValueless, "taxonomically valueless", valueless, "valueless", getEuroMedMarkerTypeVoc(state));
318 }else if (provisional.equals(extension)){
319 markerType = getMarkerType(state, BerlinModelTransformer.uuidProbablyTaxonomicallyValueless, "probably taxonomically valueless", provisional, "provisional", getEuroMedMarkerTypeVoc(state));
320 }
321 if (markerType != null){
322 taxonBase.addMarker(Marker.NewInstance(markerType, true));
323 }else{
324 logger.warn("IsExcludedExtension not regonized for taxon " + taxonId + "; " + extension);
325 }
326 }
327 }
328
329 //Notes
330 boolean excludeNotes = state.getConfig().isTaxonNoteAsFeature() && taxonBase.isInstanceOf(Taxon.class);
331 String notes = rs.getString("Notes");
332 if (state.getConfig().isEuroMed()){
333 if (isNotBlank(notes) && notes.startsWith("non ")){
334 taxonBase.setAppendedPhrase(CdmUtils.concat("; ", taxonBase.getAppendedPhrase(), notes));
335 notes = null;
336 }
337 String nameNotes = rs.getString("nameNotes");
338 nameNotes = BerlinModelTaxonNameImport.filterNotes(nameNotes, 900000000 + taxonId);
339 if (BerlinModelTaxonNameImport.isPostulatedParentalSpeciesNote(nameNotes)){
340 nameNotes = nameNotes.replace("{", "").replace("}", "");
341 String text = "For intermediate, so-called \"collective\" species in the genus Pilosella, a combination of the postulated parental basic species is given.";
342 UUID parSpecUuid = BerlinModelTransformer.PARENTAL_SPECIES_EXT_UUID;
343 ExtensionType parentalSpeciesExtType = getExtensionType(state, parSpecUuid, " Postulated parental species", text, "par. spec.");
344 Extension.NewInstance(taxonBase, nameNotes, parentalSpeciesExtType);
345 }
346 }
347
348 doIdCreatedUpdatedNotes(state, taxonBase, rs, taxonId, NAMESPACE, false, excludeNotes || notes == null);
349 if (excludeNotes && notes != null){
350 makeTaxonomicNote(state, CdmBase.deproxy(taxonBase, Taxon.class), rs.getString("Notes"));
351 }
352
353 //external url
354 if (config.getMakeUrlForTaxon() != null){
355 Method urlMethod = config.getMakeUrlForTaxon();
356 urlMethod.invoke(null, taxonBase, rs);
357 }
358
359 partitioner.startDoSave();
360 taxaToSave.add(taxonBase);
361 } catch (Exception e) {
362 logger.warn("An exception (" +e.getMessage()+") occurred when creating taxon with id " + taxonId + ". Taxon could not be saved.");
363 success = false;
364 }
365 }
366 } catch (DatabaseTypeNotSupportedException e) {
367 logger.error("MethodNotSupportedException:" + e);
368 return false;
369 } catch (Exception e) {
370 logger.error("SQLException:" + e);
371 return false;
372 }
373
374 getTaxonService().save(taxaToSave);
375 return success;
376 }
377
378 /**
379 * @param state
380 * @param rs
381 * @param idInSource
382 * @return
383 * @throws SQLException
384 */
385 private boolean isMclIdentifier(BerlinModelImportState state, ResultSet rs, String idInSource) throws SQLException {
386 if (idInSource.contains("-")){
387 return true;
388 }else if (idInSource.matches("(293|303)")){
389 String created = rs.getString("Created_Who");
390 if (created.endsWith(".xml")){
391 return true;
392 }
393 }
394 return false;
395 }
396
397 @Override
398 protected String getIdInSource(BerlinModelImportState state, ResultSet rs) throws SQLException {
399 String id = rs.getString("idInSource");
400 return id;
401 }
402
403
404 /**
405 * @param refFkInt
406 * @return
407 */
408 private boolean isRightAccessSec(Integer refFkInt) {
409 List<Integer> rightAccessSecs = Arrays.asList(new Integer[]{7000000, 7100000, 7200000, 7300000,
410 7400000, 7500000, 7600000, 7700000, 8000000, 8500000, 9000000});
411 return rightAccessSecs.contains(refFkInt);
412 }
413
414 /**
415 * @param state
416 * @param taxonBase
417 * @param notes
418 */
419 private void makeTaxonomicNote(BerlinModelImportState state, Taxon taxon, String notes) {
420 if (isNotBlank(notes)){
421 TaxonDescription desc = getTaxonDescription(taxon, false, true);
422 desc.setDefault(true); //hard coded for Salvador, not used elsewhere as far as I can see
423 TextData textData = TextData.NewInstance(Feature.NOTES() , notes, Language.SPANISH_CASTILIAN(), null);
424 desc.addElement(textData);
425 }
426 }
427
428 @Override
429 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
430
431 String nameSpace;
432 Set<String> idSet;
433 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
434
435 try{
436 Set<String> nameIdSet = new HashSet<>();
437 Set<String> referenceIdSet = new HashSet<>();
438 while (rs.next()){
439 handleForeignKey(rs, nameIdSet, "PTNameFk");
440 handleForeignKey(rs, referenceIdSet, "PTRefFk");
441 if (state.getConfig().isUseLastScrutinyAsSec() && resultSetHasColumn(rs, LAST_SCRUTINY_FK)){
442 handleForeignKey(rs, referenceIdSet, LAST_SCRUTINY_FK);
443 }
444 }
445
446 //name map
447 nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
448 idSet = nameIdSet;
449 Map<String, TaxonName> nameMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonName.class, idSet, nameSpace);
450 result.put(nameSpace, nameMap);
451
452 //reference map
453 nameSpace = BerlinModelReferenceImport.REFERENCE_NAMESPACE;
454 idSet = referenceIdSet;
455 Map<String, Reference> referenceMap = getCommonService().getSourcedObjectsByIdInSourceC(Reference.class, idSet, nameSpace);
456 result.put(nameSpace, referenceMap);
457
458 } catch (SQLException e) {
459 throw new RuntimeException(e);
460 }
461 return result;
462 }
463
464 @Override
465 protected String getTableName() {
466 return dbTableName;
467 }
468
469 @Override
470 public String getPluralString() {
471 return pluralString;
472 }
473
474 @Override
475 protected boolean isIgnore(BerlinModelImportState state){
476 return ! state.getConfig().isDoTaxa();
477 }
478
479 }