adapt some classes to new CommonService.getSourcedObjectSByIdInSource and cleanup
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelOccurrenceSourceImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.List;
17 import java.util.Map;
18 import java.util.Set;
19
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceSourceImportValidator;
25 import eu.etaxonomy.cdm.io.common.IOValidator;
26 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27 import eu.etaxonomy.cdm.io.common.Source;
28 import eu.etaxonomy.cdm.model.common.CdmBase;
29 import eu.etaxonomy.cdm.model.common.RelationshipBase.Direction;
30 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
31 import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
32 import eu.etaxonomy.cdm.model.description.Distribution;
33 import eu.etaxonomy.cdm.model.description.TaxonDescription;
34 import eu.etaxonomy.cdm.model.name.NameRelationshipType;
35 import eu.etaxonomy.cdm.model.name.TaxonName;
36 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
37 import eu.etaxonomy.cdm.model.reference.Reference;
38 import eu.etaxonomy.cdm.model.taxon.Synonym;
39 import eu.etaxonomy.cdm.model.taxon.Taxon;
40 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
41 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
42
43
44 /**
45 * @author a.mueller
46 * @since 20.03.2008
47 */
48 @Component
49 public class BerlinModelOccurrenceSourceImport extends BerlinModelImportBase {
50
51 private static final String EXACT = "(exact) ";
52
53 private static final long serialVersionUID = 1139543760239436841L;
54
55 private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceSourceImport.class);
56
57 private static int modCount = 5000;
58 private static final String pluralString = "occurrence sources";
59 private static final String dbTableName = "emOccurrenceSource"; //??
60
61
62 private Map<String, Integer> sourceNumberRefIdMap;
63 private Map<String, Set<Integer>> nameCache2NameIdMap;
64 private Set<String> notFoundReferences = new HashSet<>();
65
66
67 public BerlinModelOccurrenceSourceImport(){
68 super(dbTableName, pluralString);
69 }
70
71 @Override
72 protected String getIdQuery(BerlinModelImportState state) {
73 String result = "SELECT occurrenceSourceId FROM " + getTableName();
74 if (state.getConfig().getOccurrenceSourceFilter() != null){
75 result += " WHERE " + state.getConfig().getOccurrenceSourceFilter();
76 }
77 return result;
78 }
79
80 @Override
81 protected String getRecordQuery(BerlinModelImportConfigurator config) {
82 String strQuery = //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
83 " SELECT occ.*, n.nameCache, n.fullNameCache " +
84 " FROM emOccurrenceSource occ LEFT OUTER JOIN Name n ON n.nameId = occ.oldNameFk " +
85 " WHERE (OccurrenceSourceId IN (" + ID_LIST_TOKEN + ") )" +
86 "";
87 return strQuery;
88 }
89
90
91
92 @Override
93 protected void doInvoke(BerlinModelImportState state) {
94 notFoundReferences = new HashSet<>();
95
96 try {
97 sourceNumberRefIdMap = makeSourceNumberReferenceIdMap(state);
98 nameCache2NameIdMap = makeNameCache2NameIdMap(state);
99 } catch (SQLException e) {
100 e.printStackTrace();
101 throw new RuntimeException(e);
102 }
103 super.doInvoke(state);
104 sourceNumberRefIdMap = null;
105 nameCache2NameIdMap = null;
106 if (notFoundReferences.size()>0){
107 String unfound = "'" + CdmUtils.concat("','", notFoundReferences.toArray(new String[]{})) + "'";
108 logger.warn("Not found references: " + unfound);
109 }
110 return;
111 }
112
113 @Override
114 public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
115 boolean success = true;
116 ResultSet rs = partitioner.getResultSet();
117 @SuppressWarnings("unchecked")
118 Map<String, Reference> refMap = partitioner.getObjectMap(BerlinModelReferenceImport.REFERENCE_NAMESPACE);
119
120 Set<DescriptionElementBase> objectsToSave = new HashSet<>();
121 try {
122 int i = 0;
123 //for each reference
124 while (rs.next()){
125
126 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("occurrence sources handled: " + (i-1));}
127
128 Integer occurrenceSourceId = rs.getInt("OccurrenceSourceId");
129 Integer occurrenceFk =nullSafeInt(rs, "OccurrenceFk");
130 String sourceNumber = rs.getString("SourceNumber");
131 String oldName = rs.getString("OldName");
132 Integer oldNameFk = nullSafeInt(rs, "OldNameFk");
133 String oldNameFkCache = rs.getString("nameCache");
134 String oldNameFkFullCache = rs.getString("fullNameCache");
135
136 Distribution distribution = (Distribution)state.getRelatedObject(BerlinModelOccurrenceImport.NAMESPACE, String.valueOf(occurrenceFk));
137
138 if (distribution == null){
139 //distribution = duplicateMap.get(occurrenceFk);
140 }
141 if (distribution != null){
142 Integer refId = sourceNumberRefIdMap.get(sourceNumber);
143 Reference ref = refMap.get(String.valueOf(refId));
144
145 if (ref != null){
146 DescriptionElementSource originalSource = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
147 originalSource.setCitation(ref);
148 TaxonName taxonName = getName(state, oldName, oldNameFk, oldNameFkFullCache, oldNameFkCache, occurrenceSourceId, distribution);
149 if (taxonName != null){
150 if(isNotBlank(oldName) && !oldName.equals(taxonName.getNameCache())){
151 originalSource.setOriginalNameString(oldName);
152 }
153 originalSource.setNameUsedInSource(taxonName);
154 }else if(isNotBlank(oldName)){
155 originalSource.setOriginalNameString(oldName);
156 }
157 distribution.addSource(originalSource);
158 }else{
159 logger.warn("reference for sourceNumber "+sourceNumber+" could not be found. OccurrenceSourceId: " + occurrenceSourceId );
160 notFoundReferences.add(sourceNumber);
161 }
162 }else{
163 logger.warn("distribution ("+occurrenceFk+") for occurrence source (" + occurrenceSourceId + ") could not be found." );
164 }
165
166 }
167 logger.info("Distributions to save: " + objectsToSave.size());
168 getDescriptionService().saveDescriptionElement(objectsToSave);
169
170 return success;
171 } catch (SQLException e) {
172 logger.error("SQLException:" + e);
173 return false;
174 }
175 }
176
177
178 @Override
179 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
180
181 String nameSpace;
182 Set<String> idSet;
183 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
184
185 try{
186 Set<String> occurrenceIdSet = new HashSet<>();
187 Set<String> nameIdSet = new HashSet<>();
188 Set<String> sourceNumberSet = new HashSet<>();
189 Set<String> oldNamesSet = new HashSet<>();
190 while (rs.next()){
191 handleForeignKey(rs, occurrenceIdSet, "occurrenceFk");
192 handleForeignKey(rs, nameIdSet, "oldNameFk");
193 sourceNumberSet.add(CdmUtils.NzTrim(rs.getString("SourceNumber")));
194 oldNamesSet.add(CdmUtils.NzTrim(rs.getString("oldName")));
195 oldNamesSet.add(CdmUtils.NzTrim(rs.getString("nameCache")));
196 oldNamesSet.add(CdmUtils.NzTrim(rs.getString("fullNameCache")));
197 }
198
199 sourceNumberSet.remove("");
200 Set<String> referenceIdSet = handleSourceNumber(sourceNumberSet);
201 oldNamesSet.remove("");
202 Set<String> oldNameIdSet = handleRelatedOldNames(oldNamesSet);
203 nameIdSet.addAll(oldNameIdSet);
204
205 //occurrence map
206 nameSpace = BerlinModelOccurrenceImport.NAMESPACE;
207 idSet = occurrenceIdSet;
208 Map<String, Distribution> occurrenceMap = getCommonService().getSourcedObjectsByIdInSourceC(Distribution.class, idSet, nameSpace);
209 result.put(nameSpace, occurrenceMap);
210
211 //name map
212 nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
213 idSet =nameIdSet;
214 Map<String, TaxonName> nameMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonName.class, idSet, nameSpace);
215 result.put(nameSpace, nameMap);
216
217 //reference map
218 nameSpace = BerlinModelReferenceImport.REFERENCE_NAMESPACE;
219 Map<String, Reference> referenceMap = getCommonService().getSourcedObjectsByIdInSourceC(Reference.class, idSet, nameSpace);
220 result.put(nameSpace, referenceMap);
221
222 } catch (SQLException e) {
223 throw new RuntimeException(e);
224 }
225 return result;
226 }
227
228 private Set<String> handleSourceNumber(Set<String> sourceNumberSet) {
229 Map<String, Integer> sourceNumberReferenceIdMap = this.sourceNumberRefIdMap;
230 Set<String> referenceIdSet = new HashSet<>();
231
232 for(String sourceNumber : sourceNumberSet){
233 Integer refId = sourceNumberReferenceIdMap.get(sourceNumber);
234 referenceIdSet.add(String.valueOf(refId));
235 }
236 return referenceIdSet;
237 }
238
239 private Set<String> handleRelatedOldNames(Set<String> oldNamesSet) {
240 Set<String> oldNameIdSet = new HashSet<>();
241
242 try {
243 for(String oldName : oldNamesSet){
244 if (isNotBlank(oldName)){
245 Set<Integer> nameIds = nameCache2NameIdMap.get(oldName);
246 if (nameIds != null){
247 for (Integer nameId : nameIds){
248 oldNameIdSet.add(String.valueOf(nameId));
249 }
250 }
251 }
252 }
253 } catch (Exception e) {
254 e.printStackTrace();
255 logger.error("Exception in handleOldNames" + e.getMessage());
256 }
257 return oldNameIdSet;
258 }
259
260 private TaxonName getName(BerlinModelImportState state, String oldNameStr, Integer oldNameFk,
261 String oldNameFkFullCache, String oldNameFkCache,
262 Integer occSourceId, Distribution distribution) {
263 if (oldNameStr == null && oldNameFk == null){
264 return null;
265 }
266 boolean includeMisapplications = state.getConfig().isIncludeMANsForOldNameCheck();
267
268 TaxonName taxonName = (TaxonName)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(oldNameFk));
269 if (oldNameFk != null && taxonName == null){
270 //move down if occ source names are not loaded in name view
271 taxonName = handleOldFreetextNameOnly(state, oldNameFkFullCache, occSourceId, distribution);
272 if (taxonName == null){
273 taxonName = handleOldFreetextNameOnly(state, oldNameFkCache, occSourceId, distribution);
274 }
275 if (taxonName == null ){
276 logger.warn("WARN: OldNameFk "+oldNameFk+" exists but taxonName not found and also search by string not successful for occSource: " + occSourceId +"; Taxon: "+getTaxonStr(distribution));
277 oldNameStr = oldNameFkFullCache;
278 }
279 }else if (taxonName != null){
280 taxonName = checkSynonymy(state, oldNameFk, occSourceId, distribution, taxonName, includeMisapplications);
281 }
282 if (isNotBlank(oldNameStr) && oldNameStr != null){
283 if (taxonName == null){
284 return handleOldFreetextNameOnly(state, oldNameStr, occSourceId, distribution);
285 }else if (!oldNameStr.equals(taxonName.getNameCache())){
286 logger.info("INFO: Old name freetext and linked name nameCache are not equal: " + oldNameStr + "/" + taxonName.getNameCache() +"; Taxon: "+getTaxonStr(distribution) + "; occSourceId: " + occSourceId);
287 checkSynonymy(state, oldNameFk, occSourceId, distribution, taxonName, includeMisapplications);
288 return taxonName;
289 }else{
290 checkSynonymy(state, oldNameFk, occSourceId, distribution, taxonName, includeMisapplications);
291 return taxonName;
292 }
293 }else{ //taxonName != null
294 if (taxonName != null){
295 checkSynonymy(state, oldNameFk, occSourceId, distribution, taxonName, includeMisapplications);
296 }
297 return taxonName;
298 }
299 }
300
301 /**
302 * @param state
303 * @param oldName
304 * @param occSourceId
305 * @param distribution
306 * @return
307 */
308 protected TaxonName handleOldFreetextNameOnly(BerlinModelImportState state, String oldName, Integer occSourceId,
309 Distribution distribution) {
310 Set<TaxonName> names = getOldNames(state, oldName);
311 if (names.isEmpty()){
312 if (getNameIds(oldName).isEmpty()){
313 if (state.getConfig().isLogNotMatchingOldNames()){
314 logger.warn("No name found for freetext oldName '"+oldName+"'; occSourceId: " + occSourceId);
315 }
316 }else{
317 if (state.getConfig().isLogMatchingNotExportedOldNames()){
318 logger.warn("Matching name exists in BM but not in CDM. OldName: " + oldName + "; Taxon: "+getTaxonStr(distribution)+"; occSourceId: " + occSourceId);
319 }
320 }
321 return null;
322 }else {
323 TaxonName result = names.iterator().next();
324 boolean checkOldNameIsSynonym = state.getConfig().isCheckOldNameIsSynonym();
325 if (names.size()> 1){
326 TaxonName synName = getFirstSynonymName(state, names, distribution, null, occSourceId, true);
327 if (synName == null){
328 //TODO should we really use a name if not available in synonymy?
329 String message = "INFO: There is more than one matching oldName for '"+oldName+"' but none of them is a synonym of the accepted taxon '"+getTaxonStr(distribution)+"'.";
330 message += (checkOldNameIsSynonym ? "":"Take arbitrary one. ") + "OccSourceId: " + occSourceId;
331 logger.info(message);
332 return checkOldNameIsSynonym ? null : result;
333 }else{
334 return synName;
335 }
336 }else{
337 //names.size() = 1
338 if (checkOldNameIsSynonym){
339 TaxonName synName = getFirstSynonymName(state, names, distribution, null, occSourceId, true);
340 if (synName == null){
341 if (state.getConfig().isCheckOldNameIsSynonym()){
342 logger.warn("There is a matching oldName for '"+oldName+"' but it is not a synonym/misapplication of the accepted taxon '"+getTaxonStr(distribution)+"'. OccSourceId: " + occSourceId);
343 return null;
344 }else{
345 return result;
346 }
347 }else if (!synName.equals(result)){
348 //TODO strange, how can this happen if it is the only matching?
349 logger.warn("There is a matching oldName for '"+oldName+"'("+result.getUuid()+") but another matching name "+synName.getUuid()+"exists in the synonymy of the accepted taxon '"+getTaxonStr(distribution)+"'. OccSourceId: " + occSourceId);
350 return synName;
351 }else{
352 return result;
353 }
354 }else{
355 return result;
356 }
357 }
358 }
359 }
360
361 /**
362 * @param state
363 * @param oldNameFk
364 * @param occSourceId
365 * @param distribution
366 * @param taxonName
367 */
368 protected TaxonName checkSynonymy(BerlinModelImportState state, Integer oldNameFk, Integer occSourceId,
369 Distribution distribution, TaxonName taxonName, boolean includeMisapplications) {
370
371 if (!state.getConfig().isCheckOldNameIsSynonym()){
372 return taxonName;
373 }else{
374 Set<TaxonName> names = new HashSet<>();
375 names.add(taxonName);
376 TaxonName synName = getFirstSynonymName(state, names, distribution, null, occSourceId, includeMisapplications);
377 if (synName != null){
378 return synName; //same as taxonName?
379 }else{
380 boolean hasTaxon = !taxonName.getTaxonBases().isEmpty();
381 String orphaned = hasTaxon ? "" : "Orphaned name: ";
382 Set<TaxonName> existingNames = getOldNames(state, taxonName.getNameCache());
383 existingNames.remove(taxonName);
384 if (existingNames.isEmpty()){
385 logger.info("INFO:" + orphaned + "NameInSource (" + oldNameFk + " - " +taxonName.getTitleCache() + ") could not be found in synonymy. Similar name does not exist. Use the not in synonymy name. "+getTaxonStr(distribution)+". OccSourceId: " + occSourceId);
386 return taxonName;
387 }else{
388 TaxonName existingSynonym = getFirstSynonymName(state, existingNames, distribution, null, occSourceId, false);
389 if (existingSynonym != null){
390 boolean isExact = CdmUtils.nullSafeEqual(existingSynonym.getTitleCache(),taxonName.getTitleCache());
391 String exact = isExact ? EXACT : "";
392 logger.info("INFO: " + exact + orphaned + "A similar name ("+existingSynonym.getUuid()+") was found in synonymy but is not the nameInSource. Use synonymie name (" + oldNameFk + " - " +taxonName.getTitleCache() + "); Taxon: "+getTaxonStr(distribution)+". OccSourceId: " + occSourceId);
393 return existingSynonym;
394 }else{
395 TaxonName existingMisapplication = getFirstMisapplication(state, existingNames, distribution, occSourceId);
396 if (existingMisapplication != null){
397 boolean isExact = CdmUtils.nullSafeEqual(existingMisapplication.getTitleCache(),taxonName.getTitleCache());
398 String exact = isExact ? EXACT : "";
399 logger.info("INFO: " + exact + orphaned + "A similar misapplied name ("+existingMisapplication.getUuid()+") can be found in misapplications but is not the nameInSource. Use synonymie name (" + oldNameFk + " - " +taxonName.getTitleCache() + "); Taxon: "+getTaxonStr(distribution)+". OccSourceId: " + occSourceId);
400 return existingMisapplication;
401 }else{
402 logger.info("INFO: NameInSource not found in synonymy. Similar names exist but also not in synonymy. Use name in source (" + oldNameFk + " - " +taxonName.getTitleCache() + "); Taxon: "+getTaxonStr(distribution)+". OccSourceId: " + occSourceId);
403 return taxonName;
404 }
405 }
406 }
407 }
408 }
409 }
410
411 /**
412 * @param state
413 * @param names
414 * @param taxon
415 * @param taxon
416 * @return
417 */
418 private TaxonName getFirstSynonymName(BerlinModelImportState state, Set<TaxonName> names, Distribution distribution,
419 Taxon taxon, Integer occSourceId, boolean includeMisapplications) {
420 TaxonName result = null;
421 taxon = (taxon == null) ? getTaxon(distribution): taxon;
422 Set<Synonym> synonyms = taxon.getSynonyms();
423 Set<TaxonName> synonymNames = new HashSet<>();
424
425 //taxon, orthvars, synonyms and their orthvars
426 synonymNames.add(taxon.getName());
427 synonymNames.addAll(getOrthographicVariants(taxon));
428
429 for (Synonym synonym : synonyms){
430 synonymNames.add(synonym.getName());
431 synonymNames.addAll(getOrthographicVariants(synonym));
432 }
433 for (TaxonName name : names){
434 if (synonymNames.contains(name)){
435 if (result != null){
436 logger.warn("There is more than 1 matching synonym/taxon for " + name.getNameCache() + "; occSourceId: " + occSourceId);
437 }
438 result = name;
439 }
440 }
441
442 //parent
443 if (result == null){
444 if (taxon.getName().isInfraSpecific()){
445 if (!taxon.getTaxonNodes().isEmpty()){
446 TaxonNode parent = taxon.getTaxonNodes().iterator().next().getParent();
447 if (parent != null && parent.getTaxon() != null){
448 Set<TaxonName> parentNames = new HashSet<>();
449 TaxonName parentName = parent.getTaxon().getName();
450 parentNames.add(parentName);
451 parentNames.addAll(getOrthographicVariants(parent.getTaxon()));
452
453 for (TaxonName name : names){
454 if (parentNames.contains(name)){
455 if (result != null){
456 logger.warn("There is more than 1 matching parent for " + name.getNameCache() + "; occSourceId: " + occSourceId);
457 }
458 result = name;
459 }
460 }
461 if (result == null){
462 TaxonName parentSyn = getFirstSynonymName(state, names, distribution, parent.getTaxon(), occSourceId, includeMisapplications);
463 if (parentSyn != null){
464 result = parentSyn;
465 }
466 }
467 }
468 }
469 }
470 }
471
472 //child
473 if (result == null){
474 if (taxon.getName().isSpecies() || taxon.getName().isSupraSpecific()){
475 if (!taxon.getTaxonNodes().isEmpty()){
476 List<TaxonNode> children = taxon.getTaxonNodes().iterator().next().getChildNodes();
477 Set<TaxonName> childNames = new HashSet<>();
478 for (TaxonNode child : children){
479 childNames.add(child.getTaxon().getName());
480 childNames.addAll(getOrthographicVariants(child.getTaxon()));
481 }
482 for (TaxonName name : names){
483 if (childNames.contains(name)){
484 if (result != null){
485 logger.warn("There is more than 1 matching child for " + name.getNameCache() + "; occSourceId: " + occSourceId);
486 }
487 result = name;
488 }
489 }
490 }
491 }
492 }
493
494 if (result == null && includeMisapplications){
495 result = getFirstMisapplication(state, names, distribution, occSourceId);
496 }
497
498 return result;
499 }
500
501 /**
502 * @param state
503 * @param names
504 * @param taxon
505 * @return
506 */
507 private TaxonName getFirstMisapplication(BerlinModelImportState state, Set<TaxonName> names, Distribution distribution, Integer occSourceId) {
508 TaxonName result = null;
509 Taxon taxon = getTaxon(distribution);
510
511 //MAN
512 Set<Taxon> misappliedTaxa = taxon.getMisappliedNames(true);
513 misappliedTaxa.addAll(taxon.getInvalidDesignations());
514 Set<TaxonName> misappliedNames = new HashSet<>();
515 for (Taxon misTaxon : misappliedTaxa){
516 misappliedNames.add(misTaxon.getName());
517 misappliedNames.addAll(getOrthographicVariants(misTaxon));
518 }
519
520 for (TaxonName name : names){
521 if (misappliedNames.contains(name)){
522 if (result != null){
523 logger.info("INFO: There is more than 1 matching misapplied name or invalid designation for " + name.getNameCache() + ". Take arbitrary one.; occSourceId: " + occSourceId);
524 }
525 result = name;
526 }
527 }
528 return result;
529 }
530
531 /**
532 * @param taxon
533 * @return
534 */
535 protected Set<TaxonName> getOrthographicVariants(TaxonBase<?> taxonBase) {
536 Set<TaxonName> result = taxonBase.getName().getRelatedNames(Direction.relatedTo, NameRelationshipType.ORTHOGRAPHIC_VARIANT());
537 result.addAll(taxonBase.getName().getRelatedNames(Direction.relatedTo, NameRelationshipType.MISSPELLING()));
538 result.addAll(taxonBase.getName().getRelatedNames(Direction.relatedTo, NameRelationshipType.ORIGINAL_SPELLING()));
539 return result;
540 }
541
542 /**
543 * @param distribution
544 * @return
545 */
546 protected String getTaxonStr(Distribution distribution) {
547 Taxon taxon = CdmBase.deproxy(distribution.getInDescription(), TaxonDescription.class).getTaxon();
548 String areaStr = distribution.getArea().getIdInVocabulary();
549 return areaStr + ": " + taxon.getName().getTitleCache();
550 }
551
552 protected Taxon getTaxon(Distribution distribution) {
553 Taxon taxon = CdmBase.deproxy(distribution.getInDescription(), TaxonDescription.class).getTaxon();
554 return taxon;
555 }
556
557 /**
558 * returns all names in DB matching the given name string.
559 * The name needs to be loaded via related objects previously.
560 */
561 private Set<TaxonName> getOldNames(BerlinModelImportState state, String nameStr) {
562 Set<TaxonName> names = new HashSet<>();
563 Set<Integer> nameIds = getNameIds(nameStr);
564 for (Integer id : nameIds){
565 TaxonName name = (TaxonName)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(id));
566 if (name != null){
567 names.add(name);
568 }else{
569 // logger.warn("Name for existing id "+id+" not found in related objects: " + nameStr);
570 }
571 }
572 return names;
573 }
574
575 /**
576 * @param oldName
577 * @return
578 */
579 private Set<Integer> getNameIds(String oldName) {
580 Set<Integer> result = nameCache2NameIdMap.get(oldName);
581 return result == null ? new HashSet<>(): result;
582 }
583
584 /**
585 * Creates a map which maps source numbers on references
586 * @param state
587 * @return
588 * @throws SQLException
589 */
590 private Map<String, Integer> makeSourceNumberReferenceIdMap(BerlinModelImportState state) throws SQLException {
591 Map<String, Integer> result = new HashMap<>();
592
593 Source source = state.getConfig().getSource();
594 String strQuery = " SELECT RefId, IdInSource " +
595 " FROM Reference " +
596 " WHERE (IdInSource IS NOT NULL) AND (IdInSource NOT LIKE '') ";
597
598 ResultSet rs = source.getResultSet(strQuery) ;
599 while (rs.next()){
600 int refId = rs.getInt("RefId");
601 String idInSource = rs.getString("IdInSource");
602 if (idInSource != null){
603 String[] singleSources = idInSource.split("\\|");
604 for (String singleSource : singleSources){
605 singleSource = singleSource.trim();
606 result.put(singleSource, refId);
607 }
608 }
609 }
610 return result;
611 }
612
613 /**
614 * Creates a map which maps nameCaches to nameIDs numbers on references
615 * @param state
616 * @return
617 * @throws SQLException
618 */
619 private Map<String, Set<Integer>> makeNameCache2NameIdMap(BerlinModelImportState state) throws SQLException {
620 Map<String, Set<Integer>> result = new HashMap<>();
621 try {
622
623 Source source = state.getConfig().getSource();
624 String strQuery = " SELECT NameId, nameCache " +
625 " FROM Name " +
626 " WHERE (nameCache IS NOT NULL) AND (nameCache NOT LIKE '') ";
627
628 ResultSet rs = source.getResultSet(strQuery) ;
629 while (rs.next()){
630 int nameId = rs.getInt("NameId");
631 String nameCache = rs.getString("nameCache");
632 if (isNotBlank(nameCache)){
633 nameCache = nameCache.trim();
634 Set<Integer> set = result.get(nameCache);
635 if (set == null){
636 set = new HashSet<>();
637 result.put(nameCache, set);
638 }
639 set.add(nameId);
640 }
641 }
642 } catch (Exception e) {
643 e.printStackTrace();
644 logger.error("Exception in makeNameCache2NameIdMap" + e.getMessage());
645 }
646 return result;
647 }
648
649 @Override
650 protected boolean doCheck(BerlinModelImportState state){
651 IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceSourceImportValidator();
652 return validator.validate(state);
653 }
654
655 @Override
656 protected boolean isIgnore(BerlinModelImportState state){
657 if (! state.getConfig().isDoOccurrenceSources()){
658 return true;
659 }else{
660 if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrenceSource", "OccurrenceSourceId")){
661 logger.error("emOccurrenceSource table or emOccurrenceSourceId does not exist. Must ignore occurrence import");
662 return true;
663 }else{
664 return false;
665 }
666 }
667 }
668
669 }