Project

General

Profile

Download (27.8 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.berlinModel.in;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.log4j.Logger;
20
import org.springframework.stereotype.Component;
21

    
22
import eu.etaxonomy.cdm.common.CdmUtils;
23
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceSourceImportValidator;
24
import eu.etaxonomy.cdm.io.common.IOValidator;
25
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
26
import eu.etaxonomy.cdm.io.common.Source;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.RelationshipBase.Direction;
29
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
30
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
31
import eu.etaxonomy.cdm.model.description.Distribution;
32
import eu.etaxonomy.cdm.model.description.TaxonDescription;
33
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
34
import eu.etaxonomy.cdm.model.name.TaxonName;
35
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
36
import eu.etaxonomy.cdm.model.reference.Reference;
37
import eu.etaxonomy.cdm.model.taxon.Synonym;
38
import eu.etaxonomy.cdm.model.taxon.Taxon;
39
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
40
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
41

    
42
/**
43
 * @author a.mueller
44
 * @since 20.03.2008
45
 */
46
@Component
47
public class BerlinModelOccurrenceSourceImport  extends BerlinModelImportBase {
48

    
49
    private static final long serialVersionUID = 1139543760239436841L;
50
    private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceSourceImport.class);
51

    
52
	private static int modCount = 5000;
53
	private static final String pluralString = "occurrence sources";
54
	private static final String dbTableName = "emOccurrenceSource";  //??
55
	private static final String EXACT = "(exact) ";
56

    
57
	private Map<String, Integer> sourceNumberRefIdMap;
58
	private Map<String, Set<Integer>> nameCache2NameIdMap;
59
	private Set<String> notFoundReferences = new HashSet<>();
60

    
61

    
62
	public BerlinModelOccurrenceSourceImport(){
63
		super(dbTableName, pluralString);
64
	}
65

    
66
	@Override
67
	protected String getIdQuery(BerlinModelImportState state) {
68
		String result = "SELECT occurrenceSourceId FROM " + getTableName();
69
		if (state.getConfig().getOccurrenceSourceFilter() != null){
70
			result += " WHERE " +  state.getConfig().getOccurrenceSourceFilter();
71
		}
72
		return result;
73
	}
74

    
75
	@Override
76
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
77
			String strQuery =   //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
78
            " SELECT occ.*, n.nameCache, n.fullNameCache " +
79
                " FROM emOccurrenceSource occ LEFT OUTER JOIN Name n ON n.nameId = occ.oldNameFk " +
80
            " WHERE (OccurrenceSourceId IN (" + ID_LIST_TOKEN + ")  )" +
81
             "";
82
		return strQuery;
83
	}
84

    
85
	@Override
86
	protected void doInvoke(BerlinModelImportState state) {
87
		notFoundReferences = new HashSet<>();
88

    
89
		try {
90
			sourceNumberRefIdMap = makeSourceNumberReferenceIdMap(state);
91
			nameCache2NameIdMap = makeNameCache2NameIdMap(state);
92
		} catch (SQLException e) {
93
			e.printStackTrace();
94
			throw new RuntimeException(e);
95
		}
96
		super.doInvoke(state);
97
		sourceNumberRefIdMap = null;
98
		nameCache2NameIdMap = null;
99
		if (notFoundReferences.size()>0){
100
			String unfound = "'" + CdmUtils.concat("','", notFoundReferences.toArray(new String[]{})) + "'";
101
			logger.warn("Not found references: " + unfound);
102
		}
103
		return;
104
	}
105

    
106
	@Override
107
	public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
108
		boolean success = true;
109
		ResultSet rs = partitioner.getResultSet();
110
		@SuppressWarnings("unchecked")
111
        Map<String, Reference> refMap = partitioner.getObjectMap(BerlinModelReferenceImport.REFERENCE_NAMESPACE);
112

    
113
		Set<DescriptionElementBase> objectsToSave = new HashSet<>();
114
		try {
115
			int i = 0;
116
			//for each reference
117
            while (rs.next()){
118

    
119
                if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("occurrence sources handled: " + (i-1));}
120

    
121
                Integer occurrenceSourceId = rs.getInt("OccurrenceSourceId");
122
                Integer occurrenceFk =nullSafeInt(rs, "OccurrenceFk");
123
    			String sourceNumber = rs.getString("SourceNumber");
124
    			String oldName = rs.getString("OldName");
125
    			Integer oldNameFk = nullSafeInt(rs, "OldNameFk");
126
    			String oldNameFkCache = rs.getString("nameCache");
127
    			String oldNameFkFullCache = rs.getString("fullNameCache");
128

    
129
    			Distribution distribution = (Distribution)state.getRelatedObject(BerlinModelOccurrenceImport.NAMESPACE, String.valueOf(occurrenceFk));
130

    
131
    			if (distribution == null){
132
    				//distribution = duplicateMap.get(occurrenceFk);
133
    			}
134
    			if (distribution != null){
135
    				Integer refId = sourceNumberRefIdMap.get(sourceNumber);
136
    				Reference ref = refMap.get(String.valueOf(refId));
137

    
138
    				if (ref != null){
139
    					DescriptionElementSource originalSource = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
140
    					originalSource.setCitation(ref);
141
    					TaxonName taxonName = getName(state, oldName, oldNameFk, oldNameFkFullCache, oldNameFkCache, occurrenceSourceId, distribution);
142
						if (taxonName != null){
143
						    if(isNotBlank(oldName) && !oldName.equals(taxonName.getNameCache())){
144
	                            originalSource.setOriginalNameString(oldName);
145
	                        }
146
						    originalSource.setNameUsedInSource(taxonName);
147
    					}else if(isNotBlank(oldName)){
148
    						originalSource.setOriginalNameString(oldName);
149
    					}
150
    					distribution.addSource(originalSource);
151
    				}else{
152
    					logger.warn("reference for sourceNumber "+sourceNumber+" could not be found. OccurrenceSourceId: " + occurrenceSourceId );
153
    					notFoundReferences.add(sourceNumber);
154
    				}
155
    			}else{
156
    				logger.warn("distribution ("+occurrenceFk+") for occurrence source (" + occurrenceSourceId + ") could not be found." );
157
    			}
158

    
159
            }
160
			logger.info("Distributions to save: " + objectsToSave.size());
161
			getDescriptionElementService().save(objectsToSave);
162

    
163
			return success;
164
		} catch (SQLException e) {
165
			logger.error("SQLException:" +  e);
166
			return false;
167
		}
168
	}
169

    
170
	@Override
171
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
172

    
173
	    String nameSpace;
174
		Set<String> idSet;
175
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
176

    
177
		try{
178
			Set<String> occurrenceIdSet = new HashSet<>();
179
			Set<String> nameIdSet = new HashSet<>();
180
			Set<String> sourceNumberSet = new HashSet<>();
181
			Set<String> oldNamesSet = new HashSet<>();
182
			while (rs.next()){
183
				handleForeignKey(rs, occurrenceIdSet, "occurrenceFk");
184
				handleForeignKey(rs, nameIdSet, "oldNameFk");
185
				sourceNumberSet.add(CdmUtils.NzTrim(rs.getString("SourceNumber")));
186
				oldNamesSet.add(CdmUtils.NzTrim(rs.getString("oldName")));
187
				oldNamesSet.add(CdmUtils.NzTrim(rs.getString("nameCache")));
188
				oldNamesSet.add(CdmUtils.NzTrim(rs.getString("fullNameCache")));
189
			}
190

    
191
			sourceNumberSet.remove("");
192
			Set<String> referenceIdSet = handleSourceNumber(sourceNumberSet);
193
            oldNamesSet.remove("");
194
            Set<String> oldNameIdSet = handleRelatedOldNames(oldNamesSet);
195
            nameIdSet.addAll(oldNameIdSet);
196

    
197
			//occurrence map
198
			nameSpace = BerlinModelOccurrenceImport.NAMESPACE;
199
			idSet = occurrenceIdSet;
200
            Map<String, Distribution> occurrenceMap = getCommonService().getSourcedObjectsByIdInSourceC(Distribution.class, idSet, nameSpace);
201
			result.put(nameSpace, occurrenceMap);
202

    
203
			//name map
204
			nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
205
			idSet =nameIdSet;
206
            Map<String, TaxonName> nameMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonName.class, idSet, nameSpace);
207
			result.put(nameSpace, nameMap);
208

    
209
			//reference map
210
			nameSpace = BerlinModelReferenceImport.REFERENCE_NAMESPACE;
211
            Map<String, Reference> referenceMap = getCommonService().getSourcedObjectsByIdInSourceC(Reference.class, idSet, nameSpace);
212
			result.put(nameSpace, referenceMap);
213

    
214
		} catch (SQLException e) {
215
			throw new RuntimeException(e);
216
		}
217
		return result;
218
	}
219

    
220
	private Set<String> handleSourceNumber(Set<String> sourceNumberSet) {
221
		Map<String, Integer> sourceNumberReferenceIdMap = this.sourceNumberRefIdMap;
222
		Set<String> referenceIdSet = new HashSet<>();
223

    
224
		for(String sourceNumber : sourceNumberSet){
225
			Integer refId = sourceNumberReferenceIdMap.get(sourceNumber);
226
			referenceIdSet.add(String.valueOf(refId));
227
		}
228
		return referenceIdSet;
229
	}
230

    
231
    private Set<String> handleRelatedOldNames(Set<String> oldNamesSet) {
232
        Set<String> oldNameIdSet = new HashSet<>();
233

    
234
        try {
235
            for(String oldName : oldNamesSet){
236
                if (isNotBlank(oldName)){
237
                    Set<Integer> nameIds = nameCache2NameIdMap.get(oldName);
238
                    if (nameIds != null){
239
                        for (Integer nameId : nameIds){
240
                            oldNameIdSet.add(String.valueOf(nameId));
241
                        }
242
                    }
243
                }
244
            }
245
        } catch (Exception e) {
246
            e.printStackTrace();
247
            logger.error("Exception in handleOldNames" + e.getMessage());
248
        }
249
        return oldNameIdSet;
250
    }
251

    
252
	private TaxonName getName(BerlinModelImportState state, String oldNameStr, Integer oldNameFk,
253
	        String oldNameFkFullCache, String oldNameFkCache,
254
	        Integer occSourceId, Distribution distribution) {
255
		if (oldNameStr == null && oldNameFk == null){
256
		    return null;
257
		}
258
		boolean includeMisapplications = state.getConfig().isIncludeMANsForOldNameCheck();
259

    
260
	    TaxonName taxonName = (TaxonName)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(oldNameFk));
261
		if (oldNameFk != null && taxonName == null){
262
		    //move down if occ source names are not loaded in name view
263
		    taxonName = handleOldFreetextNameOnly(state, oldNameFkFullCache, occSourceId, distribution);
264
		    if (taxonName == null){
265
		        taxonName = handleOldFreetextNameOnly(state, oldNameFkCache, occSourceId, distribution);
266
		    }
267
		    if (taxonName == null ){
268
		        logger.warn("WARN: OldNameFk "+oldNameFk+" exists but taxonName not found and also search by string not successful for occSource: " + occSourceId +"; Taxon: "+getTaxonStr(distribution));
269
		        oldNameStr = oldNameFkFullCache;
270
		    }
271
		}else if (taxonName != null){
272
            taxonName = checkSynonymy(state, oldNameFk, occSourceId, distribution, taxonName, includeMisapplications);
273
		}
274
		if (isNotBlank(oldNameStr) && oldNameStr != null){
275
		    if (taxonName == null){
276
		        return handleOldFreetextNameOnly(state, oldNameStr, occSourceId, distribution);
277
		    }else if (!oldNameStr.equals(taxonName.getNameCache())){
278
		        logger.info("INFO: Old name freetext and linked name nameCache are not equal: " + oldNameStr + "/" + taxonName.getNameCache() +"; Taxon: "+getTaxonStr(distribution) +  "; occSourceId: " +  occSourceId);
279
		        checkSynonymy(state, oldNameFk, occSourceId, distribution, taxonName, includeMisapplications);
280
	            return taxonName;
281
		    }else{
282
		        checkSynonymy(state, oldNameFk, occSourceId, distribution, taxonName, includeMisapplications);
283
	            return taxonName;
284
		    }
285
		}else{ //taxonName != null
286
		    if (taxonName != null){
287
		        checkSynonymy(state, oldNameFk, occSourceId, distribution, taxonName, includeMisapplications);
288
		    }
289
		    return taxonName;
290
		}
291
	}
292

    
293
    /**
294
     * @param state
295
     * @param oldName
296
     * @param occSourceId
297
     * @param distribution
298
     * @return
299
     */
300
    protected TaxonName handleOldFreetextNameOnly(BerlinModelImportState state, String oldName, Integer occSourceId,
301
            Distribution distribution) {
302
        Set<TaxonName> names = getOldNames(state, oldName);
303
        if (names.isEmpty()){
304
            if (getNameIds(oldName).isEmpty()){
305
                if (state.getConfig().isLogNotMatchingOldNames()){
306
                    logger.warn("No name found for freetext oldName '"+oldName+"'; occSourceId: " + occSourceId);
307
                }
308
            }else{
309
                if (state.getConfig().isLogMatchingNotExportedOldNames()){
310
                    logger.warn("Matching name exists in BM but not in CDM. OldName: " + oldName + "; Taxon: "+getTaxonStr(distribution)+"; occSourceId: " + occSourceId);
311
                }
312
            }
313
            return null;
314
        }else {
315
            TaxonName result = names.iterator().next();
316
            boolean checkOldNameIsSynonym = state.getConfig().isCheckOldNameIsSynonym();
317
            if (names.size()> 1){
318
                TaxonName synName = getFirstSynonymName(state, names, distribution, null, occSourceId, true);
319
                if (synName == null){
320
                    //TODO should we really use a name if not available in synonymy?
321
                    String message = "INFO: There is more than one matching oldName for '"+oldName+"' but none of them is a synonym of the accepted taxon '"+getTaxonStr(distribution)+"'.";
322
                    message += (checkOldNameIsSynonym ? "":"Take arbitrary one. ") + "OccSourceId: " + occSourceId;
323
                    logger.info(message);
324
                    return checkOldNameIsSynonym ? null : result;
325
                }else{
326
                    return synName;
327
                }
328
            }else{
329
                //names.size() = 1
330
                if (checkOldNameIsSynonym){
331
                    TaxonName synName = getFirstSynonymName(state, names, distribution, null, occSourceId, true);
332
                    if (synName == null){
333
                        if (state.getConfig().isCheckOldNameIsSynonym()){
334
                            logger.warn("There is a matching oldName for '"+oldName+"' but it is not a synonym/misapplication of the accepted taxon '"+getTaxonStr(distribution)+"'. OccSourceId: " + occSourceId);
335
                            return null;
336
                        }else{
337
                            return result;
338
                        }
339
                    }else if (!synName.equals(result)){
340
                        //TODO strange, how can this happen if it is the only matching?
341
                        logger.warn("There is a matching oldName for '"+oldName+"'("+result.getUuid()+") but another matching name "+synName.getUuid()+"exists in the synonymy of the accepted taxon '"+getTaxonStr(distribution)+"'. OccSourceId: " + occSourceId);
342
                        return synName;
343
                    }else{
344
                        return result;
345
                    }
346
                }else{
347
                    return result;
348
                }
349
            }
350
        }
351
    }
352

    
353
    protected TaxonName checkSynonymy(BerlinModelImportState state, Integer oldNameFk, Integer occSourceId,
354
            Distribution distribution, TaxonName taxonName, boolean includeMisapplications) {
355

    
356
        if (!state.getConfig().isCheckOldNameIsSynonym()){
357
            return taxonName;
358
        }else{
359
            Set<TaxonName> names = new HashSet<>();
360
            names.add(taxonName);
361
            TaxonName synName = getFirstSynonymName(state, names, distribution, null, occSourceId, includeMisapplications);
362
            if (synName != null){
363
                return synName;  //same as taxonName?
364
            }else{
365
                boolean hasTaxon = !taxonName.getTaxonBases().isEmpty();
366
                String orphaned = hasTaxon ? "" : "Orphaned name: ";
367
                Set<TaxonName> existingNames = getOldNames(state, taxonName.getNameCache());
368
                existingNames.remove(taxonName);
369
                if (existingNames.isEmpty()){
370
                    logger.info("INFO:" + orphaned + "NameInSource (" + oldNameFk + " - " +taxonName.getTitleCache() + ") could not be found in synonymy. Similar name does not exist. Use the not in synonymy name. "+getTaxonStr(distribution)+". OccSourceId: " + occSourceId);
371
                    return taxonName;
372
                }else{
373
                    TaxonName existingSynonym = getFirstSynonymName(state, existingNames, distribution, null, occSourceId, false);
374
                    if (existingSynonym != null){
375
                        boolean isExact = CdmUtils.nullSafeEqual(existingSynonym.getTitleCache(),taxonName.getTitleCache());
376
                        String exact = isExact ? EXACT : "";
377
                        logger.info("INFO: " + exact + orphaned + "A similar name ("+existingSynonym.getUuid()+") was found in synonymy but is not the nameInSource. Use synonymie name (" + oldNameFk + " - " +taxonName.getTitleCache() + "); Taxon: "+getTaxonStr(distribution)+". OccSourceId: " + occSourceId);
378
                        return existingSynonym;
379
                    }else{
380
                        TaxonName existingMisapplication = getFirstMisapplication(state, existingNames, distribution, occSourceId);
381
                        if (existingMisapplication != null){
382
                            boolean isExact = CdmUtils.nullSafeEqual(existingMisapplication.getTitleCache(),taxonName.getTitleCache());
383
                            String exact = isExact ? EXACT : "";
384
                            logger.info("INFO: " + exact + orphaned + "A similar misapplied name ("+existingMisapplication.getUuid()+") can be found in misapplications but is not the nameInSource. Use synonymie name (" + oldNameFk + " - " +taxonName.getTitleCache() + "); Taxon: "+getTaxonStr(distribution)+". OccSourceId: " + occSourceId);
385
                            return existingMisapplication;
386
                        }else{
387
                            logger.info("INFO: NameInSource not found in synonymy. Similar names exist but also not in synonymy. Use name in source (" + oldNameFk + " - " +taxonName.getTitleCache() + "); Taxon: "+getTaxonStr(distribution)+". OccSourceId: " + occSourceId);
388
                            return taxonName;
389
                        }
390
                    }
391
                }
392
            }
393
        }
394
    }
395

    
396
    private TaxonName getFirstSynonymName(BerlinModelImportState state, Set<TaxonName> names, Distribution distribution,
397
            Taxon taxon, Integer occSourceId, boolean includeMisapplications) {
398
        TaxonName result = null;
399
        taxon = (taxon == null) ? getTaxon(distribution): taxon;
400
        Set<Synonym> synonyms = taxon.getSynonyms();
401
        Set<TaxonName> synonymNames = new HashSet<>();
402

    
403
        //taxon, orthvars, synonyms and their orthvars
404
        synonymNames.add(taxon.getName());
405
        synonymNames.addAll(getOrthographicVariants(taxon));
406

    
407
        for (Synonym synonym : synonyms){
408
            synonymNames.add(synonym.getName());
409
            synonymNames.addAll(getOrthographicVariants(synonym));
410
        }
411
        for (TaxonName name : names){
412
            if (synonymNames.contains(name)){
413
                if (result != null){
414
                    logger.warn("There is more than 1 matching synonym/taxon for " + name.getNameCache() + "; occSourceId: " + occSourceId);
415
                }
416
                result = name;
417
            }
418
        }
419

    
420
        //parent
421
        if (result == null){
422
            if (taxon.getName().isInfraSpecific()){
423
                if (!taxon.getTaxonNodes().isEmpty()){
424
                    TaxonNode parent = taxon.getTaxonNodes().iterator().next().getParent();
425
                    if (parent != null && parent.getTaxon() != null){
426
                        Set<TaxonName> parentNames = new HashSet<>();
427
                        TaxonName parentName = parent.getTaxon().getName();
428
                        parentNames.add(parentName);
429
                        parentNames.addAll(getOrthographicVariants(parent.getTaxon()));
430

    
431
                        for (TaxonName name : names){
432
                            if (parentNames.contains(name)){
433
                                if (result != null){
434
                                    logger.warn("There is more than 1 matching parent for " + name.getNameCache() + "; occSourceId: " + occSourceId);
435
                                }
436
                                result = name;
437
                            }
438
                        }
439
                        if (result == null){
440
                            TaxonName parentSyn = getFirstSynonymName(state, names, distribution, parent.getTaxon(), occSourceId, includeMisapplications);
441
                            if (parentSyn != null){
442
                                result = parentSyn;
443
                            }
444
                        }
445
                    }
446
                }
447
            }
448
        }
449

    
450
        //child
451
        if (result == null){
452
            if (taxon.getName().isSpecies() || taxon.getName().isSupraSpecific()){
453
                if (!taxon.getTaxonNodes().isEmpty()){
454
                    List<TaxonNode> children = taxon.getTaxonNodes().iterator().next().getChildNodes();
455
                    Set<TaxonName> childNames = new HashSet<>();
456
                    for (TaxonNode child : children){
457
                        childNames.add(child.getTaxon().getName());
458
                        childNames.addAll(getOrthographicVariants(child.getTaxon()));
459
                    }
460
                    for (TaxonName name : names){
461
                        if (childNames.contains(name)){
462
                            if (result != null){
463
                                logger.warn("There is more than 1 matching child for " + name.getNameCache() + "; occSourceId: " + occSourceId);
464
                            }
465
                            result = name;
466
                        }
467
                    }
468
                }
469
            }
470
        }
471

    
472
        if (result == null && includeMisapplications){
473
            result = getFirstMisapplication(state, names, distribution, occSourceId);
474
        }
475

    
476
        return result;
477
    }
478

    
479
    private TaxonName getFirstMisapplication(BerlinModelImportState state, Set<TaxonName> names, Distribution distribution, Integer occSourceId) {
480
        TaxonName result = null;
481
        Taxon taxon = getTaxon(distribution);
482

    
483
        //MAN
484
        Set<Taxon> misappliedTaxa = taxon.getMisappliedNames(true);
485
        Set<TaxonName> misappliedNames = new HashSet<>();
486
        for (Taxon misTaxon : misappliedTaxa){
487
            misappliedNames.add(misTaxon.getName());
488
            misappliedNames.addAll(getOrthographicVariants(misTaxon));
489
        }
490

    
491
        for (TaxonName name : names){
492
            if (misappliedNames.contains(name)){
493
                if (result != null){
494
                    logger.info("INFO: There is more than 1 matching misapplied name or invalid designation for " + name.getNameCache() + ". Take arbitrary one.; occSourceId: " + occSourceId);
495
                }
496
                result = name;
497
            }
498
        }
499
        return result;
500
    }
501

    
502
    protected Set<TaxonName> getOrthographicVariants(TaxonBase<?> taxonBase) {
503
        Set<TaxonName> result = taxonBase.getName().getRelatedNames(Direction.relatedTo, NameRelationshipType.ORTHOGRAPHIC_VARIANT());
504
        result.addAll(taxonBase.getName().getRelatedNames(Direction.relatedTo, NameRelationshipType.MISSPELLING()));
505
        result.add(taxonBase.getName().getOriginalSpelling());
506
        return result;
507
    }
508

    
509
    protected String getTaxonStr(Distribution distribution) {
510
        Taxon taxon = CdmBase.deproxy(distribution.getInDescription(), TaxonDescription.class).getTaxon();
511
        String areaStr = distribution.getArea().getIdInVocabulary();
512
        return areaStr + ": " + taxon.getName().getTitleCache();
513
    }
514

    
515
    protected Taxon getTaxon(Distribution distribution) {
516
        Taxon taxon = CdmBase.deproxy(distribution.getInDescription(), TaxonDescription.class).getTaxon();
517
        return taxon;
518
    }
519

    
520
    /**
521
     * returns all names in DB matching the given name string.
522
     * The name needs to be loaded via related objects previously.
523
     */
524
    private Set<TaxonName> getOldNames(BerlinModelImportState state, String nameStr) {
525
        Set<TaxonName> names = new HashSet<>();
526
        Set<Integer> nameIds = getNameIds(nameStr);
527
        for (Integer id : nameIds){
528
            TaxonName name = (TaxonName)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(id));
529
            if (name != null){
530
                names.add(name);
531
            }else{
532
//                logger.warn("Name for existing id "+id+" not found in related objects: " + nameStr);
533
            }
534
        }
535
        return names;
536
    }
537

    
538
    private Set<Integer> getNameIds(String oldName) {
539
        Set<Integer> result = nameCache2NameIdMap.get(oldName);
540
        return result == null ? new HashSet<>(): result;
541
    }
542

    
543
    /**
544
	 * Creates a map which maps source numbers on references
545
	 */
546
	private Map<String, Integer> makeSourceNumberReferenceIdMap(BerlinModelImportState state) throws SQLException {
547
		Map<String, Integer> result = new HashMap<>();
548

    
549
		Source source = state.getConfig().getSource();
550
		String strQuery = " SELECT RefId, IdInSource " +
551
						  " FROM Reference " +
552
						  " WHERE (IdInSource IS NOT NULL) AND (IdInSource NOT LIKE '') ";
553

    
554
		ResultSet rs = source.getResultSet(strQuery) ;
555
		while (rs.next()){
556
			int refId = rs.getInt("RefId");
557
			String idInSource = rs.getString("IdInSource");
558
			if (idInSource != null){
559
				String[] singleSources = idInSource.split("\\|");
560
				for (String singleSource : singleSources){
561
					singleSource = singleSource.trim();
562
					result.put(singleSource, refId);
563
				}
564
			}
565
		}
566
		return result;
567
	}
568

    
569
	   /**
570
     * Creates a map which maps nameCaches to nameIDs numbers on references
571
     * @param state
572
     * @return
573
     * @throws SQLException
574
     */
575
    private Map<String, Set<Integer>> makeNameCache2NameIdMap(BerlinModelImportState state) throws SQLException {
576
        Map<String, Set<Integer>> result = new HashMap<>();
577
        try {
578

    
579
            Source source = state.getConfig().getSource();
580
            String strQuery = " SELECT NameId, nameCache " +
581
                              " FROM Name " +
582
                              " WHERE (nameCache IS NOT NULL) AND (nameCache NOT LIKE '') ";
583

    
584
            ResultSet rs = source.getResultSet(strQuery) ;
585
            while (rs.next()){
586
                int nameId = rs.getInt("NameId");
587
                String nameCache = rs.getString("nameCache");
588
                if (isNotBlank(nameCache)){
589
                    nameCache = nameCache.trim();
590
                    Set<Integer> set = result.get(nameCache);
591
                    if (set == null){
592
                        set = new HashSet<>();
593
                        result.put(nameCache, set);
594
                    }
595
                    set.add(nameId);
596
                }
597
            }
598
        } catch (Exception e) {
599
            e.printStackTrace();
600
            logger.error("Exception in makeNameCache2NameIdMap" + e.getMessage());
601
        }
602
        return result;
603
    }
604

    
605
	@Override
606
	protected boolean doCheck(BerlinModelImportState state){
607
		IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceSourceImportValidator();
608
		return validator.validate(state);
609
	}
610

    
611
	@Override
612
	protected boolean isIgnore(BerlinModelImportState state){
613
		if (! state.getConfig().isDoOccurrenceSources()){
614
			return true;
615
		}else{
616
			if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrenceSource", "OccurrenceSourceId")){
617
				logger.error("emOccurrenceSource table or emOccurrenceSourceId does not exist. Must ignore occurrence import");
618
				return true;
619
			}else{
620
				return false;
621
			}
622
		}
623
	}
624
}
(12-12/22)