Project

General

Profile

Download (17.7 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.ArrayList;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20

    
21
import org.apache.commons.lang.StringUtils;
22
import org.apache.log4j.Logger;
23
import org.springframework.stereotype.Component;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
27
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
28
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator;
29
import eu.etaxonomy.cdm.io.common.IOValidator;
30
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
32
import eu.etaxonomy.cdm.model.common.Annotation;
33
import eu.etaxonomy.cdm.model.common.AnnotationType;
34
import eu.etaxonomy.cdm.model.common.CdmBase;
35
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
36
import eu.etaxonomy.cdm.model.common.Language;
37
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
38
import eu.etaxonomy.cdm.model.description.Distribution;
39
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
40
import eu.etaxonomy.cdm.model.description.TaxonDescription;
41
import eu.etaxonomy.cdm.model.location.NamedArea;
42
import eu.etaxonomy.cdm.model.reference.Reference;
43
import eu.etaxonomy.cdm.model.taxon.Taxon;
44
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
45
import eu.etaxonomy.cdm.model.term.OrderedTermVocabulary;
46
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
47

    
48

    
49
/**
50
 * @author a.mueller
51
 * @since 20.03.2008
52
 */
53
@Component
54
public class BerlinModelOccurrenceImport  extends BerlinModelImportBase {
55

    
56
    private static final long serialVersionUID = -7918122767284077183L;
57

    
58
    private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class);
59

    
60
	public static final String NAMESPACE = "Occurrence";
61

    
62
	private static int modCount = 5000;
63
	private static final String pluralString = "occurrences";
64
	private static final String dbTableName = "emOccurrence";  //??
65

    
66
	public BerlinModelOccurrenceImport(){
67
		super(dbTableName, pluralString);
68
	}
69

    
70
	@Override
71
	protected String getIdQuery(BerlinModelImportState state) {
72
		String result = " SELECT occurrenceId FROM " + getTableName();
73
		if (StringUtils.isNotBlank(state.getConfig().getOccurrenceFilter())){
74
			result += " WHERE " +  state.getConfig().getOccurrenceFilter();
75
		}
76
		return result;
77
	}
78

    
79
	@Override
80
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
81
			String emCode = config.isIncludesAreaEmCode()? ", ar.EMCode" : "";
82
			String strQuery =   //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
83
                " SELECT DISTINCT pt.RIdentifier AS taxonId, occ.OccurrenceId, occ.Native, occ.Introduced, " +
84
            		" occ.Cultivated, occ.StatusUnknown, occ.WorldDistCompl, occ.Notes occNotes, " +
85
            		" sumcat.emOccurSumCatId, sumcat.Short, sumcat.Description, " +
86
                	" sumcat.OutputCode, ar.AreaId, ar.TDWGCode " + emCode +
87
                " FROM emOccurrence occ " +
88
                	" INNER JOIN emArea ar ON occ.AreaFk = ar.AreaId " +
89
                	" INNER JOIN PTaxon pt ON occ.PTNameFk = pt.PTNameFk AND occ.PTRefFk = pt.PTRefFk " +
90
                	" LEFT OUTER JOIN emOccurSumCat sumcat ON occ.SummaryStatus = sumcat.emOccurSumCatId " +
91
                	" LEFT OUTER JOIN emOccurrenceSource ocs ON occ.OccurrenceId = ocs.OccurrenceFk " +
92
                " WHERE (occ.OccurrenceId IN (" + ID_LIST_TOKEN + ")  )" +
93
                " ORDER BY pt.RIdentifier";
94
		return strQuery;
95
	}
96

    
97

    
98
	@Override
99
	public void doInvoke(BerlinModelImportState state) {
100
		super.doInvoke(state);
101
	}
102

    
103
    private NamedArea getAreaByAreaId(int areaId) {
104
        NamedArea result = null;
105
        String areaIdStr = String.valueOf(areaId);
106
        OrderedTermVocabulary<NamedArea> voc = getAreaVoc();
107
        for (NamedArea area : voc.getTerms()){
108
            for (IdentifiableSource source : area.getSources()){
109
                if (areaIdStr.equals(source.getIdInSource()) && BerlinModelAreaImport.NAMESPACE.equals(source.getIdNamespace())){
110
                    if (result != null){
111
                        logger.warn("Result for areaId already exists. areaId: " + areaId);
112
                    }
113
                    result = area;
114
                }
115
            }
116
        }
117
        return result;
118
    }
119

    
120
    private OrderedTermVocabulary<NamedArea> areaVoc;
121
    @SuppressWarnings("unchecked")
122
    private OrderedTermVocabulary<NamedArea> getAreaVoc(){
123
        if (areaVoc == null){
124
            areaVoc = (OrderedTermVocabulary<NamedArea>)getVocabularyService().find(BerlinModelTransformer.uuidVocEuroMedAreas);
125
        }
126
        return areaVoc;
127
    }
128

    
129

    
130
	private String nullSafeTrim(String string) {
131
		if (string == null){
132
			return null;
133
		}else{
134
			return string.trim();
135
		}
136
	}
137

    
138
	@Override
139
	public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
140
		boolean success = true;
141
		@SuppressWarnings("rawtypes")
142
        Set<TaxonBase> taxaToSave = new HashSet<>();
143

    
144
		@SuppressWarnings("unchecked")
145
        Map<String, TaxonBase<?>> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
146

    
147
		ResultSet rs = partitioner.getResultSet();
148

    
149
		try {
150
			//map to store the mapping of duplicate berlin model occurrences to their real distributions
151
			//duplicated may occur due to area mappings from BM areas to TDWG areas
152
			Map<Integer, String> duplicateMap = new HashMap<>();
153
			int oldTaxonId = -1;
154
			TaxonDescription oldDescription = null;
155
			int i = 0;
156
			int countDescriptions = 0;
157
			int countDistributions = 0;
158
			int countDuplicates = 0;
159
			//for each reference
160
            while (rs.next()){
161

    
162
            	if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));}
163

    
164
                int occurrenceId = rs.getInt("OccurrenceId");
165
                int newTaxonId = rs.getInt("taxonId");
166
                String notes = nullSafeTrim(rs.getString("occNotes"));
167

    
168
                Integer emStatusId = nullSafeInt(rs, "emOccurSumCatId");
169

    
170
                try {
171
                    //area(s)
172
                    List<NamedArea> areas = makeAreaList(state, partitioner, rs, occurrenceId);
173
                    if (areas.size() != 1){
174
                        logger.warn("Exactly 1 area expected but was " + areas.size() + ". OccId: " + occurrenceId);
175
                        if (areas.isEmpty()){
176
                            continue;
177
                        }
178
                    }
179

    
180
                    //status
181
                	PresenceAbsenceTerm status = null;
182
                	String alternativeStatusString = null;
183
					if (emStatusId != null){
184
						status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId);
185
					}else{
186
						//EM
187
					    if (state.getConfig().isEuroMed() && areas.get(0).getUuid().equals(BerlinModelTransformer.uuidEM)){
188
						    String complete = rs.getString("WorldDistCompl");
189
						    if (complete == null){
190
						        //FIXME
191
                                status = PresenceAbsenceTerm.ENDEMISM_UNKNOWN();
192
                                alternativeStatusString = getStatusAnnotation(rs);
193
						    }else if (complete.equals("C")){
194
                                status = PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA();
195
                                logger.warn("EmStatusId undefined though WorldDistCompl is 'C'. This is an unexpected state. OccID: " + occurrenceId);
196
						    }else if (complete.equals("I")){
197
						        status = PresenceAbsenceTerm.NOT_ENDEMIC_FOR_THE_RELEVANT_AREA();
198
                            }else{
199
                                status = PresenceAbsenceTerm.ENDEMISM_UNKNOWN();
200
                                alternativeStatusString = getStatusAnnotation(rs);
201
                            }
202
						}else{ //other areas
203
						    alternativeStatusString = getStatusAnnotation(rs);
204
						    status = getPresenceTerm(state, BerlinModelTransformer.uuidStatusUndefined, "Undefined", "Undefined status as status was not computed in Berlin Model", "none", false, null);
205
						}
206
					}
207

    
208
					Reference sourceRef = state.getTransactionalSourceReference();
209

    
210

    
211
                    //create description(elements)
212
                    TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef);
213
                    for (NamedArea area : areas){
214
                    	Distribution distribution = Distribution.NewInstance(area, status);
215
                        if (StringUtils.isNotBlank(alternativeStatusString)){
216
                            AnnotationType type = getAnnotationType(state, BerlinModelTransformer.uuidAnnoTypeDistributionStatus, "Original distribution status", "Original distribution status", null, null);
217
                            Annotation annotation = Annotation.NewInstance(alternativeStatusString, type, null);
218
                            distribution.addAnnotation(annotation);
219
                        }
220

    
221
//                      distribution.setCitation(sourceRef);
222
                        if (taxonDescription != null) {
223
                        	Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId);
224
                            if (duplicate == null){
225
                            	taxonDescription.addElement(distribution);
226
	                            distribution.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
227
	                        	countDistributions++;
228
	                            if (taxonDescription != oldDescription){
229
	                            	taxaToSave.add(taxonDescription.getTaxon());
230
	                                oldDescription = taxonDescription;
231
	                                countDescriptions++;
232
	                            }
233
                            }else{
234
                            	countDuplicates++;
235
                            	duplicate.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
236
                            	logger.info("Distribution is duplicate");	                           }
237
                        } else {
238
                        	logger.warn("Distribution " + area.getLabel() + " ignored. OccurrenceId = " + occurrenceId);
239
	                       	success = false;
240
	                    }
241
                        //notes
242
                        if (isNotBlank(notes)){
243
                        	Annotation annotation = Annotation.NewInstance(notes, Language.DEFAULT());
244
                        	distribution.addAnnotation(annotation);
245
                        }
246
                    }
247
                } catch (UnknownCdmTypeException e) {
248
                     logger.error("Unknown presenceAbsence status id: " + emStatusId);
249
                	e.printStackTrace();
250
                     success = false;
251
                }
252
            }
253

    
254
            logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions );
255
			logger.info("Duplicate occurrences: "  + (countDuplicates));
256

    
257
			logger.info("Taxa to save: " + taxaToSave.size());
258
			getTaxonService().save(taxaToSave);
259

    
260
			return success;
261
		} catch (SQLException e) {
262
			logger.error("SQLException:" +  e);
263
			return false;
264
		}
265
	}
266

    
267
    /**
268
     * @param rs
269
     * @return
270
     * @throws SQLException
271
     */
272
    protected String getStatusAnnotation(ResultSet rs) throws SQLException {
273
        String alternativeStatusString;
274
        String[] stringArray = new String[]{"Native: " + rs.getString("Native"), "Introduced: "+ rs.getString("Introduced"),
275
                "Cultivated: " + rs.getString("Cultivated"), "StatusUnknown: " + rs.getString("StatusUnknown"),
276
                "WorldDistCompl: " + rs.getString("WorldDistCompl")};
277
        alternativeStatusString = CdmUtils.concat("; ", stringArray);
278
        return alternativeStatusString;
279
    }
280

    
281
	/**
282
	 * @param state
283
	 * @param partitioner
284
	 * @param rs
285
	 * @param occurrenceId
286
	 * @param tdwgCodeString
287
	 * @param emCodeString
288
	 * @return
289
	 * @throws SQLException
290
	 */
291
	//Create area list
292
	private List<NamedArea> makeAreaList(BerlinModelImportState state,
293
	        @SuppressWarnings("rawtypes") ResultSetPartitioner partitioner,
294
	        ResultSet rs, int occurrenceId) throws SQLException {
295

    
296
	    List<NamedArea> areas = new ArrayList<>();
297

    
298
		if (state.getConfig().isUseEmAreaVocabulary()){
299
		    Integer areaId = rs.getInt("AreaId");
300
			NamedArea area = getAreaByAreaId(areaId);
301
			if (area == null){
302
			    logger.warn("Area for areaId " + areaId + " not found.");
303
			}
304
			areas.add(area);
305
		}else{
306
	        String tdwgCodeString = rs.getString("TDWGCode");
307
	        String emCodeString = state.getConfig().isIncludesAreaEmCode() ? rs.getString("EMCode") : null;
308

    
309
			if (tdwgCodeString != null){
310

    
311
				String[] tdwgCodes = new String[]{tdwgCodeString};
312
				if (state.getConfig().isSplitTdwgCodes()){
313
					tdwgCodes = tdwgCodeString.split(";");
314
				}
315

    
316
				for (String tdwgCode : tdwgCodes){
317
					NamedArea area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode.trim());
318
			    	if (area == null){
319
			    		area = getOtherAreas(state, emCodeString, tdwgCodeString);
320
			    	}
321
			    	if (area != null){
322
			    		areas.add(area);
323
			    	}
324
				}
325
			 }
326

    
327
			 if (areas.size()== 0){
328
				 NamedArea area = getOtherAreas(state, emCodeString, tdwgCodeString);
329
				 if (area != null){
330
			         areas.add(area);
331
			   }
332
			 }
333
			 if (areas.size() == 0){
334
				 String areaId = rs.getString("AreaId");
335
				 logger.warn("No areas defined for occurrence " + occurrenceId + ". EMCode: " + CdmUtils.Nz(emCodeString).trim() + ". AreaId: " + areaId );
336
			 }
337
		}
338
		return areas;
339
	}
340

    
341
	@Override
342
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
343

    
344
		try{
345

    
346
		    Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
347
			Set<String> taxonIdSet = new HashSet<String>();
348
			while (rs.next()){
349
				handleForeignKey(rs, taxonIdSet, "taxonId");
350
			}
351

    
352
			//taxon map
353
			String nameSpace = BerlinModelTaxonImport.NAMESPACE;
354
			Class<?> cdmClass = TaxonBase.class;
355
			Set<String> idSet = taxonIdSet;
356
			@SuppressWarnings("unchecked")
357
            Map<String, ? extends CdmBase> objectMap = (Map<String, TaxonBase<?>>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
358
			result.put(nameSpace, objectMap);
359

    
360
			return result;
361
		} catch (SQLException e) {
362
			throw new RuntimeException(e);
363
		}
364
	}
365

    
366

    
367
	/**
368
     * Tests if a distribution with the same tdwgArea and the same status already exists in the description.
369
     * If so the old distribution is returned
370
     * @param description
371
     * @param tdwgArea
372
     * @return false, if dupplicate exists. True otherwise.
373
     */
374
    private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){
375
    	for (DescriptionElementBase descElBase : description.getElements()){
376
    		if (descElBase.isInstanceOf(Distribution.class)){
377
    			Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class);
378
    			NamedArea oldArea = oldDistr.getArea();
379
    			if (oldArea != null && oldArea.equals(distribution.getArea())){
380
    				PresenceAbsenceTerm oldStatus = oldDistr.getStatus();
381
    				if (oldStatus != null && oldStatus.equals(distribution.getStatus())){
382
    					duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource());
383
    					return oldDistr;
384
    				}
385
    			}
386
    		}
387
    	}
388
    	return null;
389
    }
390

    
391
	/**
392
	 * Use same TaxonDescription if two records belong to the same taxon
393
	 * @param newTaxonId
394
	 * @param oldTaxonId
395
	 * @param oldDescription
396
	 * @param taxonMap
397
	 * @return
398
	 */
399
	private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase<?>> taxonMap, int occurrenceId, Reference sourceSec){
400
		TaxonDescription result = null;
401
		if (oldDescription == null || newTaxonId != oldTaxonId){
402
			TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(newTaxonId));
403
			//TODO for testing
404
			//TaxonBase taxonBase = Taxon.NewInstance(TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES()), null);
405
			Taxon taxon;
406
			if ( taxonBase instanceof Taxon ) {
407
				taxon = (Taxon) taxonBase;
408
			} else if (taxonBase != null) {
409
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
410
				return null;
411
			} else {
412
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null.");
413
				return null;
414
			}
415
			Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
416
			if (descriptionSet.size() > 0) {
417
				result = descriptionSet.iterator().next();
418
			}else{
419
				result = TaxonDescription.NewInstance();
420
				result.setTitleCache(sourceSec.getTitleCache(), true);
421
				taxon.addDescription(result);
422
			}
423
		}else{
424
			result = oldDescription;
425
		}
426
		return result;
427
	}
428

    
429
	@Override
430
	protected boolean doCheck(BerlinModelImportState state){
431
		IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator();
432
		return validator.validate(state);
433
	}
434

    
435

    
436
	@Override
437
	protected boolean isIgnore(BerlinModelImportState state){
438
		if (! state.getConfig().isDoOccurrence()){
439
			return true;
440
		}else{
441
			if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrence", "OccurrenceId")){
442
				logger.error("emOccurrence table or emOccurrenceId does not exist. Must ignore occurrence import");
443
				return true;
444
			}else{
445
				return false;
446
			}
447
		}
448
	}
449

    
450
}
(11-11/22)