Project

General

Profile

Download (18 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.ArrayList;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20

    
21
import org.apache.commons.lang.StringUtils;
22
import org.apache.log4j.Logger;
23
import org.springframework.stereotype.Component;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
27
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
28
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator;
29
import eu.etaxonomy.cdm.io.common.IOValidator;
30
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
32
import eu.etaxonomy.cdm.model.common.Annotation;
33
import eu.etaxonomy.cdm.model.common.AnnotationType;
34
import eu.etaxonomy.cdm.model.common.CdmBase;
35
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
36
import eu.etaxonomy.cdm.model.common.Language;
37
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
38
import eu.etaxonomy.cdm.model.description.Distribution;
39
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
40
import eu.etaxonomy.cdm.model.description.TaxonDescription;
41
import eu.etaxonomy.cdm.model.location.NamedArea;
42
import eu.etaxonomy.cdm.model.reference.Reference;
43
import eu.etaxonomy.cdm.model.taxon.Taxon;
44
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
45
import eu.etaxonomy.cdm.model.term.OrderedTermVocabulary;
46
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
47

    
48

    
49
/**
50
 * @author a.mueller
51
 * @since 20.03.2008
52
 */
53
@Component
54
public class BerlinModelOccurrenceImport  extends BerlinModelImportBase {
55

    
56
    private static final long serialVersionUID = -7918122767284077183L;
57

    
58
    private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class);
59

    
60
	public static final String NAMESPACE = "Occurrence";
61

    
62
	private static int modCount = 5000;
63
	private static final String pluralString = "occurrences";
64
	private static final String dbTableName = "emOccurrence";  //??
65

    
66
	public BerlinModelOccurrenceImport(){
67
		super(dbTableName, pluralString);
68
	}
69

    
70
	@Override
71
	protected String getIdQuery(BerlinModelImportState state) {
72
		String result = " SELECT occurrenceId FROM " + getTableName();
73
		if (StringUtils.isNotBlank(state.getConfig().getOccurrenceFilter())){
74
			result += " WHERE " +  state.getConfig().getOccurrenceFilter();
75
		}
76
		return result;
77
	}
78

    
79
	@Override
80
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
81
			String emCode = config.isIncludesAreaEmCode()? ", ar.EMCode" : "";
82
			String strQuery =   //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
83
                " SELECT DISTINCT pt.RIdentifier AS taxonId, occ.OccurrenceId, occ.Native, occ.Introduced, " +
84
            		" occ.Cultivated, occ.StatusUnknown, occ.WorldDistCompl, occ.Notes occNotes, " +
85
            		" sumcat.emOccurSumCatId, sumcat.Short, sumcat.Description, " +
86
                	" sumcat.OutputCode, ar.AreaId, ar.TDWGCode, "
87
                	+ " occ.Created_When , occ.Updated_When, occ.Created_Who , occ.Updated_Who, occ.notes " + emCode +
88
                " FROM emOccurrence occ " +
89
                	" INNER JOIN emArea ar ON occ.AreaFk = ar.AreaId " +
90
                	" INNER JOIN PTaxon pt ON occ.PTNameFk = pt.PTNameFk AND occ.PTRefFk = pt.PTRefFk " +
91
                	" LEFT OUTER JOIN emOccurSumCat sumcat ON occ.SummaryStatus = sumcat.emOccurSumCatId " +
92
                	" LEFT OUTER JOIN emOccurrenceSource ocs ON occ.OccurrenceId = ocs.OccurrenceFk " +
93
                " WHERE (occ.OccurrenceId IN (" + ID_LIST_TOKEN + ")  )" +
94
                " ORDER BY pt.RIdentifier";
95
		return strQuery;
96
	}
97

    
98

    
99
	@Override
100
	public void doInvoke(BerlinModelImportState state) {
101
		super.doInvoke(state);
102
	}
103

    
104
    private NamedArea getAreaByAreaId(int areaId) {
105
        NamedArea result = null;
106
        String areaIdStr = String.valueOf(areaId);
107
        OrderedTermVocabulary<NamedArea> voc = getAreaVoc();
108
        for (NamedArea area : voc.getTerms()){
109
            for (IdentifiableSource source : area.getSources()){
110
                if (areaIdStr.equals(source.getIdInSource()) && BerlinModelAreaImport.NAMESPACE.equals(source.getIdNamespace())){
111
                    if (result != null){
112
                        logger.warn("Result for areaId already exists. areaId: " + areaId);
113
                    }
114
                    result = area;
115
                }
116
            }
117
        }
118
        return result;
119
    }
120

    
121
    private OrderedTermVocabulary<NamedArea> areaVoc;
122
    @SuppressWarnings("unchecked")
123
    private OrderedTermVocabulary<NamedArea> getAreaVoc(){
124
        if (areaVoc == null){
125
            areaVoc = (OrderedTermVocabulary<NamedArea>)getVocabularyService().find(BerlinModelTransformer.uuidVocEuroMedAreas);
126
        }
127
        return areaVoc;
128
    }
129

    
130

    
131
	private String nullSafeTrim(String string) {
132
		if (string == null){
133
			return null;
134
		}else{
135
			return string.trim();
136
		}
137
	}
138

    
139
	@Override
140
	public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
141
		boolean success = true;
142
		@SuppressWarnings("rawtypes")
143
        Set<TaxonBase> taxaToSave = new HashSet<>();
144

    
145
		@SuppressWarnings("unchecked")
146
        Map<String, TaxonBase<?>> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
147

    
148
		ResultSet rs = partitioner.getResultSet();
149

    
150
		try {
151
			//map to store the mapping of duplicate berlin model occurrences to their real distributions
152
			//duplicated may occur due to area mappings from BM areas to TDWG areas
153
			Map<Integer, String> duplicateMap = new HashMap<>();
154
			int oldTaxonId = -1;
155
			TaxonDescription oldDescription = null;
156
			int i = 0;
157
			int countDescriptions = 0;
158
			int countDistributions = 0;
159
			int countDuplicates = 0;
160
			//for each reference
161
            while (rs.next()){
162

    
163
            	if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));}
164

    
165
                int occurrenceId = rs.getInt("OccurrenceId");
166
                int newTaxonId = rs.getInt("taxonId");
167
                String notes = nullSafeTrim(rs.getString("occNotes"));
168

    
169
                Integer emStatusId = nullSafeInt(rs, "emOccurSumCatId");
170

    
171
                try {
172
                    //area(s)
173
                    List<NamedArea> areas = makeAreaList(state, partitioner, rs, occurrenceId);
174
                    if (areas.size() != 1){
175
                        logger.warn("Exactly 1 area expected but was " + areas.size() + ". OccId: " + occurrenceId);
176
                        if (areas.isEmpty()){
177
                            continue;
178
                        }
179
                    }
180

    
181
                    //status
182
                	PresenceAbsenceTerm status = null;
183
                	String alternativeStatusString = null;
184
					if (emStatusId != null){
185
						status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId);
186
						if (state.getConfig().isEuroMed() && emStatusId == 250){
187
						    String introduced = nullSafeTrim(rs.getString("Introduced"));
188
						    if (emStatusId == 250 && "I(P)".equals(introduced)){
189
						        status = PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION();
190
						    }
191
						}
192
					}else{
193
						//EM
194
					    if (state.getConfig().isEuroMed() && areas.get(0).getUuid().equals(BerlinModelTransformer.uuidEM)){
195
						    String complete = rs.getString("WorldDistCompl");
196
						    if (complete == null){
197
						        //FIXME
198
                                status = PresenceAbsenceTerm.ENDEMISM_UNKNOWN();
199
                                alternativeStatusString = getStatusAnnotation(rs);
200
						    }else if (complete.equals("C")){
201
                                status = PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA();
202
                                logger.warn("EmStatusId undefined though WorldDistCompl is 'C'. This is an unexpected state. OccID: " + occurrenceId);
203
						    }else if (complete.equals("I")){
204
						        status = PresenceAbsenceTerm.NOT_ENDEMIC_FOR_THE_RELEVANT_AREA();
205
                            }else{
206
                                status = PresenceAbsenceTerm.ENDEMISM_UNKNOWN();
207
                                alternativeStatusString = getStatusAnnotation(rs);
208
                            }
209
						}else{ //other areas
210
						    alternativeStatusString = getStatusAnnotation(rs);
211
						    status = getPresenceTerm(state, BerlinModelTransformer.uuidStatusUndefined, "Undefined", "Undefined status as status was not computed in Berlin Model", "none", false, null);
212
						}
213
					}
214

    
215
					Reference sourceRef = state.getTransactionalSourceReference();
216

    
217

    
218
                    //create description(elements)
219
                    TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef);
220
                    for (NamedArea area : areas){
221
                    	Distribution distribution = Distribution.NewInstance(area, status);
222
                    	boolean excludeNotes = true;
223
                    	doCreatedUpdatedNotes(state, distribution, rs, false, excludeNotes);
224
                        if (StringUtils.isNotBlank(alternativeStatusString)){
225
                            AnnotationType type = getAnnotationType(state, BerlinModelTransformer.uuidAnnoTypeDistributionStatus, "Original distribution status", "Original distribution status", null, null);
226
                            Annotation annotation = Annotation.NewInstance(alternativeStatusString, type, null);
227
                            distribution.addAnnotation(annotation);
228
                        }
229

    
230
//                      distribution.setCitation(sourceRef);
231
                        if (taxonDescription != null) {
232
                        	Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId);
233
                            if (duplicate == null){
234
                            	taxonDescription.addElement(distribution);
235
	                            distribution.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
236
	                        	countDistributions++;
237
	                            if (taxonDescription != oldDescription){
238
	                            	taxaToSave.add(taxonDescription.getTaxon());
239
	                                oldDescription = taxonDescription;
240
	                                countDescriptions++;
241
	                            }
242
                            }else{
243
                            	countDuplicates++;
244
                            	distribution = duplicate;
245
                            	duplicate.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
246
                            	logger.info("Distribution is duplicate");	                           }
247
                        } else {
248
                        	logger.warn("Distribution " + area.getLabel() + " ignored. OccurrenceId = " + occurrenceId);
249
	                       	success = false;
250
	                    }
251
                        //notes
252
                        if (isNotBlank(notes)){
253
                        	Annotation annotation = Annotation.NewInstance(notes, Language.DEFAULT());
254
                        	distribution.addAnnotation(annotation);
255
                        }
256
                    }
257
                } catch (UnknownCdmTypeException e) {
258
                     logger.error("Unknown presenceAbsence status id: " + emStatusId);
259
                	e.printStackTrace();
260
                     success = false;
261
                }
262
            }
263

    
264
            logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions );
265
			logger.info("Duplicate occurrences: "  + (countDuplicates));
266

    
267
			logger.info("Taxa to save: " + taxaToSave.size());
268
			getTaxonService().save(taxaToSave);
269

    
270
			return success;
271
		} catch (SQLException e) {
272
			logger.error("SQLException:" +  e);
273
			return false;
274
		}
275
	}
276

    
277
    /**
278
     * @param rs
279
     * @return
280
     * @throws SQLException
281
     */
282
    protected String getStatusAnnotation(ResultSet rs) throws SQLException {
283
        String alternativeStatusString;
284
        String[] stringArray = new String[]{"Native: " + rs.getString("Native"), "Introduced: "+ rs.getString("Introduced"),
285
                "Cultivated: " + rs.getString("Cultivated"), "StatusUnknown: " + rs.getString("StatusUnknown"),
286
                "WorldDistCompl: " + rs.getString("WorldDistCompl")};
287
        alternativeStatusString = CdmUtils.concat("; ", stringArray);
288
        return alternativeStatusString;
289
    }
290

    
291
	//Create area list
292
	private List<NamedArea> makeAreaList(BerlinModelImportState state,
293
	        @SuppressWarnings("rawtypes") ResultSetPartitioner partitioner,
294
	        ResultSet rs, int occurrenceId) throws SQLException {
295

    
296
	    List<NamedArea> areas = new ArrayList<>();
297

    
298
		if (state.getConfig().isUseEmAreaVocabulary()){
299
		    Integer areaId = rs.getInt("AreaId");
300
			NamedArea area = getAreaByAreaId(areaId);
301
			if (area == null){
302
			    logger.warn("Area for areaId " + areaId + " not found.");
303
			}
304
			areas.add(area);
305
		}else{
306
	        String tdwgCodeString = rs.getString("TDWGCode");
307
	        String emCodeString = state.getConfig().isIncludesAreaEmCode() ? rs.getString("EMCode") : null;
308

    
309
			if (tdwgCodeString != null){
310

    
311
				String[] tdwgCodes = new String[]{tdwgCodeString};
312
				if (state.getConfig().isSplitTdwgCodes()){
313
					tdwgCodes = tdwgCodeString.split(";");
314
				}
315

    
316
				for (String tdwgCode : tdwgCodes){
317
					NamedArea area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode.trim());
318
			    	if (area == null){
319
			    		area = getOtherAreas(state, emCodeString, tdwgCodeString);
320
			    	}
321
			    	if (area != null){
322
			    		areas.add(area);
323
			    	}
324
				}
325
			 }
326

    
327
			 if (areas.size()== 0){
328
				 NamedArea area = getOtherAreas(state, emCodeString, tdwgCodeString);
329
				 if (area != null){
330
			         areas.add(area);
331
			   }
332
			 }
333
			 if (areas.size() == 0){
334
				 String areaId = rs.getString("AreaId");
335
				 logger.warn("No areas defined for occurrence " + occurrenceId + ". EMCode: " + CdmUtils.Nz(emCodeString).trim() + ". AreaId: " + areaId );
336
			 }
337
		}
338
		return areas;
339
	}
340

    
341
	@Override
342
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
343

    
344
		try{
345
		    Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
346
			Set<String> taxonIdSet = new HashSet<String>();
347
			while (rs.next()){
348
				handleForeignKey(rs, taxonIdSet, "taxonId");
349
			}
350

    
351
			//taxon map
352
			String nameSpace = BerlinModelTaxonImport.NAMESPACE;
353
			Set<String> idSet = taxonIdSet;
354
            @SuppressWarnings("rawtypes")
355
            Map<String, TaxonBase> objectMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonBase.class, idSet, nameSpace);
356
			result.put(nameSpace, objectMap);
357

    
358
			return result;
359
		} catch (SQLException e) {
360
			throw new RuntimeException(e);
361
		}
362
	}
363

    
364

    
365
	/**
366
     * Tests if a distribution with the same tdwgArea and the same status already exists in the description.
367
     * If so the old distribution is returned
368
     * @return false, if duplicate exists. True otherwise.
369
     */
370
    private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){
371
    	for (DescriptionElementBase descElBase : description.getElements()){
372
    		if (descElBase.isInstanceOf(Distribution.class)){
373
    			Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class);
374
    			NamedArea oldArea = oldDistr.getArea();
375
    			if (oldArea != null && oldArea.equals(distribution.getArea())){
376
    				PresenceAbsenceTerm oldStatus = oldDistr.getStatus();
377
    				if (oldStatus != null && oldStatus.equals(distribution.getStatus())){
378
    					duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource());
379
    					return oldDistr;
380
    				}
381
    			}
382
    		}
383
    	}
384
    	return null;
385
    }
386

    
387
	/**
388
	 * Use same TaxonDescription if two records belong to the same taxon
389
	 * @param newTaxonId
390
	 * @param oldTaxonId
391
	 * @param oldDescription
392
	 * @param taxonMap
393
	 * @return
394
	 */
395
	private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase<?>> taxonMap, int occurrenceId, Reference sourceSec){
396
		TaxonDescription result = null;
397
		if (oldDescription == null || newTaxonId != oldTaxonId){
398
			TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(newTaxonId));
399
			//TODO for testing
400
			//TaxonBase taxonBase = Taxon.NewInstance(TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES()), null);
401
			Taxon taxon;
402
			if ( taxonBase instanceof Taxon ) {
403
				taxon = (Taxon) taxonBase;
404
			} else if (taxonBase != null) {
405
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
406
				return null;
407
			} else {
408
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null.");
409
				return null;
410
			}
411
			Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
412
			if (descriptionSet.size() > 0) {
413
				result = descriptionSet.iterator().next();
414
			}else{
415
				result = TaxonDescription.NewInstance();
416
				result.setTitleCache(sourceSec.getTitleCache(), true);
417
				taxon.addDescription(result);
418
			}
419
		}else{
420
			result = oldDescription;
421
		}
422
		return result;
423
	}
424

    
425
	@Override
426
	protected boolean doCheck(BerlinModelImportState state){
427
		IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator();
428
		return validator.validate(state);
429
	}
430

    
431
	@Override
432
	protected boolean isIgnore(BerlinModelImportState state){
433
		if (! state.getConfig().isDoOccurrence()){
434
			return true;
435
		}else{
436
			if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrence", "OccurrenceId")){
437
				logger.error("emOccurrence table or emOccurrenceId does not exist. Must ignore occurrence import");
438
				return true;
439
			}else{
440
				return false;
441
			}
442
		}
443
	}
444
}
(11-11/22)