Project

General

Profile

Download (15.6 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.ArrayList;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20

    
21
import org.apache.commons.lang.StringUtils;
22
import org.apache.log4j.Logger;
23
import org.springframework.stereotype.Component;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
27
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
28
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator;
29
import eu.etaxonomy.cdm.io.common.IOValidator;
30
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
32
import eu.etaxonomy.cdm.model.common.Annotation;
33
import eu.etaxonomy.cdm.model.common.AnnotationType;
34
import eu.etaxonomy.cdm.model.common.CdmBase;
35
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
36
import eu.etaxonomy.cdm.model.common.Language;
37
import eu.etaxonomy.cdm.model.common.Marker;
38
import eu.etaxonomy.cdm.model.common.MarkerType;
39
import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary;
40
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
41
import eu.etaxonomy.cdm.model.description.Distribution;
42
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
43
import eu.etaxonomy.cdm.model.description.TaxonDescription;
44
import eu.etaxonomy.cdm.model.location.NamedArea;
45
import eu.etaxonomy.cdm.model.reference.Reference;
46
import eu.etaxonomy.cdm.model.taxon.Taxon;
47
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
48
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
49

    
50

    
51
/**
52
 * @author a.mueller
53
 * @since 20.03.2008
54
 */
55
@Component
56
public class BerlinModelOccurrenceImport  extends BerlinModelImportBase {
57

    
58
    private static final long serialVersionUID = -7918122767284077183L;
59

    
60
    private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class);
61

    
62
	public static final String NAMESPACE = "Occurrence";
63

    
64
	private static int modCount = 5000;
65
	private static final String pluralString = "occurrences";
66
	private static final String dbTableName = "emOccurrence";  //??
67

    
68
	public BerlinModelOccurrenceImport(){
69
		super(dbTableName, pluralString);
70
	}
71

    
72
	@Override
73
	protected String getIdQuery(BerlinModelImportState state) {
74
		String result = " SELECT occurrenceId FROM " + getTableName();
75
		if (StringUtils.isNotBlank(state.getConfig().getOccurrenceFilter())){
76
			result += " WHERE " +  state.getConfig().getOccurrenceFilter();
77
		}
78
		return result;
79
	}
80

    
81
	@Override
82
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
83
			String emCode = config.isIncludesAreaEmCode()? ", ar.EMCode" : "";
84
			String strQuery =   //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
85
                " SELECT DISTINCT pt.RIdentifier AS taxonId, occ.OccurrenceId, occ.Native, occ.Introduced, " +
86
            		" occ.Cultivated, occ.Notes occNotes, " +
87
            		" sumcat.emOccurSumCatId, sumcat.Short, sumcat.Description, " +
88
                	" sumcat.OutputCode, ar.AreaId, ar.TDWGCode " + emCode +
89
                " FROM emOccurrence occ " +
90
                	" INNER JOIN emArea ar ON occ.AreaFk = ar.AreaId " +
91
                	" INNER JOIN PTaxon pt ON occ.PTNameFk = pt.PTNameFk AND occ.PTRefFk = pt.PTRefFk " +
92
                	" LEFT OUTER JOIN emOccurSumCat sumcat ON occ.SummaryStatus = sumcat.emOccurSumCatId " +
93
                	" LEFT OUTER JOIN emOccurrenceSource ocs ON occ.OccurrenceId = ocs.OccurrenceFk " +
94
                " WHERE (occ.OccurrenceId IN (" + ID_LIST_TOKEN + ")  )" +
95
                " ORDER BY PTaxon.RIdentifier";
96
		return strQuery;
97
	}
98

    
99

    
100
	@Override
101
	public void doInvoke(BerlinModelImportState state) {
102
		super.doInvoke(state);
103
	}
104

    
105
    private NamedArea getAreaByAreaId(int areaId) {
106
        NamedArea result = null;
107
        String areaIdStr = String.valueOf(areaId);
108
        OrderedTermVocabulary<NamedArea> voc = getAreaVoc();
109
        for (NamedArea area : voc.getTerms()){
110
            for (IdentifiableSource source : area.getSources()){
111
                if (areaIdStr.equals(source.getIdInSource()) && BerlinModelAreaImport.NAMESPACE.equals(source.getIdNamespace())){
112
                    if (result != null){
113
                        logger.warn("Result for areaId already exists. areaId: " + areaId);
114
                    }
115
                    result = area;
116
                }
117
            }
118
        }
119
        return result;
120
    }
121

    
122
    private OrderedTermVocabulary<NamedArea> areaVoc;
123
    @SuppressWarnings("unchecked")
124
    private OrderedTermVocabulary<NamedArea> getAreaVoc(){
125
        if (areaVoc == null){
126
            areaVoc = (OrderedTermVocabulary<NamedArea>)getVocabularyService().find(BerlinModelTransformer.uuidVocEuroMedAreas);
127
        }
128
        return areaVoc;
129
    }
130

    
131

    
132
	private String nullSafeTrim(String string) {
133
		if (string == null){
134
			return null;
135
		}else{
136
			return string.trim();
137
		}
138
	}
139

    
140
	@Override
141
	public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
142
		boolean success = true;
143
		@SuppressWarnings("rawtypes")
144
        Set<TaxonBase> taxaToSave = new HashSet<>();
145

    
146
		@SuppressWarnings("unchecked")
147
        Map<String, TaxonBase<?>> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
148

    
149
		ResultSet rs = partitioner.getResultSet();
150

    
151
		try {
152
			//map to store the mapping of duplicate berlin model occurrences to their real distributions
153
			//duplicated may occur due to area mappings from BM areas to TDWG areas
154
			Map<Integer, String> duplicateMap = new HashMap<>();
155
			int oldTaxonId = -1;
156
			TaxonDescription oldDescription = null;
157
			int i = 0;
158
			int countDescriptions = 0;
159
			int countDistributions = 0;
160
			int countDuplicates = 0;
161
			//for each reference
162
            while (rs.next()){
163

    
164
            	if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));}
165

    
166
                int occurrenceId = rs.getInt("OccurrenceId");
167
                int newTaxonId = rs.getInt("taxonId");
168
                String notes = nullSafeTrim(rs.getString("occNotes"));
169

    
170
                Integer emStatusId = nullSafeInt(rs, "emOccurSumCatId");
171

    
172
                try {
173
                	//status
174
                	PresenceAbsenceTerm status = null;
175
                	String alternativeStatusString = null;
176
					if (emStatusId != null){
177
						status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId);
178
					}else{
179
						String[] stringArray = new String[]{rs.getString("Native"), rs.getString("Introduced"), rs.getString("Cultivated")};
180
						alternativeStatusString = CdmUtils.concat(",", stringArray);
181
					}
182

    
183
					Reference sourceRef = state.getTransactionalSourceReference();
184

    
185
					List<NamedArea> areas = makeAreaList(state, partitioner, rs, occurrenceId);
186

    
187
                    //create description(elements)
188
                    TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef);
189
                    for (NamedArea area : areas){
190
                    	Distribution distribution = Distribution.NewInstance(area, status);
191
                        if (status == null){
192
                        	AnnotationType annotationType = AnnotationType.EDITORIAL();
193
                        	Annotation annotation = Annotation.NewInstance(alternativeStatusString, annotationType, null);
194
                        	distribution.addAnnotation(annotation);
195
                        	distribution.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), false));
196
                        }
197
//                      distribution.setCitation(sourceRef);
198
                        if (taxonDescription != null) {
199
                        	Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId);
200
                            if (duplicate == null){
201
                            	taxonDescription.addElement(distribution);
202
	                            distribution.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
203
	                        	countDistributions++;
204
	                            if (taxonDescription != oldDescription){
205
	                            	taxaToSave.add(taxonDescription.getTaxon());
206
	                                oldDescription = taxonDescription;
207
	                                countDescriptions++;
208
	                            }
209
                            }else{
210
                            	countDuplicates++;
211
                            	duplicate.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
212
                            	logger.info("Distribution is duplicate");	                           }
213
                        } else {
214
                        	logger.warn("Distribution " + area.getLabel() + " ignored. OccurrenceId = " + occurrenceId);
215
	                       	success = false;
216
	                    }
217
                        //notes
218
                        if (isNotBlank(notes)){
219
                        	Annotation annotation = Annotation.NewInstance(notes, Language.DEFAULT());
220
                        	distribution.addAnnotation(annotation);
221
                        }
222
                    }
223
                } catch (UnknownCdmTypeException e) {
224
                     logger.error("Unknown presenceAbsence status id: " + emStatusId);
225
                	e.printStackTrace();
226
                     success = false;
227
                }
228
            }
229

    
230
            logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions );
231
			logger.info("Duplicate occurrences: "  + (countDuplicates));
232

    
233
			logger.info("Taxa to save: " + taxaToSave.size());
234
			getTaxonService().save(taxaToSave);
235

    
236
			return success;
237
		} catch (SQLException e) {
238
			logger.error("SQLException:" +  e);
239
			return false;
240
		}
241
	}
242

    
243
	/**
244
	 * @param state
245
	 * @param partitioner
246
	 * @param rs
247
	 * @param occurrenceId
248
	 * @param tdwgCodeString
249
	 * @param emCodeString
250
	 * @return
251
	 * @throws SQLException
252
	 */
253
	//Create area list
254
	private List<NamedArea> makeAreaList(BerlinModelImportState state,
255
	        @SuppressWarnings("rawtypes") ResultSetPartitioner partitioner,
256
	        ResultSet rs, int occurrenceId) throws SQLException {
257

    
258
	    List<NamedArea> areas = new ArrayList<>();
259

    
260
		if (state.getConfig().isUseEmAreaVocabulary()){
261
		    Integer areaId = rs.getInt("AreaId");
262
			NamedArea area = getAreaByAreaId(areaId);
263
			if (area == null){
264
			    logger.warn("Area for areaId " + areaId + " not found.");
265
			}
266
			areas.add(area);
267
		}else{
268
	        String tdwgCodeString = rs.getString("TDWGCode");
269
	        String emCodeString = state.getConfig().isIncludesAreaEmCode() ? rs.getString("EMCode") : null;
270

    
271
			if (tdwgCodeString != null){
272

    
273
				String[] tdwgCodes = new String[]{tdwgCodeString};
274
				if (state.getConfig().isSplitTdwgCodes()){
275
					tdwgCodes = tdwgCodeString.split(";");
276
				}
277

    
278
				for (String tdwgCode : tdwgCodes){
279
					NamedArea area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode.trim());
280
			    	if (area == null){
281
			    		area = getOtherAreas(state, emCodeString, tdwgCodeString);
282
			    	}
283
			    	if (area != null){
284
			    		areas.add(area);
285
			    	}
286
				}
287
			 }
288

    
289
			 if (areas.size()== 0){
290
				 NamedArea area = getOtherAreas(state, emCodeString, tdwgCodeString);
291
				 if (area != null){
292
			         areas.add(area);
293
			   }
294
			 }
295
			 if (areas.size() == 0){
296
				 String areaId = rs.getString("AreaId");
297
				 logger.warn("No areas defined for occurrence " + occurrenceId + ". EMCode: " + CdmUtils.Nz(emCodeString).trim() + ". AreaId: " + areaId );
298
			 }
299
		}
300
		return areas;
301
	}
302

    
303
	@Override
304
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
305

    
306
		try{
307

    
308
		    Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
309
			Set<String> taxonIdSet = new HashSet<String>();
310
			while (rs.next()){
311
				handleForeignKey(rs, taxonIdSet, "taxonId");
312
			}
313

    
314
			//taxon map
315
			String nameSpace = BerlinModelTaxonImport.NAMESPACE;
316
			Class<?> cdmClass = TaxonBase.class;
317
			Set<String> idSet = taxonIdSet;
318
			@SuppressWarnings("unchecked")
319
            Map<String, ? extends CdmBase> objectMap = (Map<String, TaxonBase<?>>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
320
			result.put(nameSpace, objectMap);
321

    
322
			return result;
323
		} catch (SQLException e) {
324
			throw new RuntimeException(e);
325
		}
326
	}
327

    
328

    
329
	/**
330
     * Tests if a distribution with the same tdwgArea and the same status already exists in the description.
331
     * If so the old distribution is returned
332
     * @param description
333
     * @param tdwgArea
334
     * @return false, if dupplicate exists. True otherwise.
335
     */
336
    private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){
337
    	for (DescriptionElementBase descElBase : description.getElements()){
338
    		if (descElBase.isInstanceOf(Distribution.class)){
339
    			Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class);
340
    			NamedArea oldArea = oldDistr.getArea();
341
    			if (oldArea != null && oldArea.equals(distribution.getArea())){
342
    				PresenceAbsenceTerm oldStatus = oldDistr.getStatus();
343
    				if (oldStatus != null && oldStatus.equals(distribution.getStatus())){
344
    					duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource());
345
    					return oldDistr;
346
    				}
347
    			}
348
    		}
349
    	}
350
    	return null;
351
    }
352

    
353
	/**
354
	 * Use same TaxonDescription if two records belong to the same taxon
355
	 * @param newTaxonId
356
	 * @param oldTaxonId
357
	 * @param oldDescription
358
	 * @param taxonMap
359
	 * @return
360
	 */
361
	private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase<?>> taxonMap, int occurrenceId, Reference sourceSec){
362
		TaxonDescription result = null;
363
		if (oldDescription == null || newTaxonId != oldTaxonId){
364
			TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(newTaxonId));
365
			//TODO for testing
366
			//TaxonBase taxonBase = Taxon.NewInstance(TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES()), null);
367
			Taxon taxon;
368
			if ( taxonBase instanceof Taxon ) {
369
				taxon = (Taxon) taxonBase;
370
			} else if (taxonBase != null) {
371
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
372
				return null;
373
			} else {
374
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null.");
375
				return null;
376
			}
377
			Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
378
			if (descriptionSet.size() > 0) {
379
				result = descriptionSet.iterator().next();
380
			}else{
381
				result = TaxonDescription.NewInstance();
382
				result.setTitleCache(sourceSec.getTitleCache(), true);
383
				taxon.addDescription(result);
384
			}
385
		}else{
386
			result = oldDescription;
387
		}
388
		return result;
389
	}
390

    
391
	@Override
392
	protected boolean doCheck(BerlinModelImportState state){
393
		IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator();
394
		return validator.validate(state);
395
	}
396

    
397

    
398
	@Override
399
	protected boolean isIgnore(BerlinModelImportState state){
400
		if (! state.getConfig().isDoOccurrence()){
401
			return true;
402
		}else{
403
			if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrence", "OccurrenceId")){
404
				logger.error("emOccurrence table or emOccurrenceId does not exist. Must ignore occurrence import");
405
				return true;
406
			}else{
407
				return false;
408
			}
409
		}
410
	}
411

    
412
}
(11-11/22)