Project

General

Profile

« Previous | Next » 

Revision 4a5ee6fe

Added by Andreas Müller almost 9 years ago

Adapt EM area hierarchy to filtering rules #3904

Also moving hidden area functionality from activator to occurrence
import class.

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelOccurrenceImport.java
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

  
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

  
12
import java.net.URI;
13
import java.sql.ResultSet;
14
import java.sql.SQLException;
15
import java.util.ArrayList;
16
import java.util.HashMap;
17
import java.util.HashSet;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.Set;
21
import java.util.UUID;
22

  
23
import org.apache.commons.lang.StringUtils;
24
import org.apache.log4j.Logger;
25
import org.springframework.stereotype.Component;
26
import org.springframework.transaction.TransactionStatus;
27

  
28
import eu.etaxonomy.cdm.common.CdmUtils;
29
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
30
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
31
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator;
32
import eu.etaxonomy.cdm.io.common.IOValidator;
33
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
34
import eu.etaxonomy.cdm.io.common.Source;
35
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
36
import eu.etaxonomy.cdm.model.common.Annotation;
37
import eu.etaxonomy.cdm.model.common.AnnotationType;
38
import eu.etaxonomy.cdm.model.common.CdmBase;
39
import eu.etaxonomy.cdm.model.common.ExtensionType;
40
import eu.etaxonomy.cdm.model.common.Language;
41
import eu.etaxonomy.cdm.model.common.Marker;
42
import eu.etaxonomy.cdm.model.common.MarkerType;
43
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
44
import eu.etaxonomy.cdm.model.common.TermType;
45
import eu.etaxonomy.cdm.model.common.TermVocabulary;
46
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
47
import eu.etaxonomy.cdm.model.description.Distribution;
48
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
49
import eu.etaxonomy.cdm.model.description.TaxonDescription;
50
import eu.etaxonomy.cdm.model.location.NamedArea;
51
import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
52
import eu.etaxonomy.cdm.model.location.NamedAreaType;
53
import eu.etaxonomy.cdm.model.reference.Reference;
54
import eu.etaxonomy.cdm.model.taxon.Taxon;
55
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
56
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
57

  
58

  
59
/**
60
 * @author a.mueller
61
 * @created 20.03.2008
62
 */
63
@Component
64
public class BerlinModelOccurrenceImport  extends BerlinModelImportBase {
65
	private static final String EM_AREA_NAMESPACE = "emArea";
66

  
67
	private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class);
68

  
69
	public static final String NAMESPACE = "Occurrence";
70

  
71

  
72
	private static int modCount = 5000;
73
	private static final String pluralString = "occurrences";
74
	private static final String dbTableName = "emOccurrence";  //??
75

  
76

  
77
	public BerlinModelOccurrenceImport(){
78
		super(dbTableName, pluralString);
79
	}
80

  
81
	@Override
82
	protected String getIdQuery(BerlinModelImportState state) {
83
		String result = " SELECT occurrenceId FROM " + getTableName();
84
		if (StringUtils.isNotBlank(state.getConfig().getOccurrenceFilter())){
85
			result += " WHERE " +  state.getConfig().getOccurrenceFilter();
86
		}
87
		return result;
88
	}
89

  
90
	@Override
91
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
92
			String emCode = config.isIncludesAreaEmCode()? ", emArea.EMCode" : "";
93
			String strQuery =   //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
94
            " SELECT DISTINCT PTaxon.RIdentifier AS taxonId, emOccurrence.OccurrenceId, emOccurrence.Native, emOccurrence.Introduced, " +
95
            		" emOccurrence.Cultivated, emOccurrence.Notes occNotes, " +
96
            		" emOccurSumCat.emOccurSumCatId, emOccurSumCat.Short, emOccurSumCat.Description, " +
97
                	" emOccurSumCat.OutputCode, emArea.AreaId, emArea.TDWGCode " + emCode +
98
                " FROM emOccurrence INNER JOIN " +
99
                	" emArea ON emOccurrence.AreaFk = emArea.AreaId INNER JOIN " +
100
                	" PTaxon ON emOccurrence.PTNameFk = PTaxon.PTNameFk AND emOccurrence.PTRefFk = PTaxon.PTRefFk LEFT OUTER JOIN " +
101
                	" emOccurSumCat ON emOccurrence.SummaryStatus = emOccurSumCat.emOccurSumCatId LEFT OUTER JOIN " +
102
                	" emOccurrenceSource ON emOccurrence.OccurrenceId = emOccurrenceSource.OccurrenceFk " +
103
            " WHERE (emOccurrence.OccurrenceId IN (" + ID_LIST_TOKEN + ")  )" +
104
                " ORDER BY PTaxon.RIdentifier";
105
		return strQuery;
106
	}
107

  
108
	private Map<Integer, NamedArea> euroMedAreas = new HashMap<Integer, NamedArea>();
109

  
110

  
111
	@Override
112
	public void doInvoke(BerlinModelImportState state) {
113
		if (state.getConfig().isUseEmAreaVocabulary()){
114
			try {
115
				createEuroMedAreas(state);
116
			} catch (Exception e) {
117
				logger.error("Exception occurred when trying to create euroMed Areas");
118
				e.printStackTrace();
119
				state.setSuccess(false);
120
			}
121
		}
122
		super.doInvoke(state);
123
		//reset
124
		euroMedAreas = new HashMap<Integer, NamedArea>();
125
	}
126

  
127
	private TermVocabulary<NamedArea> createEuroMedAreas(BerlinModelImportState state) throws SQLException {
128
		logger.warn("Start creating E+M areas");
129
		Source source = state.getConfig().getSource();
130
		Reference<?> sourceReference = state.getConfig().getSourceReference();
131

  
132
		TransactionStatus txStatus = this.startTransaction();
133

  
134
		sourceReference = getSourceReference(sourceReference);
135

  
136
		TermVocabulary<NamedArea> euroMedAreas = makeEmptyEuroMedVocabulary();
137

  
138
		MarkerType eurMarkerType = getMarkerType(state, BerlinModelTransformer.uuidEurArea, "eur", "eur Area", "eur");
139
		MarkerType euroMedAreaMarkerType = getMarkerType(state, BerlinModelTransformer.uuidEurMedArea, "EuroMedArea", "EuroMedArea", "EuroMedArea");
140
		ExtensionType isoCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidIsoCode, "IsoCode", "IsoCode", "iso");
141
		ExtensionType tdwgCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidTdwgAreaCode, "TDWG code", "TDWG Area code", "tdwg");
142
		ExtensionType mclCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidMclCode, "MCL code", "MedCheckList code", "mcl");
143
		NamedAreaLevel areaLevelTop = getNamedAreaLevel(state, BerlinModelTransformer.uuidEuroMedAreaLevelTop, "Euro+Med top area level", "Euro+Med top area level. This level is only to be used for the area representing the complete Euro+Med area", "e+m top", null);
144
		NamedAreaLevel areaLevelEm1 = getNamedAreaLevel(state, BerlinModelTransformer.uuidEuroMedAreaLevelFirst, "Euro+Med 1. area level", "Euro+Med 1. area level", "e+m 1.", null);
145
		NamedAreaLevel areaLevelEm2 = getNamedAreaLevel(state, BerlinModelTransformer.uuidEuroMedAreaLevelSecond, "Euro+Med 2. area level", "Euro+Med 2. area level", "Euro+Med 1. area level", null);
146

  
147

  
148
		String sql = "SELECT * , CASE WHEN EMCode = 'EM' THEN 'a' ELSE 'b' END as isEM " +
149
				" FROM emArea " +
150
				" ORDER BY isEM, EMCode";
151
		ResultSet rs = source.getResultSet(sql);
152

  
153
		NamedArea euroMedArea = null;
154
		NamedArea lastLevel2Area = null;
155

  
156
		//euroMedArea (EMCode = 'EM')
157
		rs.next();
158
		euroMedArea = makeSingleEuroMedArea(rs, eurMarkerType, euroMedAreaMarkerType, isoCodeExtType, tdwgCodeExtType, mclCodeExtType,
159
				areaLevelTop, areaLevelEm1 , areaLevelEm2, sourceReference, euroMedArea, lastLevel2Area);
160
		euroMedAreas.addTerm(euroMedArea);
161

  
162
		//all other areas
163
		while (rs.next()){
164
			NamedArea newArea = makeSingleEuroMedArea(rs, eurMarkerType, euroMedAreaMarkerType,
165
					isoCodeExtType, tdwgCodeExtType, mclCodeExtType,
166
					areaLevelTop, areaLevelEm1 , areaLevelEm2, sourceReference, euroMedArea, lastLevel2Area);
167
			if (newArea != null){
168
    			euroMedAreas.addTerm(newArea);
169
    			if (newArea.getPartOf().equals(euroMedArea)){
170
    				lastLevel2Area = newArea;
171
    			}
172
			}
173
		}
174
		getVocabularyService().saveOrUpdate(euroMedAreas);
175

  
176
		commitTransaction(txStatus);
177
		logger.warn("Created E+M areas");
178

  
179
		return euroMedAreas;
180
	}
181

  
182
	/**
183
	 * @param sourceReference
184
	 * @return
185
	 */
186
	private Reference<?> getSourceReference(Reference<?> sourceReference) {
187
		Reference<?> persistentSourceReference = getReferenceService().find(sourceReference.getUuid());  //just to be sure
188
		if (persistentSourceReference != null){
189
			sourceReference = persistentSourceReference;
190
		}
191
		return sourceReference;
192
	}
193

  
194
	/**
195
	 * @param eurMarkerType
196
	 * @param euroMedAreaMarkerType
197
	 * @param isoCodeExtType
198
	 * @param tdwgCodeExtType
199
	 * @param mclCodeExtType
200
	 * @param rs
201
	 * @param areaLevelEm2
202
	 * @param areaLevelEm1
203
	 * @param areaLevelTop
204
	 * @throws SQLException
205
	 */
206
	private NamedArea makeSingleEuroMedArea(ResultSet rs, MarkerType eurMarkerType,
207
			MarkerType euroMedAreaMarkerType, ExtensionType isoCodeExtType,
208
			ExtensionType tdwgCodeExtType, ExtensionType mclCodeExtType,
209
			NamedAreaLevel areaLevelTop, NamedAreaLevel areaLevelEm1, NamedAreaLevel areaLevelEm2,
210
			Reference<?> sourceReference, NamedArea euroMedArea, NamedArea level2Area) throws SQLException {
211
		Integer areaId = rs.getInt("AreaId");
212
		String emCode = nullSafeTrim(rs.getString("EMCode"));
213
		String isoCode = nullSafeTrim(rs.getString("ISOCode"));
214
		String tdwgCode = nullSafeTrim(rs.getString("TDWGCode"));
215
		String unit = nullSafeTrim(rs.getString("Unit"));
216
//				      ,[Status]
217
//				      ,[OutputOrder]
218
		boolean eurMarker = rs.getBoolean("eur");
219
		boolean euroMedAreaMarker = rs.getBoolean("EuroMedArea");
220
		String notes = nullSafeTrim(rs.getString("Notes"));
221
		String mclCode = nullSafeTrim(rs.getString("MCLCode"));
222
		String geoSearch = nullSafeTrim(rs.getString("NameForGeoSearch"));
223

  
224

  
225

  
226
		if (isBlank(emCode)){
227
			emCode = unit;
228
		}
229

  
230
		//uuid
231
		UUID uuid = BerlinModelTransformer.getEMAreaUuid(emCode);
232
		NamedArea area = (NamedArea)getTermService().find(uuid);
233
		if (area == null){
234
			//label
235
			area = NamedArea.NewInstance(geoSearch, unit, emCode);
236
			if (uuid != null){
237
				area.setUuid(uuid);
238
			}else{
239
			    if (areaId == 211 || areaId == 213){  //Additional Azores and Canary Is. area are merged into primary area, see also area.addSource part below
240
			        return null;
241
			    }
242
				logger.warn("Uuid for emCode could not be defined: " + emCode);
243
			}
244
		}
245

  
246

  
247
		//code
248
		area.setIdInVocabulary(emCode);
249
		//notes
250
		if (StringUtils.isNotEmpty(notes)){
251
			area.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
252
		}
253
		//markers
254
		area.addMarker(Marker.NewInstance(eurMarkerType, eurMarker));
255
		area.addMarker(Marker.NewInstance(euroMedAreaMarkerType, euroMedAreaMarker));
256

  
257
		//extensions
258
		if (isNotBlank(isoCode)){
259
			area.addExtension(isoCode, isoCodeExtType);
260
		}
261
		if (isNotBlank(tdwgCode)){
262
			area.addExtension(tdwgCode, tdwgCodeExtType);
263
		}
264
		if (isNotBlank(mclCode)){
265
			area.addExtension(mclCode, mclCodeExtType);
266
		}
267

  
268
		//type
269
		area.setType(NamedAreaType.ADMINISTRATION_AREA());
270

  
271
		//source
272
		area.addSource(OriginalSourceType.Import, String.valueOf(areaId), EM_AREA_NAMESPACE, sourceReference, null);
273
		//add duplicate area ids for canary
274
		if (areaId == 624){ //Canary Is.
275
		    area.addSource(OriginalSourceType.Import, String.valueOf(213), EM_AREA_NAMESPACE, sourceReference, null);
276
		}
277
		if (areaId == 210){//Azores
278
            area.addSource(OriginalSourceType.Import, String.valueOf(211), EM_AREA_NAMESPACE, sourceReference, null);
279
        }
280

  
281
		//parent
282
		if (euroMedArea != null){
283
			if (emCode.contains("(")){
284
				area.setPartOf(level2Area);
285
				area.setLevel(areaLevelEm2);
286
			}else{
287
				area.setPartOf(euroMedArea);
288
				area.setLevel(areaLevelEm1);
289
			}
290
		}else{
291
			area.setLevel(areaLevelTop);
292
		}
293
		this.euroMedAreas.put(areaId, area);
294

  
295
		//save
296
		getTermService().saveOrUpdate(area);
297

  
298
		return area;
299
	}
300

  
301
	private String nullSafeTrim(String string) {
302
		if (string == null){
303
			return null;
304
		}else{
305
			return string.trim();
306
		}
307
	}
308

  
309
	/**
310
	 *
311
	 */
312
	private TermVocabulary<NamedArea> makeEmptyEuroMedVocabulary() {
313
		TermType type = TermType.NamedArea;
314
		String description = "Euro+Med area vocabulary";
315
		String label = "E+M areas";
316
		String abbrev = null;
317
		URI termSourceUri = null;
318
		TermVocabulary<NamedArea> result = TermVocabulary.NewInstance(type, description, label, abbrev, termSourceUri);
319

  
320
		result.setUuid(BerlinModelTransformer.uuidVocEuroMedAreas);
321
		getVocabularyService().save(result);
322
		return result;
323
	}
324

  
325
	@Override
326
	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
327
		boolean success = true;
328
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
329

  
330
		Map<String, TaxonBase<?>> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
331

  
332
		ResultSet rs = partitioner.getResultSet();
333

  
334
		try {
335
			//map to store the mapping of duplicate berlin model occurrences to their real distributions
336
			//duplicated may occur due to area mappings from BM areas to TDWG areas
337
			Map<Integer, String> duplicateMap = new HashMap<Integer, String>();
338
			int oldTaxonId = -1;
339
			TaxonDescription oldDescription = null;
340
			int i = 0;
341
			int countDescriptions = 0;
342
			int countDistributions = 0;
343
			int countDuplicates = 0;
344
			//for each reference
345
            while (rs.next()){
346

  
347
            	if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));}
348

  
349
                int occurrenceId = rs.getInt("OccurrenceId");
350
                int newTaxonId = rs.getInt("taxonId");
351
                String notes = nullSafeTrim(rs.getString("occNotes"));
352

  
353
                Integer emStatusId = nullSafeInt(rs, "emOccurSumCatId");
354

  
355
                try {
356
                	//status
357
                	PresenceAbsenceTerm status = null;
358
                	String alternativeStatusString = null;
359
					if (emStatusId != null){
360
						status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId);
361
					}else{
362
						String[] stringArray = new String[]{rs.getString("Native"), rs.getString("Introduced"), rs.getString("Cultivated")};
363
						alternativeStatusString = CdmUtils.concat(",", stringArray);
364
					}
365

  
366
					Reference<?> sourceRef = state.getTransactionalSourceReference();
367

  
368
					List<NamedArea> areas = makeAreaList(state, rs,	occurrenceId);
369

  
370
                    //create description(elements)
371
                    TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef);
372
                    for (NamedArea area : areas){
373
                    	Distribution distribution = Distribution.NewInstance(area, status);
374
                        if (status == null){
375
                        	AnnotationType annotationType = AnnotationType.EDITORIAL();
376
                        	Annotation annotation = Annotation.NewInstance(alternativeStatusString, annotationType, null);
377
                        	distribution.addAnnotation(annotation);
378
                        	distribution.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), false));
379
                        }
380
//                      distribution.setCitation(sourceRef);
381
                        if (taxonDescription != null) {
382
                        	Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId);
383
                            if (duplicate == null){
384
                            	taxonDescription.addElement(distribution);
385
	                            distribution.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
386
	                        	countDistributions++;
387
	                            if (taxonDescription != oldDescription){
388
	                            	taxaToSave.add(taxonDescription.getTaxon());
389
	                                oldDescription = taxonDescription;
390
	                                countDescriptions++;
391
	                            }
392
                            }else{
393
                            	countDuplicates++;
394
                            	duplicate.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
395
                            	logger.info("Distribution is duplicate");	                           }
396
                        } else {
397
                        	logger.warn("Distribution " + area.getLabel() + " ignored. OccurrenceId = " + occurrenceId);
398
	                       	success = false;
399
	                    }
400
                        //notes
401
                        if (isNotBlank(notes)){
402
                        	Annotation annotation = Annotation.NewInstance(notes, Language.DEFAULT());
403
                        	distribution.addAnnotation(annotation);
404
                        }
405
                    }
406
                } catch (UnknownCdmTypeException e) {
407
                     logger.error("Unknown presenceAbsence status id: " + emStatusId);
408
                	e.printStackTrace();
409
                     success = false;
410
                }
411
            }
412

  
413
            logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions );
414
			logger.info("Duplicate occurrences: "  + (countDuplicates));
415

  
416
			logger.info("Taxa to save: " + taxaToSave.size());
417
			getTaxonService().save(taxaToSave);
418

  
419
			return success;
420
		} catch (SQLException e) {
421
			logger.error("SQLException:" +  e);
422
			return false;
423
		}
424
	}
425

  
426
	/**
427
	 * @param state
428
	 * @param rs
429
	 * @param occurrenceId
430
	 * @param tdwgCodeString
431
	 * @param emCodeString
432
	 * @return
433
	 * @throws SQLException
434
	 */
435
	//Create area list
436
	private List<NamedArea> makeAreaList(BerlinModelImportState state, ResultSet rs, int occurrenceId) throws SQLException {
437
		List<NamedArea> areas = new ArrayList<NamedArea>();
438

  
439
		if (state.getConfig().isUseEmAreaVocabulary()){
440
			Integer areaId = rs.getInt("AreaId");
441
	        NamedArea area = this.euroMedAreas.get(areaId);
442
			areas.add(area);
443
		}else{
444
	        String tdwgCodeString = rs.getString("TDWGCode");
445
	        String emCodeString = state.getConfig().isIncludesAreaEmCode() ? rs.getString("EMCode") : null;
446

  
447
			if (tdwgCodeString != null){
448

  
449
				String[] tdwgCodes = new String[]{tdwgCodeString};
450
				if (state.getConfig().isSplitTdwgCodes()){
451
					tdwgCodes = tdwgCodeString.split(";");
452
				}
453

  
454
				for (String tdwgCode : tdwgCodes){
455
					NamedArea area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode.trim());
456
			    	if (area == null){
457
			    		area = getOtherAreas(state, emCodeString, tdwgCodeString);
458
			    	}
459
			    	if (area != null){
460
			    		areas.add(area);
461
			    	}
462
				}
463
			 }
464

  
465
			 if (areas.size()== 0){
466
				 NamedArea area = getOtherAreas(state, emCodeString, tdwgCodeString);
467
				 if (area != null){
468
			         areas.add(area);
469
			   }
470
			 }
471
			 if (areas.size() == 0){
472
				 String areaId = rs.getString("AreaId");
473
				 logger.warn("No areas defined for occurrence " + occurrenceId + ". EMCode: " + CdmUtils.Nz(emCodeString).trim() + ". AreaId: " + areaId );
474
			 }
475
		}
476
		return areas;
477
	}
478

  
479
	@Override
480
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
481
		String nameSpace;
482
		Class<?> cdmClass;
483
		Set<String> idSet;
484
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
485

  
486
		try{
487
			Set<String> taxonIdSet = new HashSet<String>();
488
			while (rs.next()){
489
				handleForeignKey(rs, taxonIdSet, "taxonId");
490
			}
491

  
492
			//taxon map
493
			nameSpace = BerlinModelTaxonImport.NAMESPACE;
494
			cdmClass = TaxonBase.class;
495
			idSet = taxonIdSet;
496
			Map<String, TaxonBase> objectMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
497
			result.put(nameSpace, objectMap);
498

  
499
		} catch (SQLException e) {
500
			throw new RuntimeException(e);
501
		}
502
		return result;
503
	}
504

  
505

  
506

  
507
	/**
508
     * Tests if a distribution with the same tdwgArea and the same status already exists in the description.
509
     * If so the old distribution is returned
510
     * @param description
511
     * @param tdwgArea
512
     * @return false, if dupplicate exists. True otherwise.
513
     */
514
    private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){
515
    	for (DescriptionElementBase descElBase : description.getElements()){
516
    		if (descElBase.isInstanceOf(Distribution.class)){
517
    			Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class);
518
    			NamedArea oldArea = oldDistr.getArea();
519
    			if (oldArea != null && oldArea.equals(distribution.getArea())){
520
    				PresenceAbsenceTerm oldStatus = oldDistr.getStatus();
521
    				if (oldStatus != null && oldStatus.equals(distribution.getStatus())){
522
    					duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource());
523
    					return oldDistr;
524
    				}
525
    			}
526
    		}
527
    	}
528
    	return null;
529
    }
530

  
531
	/**
532
	 * Use same TaxonDescription if two records belong to the same taxon
533
	 * @param newTaxonId
534
	 * @param oldTaxonId
535
	 * @param oldDescription
536
	 * @param taxonMap
537
	 * @return
538
	 */
539
	private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase<?>> taxonMap, int occurrenceId, Reference<?> sourceSec){
540
		TaxonDescription result = null;
541
		if (oldDescription == null || newTaxonId != oldTaxonId){
542
			TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(newTaxonId));
543
			//TODO for testing
544
			//TaxonBase taxonBase = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SPECIES()), null);
545
			Taxon taxon;
546
			if ( taxonBase instanceof Taxon ) {
547
				taxon = (Taxon) taxonBase;
548
			} else if (taxonBase != null) {
549
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
550
				return null;
551
			} else {
552
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null.");
553
				return null;
554
			}
555
			Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
556
			if (descriptionSet.size() > 0) {
557
				result = descriptionSet.iterator().next();
558
			}else{
559
				result = TaxonDescription.NewInstance();
560
				result.setTitleCache(sourceSec.getTitleCache(), true);
561
				taxon.addDescription(result);
562
			}
563
		}else{
564
			result = oldDescription;
565
		}
566
		return result;
567
	}
568

  
569
	@Override
570
	protected boolean doCheck(BerlinModelImportState state){
571
		IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator();
572
		return validator.validate(state);
573
	}
574

  
575

  
576
	@Override
577
	protected boolean isIgnore(BerlinModelImportState state){
578
		if (! state.getConfig().isDoOccurrence()){
579
			return true;
580
		}else{
581
			if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrence", "OccurrenceId")){
582
				logger.error("emOccurrence table or emOccurrenceId does not exist. Must ignore occurrence import");
583
				return true;
584
			}else{
585
				return false;
586
			}
587
		}
588
	}
589

  
590
}
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

  
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

  
12
import java.net.URI;
13
import java.sql.ResultSet;
14
import java.sql.SQLException;
15
import java.util.ArrayList;
16
import java.util.HashMap;
17
import java.util.HashSet;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.Set;
21
import java.util.UUID;
22

  
23
import org.apache.commons.lang.StringUtils;
24
import org.apache.log4j.Logger;
25
import org.springframework.stereotype.Component;
26
import org.springframework.transaction.TransactionStatus;
27

  
28
import eu.etaxonomy.cdm.common.CdmUtils;
29
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
30
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
31
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator;
32
import eu.etaxonomy.cdm.io.common.CdmImportBase;
33
import eu.etaxonomy.cdm.io.common.IOValidator;
34
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
35
import eu.etaxonomy.cdm.io.common.Source;
36
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
37
import eu.etaxonomy.cdm.model.common.Annotation;
38
import eu.etaxonomy.cdm.model.common.AnnotationType;
39
import eu.etaxonomy.cdm.model.common.CdmBase;
40
import eu.etaxonomy.cdm.model.common.ExtensionType;
41
import eu.etaxonomy.cdm.model.common.Language;
42
import eu.etaxonomy.cdm.model.common.Marker;
43
import eu.etaxonomy.cdm.model.common.MarkerType;
44
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
45
import eu.etaxonomy.cdm.model.common.TermType;
46
import eu.etaxonomy.cdm.model.common.TermVocabulary;
47
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
48
import eu.etaxonomy.cdm.model.description.Distribution;
49
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
50
import eu.etaxonomy.cdm.model.description.TaxonDescription;
51
import eu.etaxonomy.cdm.model.location.NamedArea;
52
import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
53
import eu.etaxonomy.cdm.model.location.NamedAreaType;
54
import eu.etaxonomy.cdm.model.reference.Reference;
55
import eu.etaxonomy.cdm.model.taxon.Taxon;
56
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
57
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
58

  
59

  
60
/**
61
 * @author a.mueller
62
 * @created 20.03.2008
63
 */
64
@Component
65
public class BerlinModelOccurrenceImport  extends BerlinModelImportBase {
66
	private static final String EM_AREA_NAMESPACE = "emArea";
67

  
68
	private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class);
69

  
70
	public static final String NAMESPACE = "Occurrence";
71

  
72

  
73
	private static int modCount = 5000;
74
	private static final String pluralString = "occurrences";
75
	private static final String dbTableName = "emOccurrence";  //??
76

  
77

  
78
	public BerlinModelOccurrenceImport(){
79
		super(dbTableName, pluralString);
80
	}
81

  
82
	@Override
83
	protected String getIdQuery(BerlinModelImportState state) {
84
		String result = " SELECT occurrenceId FROM " + getTableName();
85
		if (StringUtils.isNotBlank(state.getConfig().getOccurrenceFilter())){
86
			result += " WHERE " +  state.getConfig().getOccurrenceFilter();
87
		}
88
		return result;
89
	}
90

  
91
	@Override
92
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
93
			String emCode = config.isIncludesAreaEmCode()? ", emArea.EMCode" : "";
94
			String strQuery =   //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
95
            " SELECT DISTINCT PTaxon.RIdentifier AS taxonId, emOccurrence.OccurrenceId, emOccurrence.Native, emOccurrence.Introduced, " +
96
            		" emOccurrence.Cultivated, emOccurrence.Notes occNotes, " +
97
            		" emOccurSumCat.emOccurSumCatId, emOccurSumCat.Short, emOccurSumCat.Description, " +
98
                	" emOccurSumCat.OutputCode, emArea.AreaId, emArea.TDWGCode " + emCode +
99
                " FROM emOccurrence INNER JOIN " +
100
                	" emArea ON emOccurrence.AreaFk = emArea.AreaId INNER JOIN " +
101
                	" PTaxon ON emOccurrence.PTNameFk = PTaxon.PTNameFk AND emOccurrence.PTRefFk = PTaxon.PTRefFk LEFT OUTER JOIN " +
102
                	" emOccurSumCat ON emOccurrence.SummaryStatus = emOccurSumCat.emOccurSumCatId LEFT OUTER JOIN " +
103
                	" emOccurrenceSource ON emOccurrence.OccurrenceId = emOccurrenceSource.OccurrenceFk " +
104
            " WHERE (emOccurrence.OccurrenceId IN (" + ID_LIST_TOKEN + ")  )" +
105
                " ORDER BY PTaxon.RIdentifier";
106
		return strQuery;
107
	}
108

  
109
	private Map<Integer, NamedArea> euroMedAreas = new HashMap<Integer, NamedArea>();
110

  
111

  
112
	@Override
113
	public void doInvoke(BerlinModelImportState state) {
114
		if (state.getConfig().isUseEmAreaVocabulary()){
115
			try {
116
				createEuroMedAreas(state);
117
			} catch (Exception e) {
118
				logger.error("Exception occurred when trying to create euroMed Areas");
119
				e.printStackTrace();
120
				state.setSuccess(false);
121
			}
122
		}
123
		super.doInvoke(state);
124
		//reset
125
		euroMedAreas = new HashMap<Integer, NamedArea>();
126
	}
127

  
128
	private TermVocabulary<NamedArea> createEuroMedAreas(BerlinModelImportState state) throws SQLException {
129
		logger.warn("Start creating E+M areas");
130
		Source source = state.getConfig().getSource();
131
		Reference<?> sourceReference = state.getConfig().getSourceReference();
132

  
133
		TransactionStatus txStatus = this.startTransaction();
134

  
135
		sourceReference = getSourceReference(sourceReference);
136

  
137
		TermVocabulary<NamedArea> euroMedAreas = makeEmptyEuroMedVocabulary();
138

  
139
		MarkerType eurMarkerType = getMarkerType(state, BerlinModelTransformer.uuidEurArea, "eur", "eur Area", "eur");
140
		MarkerType euroMedAreaMarkerType = getMarkerType(state, BerlinModelTransformer.uuidEurMedArea, "EuroMedArea", "EuroMedArea", "EuroMedArea");
141
		ExtensionType isoCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidIsoCode, "IsoCode", "IsoCode", "iso");
142
		ExtensionType tdwgCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidTdwgAreaCode, "TDWG code", "TDWG Area code", "tdwg");
143
		ExtensionType mclCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidMclCode, "MCL code", "MedCheckList code", "mcl");
144
		NamedAreaLevel areaLevelTop = getNamedAreaLevel(state, BerlinModelTransformer.uuidEuroMedAreaLevelTop, "Euro+Med top area level", "Euro+Med top area level. This level is only to be used for the area representing the complete Euro+Med area", "e+m top", null);
145
		NamedAreaLevel areaLevelEm1 = getNamedAreaLevel(state, BerlinModelTransformer.uuidEuroMedAreaLevelFirst, "Euro+Med 1. area level", "Euro+Med 1. area level", "e+m 1.", null);
146
		NamedAreaLevel areaLevelEm2 = getNamedAreaLevel(state, BerlinModelTransformer.uuidEuroMedAreaLevelSecond, "Euro+Med 2. area level", "Euro+Med 2. area level", "Euro+Med 1. area level", null);
147

  
148

  
149
		String sql = "SELECT * , CASE WHEN EMCode = 'EM' THEN 'a' ELSE 'b' END as isEM " +
150
				" FROM emArea " +
151
				" ORDER BY isEM, EMCode";
152
		ResultSet rs = source.getResultSet(sql);
153

  
154
		NamedArea euroMedArea = null;
155
		NamedArea lastLevel1Area = null;
156

  
157
		//euroMedArea (EMCode = 'EM')
158
		rs.next();
159
		euroMedArea = makeSingleEuroMedArea(rs, eurMarkerType, euroMedAreaMarkerType, isoCodeExtType, tdwgCodeExtType, mclCodeExtType,
160
				areaLevelTop, areaLevelEm1 , areaLevelEm2, sourceReference, euroMedArea, lastLevel1Area);
161
		euroMedAreas.addTerm(euroMedArea);
162

  
163
		//all other areas
164
		while (rs.next()){
165
			NamedArea newArea = makeSingleEuroMedArea(rs, eurMarkerType, euroMedAreaMarkerType,
166
					isoCodeExtType, tdwgCodeExtType, mclCodeExtType,
167
					areaLevelTop, areaLevelEm1 , areaLevelEm2, sourceReference, euroMedArea, lastLevel1Area);
168
			if (newArea != null){
169
    			euroMedAreas.addTerm(newArea);
170
    			if (newArea.getPartOf().equals(euroMedArea)){
171
    				lastLevel1Area = newArea;
172
    			}
173
			}
174
		}
175
		emAreaFinetuning(euroMedAreas, areaLevelEm2);
176

  
177

  
178
		markAreasAsHidden(state, euroMedAreas);
179

  
180
	    getVocabularyService().saveOrUpdate(euroMedAreas);
181

  
182
		commitTransaction(txStatus);
183
		logger.warn("Created E+M areas");
184

  
185
		return euroMedAreas;
186
	}
187

  
188
	/**
189
     * @param areaLevelEm2
190
	 * @param euroMedAreas2
191
     */
192
    private void emAreaFinetuning(TermVocabulary<NamedArea> euroMedAreas, NamedAreaLevel areaLevelEm2) {
193
        //CZ
194
        NamedArea oldArea = euroMedAreas.getTermByIdInvocabulary("Cz");
195
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Cs"), areaLevelEm2);
196
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Sk"), areaLevelEm2);
197

  
198
        //Ju
199
        oldArea = euroMedAreas.getTermByIdInvocabulary("Ju");
200
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("BH"), areaLevelEm2);
201
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Cg"), areaLevelEm2);
202
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Ct"), areaLevelEm2);
203
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Mk"), areaLevelEm2);
204
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Sl"), areaLevelEm2);
205
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Sr"), areaLevelEm2);
206

  
207
        //IJ
208
        oldArea = euroMedAreas.getTermByIdInvocabulary("IJ");
209
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Ir"), areaLevelEm2);
210
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Jo"), areaLevelEm2);
211

  
212
        //LS
213
        oldArea = euroMedAreas.getTermByIdInvocabulary("LS");
214
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Le"), areaLevelEm2);
215
        makeSubterm(oldArea, euroMedAreas.getTermByIdInvocabulary("Sy"), areaLevelEm2);
216

  
217
    }
218

  
219
    //5.Mark areas to be hidden #3979 .5
220
    private void markAreasAsHidden(BerlinModelImportState state, TermVocabulary<NamedArea> euroMedAreasVoc) {
221

  
222
        try {
223

  
224
            @SuppressWarnings("unchecked")
225
            TermVocabulary<MarkerType> vocUserDefinedMarkerTypes = getVocabularyService().find(CdmImportBase.uuidUserDefinedMarkerTypeVocabulary);
226
            if (vocUserDefinedMarkerTypes == null){
227
                String message = "Marker type vocabulary could not be found. Hidden areas not added.";
228
                logger.error(message);
229
                System.out.println(message);
230
            }
231
            MarkerType hiddenAreaMarkerType = getMarkerType(state, BerlinModelTransformer.uuidHiddenArea, "Hidden Area","Used to hide distributions for the named areas in publications", null, vocUserDefinedMarkerTypes);
232

  
233
            //Add hidden area marker to Rs(C) and Rs(N)
234
            hideArea(euroMedAreasVoc, hiddenAreaMarkerType, BerlinModelTransformer.uuidRs);
235
            hideArea(euroMedAreasVoc, hiddenAreaMarkerType, BerlinModelTransformer.uuidRs_B);
236
            hideArea(euroMedAreasVoc, hiddenAreaMarkerType, BerlinModelTransformer.uuidRs_C);
237
            hideArea(euroMedAreasVoc, hiddenAreaMarkerType, BerlinModelTransformer.uuidRs_E);
238
            hideArea(euroMedAreasVoc, hiddenAreaMarkerType, BerlinModelTransformer.uuidRs_N);
239
            hideArea(euroMedAreasVoc, hiddenAreaMarkerType, BerlinModelTransformer.uuidRs_K);
240
            hideArea(euroMedAreasVoc, hiddenAreaMarkerType, BerlinModelTransformer.uuidRs_W);
241
        } catch (Exception e) {
242
            e.printStackTrace();
243
            logger.error("Exception in markAreasAsHidden: " + e.getMessage());
244
        }
245

  
246
    }
247

  
248
    private void hideArea(TermVocabulary<NamedArea> euroMedAreasVoc, MarkerType hiddenAreaMarkerType, UUID areaUuid) {
249
        for (NamedArea namedArea : euroMedAreasVoc){
250
            if (namedArea.getUuid().equals(areaUuid)){
251
                namedArea.addMarker(Marker.NewInstance(hiddenAreaMarkerType, true));
252
                return;
253
            }
254
        }
255
    }
256

  
257
    /**
258
     * @param oldArea
259
     * @param namedArea
260
     * @param areaLevelEm2
261
     */
262
    private void makeSubterm(NamedArea oldArea, NamedArea namedArea, NamedAreaLevel areaLevelEm2) {
263
        namedArea.setLevel(areaLevelEm2);
264
        namedArea.setPartOf(oldArea);
265
    }
266

  
267
    /**
268
	 * @param sourceReference
269
	 * @return
270
	 */
271
	private Reference<?> getSourceReference(Reference<?> sourceReference) {
272
		Reference<?> persistentSourceReference = getReferenceService().find(sourceReference.getUuid());  //just to be sure
273
		if (persistentSourceReference != null){
274
			sourceReference = persistentSourceReference;
275
		}
276
		return sourceReference;
277
	}
278

  
279
	/**
280
	 * @param eurMarkerType
281
	 * @param euroMedAreaMarkerType
282
	 * @param isoCodeExtType
283
	 * @param tdwgCodeExtType
284
	 * @param mclCodeExtType
285
	 * @param rs
286
	 * @param areaLevelEm2
287
	 * @param areaLevelEm1
288
	 * @param areaLevelTop
289
	 * @throws SQLException
290
	 */
291
	private NamedArea makeSingleEuroMedArea(ResultSet rs, MarkerType eurMarkerType,
292
			MarkerType euroMedAreaMarkerType, ExtensionType isoCodeExtType,
293
			ExtensionType tdwgCodeExtType, ExtensionType mclCodeExtType,
294
			NamedAreaLevel areaLevelTop, NamedAreaLevel areaLevelEm1, NamedAreaLevel areaLevelEm2,
295
			Reference<?> sourceReference, NamedArea euroMedArea, NamedArea level1Area) throws SQLException {
296
		Integer areaId = rs.getInt("AreaId");
297
		String emCode = nullSafeTrim(rs.getString("EMCode"));
298
		String isoCode = nullSafeTrim(rs.getString("ISOCode"));
299
		String tdwgCode = nullSafeTrim(rs.getString("TDWGCode"));
300
		String unit = nullSafeTrim(rs.getString("Unit"));
301
//				      ,[Status]
302
//				      ,[OutputOrder]
303
		boolean eurMarker = rs.getBoolean("eur");
304
		boolean euroMedAreaMarker = rs.getBoolean("EuroMedArea");
305
		String notes = nullSafeTrim(rs.getString("Notes"));
306
		String mclCode = nullSafeTrim(rs.getString("MCLCode"));
307
		String geoSearch = nullSafeTrim(rs.getString("NameForGeoSearch"));
308

  
309

  
310

  
311
		if (isBlank(emCode)){
312
			emCode = unit;
313
		}
314

  
315
		//uuid
316
		UUID uuid = BerlinModelTransformer.getEMAreaUuid(emCode);
317
		NamedArea area = (NamedArea)getTermService().find(uuid);
318
		if (area == null){
319
			//label
320
			area = NamedArea.NewInstance(geoSearch, unit, emCode);
321
			if (uuid != null){
322
				area.setUuid(uuid);
323
			}else{
324
			    if (areaId == 211 || areaId == 213){  //Additional Azores and Canary Is. area are merged into primary area, see also area.addSource part below
325
			        return null;
326
			    }
327
				logger.warn("Uuid for emCode could not be defined: " + emCode);
328
			}
329
		}
330

  
331

  
332
		//code
333
		area.setIdInVocabulary(emCode);
334
		//notes
335
		if (StringUtils.isNotEmpty(notes)){
336
			area.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
337
		}
338
		//markers
339
		area.addMarker(Marker.NewInstance(eurMarkerType, eurMarker));
340
		area.addMarker(Marker.NewInstance(euroMedAreaMarkerType, euroMedAreaMarker));
341

  
342
		//extensions
343
		if (isNotBlank(isoCode)){
344
			area.addExtension(isoCode, isoCodeExtType);
345
		}
346
		if (isNotBlank(tdwgCode)){
347
			area.addExtension(tdwgCode, tdwgCodeExtType);
348
		}
349
		if (isNotBlank(mclCode)){
350
			area.addExtension(mclCode, mclCodeExtType);
351
		}
352

  
353
		//type
354
		area.setType(NamedAreaType.ADMINISTRATION_AREA());
355

  
356
		//source
357
		area.addSource(OriginalSourceType.Import, String.valueOf(areaId), EM_AREA_NAMESPACE, sourceReference, null);
358
		//add duplicate area ids for canary
359
		if (areaId == 624){ //Canary Is.
360
		    area.addSource(OriginalSourceType.Import, String.valueOf(213), EM_AREA_NAMESPACE, sourceReference, null);
361
		}
362
		if (areaId == 210){//Azores
363
            area.addSource(OriginalSourceType.Import, String.valueOf(211), EM_AREA_NAMESPACE, sourceReference, null);
364
        }
365

  
366
		//parent
367
		if (euroMedArea != null){
368
			if (emCode.contains("(")){
369
				area.setPartOf(level1Area);
370
				area.setLevel(areaLevelEm2);
371
			}else{
372
				area.setPartOf(euroMedArea);
373
				area.setLevel(areaLevelEm1);
374
			}
375
		}else{
376
			area.setLevel(areaLevelTop);
377
		}
378
		this.euroMedAreas.put(areaId, area);
379

  
380
		//save
381
		getTermService().saveOrUpdate(area);
382

  
383
		return area;
384
	}
385

  
386
	private String nullSafeTrim(String string) {
387
		if (string == null){
388
			return null;
389
		}else{
390
			return string.trim();
391
		}
392
	}
393

  
394
	/**
395
	 *
396
	 */
397
	private TermVocabulary<NamedArea> makeEmptyEuroMedVocabulary() {
398
		TermType type = TermType.NamedArea;
399
		String description = "Euro+Med area vocabulary";
400
		String label = "E+M areas";
401
		String abbrev = null;
402
		URI termSourceUri = null;
403
		TermVocabulary<NamedArea> result = TermVocabulary.NewInstance(type, description, label, abbrev, termSourceUri);
404

  
405
		result.setUuid(BerlinModelTransformer.uuidVocEuroMedAreas);
406
		getVocabularyService().save(result);
407
		return result;
408
	}
409

  
410
	@Override
411
	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
412
		boolean success = true;
413
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
414

  
415
		Map<String, TaxonBase<?>> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
416

  
417
		ResultSet rs = partitioner.getResultSet();
418

  
419
		try {
420
			//map to store the mapping of duplicate berlin model occurrences to their real distributions
421
			//duplicated may occur due to area mappings from BM areas to TDWG areas
422
			Map<Integer, String> duplicateMap = new HashMap<Integer, String>();
423
			int oldTaxonId = -1;
424
			TaxonDescription oldDescription = null;
425
			int i = 0;
426
			int countDescriptions = 0;
427
			int countDistributions = 0;
428
			int countDuplicates = 0;
429
			//for each reference
430
            while (rs.next()){
431

  
432
            	if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));}
433

  
434
                int occurrenceId = rs.getInt("OccurrenceId");
435
                int newTaxonId = rs.getInt("taxonId");
436
                String notes = nullSafeTrim(rs.getString("occNotes"));
437

  
438
                Integer emStatusId = nullSafeInt(rs, "emOccurSumCatId");
439

  
440
                try {
441
                	//status
442
                	PresenceAbsenceTerm status = null;
443
                	String alternativeStatusString = null;
444
					if (emStatusId != null){
445
						status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId);
446
					}else{
447
						String[] stringArray = new String[]{rs.getString("Native"), rs.getString("Introduced"), rs.getString("Cultivated")};
448
						alternativeStatusString = CdmUtils.concat(",", stringArray);
449
					}
450

  
451
					Reference<?> sourceRef = state.getTransactionalSourceReference();
452

  
453
					List<NamedArea> areas = makeAreaList(state, rs,	occurrenceId);
454

  
455
                    //create description(elements)
456
                    TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef);
457
                    for (NamedArea area : areas){
458
                    	Distribution distribution = Distribution.NewInstance(area, status);
459
                        if (status == null){
460
                        	AnnotationType annotationType = AnnotationType.EDITORIAL();
461
                        	Annotation annotation = Annotation.NewInstance(alternativeStatusString, annotationType, null);
462
                        	distribution.addAnnotation(annotation);
463
                        	distribution.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), false));
464
                        }
465
//                      distribution.setCitation(sourceRef);
466
                        if (taxonDescription != null) {
467
                        	Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId);
468
                            if (duplicate == null){
469
                            	taxonDescription.addElement(distribution);
470
	                            distribution.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
471
	                        	countDistributions++;
472
	                            if (taxonDescription != oldDescription){
473
	                            	taxaToSave.add(taxonDescription.getTaxon());
474
	                                oldDescription = taxonDescription;
475
	                                countDescriptions++;
476
	                            }
477
                            }else{
478
                            	countDuplicates++;
479
                            	duplicate.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
480
                            	logger.info("Distribution is duplicate");	                           }
481
                        } else {
482
                        	logger.warn("Distribution " + area.getLabel() + " ignored. OccurrenceId = " + occurrenceId);
483
	                       	success = false;
484
	                    }
485
                        //notes
486
                        if (isNotBlank(notes)){
487
                        	Annotation annotation = Annotation.NewInstance(notes, Language.DEFAULT());
488
                        	distribution.addAnnotation(annotation);
489
                        }
490
                    }
491
                } catch (UnknownCdmTypeException e) {
492
                     logger.error("Unknown presenceAbsence status id: " + emStatusId);
493
                	e.printStackTrace();
494
                     success = false;
495
                }
496
            }
497

  
498
            logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions );
499
			logger.info("Duplicate occurrences: "  + (countDuplicates));
500

  
501
			logger.info("Taxa to save: " + taxaToSave.size());
502
			getTaxonService().save(taxaToSave);
503

  
504
			return success;
505
		} catch (SQLException e) {
506
			logger.error("SQLException:" +  e);
507
			return false;
508
		}
509
	}
510

  
511
	/**
512
	 * @param state
513
	 * @param rs
514
	 * @param occurrenceId
515
	 * @param tdwgCodeString
516
	 * @param emCodeString
517
	 * @return
518
	 * @throws SQLException
519
	 */
520
	//Create area list
521
	private List<NamedArea> makeAreaList(BerlinModelImportState state, ResultSet rs, int occurrenceId) throws SQLException {
522
		List<NamedArea> areas = new ArrayList<NamedArea>();
523

  
524
		if (state.getConfig().isUseEmAreaVocabulary()){
525
			Integer areaId = rs.getInt("AreaId");
526
	        NamedArea area = this.euroMedAreas.get(areaId);
527
			areas.add(area);
528
		}else{
529
	        String tdwgCodeString = rs.getString("TDWGCode");
530
	        String emCodeString = state.getConfig().isIncludesAreaEmCode() ? rs.getString("EMCode") : null;
531

  
532
			if (tdwgCodeString != null){
533

  
534
				String[] tdwgCodes = new String[]{tdwgCodeString};
535
				if (state.getConfig().isSplitTdwgCodes()){
536
					tdwgCodes = tdwgCodeString.split(";");
537
				}
538

  
539
				for (String tdwgCode : tdwgCodes){
540
					NamedArea area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode.trim());
541
			    	if (area == null){
542
			    		area = getOtherAreas(state, emCodeString, tdwgCodeString);
543
			    	}
544
			    	if (area != null){
545
			    		areas.add(area);
546
			    	}
547
				}
548
			 }
549

  
550
			 if (areas.size()== 0){
551
				 NamedArea area = getOtherAreas(state, emCodeString, tdwgCodeString);
552
				 if (area != null){
553
			         areas.add(area);
554
			   }
555
			 }
556
			 if (areas.size() == 0){
557
				 String areaId = rs.getString("AreaId");
558
				 logger.warn("No areas defined for occurrence " + occurrenceId + ". EMCode: " + CdmUtils.Nz(emCodeString).trim() + ". AreaId: " + areaId );
559
			 }
560
		}
561
		return areas;
562
	}
563

  
564
	@Override
565
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
566
		String nameSpace;
567
		Class<?> cdmClass;
568
		Set<String> idSet;
569
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
570

  
571
		try{
572
			Set<String> taxonIdSet = new HashSet<String>();
573
			while (rs.next()){
574
				handleForeignKey(rs, taxonIdSet, "taxonId");
575
			}
576

  
577
			//taxon map
578
			nameSpace = BerlinModelTaxonImport.NAMESPACE;
579
			cdmClass = TaxonBase.class;
580
			idSet = taxonIdSet;
581
			Map<String, TaxonBase> objectMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
582
			result.put(nameSpace, objectMap);
583

  
584
		} catch (SQLException e) {
585
			throw new RuntimeException(e);
586
		}
587
		return result;
588
	}
589

  
590

  
591

  
592
	/**
593
     * Tests if a distribution with the same tdwgArea and the same status already exists in the description.
594
     * If so the old distribution is returned
595
     * @param description
596
     * @param tdwgArea
597
     * @return false, if dupplicate exists. True otherwise.
598
     */
599
    private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){
600
    	for (DescriptionElementBase descElBase : description.getElements()){
601
    		if (descElBase.isInstanceOf(Distribution.class)){
602
    			Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class);
603
    			NamedArea oldArea = oldDistr.getArea();
604
    			if (oldArea != null && oldArea.equals(distribution.getArea())){
605
    				PresenceAbsenceTerm oldStatus = oldDistr.getStatus();
606
    				if (oldStatus != null && oldStatus.equals(distribution.getStatus())){
607
    					duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource());
608
    					return oldDistr;
609
    				}
610
    			}
611
    		}
612
    	}
613
    	return null;
614
    }
615

  
616
	/**
617
	 * Use same TaxonDescription if two records belong to the same taxon
618
	 * @param newTaxonId
619
	 * @param oldTaxonId
620
	 * @param oldDescription
621
	 * @param taxonMap
622
	 * @return
623
	 */
624
	private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase<?>> taxonMap, int occurrenceId, Reference<?> sourceSec){
625
		TaxonDescription result = null;
626
		if (oldDescription == null || newTaxonId != oldTaxonId){
627
			TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(newTaxonId));
628
			//TODO for testing
629
			//TaxonBase taxonBase = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SPECIES()), null);
630
			Taxon taxon;
631
			if ( taxonBase instanceof Taxon ) {
632
				taxon = (Taxon) taxonBase;
633
			} else if (taxonBase != null) {
634
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
635
				return null;
636
			} else {
637
				logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null.");
638
				return null;
639
			}
640
			Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
641
			if (descriptionSet.size() > 0) {
642
				result = descriptionSet.iterator().next();
643
			}else{
644
				result = TaxonDescription.NewInstance();
645
				result.setTitleCache(sourceSec.getTitleCache(), true);
646
				taxon.addDescription(result);
647
			}
648
		}else{
649
			result = oldDescription;
650
		}
651
		return result;
652
	}
653

  
654
	@Override
655
	protected boolean doCheck(BerlinModelImportState state){
656
		IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator();
657
		return validator.validate(state);
658
	}
659

  
660

  
661
	@Override
662
	protected boolean isIgnore(BerlinModelImportState state){
663
		if (! state.getConfig().isDoOccurrence()){
664
			return true;
665
		}else{
666
			if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrence", "OccurrenceId")){
667
				logger.error("emOccurrence table or emOccurrenceId does not exist. Must ignore occurrence import");
668
				return true;
669
			}else{
670
				return false;
671
			}
672
		}
673
	}
674

  
675
}

Also available in: Unified diff