Revision 514f7053
Added by Andreas Müller almost 9 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelOccurrenceImport.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
|
|
10 |
package eu.etaxonomy.cdm.io.berlinModel.in; |
|
11 |
|
|
12 |
import java.net.URI; |
|
13 |
import java.sql.ResultSet; |
|
14 |
import java.sql.SQLException; |
|
15 |
import java.util.ArrayList; |
|
16 |
import java.util.HashMap; |
|
17 |
import java.util.HashSet; |
|
18 |
import java.util.List; |
|
19 |
import java.util.Map; |
|
20 |
import java.util.Set; |
|
21 |
import java.util.UUID; |
|
22 |
|
|
23 |
import org.apache.commons.lang.StringUtils; |
|
24 |
import org.apache.log4j.Logger; |
|
25 |
import org.springframework.stereotype.Component; |
|
26 |
import org.springframework.transaction.TransactionStatus; |
|
27 |
|
|
28 |
import eu.etaxonomy.cdm.common.CdmUtils; |
|
29 |
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper; |
|
30 |
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer; |
|
31 |
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator; |
|
32 |
import eu.etaxonomy.cdm.io.common.IOValidator; |
|
33 |
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner; |
|
34 |
import eu.etaxonomy.cdm.io.common.Source; |
|
35 |
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider; |
|
36 |
import eu.etaxonomy.cdm.model.common.Annotation; |
|
37 |
import eu.etaxonomy.cdm.model.common.AnnotationType; |
|
38 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
|
39 |
import eu.etaxonomy.cdm.model.common.ExtensionType; |
|
40 |
import eu.etaxonomy.cdm.model.common.Language; |
|
41 |
import eu.etaxonomy.cdm.model.common.Marker; |
|
42 |
import eu.etaxonomy.cdm.model.common.MarkerType; |
|
43 |
import eu.etaxonomy.cdm.model.common.OriginalSourceType; |
|
44 |
import eu.etaxonomy.cdm.model.common.TermType; |
|
45 |
import eu.etaxonomy.cdm.model.common.TermVocabulary; |
|
46 |
import eu.etaxonomy.cdm.model.description.DescriptionElementBase; |
|
47 |
import eu.etaxonomy.cdm.model.description.Distribution; |
|
48 |
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm; |
|
49 |
import eu.etaxonomy.cdm.model.description.TaxonDescription; |
|
50 |
import eu.etaxonomy.cdm.model.location.NamedArea; |
|
51 |
import eu.etaxonomy.cdm.model.location.NamedAreaLevel; |
|
52 |
import eu.etaxonomy.cdm.model.location.NamedAreaType; |
|
53 |
import eu.etaxonomy.cdm.model.reference.Reference; |
|
54 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
|
55 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase; |
|
56 |
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException; |
|
57 |
|
|
58 |
|
|
59 |
/** |
|
60 |
* @author a.mueller |
|
61 |
* @created 20.03.2008 |
|
62 |
*/ |
|
63 |
@Component |
|
64 |
public class BerlinModelOccurrenceImport extends BerlinModelImportBase { |
|
65 |
private static final String EM_AREA_NAMESPACE = "emArea"; |
|
66 |
|
|
67 |
private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class); |
|
68 |
|
|
69 |
public static final String NAMESPACE = "Occurrence"; |
|
70 |
|
|
71 |
|
|
72 |
private static int modCount = 5000; |
|
73 |
private static final String pluralString = "occurrences"; |
|
74 |
private static final String dbTableName = "emOccurrence"; //?? |
|
75 |
|
|
76 |
|
|
77 |
public BerlinModelOccurrenceImport(){ |
|
78 |
super(dbTableName, pluralString); |
|
79 |
} |
|
80 |
|
|
81 |
@Override |
|
82 |
protected String getIdQuery(BerlinModelImportState state) { |
|
83 |
String result = " SELECT occurrenceId FROM " + getTableName(); |
|
84 |
if (StringUtils.isNotBlank(state.getConfig().getOccurrenceFilter())){ |
|
85 |
result += " WHERE " + state.getConfig().getOccurrenceFilter(); |
|
86 |
} |
|
87 |
return result; |
|
88 |
} |
|
89 |
|
|
90 |
@Override |
|
91 |
protected String getRecordQuery(BerlinModelImportConfigurator config) { |
|
92 |
String emCode = config.isIncludesAreaEmCode()? ", emArea.EMCode" : ""; |
|
93 |
String strQuery = //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution |
|
94 |
" SELECT DISTINCT PTaxon.RIdentifier AS taxonId, emOccurrence.OccurrenceId, emOccurrence.Native, emOccurrence.Introduced, " + |
|
95 |
" emOccurrence.Cultivated, emOccurrence.Notes occNotes, " + |
|
96 |
" emOccurSumCat.emOccurSumCatId, emOccurSumCat.Short, emOccurSumCat.Description, " + |
|
97 |
" emOccurSumCat.OutputCode, emArea.AreaId, emArea.TDWGCode " + emCode + |
|
98 |
" FROM emOccurrence INNER JOIN " + |
|
99 |
" emArea ON emOccurrence.AreaFk = emArea.AreaId INNER JOIN " + |
|
100 |
" PTaxon ON emOccurrence.PTNameFk = PTaxon.PTNameFk AND emOccurrence.PTRefFk = PTaxon.PTRefFk LEFT OUTER JOIN " + |
|
101 |
" emOccurSumCat ON emOccurrence.SummaryStatus = emOccurSumCat.emOccurSumCatId LEFT OUTER JOIN " + |
|
102 |
" emOccurrenceSource ON emOccurrence.OccurrenceId = emOccurrenceSource.OccurrenceFk " + |
|
103 |
" WHERE (emOccurrence.OccurrenceId IN (" + ID_LIST_TOKEN + ") )" + |
|
104 |
" ORDER BY PTaxon.RIdentifier"; |
|
105 |
return strQuery; |
|
106 |
} |
|
107 |
|
|
108 |
private Map<Integer, NamedArea> euroMedAreas = new HashMap<Integer, NamedArea>(); |
|
109 |
|
|
110 |
|
|
111 |
@Override |
|
112 |
public void doInvoke(BerlinModelImportState state) { |
|
113 |
if (state.getConfig().isUseEmAreaVocabulary()){ |
|
114 |
try { |
|
115 |
createEuroMedAreas(state); |
|
116 |
} catch (Exception e) { |
|
117 |
logger.error("Exception occurred when trying to create euroMed Areas"); |
|
118 |
e.printStackTrace(); |
|
119 |
state.setSuccess(false); |
|
120 |
} |
|
121 |
} |
|
122 |
super.doInvoke(state); |
|
123 |
//reset |
|
124 |
euroMedAreas = new HashMap<Integer, NamedArea>(); |
|
125 |
} |
|
126 |
|
|
127 |
private TermVocabulary<NamedArea> createEuroMedAreas(BerlinModelImportState state) throws SQLException { |
|
128 |
logger.warn("Start creating E+M areas"); |
|
129 |
Source source = state.getConfig().getSource(); |
|
130 |
Reference<?> sourceReference = state.getConfig().getSourceReference(); |
|
131 |
|
|
132 |
TransactionStatus txStatus = this.startTransaction(); |
|
133 |
|
|
134 |
sourceReference = getSourceReference(sourceReference); |
|
135 |
|
|
136 |
TermVocabulary<NamedArea> euroMedAreas = makeEmptyEuroMedVocabulary(); |
|
137 |
|
|
138 |
MarkerType eurMarkerType = getMarkerType(state, BerlinModelTransformer.uuidEurArea, "eur", "eur Area", "eur"); |
|
139 |
MarkerType euroMedAreaMarkerType = getMarkerType(state, BerlinModelTransformer.uuidEurMedArea, "EuroMedArea", "EuroMedArea", "EuroMedArea"); |
|
140 |
ExtensionType isoCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidIsoCode, "IsoCode", "IsoCode", "iso"); |
|
141 |
ExtensionType tdwgCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidTdwgAreaCode, "TDWG code", "TDWG Area code", "tdwg"); |
|
142 |
ExtensionType mclCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidMclCode, "MCL code", "MedCheckList code", "mcl"); |
|
143 |
NamedAreaLevel areaLevelTop = getNamedAreaLevel(state, BerlinModelTransformer.uuidAreaLevelTop, "Euro+Med top area level", "Euro+Med top area level. This level is only to be used for the area representing the complete Euro+Med area", "e+m top", null); |
|
144 |
NamedAreaLevel areaLevelEm1 = getNamedAreaLevel(state, BerlinModelTransformer.uuidAreaLevelFirst, "Euro+Med 1. area level", "Euro+Med 1. area level", "e+m 1.", null); |
|
145 |
NamedAreaLevel areaLevelEm2 = getNamedAreaLevel(state, BerlinModelTransformer.uuidAreaLevelSecond, "Euro+Med 2. area level", "Euro+Med 2. area level", "Euro+Med 1. area level", null); |
|
146 |
|
|
147 |
|
|
148 |
String sql = "SELECT * , CASE WHEN EMCode = 'EM' THEN 'a' ELSE 'b' END as isEM " + |
|
149 |
" FROM emArea " + |
|
150 |
" ORDER BY isEM, EMCode"; |
|
151 |
ResultSet rs = source.getResultSet(sql); |
|
152 |
|
|
153 |
NamedArea euroMedArea = null; |
|
154 |
NamedArea lastLevel2Area = null; |
|
155 |
|
|
156 |
//euroMedArea (EMCode = 'EM') |
|
157 |
rs.next(); |
|
158 |
euroMedArea = makeSingleEuroMedArea(rs, eurMarkerType, euroMedAreaMarkerType, isoCodeExtType, tdwgCodeExtType, mclCodeExtType, |
|
159 |
areaLevelTop, areaLevelEm1 , areaLevelEm2, sourceReference, euroMedArea, lastLevel2Area); |
|
160 |
euroMedAreas.addTerm(euroMedArea); |
|
161 |
|
|
162 |
//all other areas |
|
163 |
while (rs.next()){ |
|
164 |
NamedArea newArea = makeSingleEuroMedArea(rs, eurMarkerType, euroMedAreaMarkerType, |
|
165 |
isoCodeExtType, tdwgCodeExtType, mclCodeExtType, |
|
166 |
areaLevelTop, areaLevelEm1 , areaLevelEm2, sourceReference, euroMedArea, lastLevel2Area); |
|
167 |
euroMedAreas.addTerm(newArea); |
|
168 |
if (newArea.getPartOf().equals(euroMedArea)){ |
|
169 |
lastLevel2Area = newArea; |
|
170 |
} |
|
171 |
getVocabularyService().saveOrUpdate(euroMedAreas); |
|
172 |
} |
|
173 |
|
|
174 |
commitTransaction(txStatus); |
|
175 |
logger.warn("Created E+M areas"); |
|
176 |
|
|
177 |
return euroMedAreas; |
|
178 |
} |
|
179 |
|
|
180 |
/** |
|
181 |
* @param sourceReference |
|
182 |
* @return |
|
183 |
*/ |
|
184 |
private Reference<?> getSourceReference(Reference<?> sourceReference) { |
|
185 |
Reference<?> persistentSourceReference = getReferenceService().find(sourceReference.getUuid()); //just to be sure |
|
186 |
if (persistentSourceReference != null){ |
|
187 |
sourceReference = persistentSourceReference; |
|
188 |
} |
|
189 |
return sourceReference; |
|
190 |
} |
|
191 |
|
|
192 |
/** |
|
193 |
* @param eurMarkerType |
|
194 |
* @param euroMedAreaMarkerType |
|
195 |
* @param isoCodeExtType |
|
196 |
* @param tdwgCodeExtType |
|
197 |
* @param mclCodeExtType |
|
198 |
* @param rs |
|
199 |
* @param areaLevelEm2 |
|
200 |
* @param areaLevelEm1 |
|
201 |
* @param areaLevelTop |
|
202 |
* @throws SQLException |
|
203 |
*/ |
|
204 |
private NamedArea makeSingleEuroMedArea(ResultSet rs, MarkerType eurMarkerType, |
|
205 |
MarkerType euroMedAreaMarkerType, ExtensionType isoCodeExtType, |
|
206 |
ExtensionType tdwgCodeExtType, ExtensionType mclCodeExtType, |
|
207 |
NamedAreaLevel areaLevelTop, NamedAreaLevel areaLevelEm1, NamedAreaLevel areaLevelEm2, |
|
208 |
Reference<?> sourceReference, NamedArea euroMedArea, NamedArea level2Area) throws SQLException { |
|
209 |
Integer areaId = rs.getInt("AreaId"); |
|
210 |
String emCode = nullSafeTrim(rs.getString("EMCode")); |
|
211 |
String isoCode = nullSafeTrim(rs.getString("ISOCode")); |
|
212 |
String tdwgCode = nullSafeTrim(rs.getString("TDWGCode")); |
|
213 |
String unit = nullSafeTrim(rs.getString("Unit")); |
|
214 |
// ,[Status] |
|
215 |
// ,[OutputOrder] |
|
216 |
boolean eurMarker = rs.getBoolean("eur"); |
|
217 |
boolean euroMedAreaMarker = rs.getBoolean("EuroMedArea"); |
|
218 |
String notes = nullSafeTrim(rs.getString("Notes")); |
|
219 |
String mclCode = nullSafeTrim(rs.getString("MCLCode")); |
|
220 |
String geoSearch = nullSafeTrim(rs.getString("NameForGeoSearch")); |
|
221 |
|
|
222 |
if (isBlank(emCode)){ |
|
223 |
emCode = unit; |
|
224 |
} |
|
225 |
|
|
226 |
//uuid |
|
227 |
UUID uuid = BerlinModelTransformer.getEMAreaUuid(emCode); |
|
228 |
NamedArea area = (NamedArea)getTermService().find(uuid); |
|
229 |
if (area == null){ |
|
230 |
//label |
|
231 |
area = NamedArea.NewInstance(geoSearch, unit, emCode); |
|
232 |
if (uuid != null){ |
|
233 |
area.setUuid(uuid); |
|
234 |
}else{ |
|
235 |
logger.warn("Uuuid for emCode could not be defined: " + emCode); |
|
236 |
} |
|
237 |
} |
|
238 |
|
|
239 |
|
|
240 |
//code |
|
241 |
area.setIdInVocabulary(emCode); |
|
242 |
//notes |
|
243 |
if (StringUtils.isNotEmpty(notes)){ |
|
244 |
area.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT())); |
|
245 |
} |
|
246 |
//markers |
|
247 |
area.addMarker(Marker.NewInstance(eurMarkerType, eurMarker)); |
|
248 |
area.addMarker(Marker.NewInstance(euroMedAreaMarkerType, euroMedAreaMarker)); |
|
249 |
|
|
250 |
//extensions |
|
251 |
if (isNotBlank(isoCode)){ |
|
252 |
area.addExtension(isoCode, isoCodeExtType); |
|
253 |
} |
|
254 |
if (isNotBlank(tdwgCode)){ |
|
255 |
area.addExtension(tdwgCode, tdwgCodeExtType); |
|
256 |
} |
|
257 |
if (isNotBlank(mclCode)){ |
|
258 |
area.addExtension(mclCode, mclCodeExtType); |
|
259 |
} |
|
260 |
|
|
261 |
//type |
|
262 |
area.setType(NamedAreaType.ADMINISTRATION_AREA()); |
|
263 |
|
|
264 |
//source |
|
265 |
area.addSource(OriginalSourceType.Import, String.valueOf(areaId), EM_AREA_NAMESPACE, sourceReference, null); |
|
266 |
|
|
267 |
//parent |
|
268 |
if (euroMedArea != null){ |
|
269 |
if (emCode.contains("(")){ |
|
270 |
area.setPartOf(level2Area); |
|
271 |
area.setLevel(areaLevelEm2); |
|
272 |
}else{ |
|
273 |
area.setPartOf(euroMedArea); |
|
274 |
area.setLevel(areaLevelEm1); |
|
275 |
} |
|
276 |
}else{ |
|
277 |
area.setLevel(areaLevelTop); |
|
278 |
} |
|
279 |
this.euroMedAreas.put(areaId, area); |
|
280 |
|
|
281 |
//save |
|
282 |
getTermService().saveOrUpdate(area); |
|
283 |
|
|
284 |
return area; |
|
285 |
} |
|
286 |
|
|
287 |
private String nullSafeTrim(String string) { |
|
288 |
if (string == null){ |
|
289 |
return null; |
|
290 |
}else{ |
|
291 |
return string.trim(); |
|
292 |
} |
|
293 |
} |
|
294 |
|
|
295 |
/** |
|
296 |
* |
|
297 |
*/ |
|
298 |
private TermVocabulary<NamedArea> makeEmptyEuroMedVocabulary() { |
|
299 |
TermType type = TermType.NamedArea; |
|
300 |
String description = "Euro+Med area vocabulary"; |
|
301 |
String label = "E+M areas"; |
|
302 |
String abbrev = null; |
|
303 |
URI termSourceUri = null; |
|
304 |
TermVocabulary<NamedArea> result = TermVocabulary.NewInstance(type, description, label, abbrev, termSourceUri); |
|
305 |
getVocabularyService().save(result); |
|
306 |
return result; |
|
307 |
} |
|
308 |
|
|
309 |
@Override |
|
310 |
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) { |
|
311 |
boolean success = true; |
|
312 |
Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>(); |
|
313 |
|
|
314 |
Map<String, TaxonBase<?>> taxonMap = (Map<String, TaxonBase<?>>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE); |
|
315 |
|
|
316 |
ResultSet rs = partitioner.getResultSet(); |
|
317 |
|
|
318 |
try { |
|
319 |
//map to store the mapping of duplicate berlin model occurrences to their real distributions |
|
320 |
//duplicated may occur due to area mappings from BM areas to TDWG areas |
|
321 |
Map<Integer, String> duplicateMap = new HashMap<Integer, String>(); |
|
322 |
int oldTaxonId = -1; |
|
323 |
TaxonDescription oldDescription = null; |
|
324 |
int i = 0; |
|
325 |
int countDescriptions = 0; |
|
326 |
int countDistributions = 0; |
|
327 |
int countDuplicates = 0; |
|
328 |
//for each reference |
|
329 |
while (rs.next()){ |
|
330 |
|
|
331 |
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));} |
|
332 |
|
|
333 |
int occurrenceId = rs.getInt("OccurrenceId"); |
|
334 |
int newTaxonId = rs.getInt("taxonId"); |
|
335 |
String notes = nullSafeTrim(rs.getString("occNotes")); |
|
336 |
|
|
337 |
Integer emStatusId = nullSafeInt(rs, "emOccurSumCatId"); |
|
338 |
|
|
339 |
try { |
|
340 |
//status |
|
341 |
PresenceAbsenceTerm status = null; |
|
342 |
String alternativeStatusString = null; |
|
343 |
if (emStatusId != null){ |
|
344 |
status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId); |
|
345 |
}else{ |
|
346 |
String[] stringArray = new String[]{rs.getString("Native"), rs.getString("Introduced"), rs.getString("Cultivated")}; |
|
347 |
alternativeStatusString = CdmUtils.concat(",", stringArray); |
|
348 |
} |
|
349 |
|
|
350 |
Reference<?> sourceRef = state.getTransactionalSourceReference(); |
|
351 |
|
|
352 |
List<NamedArea> areas = makeAreaList(state, rs, occurrenceId); |
|
353 |
|
|
354 |
//create description(elements) |
|
355 |
TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef); |
|
356 |
for (NamedArea area : areas){ |
|
357 |
Distribution distribution = Distribution.NewInstance(area, status); |
|
358 |
if (status == null){ |
|
359 |
AnnotationType annotationType = AnnotationType.EDITORIAL(); |
|
360 |
Annotation annotation = Annotation.NewInstance(alternativeStatusString, annotationType, null); |
|
361 |
distribution.addAnnotation(annotation); |
|
362 |
distribution.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), false)); |
|
363 |
} |
|
364 |
// distribution.setCitation(sourceRef); |
|
365 |
if (taxonDescription != null) { |
|
366 |
Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId); |
|
367 |
if (duplicate == null){ |
|
368 |
taxonDescription.addElement(distribution); |
|
369 |
distribution.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null); |
|
370 |
countDistributions++; |
|
371 |
if (taxonDescription != oldDescription){ |
|
372 |
taxaToSave.add(taxonDescription.getTaxon()); |
|
373 |
oldDescription = taxonDescription; |
|
374 |
countDescriptions++; |
|
375 |
} |
|
376 |
}else{ |
|
377 |
countDuplicates++; |
|
378 |
duplicate.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null); |
|
379 |
logger.info("Distribution is duplicate"); } |
|
380 |
} else { |
|
381 |
logger.warn("Distribution " + area.getLabel() + " ignored. OccurrenceId = " + occurrenceId); |
|
382 |
success = false; |
|
383 |
} |
|
384 |
//notes |
|
385 |
if (isNotBlank(notes)){ |
|
386 |
Annotation annotation = Annotation.NewInstance(notes, Language.DEFAULT()); |
|
387 |
distribution.addAnnotation(annotation); |
|
388 |
} |
|
389 |
} |
|
390 |
} catch (UnknownCdmTypeException e) { |
|
391 |
logger.error("Unknown presenceAbsence status id: " + emStatusId); |
|
392 |
e.printStackTrace(); |
|
393 |
success = false; |
|
394 |
} |
|
395 |
} |
|
396 |
|
|
397 |
logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions ); |
|
398 |
logger.info("Duplicate occurrences: " + (countDuplicates)); |
|
399 |
|
|
400 |
logger.info("Taxa to save: " + taxaToSave.size()); |
|
401 |
getTaxonService().save(taxaToSave); |
|
402 |
|
|
403 |
return success; |
|
404 |
} catch (SQLException e) { |
|
405 |
logger.error("SQLException:" + e); |
|
406 |
return false; |
|
407 |
} |
|
408 |
} |
|
409 |
|
|
410 |
/** |
|
411 |
* @param state |
|
412 |
* @param rs |
|
413 |
* @param occurrenceId |
|
414 |
* @param tdwgCodeString |
|
415 |
* @param emCodeString |
|
416 |
* @return |
|
417 |
* @throws SQLException |
|
418 |
*/ |
|
419 |
//Create area list |
|
420 |
private List<NamedArea> makeAreaList(BerlinModelImportState state, ResultSet rs, int occurrenceId) throws SQLException { |
|
421 |
List<NamedArea> areas = new ArrayList<NamedArea>(); |
|
422 |
|
|
423 |
if (state.getConfig().isUseEmAreaVocabulary()){ |
|
424 |
Integer areaId = rs.getInt("AreaId"); |
|
425 |
NamedArea area = this.euroMedAreas.get(areaId); |
|
426 |
areas.add(area); |
|
427 |
}else{ |
|
428 |
String tdwgCodeString = rs.getString("TDWGCode"); |
|
429 |
String emCodeString = state.getConfig().isIncludesAreaEmCode() ? rs.getString("EMCode") : null; |
|
430 |
|
|
431 |
if (tdwgCodeString != null){ |
|
432 |
|
|
433 |
String[] tdwgCodes = new String[]{tdwgCodeString}; |
|
434 |
if (state.getConfig().isSplitTdwgCodes()){ |
|
435 |
tdwgCodes = tdwgCodeString.split(";"); |
|
436 |
} |
|
437 |
|
|
438 |
for (String tdwgCode : tdwgCodes){ |
|
439 |
NamedArea area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode.trim()); |
|
440 |
if (area == null){ |
|
441 |
area = getOtherAreas(state, emCodeString, tdwgCodeString); |
|
442 |
} |
|
443 |
if (area != null){ |
|
444 |
areas.add(area); |
|
445 |
} |
|
446 |
} |
|
447 |
} |
|
448 |
|
|
449 |
if (areas.size()== 0){ |
|
450 |
NamedArea area = getOtherAreas(state, emCodeString, tdwgCodeString); |
|
451 |
if (area != null){ |
|
452 |
areas.add(area); |
|
453 |
} |
|
454 |
} |
|
455 |
if (areas.size() == 0){ |
|
456 |
String areaId = rs.getString("AreaId"); |
|
457 |
logger.warn("No areas defined for occurrence " + occurrenceId + ". EMCode: " + CdmUtils.Nz(emCodeString).trim() + ". AreaId: " + areaId ); |
|
458 |
} |
|
459 |
} |
|
460 |
return areas; |
|
461 |
} |
|
462 |
|
|
463 |
@Override |
|
464 |
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) { |
|
465 |
String nameSpace; |
|
466 |
Class<?> cdmClass; |
|
467 |
Set<String> idSet; |
|
468 |
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>(); |
|
469 |
|
|
470 |
try{ |
|
471 |
Set<String> taxonIdSet = new HashSet<String>(); |
|
472 |
while (rs.next()){ |
|
473 |
handleForeignKey(rs, taxonIdSet, "taxonId"); |
|
474 |
} |
|
475 |
|
|
476 |
//taxon map |
|
477 |
nameSpace = BerlinModelTaxonImport.NAMESPACE; |
|
478 |
cdmClass = TaxonBase.class; |
|
479 |
idSet = taxonIdSet; |
|
480 |
Map<String, TaxonBase> objectMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace); |
|
481 |
result.put(nameSpace, objectMap); |
|
482 |
|
|
483 |
} catch (SQLException e) { |
|
484 |
throw new RuntimeException(e); |
|
485 |
} |
|
486 |
return result; |
|
487 |
} |
|
488 |
|
|
489 |
|
|
490 |
|
|
491 |
/** |
|
492 |
* Tests if a distribution with the same tdwgArea and the same status already exists in the description. |
|
493 |
* If so the old distribution is returned |
|
494 |
* @param description |
|
495 |
* @param tdwgArea |
|
496 |
* @return false, if dupplicate exists. True otherwise. |
|
497 |
*/ |
|
498 |
private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){ |
|
499 |
for (DescriptionElementBase descElBase : description.getElements()){ |
|
500 |
if (descElBase.isInstanceOf(Distribution.class)){ |
|
501 |
Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class); |
|
502 |
NamedArea oldArea = oldDistr.getArea(); |
|
503 |
if (oldArea != null && oldArea.equals(distribution.getArea())){ |
|
504 |
PresenceAbsenceTerm oldStatus = oldDistr.getStatus(); |
|
505 |
if (oldStatus != null && oldStatus.equals(distribution.getStatus())){ |
|
506 |
duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource()); |
|
507 |
return oldDistr; |
|
508 |
} |
|
509 |
} |
|
510 |
} |
|
511 |
} |
|
512 |
return null; |
|
513 |
} |
|
514 |
|
|
515 |
/** |
|
516 |
* Use same TaxonDescription if two records belong to the same taxon |
|
517 |
* @param newTaxonId |
|
518 |
* @param oldTaxonId |
|
519 |
* @param oldDescription |
|
520 |
* @param taxonMap |
|
521 |
* @return |
|
522 |
*/ |
|
523 |
private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase<?>> taxonMap, int occurrenceId, Reference<?> sourceSec){ |
|
524 |
TaxonDescription result = null; |
|
525 |
if (oldDescription == null || newTaxonId != oldTaxonId){ |
|
526 |
TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(newTaxonId)); |
|
527 |
//TODO for testing |
|
528 |
//TaxonBase taxonBase = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SPECIES()), null); |
|
529 |
Taxon taxon; |
|
530 |
if ( taxonBase instanceof Taxon ) { |
|
531 |
taxon = (Taxon) taxonBase; |
|
532 |
} else if (taxonBase != null) { |
|
533 |
logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName()); |
|
534 |
return null; |
|
535 |
} else { |
|
536 |
logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null."); |
|
537 |
return null; |
|
538 |
} |
|
539 |
Set<TaxonDescription> descriptionSet= taxon.getDescriptions(); |
|
540 |
if (descriptionSet.size() > 0) { |
|
541 |
result = descriptionSet.iterator().next(); |
|
542 |
}else{ |
|
543 |
result = TaxonDescription.NewInstance(); |
|
544 |
result.setTitleCache(sourceSec.getTitleCache(), true); |
|
545 |
taxon.addDescription(result); |
|
546 |
} |
|
547 |
}else{ |
|
548 |
result = oldDescription; |
|
549 |
} |
|
550 |
return result; |
|
551 |
} |
|
552 |
|
|
553 |
@Override |
|
554 |
protected boolean doCheck(BerlinModelImportState state){ |
|
555 |
IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator(); |
|
556 |
return validator.validate(state); |
|
557 |
} |
|
558 |
|
|
559 |
|
|
560 |
@Override |
|
561 |
protected boolean isIgnore(BerlinModelImportState state){ |
|
562 |
if (! state.getConfig().isDoOccurrence()){ |
|
563 |
return true; |
|
564 |
}else{ |
|
565 |
if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrence", "OccurrenceId")){ |
|
566 |
logger.error("emOccurrence table or emOccurrenceId does not exist. Must ignore occurrence import"); |
|
567 |
return true; |
|
568 |
}else{ |
|
569 |
return false; |
|
570 |
} |
|
571 |
} |
|
572 |
} |
|
573 |
|
|
574 |
} |
|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
|
|
10 |
package eu.etaxonomy.cdm.io.berlinModel.in; |
|
11 |
|
|
12 |
import java.net.URI; |
|
13 |
import java.sql.ResultSet; |
|
14 |
import java.sql.SQLException; |
|
15 |
import java.util.ArrayList; |
|
16 |
import java.util.HashMap; |
|
17 |
import java.util.HashSet; |
|
18 |
import java.util.List; |
|
19 |
import java.util.Map; |
|
20 |
import java.util.Set; |
|
21 |
import java.util.UUID; |
|
22 |
|
|
23 |
import org.apache.commons.lang.StringUtils; |
|
24 |
import org.apache.log4j.Logger; |
|
25 |
import org.springframework.stereotype.Component; |
|
26 |
import org.springframework.transaction.TransactionStatus; |
|
27 |
|
|
28 |
import eu.etaxonomy.cdm.common.CdmUtils; |
|
29 |
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper; |
|
30 |
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer; |
|
31 |
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator; |
|
32 |
import eu.etaxonomy.cdm.io.common.IOValidator; |
|
33 |
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner; |
|
34 |
import eu.etaxonomy.cdm.io.common.Source; |
|
35 |
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider; |
|
36 |
import eu.etaxonomy.cdm.model.common.Annotation; |
|
37 |
import eu.etaxonomy.cdm.model.common.AnnotationType; |
|
38 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
|
39 |
import eu.etaxonomy.cdm.model.common.ExtensionType; |
|
40 |
import eu.etaxonomy.cdm.model.common.Language; |
|
41 |
import eu.etaxonomy.cdm.model.common.Marker; |
|
42 |
import eu.etaxonomy.cdm.model.common.MarkerType; |
|
43 |
import eu.etaxonomy.cdm.model.common.OriginalSourceType; |
|
44 |
import eu.etaxonomy.cdm.model.common.TermType; |
|
45 |
import eu.etaxonomy.cdm.model.common.TermVocabulary; |
|
46 |
import eu.etaxonomy.cdm.model.description.DescriptionElementBase; |
|
47 |
import eu.etaxonomy.cdm.model.description.Distribution; |
|
48 |
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm; |
|
49 |
import eu.etaxonomy.cdm.model.description.TaxonDescription; |
|
50 |
import eu.etaxonomy.cdm.model.location.NamedArea; |
|
51 |
import eu.etaxonomy.cdm.model.location.NamedAreaLevel; |
|
52 |
import eu.etaxonomy.cdm.model.location.NamedAreaType; |
|
53 |
import eu.etaxonomy.cdm.model.reference.Reference; |
|
54 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
|
55 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase; |
|
56 |
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException; |
|
57 |
|
|
58 |
|
|
59 |
/** |
|
60 |
* @author a.mueller |
|
61 |
* @created 20.03.2008 |
|
62 |
*/ |
|
63 |
@Component |
|
64 |
public class BerlinModelOccurrenceImport extends BerlinModelImportBase { |
|
65 |
private static final String EM_AREA_NAMESPACE = "emArea"; |
|
66 |
|
|
67 |
private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class); |
|
68 |
|
|
69 |
public static final String NAMESPACE = "Occurrence"; |
|
70 |
|
|
71 |
|
|
72 |
private static int modCount = 5000; |
|
73 |
private static final String pluralString = "occurrences"; |
|
74 |
private static final String dbTableName = "emOccurrence"; //?? |
|
75 |
|
|
76 |
|
|
77 |
public BerlinModelOccurrenceImport(){ |
|
78 |
super(dbTableName, pluralString); |
|
79 |
} |
|
80 |
|
|
81 |
@Override |
|
82 |
protected String getIdQuery(BerlinModelImportState state) { |
|
83 |
String result = " SELECT occurrenceId FROM " + getTableName(); |
|
84 |
if (StringUtils.isNotBlank(state.getConfig().getOccurrenceFilter())){ |
|
85 |
result += " WHERE " + state.getConfig().getOccurrenceFilter(); |
|
86 |
} |
|
87 |
return result; |
|
88 |
} |
|
89 |
|
|
90 |
@Override |
|
91 |
protected String getRecordQuery(BerlinModelImportConfigurator config) { |
|
92 |
String emCode = config.isIncludesAreaEmCode()? ", emArea.EMCode" : ""; |
|
93 |
String strQuery = //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution |
|
94 |
" SELECT DISTINCT PTaxon.RIdentifier AS taxonId, emOccurrence.OccurrenceId, emOccurrence.Native, emOccurrence.Introduced, " + |
|
95 |
" emOccurrence.Cultivated, emOccurrence.Notes occNotes, " + |
|
96 |
" emOccurSumCat.emOccurSumCatId, emOccurSumCat.Short, emOccurSumCat.Description, " + |
|
97 |
" emOccurSumCat.OutputCode, emArea.AreaId, emArea.TDWGCode " + emCode + |
|
98 |
" FROM emOccurrence INNER JOIN " + |
|
99 |
" emArea ON emOccurrence.AreaFk = emArea.AreaId INNER JOIN " + |
|
100 |
" PTaxon ON emOccurrence.PTNameFk = PTaxon.PTNameFk AND emOccurrence.PTRefFk = PTaxon.PTRefFk LEFT OUTER JOIN " + |
|
101 |
" emOccurSumCat ON emOccurrence.SummaryStatus = emOccurSumCat.emOccurSumCatId LEFT OUTER JOIN " + |
|
102 |
" emOccurrenceSource ON emOccurrence.OccurrenceId = emOccurrenceSource.OccurrenceFk " + |
|
103 |
" WHERE (emOccurrence.OccurrenceId IN (" + ID_LIST_TOKEN + ") )" + |
|
104 |
" ORDER BY PTaxon.RIdentifier"; |
|
105 |
return strQuery; |
|
106 |
} |
|
107 |
|
|
108 |
private Map<Integer, NamedArea> euroMedAreas = new HashMap<Integer, NamedArea>(); |
|
109 |
|
|
110 |
|
|
111 |
@Override |
|
112 |
public void doInvoke(BerlinModelImportState state) { |
|
113 |
if (state.getConfig().isUseEmAreaVocabulary()){ |
|
114 |
try { |
|
115 |
createEuroMedAreas(state); |
|
116 |
} catch (Exception e) { |
|
117 |
logger.error("Exception occurred when trying to create euroMed Areas"); |
|
118 |
e.printStackTrace(); |
|
119 |
state.setSuccess(false); |
|
120 |
} |
|
121 |
} |
|
122 |
super.doInvoke(state); |
|
123 |
//reset |
|
124 |
euroMedAreas = new HashMap<Integer, NamedArea>(); |
|
125 |
} |
|
126 |
|
|
127 |
private TermVocabulary<NamedArea> createEuroMedAreas(BerlinModelImportState state) throws SQLException { |
|
128 |
logger.warn("Start creating E+M areas"); |
|
129 |
Source source = state.getConfig().getSource(); |
|
130 |
Reference<?> sourceReference = state.getConfig().getSourceReference(); |
|
131 |
|
|
132 |
TransactionStatus txStatus = this.startTransaction(); |
|
133 |
|
|
134 |
sourceReference = getSourceReference(sourceReference); |
|
135 |
|
|
136 |
TermVocabulary<NamedArea> euroMedAreas = makeEmptyEuroMedVocabulary(); |
|
137 |
|
|
138 |
MarkerType eurMarkerType = getMarkerType(state, BerlinModelTransformer.uuidEurArea, "eur", "eur Area", "eur"); |
|
139 |
MarkerType euroMedAreaMarkerType = getMarkerType(state, BerlinModelTransformer.uuidEurMedArea, "EuroMedArea", "EuroMedArea", "EuroMedArea"); |
|
140 |
ExtensionType isoCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidIsoCode, "IsoCode", "IsoCode", "iso"); |
|
141 |
ExtensionType tdwgCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidTdwgAreaCode, "TDWG code", "TDWG Area code", "tdwg"); |
|
142 |
ExtensionType mclCodeExtType = getExtensionType(state, BerlinModelTransformer.uuidMclCode, "MCL code", "MedCheckList code", "mcl"); |
|
143 |
NamedAreaLevel areaLevelTop = getNamedAreaLevel(state, BerlinModelTransformer.uuidAreaLevelTop, "Euro+Med top area level", "Euro+Med top area level. This level is only to be used for the area representing the complete Euro+Med area", "e+m top", null); |
|
144 |
NamedAreaLevel areaLevelEm1 = getNamedAreaLevel(state, BerlinModelTransformer.uuidAreaLevelFirst, "Euro+Med 1. area level", "Euro+Med 1. area level", "e+m 1.", null); |
|
145 |
NamedAreaLevel areaLevelEm2 = getNamedAreaLevel(state, BerlinModelTransformer.uuidAreaLevelSecond, "Euro+Med 2. area level", "Euro+Med 2. area level", "Euro+Med 1. area level", null); |
|
146 |
|
|
147 |
|
|
148 |
String sql = "SELECT * , CASE WHEN EMCode = 'EM' THEN 'a' ELSE 'b' END as isEM " + |
|
149 |
" FROM emArea " + |
|
150 |
" ORDER BY isEM, EMCode"; |
|
151 |
ResultSet rs = source.getResultSet(sql); |
|
152 |
|
|
153 |
NamedArea euroMedArea = null; |
|
154 |
NamedArea lastLevel2Area = null; |
|
155 |
|
|
156 |
//euroMedArea (EMCode = 'EM') |
|
157 |
rs.next(); |
|
158 |
euroMedArea = makeSingleEuroMedArea(rs, eurMarkerType, euroMedAreaMarkerType, isoCodeExtType, tdwgCodeExtType, mclCodeExtType, |
|
159 |
areaLevelTop, areaLevelEm1 , areaLevelEm2, sourceReference, euroMedArea, lastLevel2Area); |
|
160 |
euroMedAreas.addTerm(euroMedArea); |
|
161 |
|
|
162 |
//all other areas |
|
163 |
while (rs.next()){ |
|
164 |
NamedArea newArea = makeSingleEuroMedArea(rs, eurMarkerType, euroMedAreaMarkerType, |
|
165 |
isoCodeExtType, tdwgCodeExtType, mclCodeExtType, |
|
166 |
areaLevelTop, areaLevelEm1 , areaLevelEm2, sourceReference, euroMedArea, lastLevel2Area); |
|
167 |
euroMedAreas.addTerm(newArea); |
|
168 |
if (newArea.getPartOf().equals(euroMedArea)){ |
|
169 |
lastLevel2Area = newArea; |
|
170 |
} |
|
171 |
getVocabularyService().saveOrUpdate(euroMedAreas); |
|
172 |
} |
|
173 |
|
|
174 |
commitTransaction(txStatus); |
|
175 |
logger.warn("Created E+M areas"); |
|
176 |
|
|
177 |
return euroMedAreas; |
|
178 |
} |
|
179 |
|
|
180 |
/** |
|
181 |
* @param sourceReference |
|
182 |
* @return |
|
183 |
*/ |
|
184 |
private Reference<?> getSourceReference(Reference<?> sourceReference) { |
|
185 |
Reference<?> persistentSourceReference = getReferenceService().find(sourceReference.getUuid()); //just to be sure |
|
186 |
if (persistentSourceReference != null){ |
|
187 |
sourceReference = persistentSourceReference; |
|
188 |
} |
|
189 |
return sourceReference; |
|
190 |
} |
|
191 |
|
|
192 |
/** |
|
193 |
* @param eurMarkerType |
|
194 |
* @param euroMedAreaMarkerType |
|
195 |
* @param isoCodeExtType |
|
196 |
* @param tdwgCodeExtType |
|
197 |
* @param mclCodeExtType |
|
198 |
* @param rs |
|
199 |
* @param areaLevelEm2 |
|
200 |
* @param areaLevelEm1 |
|
201 |
* @param areaLevelTop |
|
202 |
* @throws SQLException |
|
203 |
*/ |
|
204 |
private NamedArea makeSingleEuroMedArea(ResultSet rs, MarkerType eurMarkerType, |
|
205 |
MarkerType euroMedAreaMarkerType, ExtensionType isoCodeExtType, |
|
206 |
ExtensionType tdwgCodeExtType, ExtensionType mclCodeExtType, |
|
207 |
NamedAreaLevel areaLevelTop, NamedAreaLevel areaLevelEm1, NamedAreaLevel areaLevelEm2, |
|
208 |
Reference<?> sourceReference, NamedArea euroMedArea, NamedArea level2Area) throws SQLException { |
|
209 |
Integer areaId = rs.getInt("AreaId"); |
|
210 |
String emCode = nullSafeTrim(rs.getString("EMCode")); |
|
211 |
String isoCode = nullSafeTrim(rs.getString("ISOCode")); |
|
212 |
String tdwgCode = nullSafeTrim(rs.getString("TDWGCode")); |
|
213 |
String unit = nullSafeTrim(rs.getString("Unit")); |
|
214 |
// ,[Status] |
|
215 |
// ,[OutputOrder] |
|
216 |
boolean eurMarker = rs.getBoolean("eur"); |
|
217 |
boolean euroMedAreaMarker = rs.getBoolean("EuroMedArea"); |
|
218 |
String notes = nullSafeTrim(rs.getString("Notes")); |
|
219 |
String mclCode = nullSafeTrim(rs.getString("MCLCode")); |
|
220 |
String geoSearch = nullSafeTrim(rs.getString("NameForGeoSearch")); |
|
221 |
|
|
222 |
if (isBlank(emCode)){ |
|
223 |
emCode = unit; |
|
224 |
} |
|
225 |
|
|
226 |
//uuid |
|
227 |
UUID uuid = BerlinModelTransformer.getEMAreaUuid(emCode); |
|
228 |
NamedArea area = (NamedArea)getTermService().find(uuid); |
|
229 |
if (area == null){ |
|
230 |
//label |
|
231 |
area = NamedArea.NewInstance(geoSearch, unit, emCode); |
|
232 |
if (uuid != null){ |
|
233 |
area.setUuid(uuid); |
|
234 |
}else{ |
|
235 |
logger.warn("Uuuid for emCode could not be defined: " + emCode); |
|
236 |
} |
|
237 |
} |
|
238 |
|
|
239 |
|
|
240 |
//code |
|
241 |
area.setIdInVocabulary(emCode); |
|
242 |
//notes |
|
243 |
if (StringUtils.isNotEmpty(notes)){ |
|
244 |
area.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT())); |
|
245 |
} |
|
246 |
//markers |
|
247 |
area.addMarker(Marker.NewInstance(eurMarkerType, eurMarker)); |
|
248 |
area.addMarker(Marker.NewInstance(euroMedAreaMarkerType, euroMedAreaMarker)); |
|
249 |
|
|
250 |
//extensions |
|
251 |
if (isNotBlank(isoCode)){ |
|
252 |
area.addExtension(isoCode, isoCodeExtType); |
|
253 |
} |
|
254 |
if (isNotBlank(tdwgCode)){ |
|
255 |
area.addExtension(tdwgCode, tdwgCodeExtType); |
|
256 |
} |
|
257 |
if (isNotBlank(mclCode)){ |
|
258 |
area.addExtension(mclCode, mclCodeExtType); |
|
259 |
} |
|
260 |
|
|
261 |
//type |
|
262 |
area.setType(NamedAreaType.ADMINISTRATION_AREA()); |
|
263 |
|
|
264 |
//source |
|
265 |
area.addSource(OriginalSourceType.Import, String.valueOf(areaId), EM_AREA_NAMESPACE, sourceReference, null); |
|
266 |
|
|
267 |
//parent |
|
268 |
if (euroMedArea != null){ |
|
269 |
if (emCode.contains("(")){ |
|
270 |
area.setPartOf(level2Area); |
|
271 |
area.setLevel(areaLevelEm2); |
|
272 |
}else{ |
|
273 |
area.setPartOf(euroMedArea); |
|
274 |
area.setLevel(areaLevelEm1); |
|
275 |
} |
|
276 |
}else{ |
|
277 |
area.setLevel(areaLevelTop); |
|
278 |
} |
|
279 |
this.euroMedAreas.put(areaId, area); |
|
280 |
|
|
281 |
//save |
|
282 |
getTermService().saveOrUpdate(area); |
|
283 |
|
|
284 |
return area; |
|
285 |
} |
|
286 |
|
|
287 |
private String nullSafeTrim(String string) { |
|
288 |
if (string == null){ |
|
289 |
return null; |
|
290 |
}else{ |
|
291 |
return string.trim(); |
|
292 |
} |
|
293 |
} |
|
294 |
|
|
295 |
/** |
|
296 |
* |
|
297 |
*/ |
|
298 |
private TermVocabulary<NamedArea> makeEmptyEuroMedVocabulary() { |
|
299 |
TermType type = TermType.NamedArea; |
|
300 |
String description = "Euro+Med area vocabulary"; |
|
301 |
String label = "E+M areas"; |
|
302 |
String abbrev = null; |
|
303 |
URI termSourceUri = null; |
|
304 |
TermVocabulary<NamedArea> result = TermVocabulary.NewInstance(type, description, label, abbrev, termSourceUri); |
|
305 |
|
|
306 |
result.setUuid(BerlinModelTransformer.uuidVocEuroMedAreas); |
|
307 |
getVocabularyService().save(result); |
|
308 |
return result; |
|
309 |
} |
|
310 |
|
|
311 |
@Override |
|
312 |
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) { |
|
313 |
boolean success = true; |
|
314 |
Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>(); |
|
315 |
|
|
316 |
Map<String, TaxonBase<?>> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE); |
|
317 |
|
|
318 |
ResultSet rs = partitioner.getResultSet(); |
|
319 |
|
|
320 |
try { |
|
321 |
//map to store the mapping of duplicate berlin model occurrences to their real distributions |
|
322 |
//duplicated may occur due to area mappings from BM areas to TDWG areas |
|
323 |
Map<Integer, String> duplicateMap = new HashMap<Integer, String>(); |
|
324 |
int oldTaxonId = -1; |
|
325 |
TaxonDescription oldDescription = null; |
|
326 |
int i = 0; |
|
327 |
int countDescriptions = 0; |
|
328 |
int countDistributions = 0; |
|
329 |
int countDuplicates = 0; |
|
330 |
//for each reference |
|
331 |
while (rs.next()){ |
|
332 |
|
|
333 |
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));} |
|
334 |
|
|
335 |
int occurrenceId = rs.getInt("OccurrenceId"); |
|
336 |
int newTaxonId = rs.getInt("taxonId"); |
|
337 |
String notes = nullSafeTrim(rs.getString("occNotes")); |
|
338 |
|
|
339 |
Integer emStatusId = nullSafeInt(rs, "emOccurSumCatId"); |
|
340 |
|
|
341 |
try { |
|
342 |
//status |
|
343 |
PresenceAbsenceTerm status = null; |
|
344 |
String alternativeStatusString = null; |
|
345 |
if (emStatusId != null){ |
|
346 |
status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId); |
|
347 |
}else{ |
|
348 |
String[] stringArray = new String[]{rs.getString("Native"), rs.getString("Introduced"), rs.getString("Cultivated")}; |
|
349 |
alternativeStatusString = CdmUtils.concat(",", stringArray); |
|
350 |
} |
|
351 |
|
|
352 |
Reference<?> sourceRef = state.getTransactionalSourceReference(); |
|
353 |
|
|
354 |
List<NamedArea> areas = makeAreaList(state, rs, occurrenceId); |
|
355 |
|
|
356 |
//create description(elements) |
|
357 |
TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef); |
|
358 |
for (NamedArea area : areas){ |
|
359 |
Distribution distribution = Distribution.NewInstance(area, status); |
|
360 |
if (status == null){ |
|
361 |
AnnotationType annotationType = AnnotationType.EDITORIAL(); |
|
362 |
Annotation annotation = Annotation.NewInstance(alternativeStatusString, annotationType, null); |
|
363 |
distribution.addAnnotation(annotation); |
|
364 |
distribution.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), false)); |
|
365 |
} |
|
366 |
// distribution.setCitation(sourceRef); |
|
367 |
if (taxonDescription != null) { |
|
368 |
Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId); |
|
369 |
if (duplicate == null){ |
|
370 |
taxonDescription.addElement(distribution); |
|
371 |
distribution.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null); |
|
372 |
countDistributions++; |
|
373 |
if (taxonDescription != oldDescription){ |
|
374 |
taxaToSave.add(taxonDescription.getTaxon()); |
|
375 |
oldDescription = taxonDescription; |
|
376 |
countDescriptions++; |
|
377 |
} |
|
378 |
}else{ |
|
379 |
countDuplicates++; |
|
380 |
duplicate.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null); |
|
381 |
logger.info("Distribution is duplicate"); } |
|
382 |
} else { |
|
383 |
logger.warn("Distribution " + area.getLabel() + " ignored. OccurrenceId = " + occurrenceId); |
|
384 |
success = false; |
|
385 |
} |
|
386 |
//notes |
|
387 |
if (isNotBlank(notes)){ |
|
388 |
Annotation annotation = Annotation.NewInstance(notes, Language.DEFAULT()); |
|
389 |
distribution.addAnnotation(annotation); |
|
390 |
} |
|
391 |
} |
|
392 |
} catch (UnknownCdmTypeException e) { |
|
393 |
logger.error("Unknown presenceAbsence status id: " + emStatusId); |
|
394 |
e.printStackTrace(); |
|
395 |
success = false; |
|
396 |
} |
|
397 |
} |
|
398 |
|
|
399 |
logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions ); |
|
400 |
logger.info("Duplicate occurrences: " + (countDuplicates)); |
|
401 |
|
|
402 |
logger.info("Taxa to save: " + taxaToSave.size()); |
|
403 |
getTaxonService().save(taxaToSave); |
|
404 |
|
|
405 |
return success; |
|
406 |
} catch (SQLException e) { |
|
407 |
logger.error("SQLException:" + e); |
|
408 |
return false; |
|
409 |
} |
|
410 |
} |
|
411 |
|
|
412 |
/** |
|
413 |
* @param state |
|
414 |
* @param rs |
|
415 |
* @param occurrenceId |
|
416 |
* @param tdwgCodeString |
|
417 |
* @param emCodeString |
|
418 |
* @return |
|
419 |
* @throws SQLException |
|
420 |
*/ |
|
421 |
//Create area list |
|
422 |
private List<NamedArea> makeAreaList(BerlinModelImportState state, ResultSet rs, int occurrenceId) throws SQLException { |
|
423 |
List<NamedArea> areas = new ArrayList<NamedArea>(); |
|
424 |
|
|
425 |
if (state.getConfig().isUseEmAreaVocabulary()){ |
|
426 |
Integer areaId = rs.getInt("AreaId"); |
|
427 |
NamedArea area = this.euroMedAreas.get(areaId); |
|
428 |
areas.add(area); |
|
429 |
}else{ |
|
430 |
String tdwgCodeString = rs.getString("TDWGCode"); |
|
431 |
String emCodeString = state.getConfig().isIncludesAreaEmCode() ? rs.getString("EMCode") : null; |
|
432 |
|
|
433 |
if (tdwgCodeString != null){ |
|
434 |
|
|
435 |
String[] tdwgCodes = new String[]{tdwgCodeString}; |
|
436 |
if (state.getConfig().isSplitTdwgCodes()){ |
|
437 |
tdwgCodes = tdwgCodeString.split(";"); |
|
438 |
} |
|
439 |
|
|
440 |
for (String tdwgCode : tdwgCodes){ |
|
441 |
NamedArea area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode.trim()); |
|
442 |
if (area == null){ |
|
443 |
area = getOtherAreas(state, emCodeString, tdwgCodeString); |
|
444 |
} |
|
445 |
if (area != null){ |
|
446 |
areas.add(area); |
|
447 |
} |
|
448 |
} |
|
449 |
} |
|
450 |
|
|
451 |
if (areas.size()== 0){ |
|
452 |
NamedArea area = getOtherAreas(state, emCodeString, tdwgCodeString); |
|
453 |
if (area != null){ |
|
454 |
areas.add(area); |
|
455 |
} |
|
456 |
} |
|
457 |
if (areas.size() == 0){ |
|
458 |
String areaId = rs.getString("AreaId"); |
|
459 |
logger.warn("No areas defined for occurrence " + occurrenceId + ". EMCode: " + CdmUtils.Nz(emCodeString).trim() + ". AreaId: " + areaId ); |
|
460 |
} |
|
461 |
} |
|
462 |
return areas; |
|
463 |
} |
|
464 |
|
|
465 |
@Override |
|
466 |
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) { |
|
467 |
String nameSpace; |
|
468 |
Class<?> cdmClass; |
|
469 |
Set<String> idSet; |
|
470 |
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>(); |
|
471 |
|
|
472 |
try{ |
|
473 |
Set<String> taxonIdSet = new HashSet<String>(); |
|
474 |
while (rs.next()){ |
|
475 |
handleForeignKey(rs, taxonIdSet, "taxonId"); |
|
476 |
} |
|
477 |
|
|
478 |
//taxon map |
|
479 |
nameSpace = BerlinModelTaxonImport.NAMESPACE; |
|
480 |
cdmClass = TaxonBase.class; |
|
481 |
idSet = taxonIdSet; |
|
482 |
Map<String, TaxonBase> objectMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace); |
|
483 |
result.put(nameSpace, objectMap); |
|
484 |
|
|
485 |
} catch (SQLException e) { |
|
486 |
throw new RuntimeException(e); |
|
487 |
} |
|
488 |
return result; |
|
489 |
} |
|
490 |
|
|
491 |
|
|
492 |
|
|
493 |
/** |
|
494 |
* Tests if a distribution with the same tdwgArea and the same status already exists in the description. |
|
495 |
* If so the old distribution is returned |
|
496 |
* @param description |
|
497 |
* @param tdwgArea |
|
498 |
* @return false, if dupplicate exists. True otherwise. |
|
499 |
*/ |
|
500 |
private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){ |
|
501 |
for (DescriptionElementBase descElBase : description.getElements()){ |
|
502 |
if (descElBase.isInstanceOf(Distribution.class)){ |
|
503 |
Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class); |
|
504 |
NamedArea oldArea = oldDistr.getArea(); |
|
505 |
if (oldArea != null && oldArea.equals(distribution.getArea())){ |
|
506 |
PresenceAbsenceTerm oldStatus = oldDistr.getStatus(); |
|
507 |
if (oldStatus != null && oldStatus.equals(distribution.getStatus())){ |
|
508 |
duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource()); |
|
509 |
return oldDistr; |
|
510 |
} |
|
511 |
} |
|
512 |
} |
|
513 |
} |
|
514 |
return null; |
|
515 |
} |
|
516 |
|
|
517 |
/** |
|
518 |
* Use same TaxonDescription if two records belong to the same taxon |
|
519 |
* @param newTaxonId |
|
520 |
* @param oldTaxonId |
|
521 |
* @param oldDescription |
|
522 |
* @param taxonMap |
|
523 |
* @return |
|
524 |
*/ |
|
525 |
private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase<?>> taxonMap, int occurrenceId, Reference<?> sourceSec){ |
|
526 |
TaxonDescription result = null; |
|
527 |
if (oldDescription == null || newTaxonId != oldTaxonId){ |
|
528 |
TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(newTaxonId)); |
|
529 |
//TODO for testing |
|
530 |
//TaxonBase taxonBase = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SPECIES()), null); |
|
531 |
Taxon taxon; |
|
532 |
if ( taxonBase instanceof Taxon ) { |
|
533 |
taxon = (Taxon) taxonBase; |
|
534 |
} else if (taxonBase != null) { |
|
535 |
logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName()); |
|
536 |
return null; |
|
537 |
} else { |
|
538 |
logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null."); |
|
539 |
return null; |
|
540 |
} |
|
541 |
Set<TaxonDescription> descriptionSet= taxon.getDescriptions(); |
|
542 |
if (descriptionSet.size() > 0) { |
|
543 |
result = descriptionSet.iterator().next(); |
|
544 |
}else{ |
|
545 |
result = TaxonDescription.NewInstance(); |
|
546 |
result.setTitleCache(sourceSec.getTitleCache(), true); |
|
547 |
taxon.addDescription(result); |
|
548 |
} |
|
549 |
}else{ |
|
550 |
result = oldDescription; |
|
551 |
} |
|
552 |
return result; |
|
553 |
} |
|
554 |
|
|
555 |
@Override |
|
556 |
protected boolean doCheck(BerlinModelImportState state){ |
|
557 |
IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator(); |
|
558 |
return validator.validate(state); |
|
559 |
} |
|
560 |
|
|
561 |
|
|
562 |
@Override |
|
563 |
protected boolean isIgnore(BerlinModelImportState state){ |
|
564 |
if (! state.getConfig().isDoOccurrence()){ |
|
565 |
return true; |
|
566 |
}else{ |
|
567 |
if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrence", "OccurrenceId")){ |
|
568 |
logger.error("emOccurrence table or emOccurrenceId does not exist. Must ignore occurrence import"); |
|
569 |
return true; |
|
570 |
}else{ |
|
571 |
return false; |
|
572 |
} |
|
573 |
} |
|
574 |
} |
|
575 |
|
|
576 |
} |
Also available in: Unified diff
Add E+M shapefile attributes import to E+M import #3979