1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.algaterra;
|
11
|
|
12
|
import java.sql.ResultSet;
|
13
|
import java.sql.SQLException;
|
14
|
import java.util.HashMap;
|
15
|
import java.util.HashSet;
|
16
|
import java.util.Map;
|
17
|
import java.util.Set;
|
18
|
|
19
|
import org.apache.commons.lang.StringUtils;
|
20
|
import org.apache.log4j.Logger;
|
21
|
import org.springframework.stereotype.Component;
|
22
|
|
23
|
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
|
24
|
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade.DerivedUnitType;
|
25
|
import eu.etaxonomy.cdm.io.algaterra.validation.AlgaTerraSpecimenImportValidator;
|
26
|
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase;
|
27
|
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator;
|
28
|
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState;
|
29
|
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelTaxonImport;
|
30
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
31
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
32
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
33
|
import eu.etaxonomy.cdm.model.description.Feature;
|
34
|
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
|
35
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
36
|
import eu.etaxonomy.cdm.model.location.Point;
|
37
|
import eu.etaxonomy.cdm.model.location.ReferenceSystem;
|
38
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
39
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
40
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
41
|
|
42
|
|
43
|
/**
|
44
|
* @author a.mueller
|
45
|
* @created 20.03.2008
|
46
|
* @version 1.0
|
47
|
*/
|
48
|
@Component
|
49
|
public class AlgaTerraSpecimenImport extends BerlinModelImportBase {
|
50
|
private static final Logger logger = Logger.getLogger(AlgaTerraSpecimenImport.class);
|
51
|
|
52
|
public static final String NAMESPACE = "Occurrence";
|
53
|
|
54
|
|
55
|
private static int modCount = 5000;
|
56
|
private static final String pluralString = "specimen and observation";
|
57
|
private static final String dbTableName = "Fact"; //??
|
58
|
|
59
|
|
60
|
public AlgaTerraSpecimenImport(){
|
61
|
super();
|
62
|
}
|
63
|
|
64
|
/* (non-Javadoc)
|
65
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
|
66
|
*/
|
67
|
@Override
|
68
|
protected String getIdQuery(BerlinModelImportState state) {
|
69
|
String result = " SELECT factId " +
|
70
|
" FROM " + getTableName() + " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk "
|
71
|
+ " WHERE FactCategoryFk = 202 "
|
72
|
+ " ORDER BY PTaxon.RIdentifier, Fact.FactId ";
|
73
|
return result;
|
74
|
}
|
75
|
|
76
|
/* (non-Javadoc)
|
77
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
|
78
|
*/
|
79
|
@Override
|
80
|
protected String getRecordQuery(BerlinModelImportConfigurator config) {
|
81
|
String strQuery = //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
|
82
|
" SELECT PTaxon.RIdentifier as taxonId, Fact.FactId, Fact.RecordBasis, EcoFact.* " +
|
83
|
" FROM Fact " +
|
84
|
" INNER JOIN EcoFact ON Fact.ExtensionFk = EcoFact.EcoFactId " +
|
85
|
" INNER JOIN PTaxon ON dbo.Fact.PTNameFk = dbo.PTaxon.PTNameFk AND dbo.Fact.PTRefFk = dbo.PTaxon.PTRefFk " +
|
86
|
" WHERE Fact.FactCategoryFk = 202 AND (Fact.FactId IN (" + ID_LIST_TOKEN + ") )"
|
87
|
+ " ORDER BY PTaxon.RIdentifier, Fact.FactId "
|
88
|
;
|
89
|
return strQuery;
|
90
|
}
|
91
|
|
92
|
/* (non-Javadoc)
|
93
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
|
94
|
*/
|
95
|
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
|
96
|
boolean success = true;
|
97
|
Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
|
98
|
|
99
|
Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
|
100
|
|
101
|
ResultSet rs = partitioner.getResultSet();
|
102
|
|
103
|
try {
|
104
|
int oldTaxonId = -1;
|
105
|
TaxonDescription oldDescription = null;
|
106
|
int i = 0;
|
107
|
int countDescriptions = 0;
|
108
|
int countSpecimen = 0;
|
109
|
//for each reference
|
110
|
while (rs.next()){
|
111
|
|
112
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Specimen facts handled: " + (i-1));}
|
113
|
|
114
|
int newTaxonId = rs.getInt("taxonId");
|
115
|
int factId = rs.getInt("FactId");
|
116
|
try {
|
117
|
|
118
|
String recordBasis = rs.getString("RecordBasis");
|
119
|
|
120
|
Reference<?> sourceRef = state.getTransactionalSourceReference();
|
121
|
//create description(elements)
|
122
|
TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, factId, sourceRef);
|
123
|
|
124
|
DerivedUnitType type = makeDerivedUnitType(recordBasis);
|
125
|
DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(type);
|
126
|
|
127
|
handleSingleSpecimen(rs, facade);
|
128
|
|
129
|
IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
|
130
|
Feature feature = makeFeature(type);
|
131
|
indAssociation.setAssociatedSpecimenOrObservation(facade.innerDerivedUnit());
|
132
|
indAssociation.setFeature(feature);
|
133
|
taxonDescription.addElement(indAssociation);
|
134
|
|
135
|
if (taxonDescription != oldDescription){
|
136
|
taxaToSave.add(taxonDescription.getTaxon());
|
137
|
oldDescription = taxonDescription;
|
138
|
countDescriptions++;
|
139
|
}
|
140
|
} catch (Exception e) {
|
141
|
logger.warn("Exception in ecoFact: FactId " + factId + ". " + e.getMessage());
|
142
|
// e.printStackTrace();
|
143
|
}
|
144
|
|
145
|
}
|
146
|
|
147
|
logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
|
148
|
|
149
|
logger.warn("Taxa to save: " + taxaToSave.size());
|
150
|
getTaxonService().save(taxaToSave);
|
151
|
|
152
|
return success;
|
153
|
} catch (SQLException e) {
|
154
|
logger.error("SQLException:" + e);
|
155
|
return false;
|
156
|
}
|
157
|
}
|
158
|
|
159
|
|
160
|
private void handleSingleSpecimen(ResultSet rs, DerivedUnitFacade facade) throws SQLException {
|
161
|
//TDWGGazetteerFK, CollectionFk, Collector, GeoCodeMethod, Prec, AltitudeMethod, Depth,
|
162
|
//ISOCountrySub, CollectionDate/End, WaterBody,
|
163
|
//CreatedWhen/Who/Updated/who
|
164
|
|
165
|
//P1-10Value/Unit/Parameter/Method
|
166
|
|
167
|
// int factId = rs.getInt("factId");
|
168
|
String locality = rs.getString("Locality");
|
169
|
Double latitude = rs.getDouble("Latitude");
|
170
|
Double longitude = rs.getDouble("Longitude");
|
171
|
int errorRadius = rs.getInt("Prec");
|
172
|
Integer altitude = rs.getInt("Altitude");
|
173
|
String altitudeUnit = rs.getString("AltitudeUnit");
|
174
|
String collectorsNumber = rs.getString("CollectorsNumber");
|
175
|
|
176
|
//location
|
177
|
facade.setLocality(locality);
|
178
|
|
179
|
//exact location
|
180
|
ReferenceSystem referenceSystem = null;
|
181
|
Point exactLocation = Point.NewInstance(longitude, latitude, referenceSystem, errorRadius);
|
182
|
facade.setExactLocation(exactLocation);
|
183
|
|
184
|
//altitude
|
185
|
if (StringUtils.isNotBlank(altitudeUnit) && ! altitudeUnit.trim().equalsIgnoreCase("m")){
|
186
|
logger.warn("Altitude unit is not [m] but: " + altitudeUnit);
|
187
|
}
|
188
|
facade.setAbsoluteElevationRange(altitude, altitude); //TODO
|
189
|
|
190
|
//field
|
191
|
facade.setFieldNumber(collectorsNumber);
|
192
|
|
193
|
}
|
194
|
|
195
|
private Feature makeFeature(DerivedUnitType type) {
|
196
|
if (type.equals(DerivedUnitType.DerivedUnit)){
|
197
|
return Feature.INDIVIDUALS_ASSOCIATION();
|
198
|
}else if (type.equals(DerivedUnitType.FieldObservation) || type.equals(DerivedUnitType.Observation) ){
|
199
|
return Feature.OBSERVATION();
|
200
|
}else if (type.equals(DerivedUnitType.Fossil) || type.equals(DerivedUnitType.LivingBeing) || type.equals(DerivedUnitType.Specimen )){
|
201
|
return Feature.SPECIMEN();
|
202
|
}
|
203
|
logger.warn("No feature defined for derived unit type: " + type);
|
204
|
return null;
|
205
|
}
|
206
|
|
207
|
|
208
|
private DerivedUnitType makeDerivedUnitType(String recordBasis) {
|
209
|
DerivedUnitType result = null;
|
210
|
if (StringUtils.isBlank(recordBasis)){
|
211
|
result = DerivedUnitType.DerivedUnit;
|
212
|
} else if (recordBasis.equalsIgnoreCase("FossileSpecimen")){
|
213
|
result = DerivedUnitType.Fossil;
|
214
|
}else if (recordBasis.equalsIgnoreCase("HumanObservation")){
|
215
|
result = DerivedUnitType.Observation;
|
216
|
}else if (recordBasis.equalsIgnoreCase("Literature")){
|
217
|
logger.warn("Literature record basis not yet supported");
|
218
|
result = DerivedUnitType.DerivedUnit;
|
219
|
}else if (recordBasis.equalsIgnoreCase("LivingSpecimen")){
|
220
|
result = DerivedUnitType.LivingBeing;
|
221
|
}else if (recordBasis.equalsIgnoreCase("MachineObservation")){
|
222
|
logger.warn("MachineObservation record basis not yet supported");
|
223
|
result = DerivedUnitType.Observation;
|
224
|
}else if (recordBasis.equalsIgnoreCase("PreservedSpecimen")){
|
225
|
result = DerivedUnitType.Specimen;
|
226
|
}
|
227
|
return result;
|
228
|
}
|
229
|
|
230
|
/* (non-Javadoc)
|
231
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
|
232
|
*/
|
233
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
|
234
|
String nameSpace;
|
235
|
Class cdmClass;
|
236
|
Set<String> idSet;
|
237
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
238
|
|
239
|
try{
|
240
|
Set<String> taxonIdSet = new HashSet<String>();
|
241
|
while (rs.next()){
|
242
|
handleForeignKey(rs, taxonIdSet, "taxonId");
|
243
|
}
|
244
|
|
245
|
//taxon map
|
246
|
nameSpace = BerlinModelTaxonImport.NAMESPACE;
|
247
|
cdmClass = TaxonBase.class;
|
248
|
idSet = taxonIdSet;
|
249
|
Map<String, TaxonBase> objectMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
250
|
result.put(nameSpace, objectMap);
|
251
|
|
252
|
} catch (SQLException e) {
|
253
|
throw new RuntimeException(e);
|
254
|
}
|
255
|
return result;
|
256
|
}
|
257
|
|
258
|
|
259
|
/**
|
260
|
* Use same TaxonDescription if two records belong to the same taxon
|
261
|
* @param newTaxonId
|
262
|
* @param oldTaxonId
|
263
|
* @param oldDescription
|
264
|
* @param taxonMap
|
265
|
* @return
|
266
|
*/
|
267
|
private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase> taxonMap, int factId, Reference<?> sourceSec){
|
268
|
TaxonDescription result = null;
|
269
|
if (oldDescription == null || newTaxonId != oldTaxonId){
|
270
|
TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(newTaxonId));
|
271
|
//TODO for testing
|
272
|
//TaxonBase taxonBase = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SPECIES()), null);
|
273
|
Taxon taxon;
|
274
|
if ( taxonBase instanceof Taxon ) {
|
275
|
taxon = (Taxon) taxonBase;
|
276
|
} else if (taxonBase != null) {
|
277
|
logger.warn("TaxonBase for Fact(Specimen) with factId" + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
|
278
|
return null;
|
279
|
} else {
|
280
|
logger.warn("TaxonBase for Fact(Specimen) " + factId + " is null.");
|
281
|
return null;
|
282
|
}
|
283
|
Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
|
284
|
if (descriptionSet.size() > 0) {
|
285
|
result = descriptionSet.iterator().next();
|
286
|
}else{
|
287
|
result = TaxonDescription.NewInstance();
|
288
|
result.setTitleCache(sourceSec.getTitleCache(), true);
|
289
|
taxon.addDescription(result);
|
290
|
}
|
291
|
}else{
|
292
|
result = oldDescription;
|
293
|
}
|
294
|
return result;
|
295
|
}
|
296
|
|
297
|
|
298
|
/* (non-Javadoc)
|
299
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
|
300
|
*/
|
301
|
@Override
|
302
|
protected boolean doCheck(BerlinModelImportState state){
|
303
|
IOValidator<BerlinModelImportState> validator = new AlgaTerraSpecimenImportValidator();
|
304
|
return validator.validate(state);
|
305
|
}
|
306
|
|
307
|
/* (non-Javadoc)
|
308
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
|
309
|
*/
|
310
|
@Override
|
311
|
protected String getTableName() {
|
312
|
return dbTableName;
|
313
|
}
|
314
|
|
315
|
/* (non-Javadoc)
|
316
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
|
317
|
*/
|
318
|
@Override
|
319
|
public String getPluralString() {
|
320
|
return pluralString;
|
321
|
}
|
322
|
|
323
|
/* (non-Javadoc)
|
324
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
|
325
|
*/
|
326
|
protected boolean isIgnore(BerlinModelImportState state){
|
327
|
return ! ((AlgaTerraImportState)state).getAlgaTerraConfigurator().isDoSpecimen();
|
328
|
}
|
329
|
|
330
|
}
|