|
1 |
/**
|
|
2 |
* Copyright (C) 2007 EDIT
|
|
3 |
* European Distributed Institute of Taxonomy
|
|
4 |
* http://www.e-taxonomy.eu
|
|
5 |
*
|
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms.
|
|
8 |
*/
|
|
9 |
|
|
10 |
package eu.etaxonomy.cdm.io.globis;
|
|
11 |
|
|
12 |
import java.sql.ResultSet;
|
|
13 |
import java.sql.SQLException;
|
|
14 |
import java.util.HashMap;
|
|
15 |
import java.util.HashSet;
|
|
16 |
import java.util.Map;
|
|
17 |
import java.util.Set;
|
|
18 |
|
|
19 |
import org.apache.commons.lang.StringUtils;
|
|
20 |
import org.apache.log4j.Logger;
|
|
21 |
import org.springframework.stereotype.Component;
|
|
22 |
|
|
23 |
import eu.etaxonomy.cdm.common.CdmUtils;
|
|
24 |
import eu.etaxonomy.cdm.io.common.IOValidator;
|
|
25 |
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
|
26 |
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
|
|
27 |
import eu.etaxonomy.cdm.model.common.CdmBase;
|
|
28 |
import eu.etaxonomy.cdm.model.common.Language;
|
|
29 |
import eu.etaxonomy.cdm.model.description.Distribution;
|
|
30 |
import eu.etaxonomy.cdm.model.description.PresenceTerm;
|
|
31 |
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
|
32 |
import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
|
|
33 |
import eu.etaxonomy.cdm.model.name.Rank;
|
|
34 |
import eu.etaxonomy.cdm.model.name.ZoologicalName;
|
|
35 |
import eu.etaxonomy.cdm.model.reference.Reference;
|
|
36 |
import eu.etaxonomy.cdm.model.taxon.Classification;
|
|
37 |
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
|
38 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
|
39 |
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
|
40 |
|
|
41 |
|
|
42 |
/**
|
|
43 |
* @author a.mueller
|
|
44 |
* @created 20.02.2010
|
|
45 |
* @version 1.0
|
|
46 |
*/
|
|
47 |
@Component
|
|
48 |
public class GlobisCommonNameImport extends GlobisImportBase<Taxon> {
|
|
49 |
private static final Logger logger = Logger.getLogger(GlobisCommonNameImport.class);
|
|
50 |
|
|
51 |
private int modCount = 10000;
|
|
52 |
private static final String pluralString = "common names";
|
|
53 |
private static final String dbTableName = "species_language";
|
|
54 |
private static final Class cdmTargetClass = Taxon.class; //not needed
|
|
55 |
|
|
56 |
public GlobisCommonNameImport(){
|
|
57 |
super(pluralString, dbTableName, cdmTargetClass);
|
|
58 |
}
|
|
59 |
|
|
60 |
|
|
61 |
|
|
62 |
|
|
63 |
/* (non-Javadoc)
|
|
64 |
* @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
|
|
65 |
*/
|
|
66 |
@Override
|
|
67 |
protected String getIdQuery() {
|
|
68 |
String strRecordQuery =
|
|
69 |
" SELECT ID " +
|
|
70 |
" FROM " + dbTableName;
|
|
71 |
return strRecordQuery;
|
|
72 |
}
|
|
73 |
|
|
74 |
|
|
75 |
|
|
76 |
|
|
77 |
/* (non-Javadoc)
|
|
78 |
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
|
|
79 |
*/
|
|
80 |
@Override
|
|
81 |
protected String getRecordQuery(GlobisImportConfigurator config) {
|
|
82 |
String strRecordQuery =
|
|
83 |
" SELECT * " +
|
|
84 |
" FROM " + getTableName() + " sl " +
|
|
85 |
" WHERE ( sl.ID IN (" + ID_LIST_TOKEN + ") )";
|
|
86 |
return strRecordQuery;
|
|
87 |
}
|
|
88 |
|
|
89 |
|
|
90 |
|
|
91 |
/* (non-Javadoc)
|
|
92 |
* @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
|
|
93 |
*/
|
|
94 |
@Override
|
|
95 |
public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
|
|
96 |
boolean success = true;
|
|
97 |
|
|
98 |
Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
|
|
99 |
|
|
100 |
Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
|
|
101 |
// Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
|
|
102 |
|
|
103 |
ResultSet rs = partitioner.getResultSet();
|
|
104 |
|
|
105 |
try {
|
|
106 |
|
|
107 |
int i = 0;
|
|
108 |
|
|
109 |
//for each common name
|
|
110 |
while (rs.next()){
|
|
111 |
|
|
112 |
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
|
|
113 |
|
|
114 |
Integer taxonId = rs.getInt("IdCrrentSpec");
|
|
115 |
|
|
116 |
|
|
117 |
//String dtSpcJahr -> ignore !
|
|
118 |
//empty: fiSpcLiteratur
|
|
119 |
|
|
120 |
//TODO
|
|
121 |
//fiSpcspcgrptax
|
|
122 |
|
|
123 |
|
|
124 |
|
|
125 |
try {
|
|
126 |
|
|
127 |
//source ref
|
|
128 |
Reference<?> sourceRef = state.getTransactionalSourceReference();
|
|
129 |
|
|
130 |
//species
|
|
131 |
Taxon species = createObject(rs, state);
|
|
132 |
|
|
133 |
|
|
134 |
handleCountries(state, rs, species);
|
|
135 |
|
|
136 |
this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
|
|
137 |
|
|
138 |
objectsToSave.add(species);
|
|
139 |
|
|
140 |
|
|
141 |
} catch (Exception e) {
|
|
142 |
logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
|
|
143 |
// e.printStackTrace();
|
|
144 |
}
|
|
145 |
|
|
146 |
}
|
|
147 |
|
|
148 |
// logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
|
|
149 |
|
|
150 |
logger.warn(pluralString + " to save: " + objectsToSave.size());
|
|
151 |
getTaxonService().save(objectsToSave);
|
|
152 |
|
|
153 |
return success;
|
|
154 |
} catch (SQLException e) {
|
|
155 |
logger.error("SQLException:" + e);
|
|
156 |
return false;
|
|
157 |
}
|
|
158 |
}
|
|
159 |
|
|
160 |
private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
|
|
161 |
String countriesStr = rs.getString("dtSpcCountries");
|
|
162 |
if (isBlank(countriesStr)){
|
|
163 |
return;
|
|
164 |
}
|
|
165 |
String[] countriesSplit = countriesStr.split(";");
|
|
166 |
for (String countryStr : countriesSplit){
|
|
167 |
if (isBlank(countryStr)){
|
|
168 |
continue;
|
|
169 |
}
|
|
170 |
countryStr = countryStr.trim();
|
|
171 |
|
|
172 |
//TODO use isComplete
|
|
173 |
boolean isComplete = countryStr.endsWith(".");
|
|
174 |
if (isComplete){
|
|
175 |
countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
|
|
176 |
}
|
|
177 |
boolean isDoubtful = countryStr.endsWith("[?]");
|
|
178 |
if (isDoubtful){
|
|
179 |
countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
|
|
180 |
}
|
|
181 |
if (countryStr.startsWith("?")){
|
|
182 |
isDoubtful = true;
|
|
183 |
countryStr = countryStr.substring(1).trim();
|
|
184 |
}
|
|
185 |
|
|
186 |
|
|
187 |
|
|
188 |
countryStr = normalizeCountry(countryStr);
|
|
189 |
|
|
190 |
WaterbodyOrCountry country = getCountry(state, countryStr);
|
|
191 |
|
|
192 |
PresenceTerm status;
|
|
193 |
if (isDoubtful){
|
|
194 |
status = PresenceTerm.PRESENT_DOUBTFULLY();
|
|
195 |
}else{
|
|
196 |
status = PresenceTerm.PRESENT();
|
|
197 |
}
|
|
198 |
|
|
199 |
if (country != null){
|
|
200 |
TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
|
|
201 |
Distribution distribution = Distribution.NewInstance(country, status);
|
|
202 |
desc.addElement(distribution);
|
|
203 |
}else{
|
|
204 |
logger.warn("Country string not recognized: " + countryStr);
|
|
205 |
}
|
|
206 |
}
|
|
207 |
}
|
|
208 |
|
|
209 |
|
|
210 |
|
|
211 |
/**
|
|
212 |
* @param countryStr
|
|
213 |
* @return
|
|
214 |
*/
|
|
215 |
private String normalizeCountry(String countryStr) {
|
|
216 |
String result = countryStr.trim();
|
|
217 |
if (result.endsWith(".")){
|
|
218 |
result = result.substring(0,result.length() - 1);
|
|
219 |
}
|
|
220 |
return result;
|
|
221 |
}
|
|
222 |
|
|
223 |
|
|
224 |
/* (non-Javadoc)
|
|
225 |
* @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
|
|
226 |
*/
|
|
227 |
public Taxon createObject(ResultSet rs, GlobisImportState state)
|
|
228 |
throws SQLException {
|
|
229 |
String speciesEpi = rs.getString("dtSpcSpcakt");
|
|
230 |
String subGenusEpi = rs.getString("dtSpcSubgenakt");
|
|
231 |
String genusEpi = rs.getString("dtSpcGenusakt");
|
|
232 |
String author = rs.getString("dtSpcAutor");
|
|
233 |
|
|
234 |
|
|
235 |
ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
|
|
236 |
zooName.setSpecificEpithet(speciesEpi);
|
|
237 |
if (StringUtils.isNotBlank(subGenusEpi)){
|
|
238 |
zooName.setInfraGenericEpithet(subGenusEpi);
|
|
239 |
}
|
|
240 |
zooName.setGenusOrUninomial(genusEpi);
|
|
241 |
handleAuthorAndYear(author, zooName);
|
|
242 |
|
|
243 |
Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
|
|
244 |
|
|
245 |
return taxon;
|
|
246 |
}
|
|
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
/* (non-Javadoc)
|
|
253 |
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
|
|
254 |
*/
|
|
255 |
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
|
|
256 |
String nameSpace;
|
|
257 |
Class cdmClass;
|
|
258 |
Set<String> idSet;
|
|
259 |
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
|
260 |
try{
|
|
261 |
Set<String> taxonIdSet = new HashSet<String>();
|
|
262 |
|
|
263 |
while (rs.next()){
|
|
264 |
// handleForeignKey(rs, taxonIdSet, "taxonId");
|
|
265 |
}
|
|
266 |
|
|
267 |
//taxon map
|
|
268 |
nameSpace = TAXON_NAMESPACE;
|
|
269 |
cdmClass = Taxon.class;
|
|
270 |
idSet = taxonIdSet;
|
|
271 |
Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
|
272 |
result.put(nameSpace, objectMap);
|
|
273 |
|
|
274 |
|
|
275 |
} catch (SQLException e) {
|
|
276 |
throw new RuntimeException(e);
|
|
277 |
}
|
|
278 |
return result;
|
|
279 |
}
|
|
280 |
|
|
281 |
/* (non-Javadoc)
|
|
282 |
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
|
|
283 |
*/
|
|
284 |
@Override
|
|
285 |
protected boolean doCheck(GlobisImportState state){
|
|
286 |
IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
|
|
287 |
return validator.validate(state);
|
|
288 |
}
|
|
289 |
|
|
290 |
|
|
291 |
/* (non-Javadoc)
|
|
292 |
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
|
|
293 |
*/
|
|
294 |
protected boolean isIgnore(GlobisImportState state){
|
|
295 |
return ! state.getConfig().isDoCurrentTaxa();
|
|
296 |
}
|
|
297 |
|
|
298 |
|
|
299 |
|
|
300 |
|
|
301 |
|
|
302 |
}
|
raw common name import for globis