1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.pesi.indexFungorum;
|
11
|
|
12
|
import java.sql.ResultSet;
|
13
|
import java.sql.ResultSetMetaData;
|
14
|
import java.sql.SQLException;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.Map;
|
18
|
import java.util.Set;
|
19
|
import java.util.UUID;
|
20
|
|
21
|
import org.apache.commons.lang.StringUtils;
|
22
|
import org.apache.log4j.Logger;
|
23
|
|
24
|
import com.ibm.lsid.MalformedLSIDException;
|
25
|
|
26
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
27
|
import eu.etaxonomy.cdm.io.common.CdmImportBase;
|
28
|
import eu.etaxonomy.cdm.io.common.ICdmIO;
|
29
|
import eu.etaxonomy.cdm.io.common.IPartitionedIO;
|
30
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
31
|
import eu.etaxonomy.cdm.io.common.Source;
|
32
|
import eu.etaxonomy.cdm.io.common.mapping.out.DbLastActionMapper;
|
33
|
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
|
34
|
import eu.etaxonomy.cdm.model.agent.Team;
|
35
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
36
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
37
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
38
|
import eu.etaxonomy.cdm.model.common.LSID;
|
39
|
import eu.etaxonomy.cdm.model.common.Marker;
|
40
|
import eu.etaxonomy.cdm.model.common.MarkerType;
|
41
|
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
|
42
|
import eu.etaxonomy.cdm.model.common.TimePeriod;
|
43
|
import eu.etaxonomy.cdm.model.name.NonViralName;
|
44
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
45
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
46
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
47
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
48
|
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
|
49
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
50
|
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
|
51
|
|
52
|
/**
|
53
|
* @author a.mueller
|
54
|
* @created 27.02.2012
|
55
|
*/
|
56
|
public abstract class IndexFungorumImportBase extends CdmImportBase<IndexFungorumImportConfigurator, IndexFungorumImportState> implements ICdmIO<IndexFungorumImportState>, IPartitionedIO<IndexFungorumImportState> {
|
57
|
private static final Logger logger = Logger.getLogger(IndexFungorumImportBase.class);
|
58
|
|
59
|
//NAMESPACES
|
60
|
protected static final String NAMESPACE_REFERENCE = "reference";
|
61
|
protected static final String NAMESPACE_TAXON = "Taxon";
|
62
|
protected static final String NAMESPACE_SUPRAGENERIC_NAMES = "SupragenericNames";
|
63
|
protected static final String NAMESPACE_GENERA = "Genera";
|
64
|
protected static final String NAMESPACE_SPECIES = "Species";
|
65
|
|
66
|
|
67
|
protected static final String INCERTAE_SEDIS = "Incertae sedis";
|
68
|
protected static final String FOSSIL_FUNGI = "Fossil Fungi";
|
69
|
|
70
|
protected static final String SOURCE_REFERENCE = "SOURCE_REFERENCE";
|
71
|
|
72
|
|
73
|
|
74
|
|
75
|
private String pluralString;
|
76
|
private String dbTableName;
|
77
|
//TODO needed?
|
78
|
private Class cdmTargetClass;
|
79
|
|
80
|
|
81
|
|
82
|
/**
|
83
|
* @param dbTableName
|
84
|
* @param dbTableName2
|
85
|
*/
|
86
|
public IndexFungorumImportBase(String pluralString, String dbTableName, Class cdmTargetClass) {
|
87
|
this.pluralString = pluralString;
|
88
|
this.dbTableName = dbTableName;
|
89
|
this.cdmTargetClass = cdmTargetClass;
|
90
|
}
|
91
|
|
92
|
protected void doInvoke(IndexFungorumImportState state){
|
93
|
logger.info("start make " + getPluralString() + " ...");
|
94
|
IndexFungorumImportConfigurator config = state.getConfig();
|
95
|
Source source = config.getSource();
|
96
|
|
97
|
String strIdQuery = getIdQuery();
|
98
|
String strRecordQuery = getRecordQuery(config);
|
99
|
|
100
|
int recordsPerTransaction = config.getRecordsPerTransaction();
|
101
|
try{
|
102
|
ResultSetPartitioner partitioner = ResultSetPartitioner.NewInstance(source, strIdQuery, strRecordQuery, recordsPerTransaction);
|
103
|
while (partitioner.nextPartition()){
|
104
|
partitioner.doPartition(this, state);
|
105
|
}
|
106
|
} catch (SQLException e) {
|
107
|
logger.error("SQLException:" + e);
|
108
|
state.setUnsuccessfull();
|
109
|
return;
|
110
|
}
|
111
|
|
112
|
logger.info("end make " + getPluralString() + " ... " + getSuccessString(true));
|
113
|
return;
|
114
|
}
|
115
|
|
116
|
|
117
|
|
118
|
|
119
|
|
120
|
public boolean doPartition(ResultSetPartitioner partitioner, IndexFungorumImportState state) {
|
121
|
boolean success = true ;
|
122
|
Set objectsToSave = new HashSet<CdmBase>();
|
123
|
|
124
|
// DbImportMapping<?, ?> mapping = getMapping();
|
125
|
// mapping.initialize(state, cdmTargetClass);
|
126
|
|
127
|
ResultSet rs = partitioner.getResultSet();
|
128
|
try{
|
129
|
while (rs.next()){
|
130
|
// success &= mapping.invoke(rs,objectsToSave);
|
131
|
}
|
132
|
} catch (SQLException e) {
|
133
|
logger.error("SQLException:" + e);
|
134
|
return false;
|
135
|
}
|
136
|
|
137
|
partitioner.startDoSave();
|
138
|
getCommonService().save(objectsToSave);
|
139
|
return success;
|
140
|
}
|
141
|
|
142
|
|
143
|
/**
|
144
|
* @return
|
145
|
*/
|
146
|
protected abstract String getRecordQuery(IndexFungorumImportConfigurator config);
|
147
|
|
148
|
/**
|
149
|
* @return
|
150
|
*/
|
151
|
protected String getIdQuery(){
|
152
|
String result = " SELECT id FROM " + getTableName();
|
153
|
return result;
|
154
|
}
|
155
|
|
156
|
/* (non-Javadoc)
|
157
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getPluralString()
|
158
|
*/
|
159
|
public String getPluralString(){
|
160
|
return pluralString;
|
161
|
}
|
162
|
|
163
|
/**
|
164
|
* @return
|
165
|
*/
|
166
|
protected String getTableName(){
|
167
|
return this.dbTableName;
|
168
|
}
|
169
|
|
170
|
|
171
|
protected boolean resultSetHasColumn(ResultSet rs, String columnName){
|
172
|
try {
|
173
|
ResultSetMetaData metaData = rs.getMetaData();
|
174
|
for (int i = 0; i < metaData.getColumnCount(); i++){
|
175
|
if (metaData.getColumnName(i + 1).equalsIgnoreCase(columnName)){
|
176
|
return true;
|
177
|
}
|
178
|
}
|
179
|
return false;
|
180
|
} catch (SQLException e) {
|
181
|
logger.warn("Exception in resultSetHasColumn");
|
182
|
return false;
|
183
|
}
|
184
|
}
|
185
|
|
186
|
protected boolean checkSqlServerColumnExists(Source source, String tableName, String columnName){
|
187
|
String strQuery = "SELECT Count(t.id) as n " +
|
188
|
" FROM sysobjects AS t " +
|
189
|
" INNER JOIN syscolumns AS c ON t.id = c.id " +
|
190
|
" WHERE (t.xtype = 'U') AND " +
|
191
|
" (t.name = '" + tableName + "') AND " +
|
192
|
" (c.name = '" + columnName + "')";
|
193
|
ResultSet rs = source.getResultSet(strQuery) ;
|
194
|
int n;
|
195
|
try {
|
196
|
rs.next();
|
197
|
n = rs.getInt("n");
|
198
|
return n>0;
|
199
|
} catch (SQLException e) {
|
200
|
e.printStackTrace();
|
201
|
return false;
|
202
|
}
|
203
|
|
204
|
}
|
205
|
|
206
|
/**
|
207
|
* Returns a map that holds all values of a ResultSet. This is needed if a value needs to
|
208
|
* be accessed twice
|
209
|
* @param rs
|
210
|
* @return
|
211
|
* @throws SQLException
|
212
|
*/
|
213
|
protected Map<String, Object> getValueMap(ResultSet rs) throws SQLException{
|
214
|
try{
|
215
|
Map<String, Object> valueMap = new HashMap<String, Object>();
|
216
|
int colCount = rs.getMetaData().getColumnCount();
|
217
|
for (int c = 0; c < colCount ; c++){
|
218
|
Object value = rs.getObject(c+1);
|
219
|
String label = rs.getMetaData().getColumnLabel(c+1).toLowerCase();
|
220
|
if (value != null && ! CdmUtils.Nz(value.toString()).trim().equals("")){
|
221
|
valueMap.put(label, value);
|
222
|
}
|
223
|
}
|
224
|
return valueMap;
|
225
|
}catch(SQLException e){
|
226
|
throw e;
|
227
|
}
|
228
|
}
|
229
|
|
230
|
|
231
|
/**
|
232
|
* Reads a foreign key field from the result set and adds its value to the idSet.
|
233
|
* @param rs
|
234
|
* @param teamIdSet
|
235
|
* @throws SQLException
|
236
|
*/
|
237
|
protected void handleForeignKey(ResultSet rs, Set<String> idSet, String attributeName)
|
238
|
throws SQLException {
|
239
|
Object idObj = rs.getObject(attributeName);
|
240
|
if (idObj != null){
|
241
|
String id = String.valueOf(idObj);
|
242
|
idSet.add(id);
|
243
|
}
|
244
|
}
|
245
|
|
246
|
/**
|
247
|
* Returns true if i is a multiple of recordsPerTransaction
|
248
|
* @param i
|
249
|
* @param recordsPerTransaction
|
250
|
* @return
|
251
|
*/
|
252
|
protected boolean loopNeedsHandling(int i, int recordsPerLoop) {
|
253
|
startTransaction();
|
254
|
return (i % recordsPerLoop) == 0;
|
255
|
}
|
256
|
|
257
|
protected void doLogPerLoop(int count, int recordsPerLog, String pluralString){
|
258
|
if ((count % recordsPerLog ) == 0 && count!= 0 ){ logger.info(pluralString + " handled: " + (count));}
|
259
|
}
|
260
|
|
261
|
|
262
|
protected void makeAuthorAndPublication(IndexFungorumImportState state, ResultSet rs, NonViralName name) throws SQLException {
|
263
|
//authors
|
264
|
NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
265
|
String authorStr = rs.getString("AUTHORS");
|
266
|
if (StringUtils.isNotBlank(authorStr)){
|
267
|
try {
|
268
|
parser.parseAuthors(name, authorStr);
|
269
|
} catch (StringNotParsableException e){
|
270
|
logger.warn("Authorstring not parsable: " + authorStr);
|
271
|
name.setAuthorshipCache(authorStr);
|
272
|
}
|
273
|
}
|
274
|
|
275
|
//page
|
276
|
String page = rs.getString("PAGE");
|
277
|
if (StringUtils.isNotBlank(page)){
|
278
|
name.setNomenclaturalMicroReference(page);
|
279
|
}
|
280
|
|
281
|
//Reference
|
282
|
Reference<?> ref = ReferenceFactory.newGeneric();
|
283
|
boolean hasInReference = false;
|
284
|
//publishing authors
|
285
|
Team pubAuthor = null;
|
286
|
String pubAuthorStr = rs.getString("PUBLISHING AUTHORS");
|
287
|
if (StringUtils.isNotBlank(pubAuthorStr)){
|
288
|
if (StringUtils.isNotBlank(authorStr)){
|
289
|
if (! pubAuthorStr.equals(authorStr)){
|
290
|
pubAuthor = Team.NewTitledInstance(pubAuthorStr,pubAuthorStr);
|
291
|
}
|
292
|
}else{
|
293
|
logger.warn("'AUTHORS' is blank for not empty PUBLISHING_AUTHORS. This is not yet handled.");
|
294
|
}
|
295
|
}
|
296
|
|
297
|
//inRef + inRefAuthor
|
298
|
if (pubAuthor != null){
|
299
|
Reference<?> inRef = ReferenceFactory.newGeneric();
|
300
|
inRef.setAuthorTeam(pubAuthor);
|
301
|
ref.setInReference(inRef);
|
302
|
hasInReference = true;
|
303
|
}
|
304
|
|
305
|
//refAuthor
|
306
|
TeamOrPersonBase<?> refAuthor = CdmBase.deproxy(name.getCombinationAuthorTeam(), TeamOrPersonBase.class);
|
307
|
if (refAuthor == null){
|
308
|
refAuthor = Team.NewTitledInstance(authorStr, authorStr);
|
309
|
}
|
310
|
ref.setAuthorTeam(refAuthor);
|
311
|
//location
|
312
|
String location = rs.getString("pubIMIAbbrLoc");
|
313
|
if (StringUtils.isNotBlank(location)){
|
314
|
if (hasInReference){
|
315
|
ref.getInReference().setPlacePublished(location);
|
316
|
}else{
|
317
|
ref.setPlacePublished(location);
|
318
|
}
|
319
|
}
|
320
|
//title
|
321
|
String titleMain = rs.getString("pubIMIAbbr");
|
322
|
String supTitle = rs.getString("pubIMISupAbbr");
|
323
|
String title = CdmUtils.concat(", ", titleMain, supTitle);
|
324
|
//preliminary to comply with current Index Fungorum display
|
325
|
if (StringUtils.isNotBlank(location)){
|
326
|
title += " (" + location +")";
|
327
|
}
|
328
|
//end preliminary
|
329
|
if (StringUtils.isNotBlank(title)){
|
330
|
if (hasInReference){
|
331
|
ref.getInReference().setTitle(title);
|
332
|
}else{
|
333
|
ref.setTitle(title);
|
334
|
}
|
335
|
}
|
336
|
//Volume
|
337
|
String volume = CdmUtils.Nz(rs.getString("VOLUME")).trim();
|
338
|
String part = rs.getString("PART");
|
339
|
if (StringUtils.isNotBlank(part)){
|
340
|
volume = volume + "(" + part + ")";
|
341
|
if (StringUtils.isBlank(volume)){
|
342
|
logger.warn("'Part' is not blank for blank volume. This may be an inconsistency.");
|
343
|
}
|
344
|
}
|
345
|
ref.setVolume(volume);
|
346
|
|
347
|
//year
|
348
|
String yearOfPubl = rs.getString("YEAR OF PUBLICATION");
|
349
|
String yearOnPubl = rs.getString("YEAR ON PUBLICATION");
|
350
|
String year = null;
|
351
|
if (StringUtils.isNotBlank(yearOfPubl)){
|
352
|
year = yearOfPubl.trim();
|
353
|
}
|
354
|
if (StringUtils.isNotBlank(yearOnPubl)){
|
355
|
year = CdmUtils.concat(" ", year, "[" + yearOnPubl + "]");
|
356
|
}
|
357
|
if (year != null){
|
358
|
ref.setDatePublished(TimePeriodParser.parseString(year));
|
359
|
}
|
360
|
|
361
|
//preliminary, set protected titlecache as Generic Cache Generation with in references currently doesn't fully work yet
|
362
|
String titleCache = CdmUtils.concat(", ", pubAuthorStr, title);
|
363
|
if ( StringUtils.isNotBlank(pubAuthorStr)){
|
364
|
titleCache = "in " + titleCache;
|
365
|
}
|
366
|
titleCache = CdmUtils.concat(" ", titleCache, volume);
|
367
|
titleCache = CdmUtils.concat(": ", titleCache, page);
|
368
|
titleCache = CdmUtils.concat(". ", titleCache, year);
|
369
|
ref.setTitleCache(titleCache, true);
|
370
|
|
371
|
//set nom ref
|
372
|
if (StringUtils.isNotBlank(titleCache)){
|
373
|
name.setNomenclaturalReference(ref);
|
374
|
}
|
375
|
}
|
376
|
|
377
|
|
378
|
protected MarkerType getNoLastActionMarkerType(IndexFungorumImportState state) {
|
379
|
return getMarkerType(state, DbLastActionMapper.uuidMarkerTypeHasNoLastAction,
|
380
|
"has no last action", "No last action information available", "no last action");
|
381
|
}
|
382
|
|
383
|
|
384
|
protected void makeSource(IndexFungorumImportState state, Taxon taxon, Integer id, String namespace) {
|
385
|
//source reference
|
386
|
Reference<?> sourceReference = state.getRelatedObject(NAMESPACE_REFERENCE, SOURCE_REFERENCE, Reference.class);
|
387
|
//source
|
388
|
String strId = (id == null ? null : String.valueOf(id));
|
389
|
IdentifiableSource source = IdentifiableSource.NewInstance(OriginalSourceType.Import, strId, namespace, sourceReference, null);
|
390
|
taxon.addSource(source);
|
391
|
|
392
|
//no last action
|
393
|
MarkerType hasNoLastAction = getNoLastActionMarkerType(state);
|
394
|
taxon.addMarker(Marker.NewInstance(hasNoLastAction, true));
|
395
|
//LSID
|
396
|
makeLSID(taxon, strId, state);
|
397
|
}
|
398
|
|
399
|
private void makeLSID(Taxon taxon, String strId, IndexFungorumImportState state) {
|
400
|
try {
|
401
|
if (StringUtils.isNotBlank(strId) && strId != null){
|
402
|
LSID lsid = new LSID(IndexFungorumTransformer.LSID_PREFIX + strId);
|
403
|
taxon.setLsid(lsid);
|
404
|
}else{
|
405
|
logger.warn("No ID available for taxon " + taxon.getTitleCache() + ", " + taxon.getUuid());
|
406
|
MarkerType missingGUID = getMissingGUIDMarkerType(state);
|
407
|
taxon.addMarker(Marker.NewInstance(missingGUID, true));
|
408
|
}
|
409
|
} catch (MalformedLSIDException e) {
|
410
|
logger.error(e.getMessage());
|
411
|
}
|
412
|
}
|
413
|
|
414
|
protected MarkerType getMissingGUIDMarkerType(IndexFungorumImportState state) {
|
415
|
MarkerType missingGUID = getMarkerType(state, PesiTransformer.uuidMarkerGuidIsMissing, "GUID is missing", "GUID is missing", null);
|
416
|
return missingGUID;
|
417
|
}
|
418
|
|
419
|
|
420
|
protected Classification getClassification(IndexFungorumImportState state) {
|
421
|
Classification result;
|
422
|
UUID classificationUuid = state.getTreeUuid(state.getConfig().getSourceReference());
|
423
|
if (classificationUuid == null){
|
424
|
Reference<?> sourceReference = state.getRelatedObject(NAMESPACE_REFERENCE, SOURCE_REFERENCE, Reference.class);
|
425
|
result = makeTreeMemSave(state, sourceReference);
|
426
|
} else {
|
427
|
result = getClassificationService().find(classificationUuid);
|
428
|
}
|
429
|
return result;
|
430
|
}
|
431
|
|
432
|
|
433
|
|
434
|
|
435
|
|
436
|
}
|