1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.globis;
|
11
|
|
12
|
import java.net.URI;
|
13
|
import java.sql.ResultSet;
|
14
|
import java.sql.SQLException;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.Map;
|
18
|
import java.util.Set;
|
19
|
|
20
|
import org.apache.log4j.Logger;
|
21
|
import org.springframework.stereotype.Component;
|
22
|
|
23
|
import eu.etaxonomy.cdm.io.common.IImportConfigurator;
|
24
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
25
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
26
|
import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;
|
27
|
import eu.etaxonomy.cdm.io.globis.validation.GlobisReferenceImportValidator;
|
28
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
29
|
import eu.etaxonomy.cdm.model.common.Annotation;
|
30
|
import eu.etaxonomy.cdm.model.common.AnnotationType;
|
31
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
32
|
import eu.etaxonomy.cdm.model.reference.IArticle;
|
33
|
import eu.etaxonomy.cdm.model.reference.IBook;
|
34
|
import eu.etaxonomy.cdm.model.reference.IBookSection;
|
35
|
import eu.etaxonomy.cdm.model.reference.IJournal;
|
36
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
37
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
38
|
import eu.etaxonomy.cdm.model.reference.ReferenceType;
|
39
|
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
|
40
|
|
41
|
|
42
|
/**
|
43
|
* @author a.mueller
|
44
|
* @since 20.02.2010
|
45
|
*/
|
46
|
@Component
|
47
|
public class GlobisReferenceImport extends GlobisImportBase<Reference> implements IMappingImport<Reference, GlobisImportState>{
|
48
|
private static final Logger logger = Logger.getLogger(GlobisReferenceImport.class);
|
49
|
|
50
|
private int modCount = 10000;
|
51
|
private static final String pluralString = "references";
|
52
|
private static final String dbTableName = "Literatur";
|
53
|
private static final Class<?> cdmTargetClass = Reference.class;
|
54
|
|
55
|
public GlobisReferenceImport(){
|
56
|
super(pluralString, dbTableName, cdmTargetClass);
|
57
|
}
|
58
|
|
59
|
@Override
|
60
|
protected String getIdQuery() {
|
61
|
String strRecordQuery =
|
62
|
" SELECT refID " +
|
63
|
" FROM " + dbTableName
|
64
|
+ " WHERE RefSource like 'Original' or refID in (SELECT fiSpecRefId FROM specTax)";
|
65
|
return strRecordQuery;
|
66
|
}
|
67
|
|
68
|
@Override
|
69
|
protected String getRecordQuery(GlobisImportConfigurator config) {
|
70
|
String strRecordQuery =
|
71
|
" SELECT l.*, l.DateCreated as Created_When, l.CreatedBy as Created_Who," +
|
72
|
" l.ModifiedBy as Updated_who, l.DateModified as Updated_When, l.RefRemarks as Notes " +
|
73
|
" FROM " + getTableName() + " l " +
|
74
|
" WHERE ( l.refId IN (" + ID_LIST_TOKEN + ") )";
|
75
|
return strRecordQuery;
|
76
|
}
|
77
|
|
78
|
@Override
|
79
|
public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
|
80
|
boolean success = true;
|
81
|
|
82
|
Set<Reference> objectsToSave = new HashSet<Reference>();
|
83
|
|
84
|
ResultSet rs = partitioner.getResultSet();
|
85
|
|
86
|
try {
|
87
|
|
88
|
int i = 0;
|
89
|
|
90
|
//for each reference
|
91
|
while (rs.next()){
|
92
|
|
93
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
|
94
|
|
95
|
|
96
|
handleSingleRecord(state, objectsToSave, rs);
|
97
|
|
98
|
}
|
99
|
|
100
|
// logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
|
101
|
|
102
|
logger.warn(pluralString + " to save: " + objectsToSave.size());
|
103
|
getReferenceService().save(objectsToSave);
|
104
|
|
105
|
return success;
|
106
|
} catch (SQLException e) {
|
107
|
logger.error("SQLException:" + e);
|
108
|
return false;
|
109
|
}
|
110
|
}
|
111
|
|
112
|
/**
|
113
|
* @param state
|
114
|
* @param objectsToSave
|
115
|
* @param rs
|
116
|
* @throws SQLException
|
117
|
*/
|
118
|
private void handleSingleRecord(GlobisImportState state,
|
119
|
Set<Reference> objectsToSave, ResultSet rs) throws SQLException {
|
120
|
|
121
|
Integer refId = rs.getInt("RefId");
|
122
|
|
123
|
try {
|
124
|
|
125
|
String title = rs.getString("RefTitle");
|
126
|
String refJournal = rs.getString("RefJournal");
|
127
|
refJournal = normalizeRefJournal(refJournal);
|
128
|
String refBookTitle = rs.getString("RefBookTitle");
|
129
|
|
130
|
String refUrl = rs.getString("RefURL");
|
131
|
String refVolume = rs.getString("RefVolume");
|
132
|
String refYear = rs.getString("RefYear");
|
133
|
String refIssn = rs.getString("RefISSN");
|
134
|
String refRemarks = rs.getString("RefRemarks");
|
135
|
String refPublisher = rs.getString("RefPublisher");
|
136
|
String refPlace = rs.getString("RefPlace");
|
137
|
String refEdition = rs.getString("RefEdition");
|
138
|
String refEditor = rs.getString("RefEditor");
|
139
|
String refAuthor = rs.getString("RefAuthor");
|
140
|
String refPages = rs.getString("RefPages");
|
141
|
|
142
|
String isbn = null;
|
143
|
String issn = null;
|
144
|
if (isNotBlank(refIssn)){
|
145
|
refIssn = refIssn.trim();
|
146
|
if (refIssn.startsWith("ISBN")){
|
147
|
isbn = refIssn.replace("ISBN", "").trim();
|
148
|
}else if (refIssn.startsWith("ISSN")){
|
149
|
issn = refIssn.replace("ISSN", "").trim();
|
150
|
}else{
|
151
|
String pureNumbers = refIssn.replace("-", "").replace(" ", "");
|
152
|
if (pureNumbers.length() == 8){
|
153
|
issn = refIssn;
|
154
|
}else if (pureNumbers.length() == 10){
|
155
|
isbn = refIssn;
|
156
|
}else{
|
157
|
logger.warn("RefISSN could not be parsed: " + refIssn + ", refId: " + refId);
|
158
|
}
|
159
|
}
|
160
|
}
|
161
|
|
162
|
|
163
|
//source ref //TODO
|
164
|
Reference sourceRef = state.getTransactionalSourceReference();
|
165
|
|
166
|
Reference ref = createObject(rs, state);
|
167
|
testIsxnType(ref, isbn, issn, refId);
|
168
|
ref.setTitle(title);
|
169
|
|
170
|
//refAuthor
|
171
|
TeamOrPersonBase<?> author = makeAuthor(refAuthor, state);
|
172
|
ref.setAuthorship(author);
|
173
|
|
174
|
//inRef
|
175
|
if (isNotBlank(refJournal)){
|
176
|
//Article
|
177
|
if (ref.getType().equals(ReferenceType.Article) ){
|
178
|
Reference journal = getJournal(state, rs, refJournal);
|
179
|
ref.setInJournal(journal);
|
180
|
}else{
|
181
|
logger.warn("Reference type not supported for RefJournal. Type: " + ref.getType().toString() + ", refId: " + refId );
|
182
|
}
|
183
|
}
|
184
|
if (isNotBlank(refBookTitle)){
|
185
|
//BookSection
|
186
|
//TODO RefSerial
|
187
|
if (ref.getType().equals(ReferenceType.BookSection) ){
|
188
|
IBook book = getBook(state, rs, refBookTitle);
|
189
|
ref.setInBook(book);
|
190
|
}else if (ref.getType().equals(ReferenceType.Book)) {
|
191
|
ref.setTitle(refBookTitle);
|
192
|
}else{
|
193
|
logger.warn("Reference type not supported for RefBookTitle. Type: " + ref.getType().toString() + ", refId: " + refId );
|
194
|
}
|
195
|
}
|
196
|
|
197
|
IBookSection bookSection;
|
198
|
IBook book;
|
199
|
IArticle article;
|
200
|
IJournal journal;
|
201
|
|
202
|
|
203
|
//RefVolume
|
204
|
if (isNotBlank(refVolume)){
|
205
|
if (ref.getType().isVolumeReference()){
|
206
|
ref.setVolume(refVolume);
|
207
|
}else if(ref.getInReference() != null && ref.getInReference().getType().isVolumeReference()){
|
208
|
ref.getInReference().setVolume(refVolume);
|
209
|
}else{
|
210
|
logger.warn(ref.getType() + " does not support volume but volume exists, refId: " + refId);
|
211
|
}
|
212
|
}
|
213
|
|
214
|
//RefYear
|
215
|
//TODO check correct parsing for [] and full dates
|
216
|
if (isNotBlank(refYear)){
|
217
|
ref.setDatePublished(TimePeriodParser.parseStringVerbatim(refYear));
|
218
|
}
|
219
|
|
220
|
//refPages
|
221
|
if (isNotBlank(refPages)){
|
222
|
refPages = refPages.trim();
|
223
|
if (refPages.endsWith(".")){
|
224
|
refPages = refPages.substring(0, refPages.length()-1).trim();
|
225
|
}
|
226
|
ref.setPages(refPages);
|
227
|
}
|
228
|
|
229
|
//ISXN
|
230
|
if (isbn != null){
|
231
|
Reference isbnRef = getIsbnReference(ref, refId);
|
232
|
if (isbnRef != null){
|
233
|
isbnRef.setIsbn(isbn);
|
234
|
}
|
235
|
}
|
236
|
if(issn != null){
|
237
|
Reference issnRef = getIssnReference(ref, refId);
|
238
|
if (issnRef != null){
|
239
|
issnRef.setIssn(issn);
|
240
|
}
|
241
|
}
|
242
|
|
243
|
//refURL
|
244
|
if (isNotBlank(refUrl)){
|
245
|
URI uri = URI.create(refUrl);
|
246
|
ref.setUri(uri);
|
247
|
}
|
248
|
|
249
|
//refRemarks
|
250
|
if (isNotBlank(refRemarks)){
|
251
|
Annotation anno = Annotation.NewDefaultLanguageInstance(refRemarks);
|
252
|
anno.setAnnotationType(AnnotationType.EDITORIAL());
|
253
|
ref.addAnnotation(anno);
|
254
|
}
|
255
|
|
256
|
//Publisher + Place
|
257
|
handlePublisherAndPlace(refId, refPublisher, refPlace, ref);
|
258
|
|
259
|
//refEdition
|
260
|
if (isNotBlank(refEdition)){
|
261
|
Reference edRef = ref;
|
262
|
if (ref.getType() == ReferenceType.BookSection){
|
263
|
edRef = ref.getInReference();
|
264
|
}
|
265
|
if (edRef == null || edRef.getType() != ReferenceType.Book){
|
266
|
logger.warn("Incorrect refType " + ref.getType() + " for refererence with edition or inRef is null, " + refId);
|
267
|
}
|
268
|
}
|
269
|
|
270
|
//refEditor
|
271
|
if (isNotBlank(refEditor)){
|
272
|
Reference edsRef = ref;
|
273
|
if (ref.getType() == ReferenceType.BookSection){
|
274
|
edsRef = ref.getInReference();
|
275
|
}
|
276
|
if (edsRef == null || edsRef.getType() != ReferenceType.Book){
|
277
|
logger.warn("Reference type for RefEditor must be Book or Booksection but was " + ref.getType() + " or inRef was null, refId " + refId);
|
278
|
}
|
279
|
}
|
280
|
|
281
|
//id, created, notes
|
282
|
this.doIdCreatedUpdatedNotes(state, ref, rs, refId, REFERENCE_NAMESPACE);
|
283
|
|
284
|
|
285
|
|
286
|
//DONE
|
287
|
//RefType, RefTitle, RefJournal,
|
288
|
//RefId, ...
|
289
|
|
290
|
//TODO
|
291
|
|
292
|
//RefBookTitle, RefJournal, RefSerial, - mostly done
|
293
|
|
294
|
//RefIll only, RefPages,RefPages only,
|
295
|
|
296
|
//unclear
|
297
|
//RefDatePublished, RefVolPageFig,
|
298
|
//RefSource,
|
299
|
//RefLibrary, RefMarker,
|
300
|
//RefGeneralKeywords, RefGeoKeywords, RefSpecificKeywords, RefTaxKeywords, SpecificKeywordDummy
|
301
|
|
302
|
|
303
|
//no data
|
304
|
//CountryDummy
|
305
|
|
306
|
objectsToSave.add(ref);
|
307
|
|
308
|
|
309
|
} catch (Exception e) {
|
310
|
logger.warn("Exception in literature: RefId " + refId + ". " + e.getMessage());
|
311
|
e.printStackTrace();
|
312
|
}
|
313
|
}
|
314
|
|
315
|
private TeamOrPersonBase<?> makeAuthor(String refAuthor, GlobisImportState state) {
|
316
|
TeamOrPersonBase<?> author = GlobisAuthorImport.makeAuthor(refAuthor, state, getAgentService());
|
317
|
// getAgentService().update(author);
|
318
|
return author;
|
319
|
}
|
320
|
|
321
|
// private TeamOrPersonBase<?> makeAuthor(String refAuthor) {
|
322
|
// String[] split = refAuthor.split(";");
|
323
|
// List<String> singleAuthorStrings = new ArrayList<String>();
|
324
|
// for (String single : split){
|
325
|
// single = single.trim();
|
326
|
// if (single.startsWith("&")){
|
327
|
// single = single.substring(1).trim();
|
328
|
// }
|
329
|
// String[] split2 = single.split("&");
|
330
|
// for (String single2 : split2){
|
331
|
// singleAuthorStrings.add(single2);
|
332
|
// }
|
333
|
// }
|
334
|
//
|
335
|
// TeamOrPersonBase<?> result;
|
336
|
// if (singleAuthorStrings.size() > 1){
|
337
|
// Team team= Team.NewInstance();
|
338
|
// for (String str : singleAuthorStrings){
|
339
|
// Person person = makePerson(str);
|
340
|
// team.addTeamMember(person);
|
341
|
// }
|
342
|
// result = team;
|
343
|
// }else{
|
344
|
// result = makePerson(singleAuthorStrings.get(0));
|
345
|
// }
|
346
|
//
|
347
|
// //TODO deduplicate
|
348
|
// return result;
|
349
|
// }
|
350
|
//
|
351
|
// private Person makePerson(String string) {
|
352
|
// Person person = Person.NewTitledInstance(string.trim());
|
353
|
// //TODO deduplicate
|
354
|
// return person;
|
355
|
// }
|
356
|
|
357
|
/**
|
358
|
* @param refId
|
359
|
* @param refPublisher
|
360
|
* @param refPlace
|
361
|
* @param ref
|
362
|
*/
|
363
|
private void handlePublisherAndPlace(Integer refId, String refPublisher,
|
364
|
String refPlace, Reference ref) {
|
365
|
//refPublisher
|
366
|
if (isNotBlank(refPublisher)){
|
367
|
if (ref.getType().isPublication()){
|
368
|
ref.setPublisher(refPublisher);
|
369
|
}else if (ref.getInReference() != null && ref.getInReference().getType().isPublication()){
|
370
|
ref.getInReference().setPublisher(refPublisher);
|
371
|
}else{
|
372
|
logger.warn("RefPublisher can not be set, " + ref.getType() + ", refId " + refId);
|
373
|
}
|
374
|
}
|
375
|
|
376
|
//refPlace
|
377
|
if (isNotBlank(refPlace)){
|
378
|
if (ref.getType().isPublication()){
|
379
|
ref.setPlacePublished(refPlace);
|
380
|
}else if (ref.getInReference() != null && ref.getInReference().getType().isPublication()){
|
381
|
//TODO handle if not empty
|
382
|
ref.getInReference().setPlacePublished(refPlace);
|
383
|
}else{
|
384
|
logger.warn("RefPlace can not be set, " + ref.getType() + ", refId " + refId);
|
385
|
}
|
386
|
}
|
387
|
}
|
388
|
|
389
|
private Reference getIssnReference(Reference ref, int refId) {
|
390
|
if (ref == null){
|
391
|
return null;
|
392
|
}
|
393
|
if (ref.getType() == ReferenceType.Article){
|
394
|
ref = ref.getInReference();
|
395
|
}
|
396
|
if (ref.getType() != ReferenceType.Journal && ref.getType() != ReferenceType.Generic){
|
397
|
logger.warn("Invalid refType for issn, refId " + refId);
|
398
|
return null;
|
399
|
}else{
|
400
|
return ref;
|
401
|
}
|
402
|
}
|
403
|
|
404
|
private Reference getIsbnReference(Reference ref, int refId) {
|
405
|
if (ref == null){
|
406
|
return null;
|
407
|
}
|
408
|
if (ref.getType() == ReferenceType.BookSection){
|
409
|
ref = ref.getInReference();
|
410
|
}
|
411
|
if (ref.getType() != ReferenceType.Book && ref.getType() != ReferenceType.Generic){
|
412
|
logger.warn("Invalid refType for isbn, refId " + refId);
|
413
|
return null;
|
414
|
}else{
|
415
|
return ref;
|
416
|
}
|
417
|
}
|
418
|
|
419
|
private void testIsxnType(Reference ref, String isbn, String issn, int refID) {
|
420
|
if (isbn != null && ref.getType() != ReferenceType.Book && ref.getType() != ReferenceType.BookSection ){
|
421
|
logger.warn("Reference has isbn but is not a book type, type " + ref.getType() + ", row " + refID);
|
422
|
}else if (issn != null && ref.getType() != ReferenceType.Article){
|
423
|
logger.warn("Reference has issn but is not an article, row " + refID);
|
424
|
}
|
425
|
}
|
426
|
|
427
|
/**
|
428
|
* @param refJournal
|
429
|
* @return
|
430
|
*/
|
431
|
private String normalizeRefJournal(String refJournal) {
|
432
|
if (refJournal != null){
|
433
|
refJournal = refJournal.trim();
|
434
|
if (refJournal.equals(".")){
|
435
|
refJournal = null;
|
436
|
}
|
437
|
}
|
438
|
return refJournal;
|
439
|
}
|
440
|
|
441
|
|
442
|
|
443
|
|
444
|
private Reference getJournal(GlobisImportState state, ResultSet rs, String refJournal) throws SQLException {
|
445
|
|
446
|
Reference journal = ReferenceFactory.newJournal();
|
447
|
//TODO deduplicate
|
448
|
journal.setTitle(refJournal);
|
449
|
return journal;
|
450
|
}
|
451
|
|
452
|
private IBook getBook(GlobisImportState state, ResultSet rs, String refBookTitle) throws SQLException {
|
453
|
|
454
|
Reference book = ReferenceFactory.newBook();
|
455
|
//TODO deduplicate
|
456
|
book.setTitle(refBookTitle);
|
457
|
return book;
|
458
|
}
|
459
|
|
460
|
@Override
|
461
|
public Reference createObject(ResultSet rs, GlobisImportState state)
|
462
|
throws SQLException {
|
463
|
String refJournal = rs.getString("RefJournal");
|
464
|
boolean isInJournal =isNotBlank(refJournal);
|
465
|
String refBookTitle = rs.getString("RefBookTitle");
|
466
|
boolean isInBook =isNotBlank(refBookTitle);
|
467
|
|
468
|
|
469
|
|
470
|
Reference ref;
|
471
|
String refType = rs.getString("RefType");
|
472
|
if (refType == null){
|
473
|
if (isInJournal && ! isInBook){
|
474
|
ref = ReferenceFactory.newArticle();
|
475
|
}else{
|
476
|
ref = ReferenceFactory.newGeneric();
|
477
|
}
|
478
|
}else if (refType.equals("book")){
|
479
|
ref = ReferenceFactory.newBook();
|
480
|
}else if (refType.equals("paper in journal")){
|
481
|
ref = ReferenceFactory.newArticle();
|
482
|
}else if (refType.startsWith("unpublished") ){
|
483
|
ref = ReferenceFactory.newGeneric();
|
484
|
}else if (refType.endsWith("paper in journal")){
|
485
|
ref = ReferenceFactory.newArticle();
|
486
|
}else if (refType.equals("paper in book")){
|
487
|
ref = ReferenceFactory.newBookSection();
|
488
|
}else if (refType.matches("paper in journal.*website.*")){
|
489
|
ref = ReferenceFactory.newArticle();
|
490
|
}else{
|
491
|
logger.warn("Unknown reference type: " + refType);
|
492
|
ref = ReferenceFactory.newGeneric();
|
493
|
}
|
494
|
return ref;
|
495
|
}
|
496
|
|
497
|
|
498
|
@Override
|
499
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
|
500
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
501
|
return result; //not needed
|
502
|
}
|
503
|
|
504
|
@Override
|
505
|
protected boolean doCheck(GlobisImportState state){
|
506
|
IOValidator<GlobisImportState> validator = new GlobisReferenceImportValidator();
|
507
|
return validator.validate(state);
|
508
|
}
|
509
|
|
510
|
@Override
|
511
|
protected boolean isIgnore(GlobisImportState state){
|
512
|
//TODO
|
513
|
return state.getConfig().getDoReferences() != IImportConfigurator.DO_REFERENCES.ALL;
|
514
|
}
|
515
|
|
516
|
|
517
|
|
518
|
|
519
|
|
520
|
}
|