Project

General

Profile

Download (15.3 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.net.URI;
13
import java.sql.ResultSet;
14
import java.sql.SQLException;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.Map;
18
import java.util.Set;
19

    
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.io.common.IImportConfigurator;
24
import eu.etaxonomy.cdm.io.common.IOValidator;
25
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
26
import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;
27
import eu.etaxonomy.cdm.io.globis.validation.GlobisReferenceImportValidator;
28
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
29
import eu.etaxonomy.cdm.model.common.Annotation;
30
import eu.etaxonomy.cdm.model.common.AnnotationType;
31
import eu.etaxonomy.cdm.model.common.CdmBase;
32
import eu.etaxonomy.cdm.model.reference.IArticle;
33
import eu.etaxonomy.cdm.model.reference.IBook;
34
import eu.etaxonomy.cdm.model.reference.IBookSection;
35
import eu.etaxonomy.cdm.model.reference.IJournal;
36
import eu.etaxonomy.cdm.model.reference.Reference;
37
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
38
import eu.etaxonomy.cdm.model.reference.ReferenceType;
39
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
40

    
41

    
42
/**
43
 * @author a.mueller
44
 * @since 20.02.2010
45
 */
46
@Component
47
public class GlobisReferenceImport  extends GlobisImportBase<Reference> implements IMappingImport<Reference, GlobisImportState>{
48
	private static final Logger logger = Logger.getLogger(GlobisReferenceImport.class);
49

    
50
	private int modCount = 10000;
51
	private static final String pluralString = "references";
52
	private static final String dbTableName = "Literatur";
53
	private static final Class<?> cdmTargetClass = Reference.class;
54

    
55
	public GlobisReferenceImport(){
56
		super(pluralString, dbTableName, cdmTargetClass);
57
	}
58

    
59
	@Override
60
	protected String getIdQuery() {
61
		String strRecordQuery =
62
			" SELECT refID " +
63
			" FROM " + dbTableName
64
			+ " WHERE RefSource like 'Original' or refID in (SELECT fiSpecRefId FROM specTax)";
65
		return strRecordQuery;
66
	}
67

    
68
	@Override
69
	protected String getRecordQuery(GlobisImportConfigurator config) {
70
		String strRecordQuery =
71
			" SELECT l.*, l.DateCreated as Created_When, l.CreatedBy as Created_Who," +
72
			"        l.ModifiedBy as Updated_who, l.DateModified as Updated_When, l.RefRemarks as Notes " +
73
			" FROM " + getTableName() + " l " +
74
			" WHERE ( l.refId IN (" + ID_LIST_TOKEN + ") )";
75
		return strRecordQuery;
76
	}
77

    
78
	@Override
79
	public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
80
		boolean success = true;
81

    
82
		Set<Reference> objectsToSave = new HashSet<Reference>();
83

    
84
		ResultSet rs = partitioner.getResultSet();
85

    
86
		try {
87

    
88
			int i = 0;
89

    
90
			//for each reference
91
            while (rs.next()){
92

    
93
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
94

    
95

    
96
				handleSingleRecord(state, objectsToSave, rs);
97

    
98
            }
99

    
100
//            logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
101

    
102
			logger.warn(pluralString + " to save: " + objectsToSave.size());
103
			getReferenceService().save(objectsToSave);
104

    
105
			return success;
106
		} catch (SQLException e) {
107
			logger.error("SQLException:" +  e);
108
			return false;
109
		}
110
	}
111

    
112
	/**
113
	 * @param state
114
	 * @param objectsToSave
115
	 * @param rs
116
	 * @throws SQLException
117
	 */
118
	private void handleSingleRecord(GlobisImportState state,
119
			Set<Reference> objectsToSave, ResultSet rs) throws SQLException {
120

    
121
		Integer refId = rs.getInt("RefId");
122

    
123
		try {
124

    
125
			String title = rs.getString("RefTitle");
126
			String refJournal = rs.getString("RefJournal");
127
			refJournal = normalizeRefJournal(refJournal);
128
			String refBookTitle = rs.getString("RefBookTitle");
129

    
130
			String refUrl = rs.getString("RefURL");
131
			String refVolume = rs.getString("RefVolume");
132
			String refYear = rs.getString("RefYear");
133
			String refIssn = rs.getString("RefISSN");
134
			String refRemarks = rs.getString("RefRemarks");
135
			String refPublisher = rs.getString("RefPublisher");
136
			String refPlace = rs.getString("RefPlace");
137
			String refEdition = rs.getString("RefEdition");
138
			String refEditor = rs.getString("RefEditor");
139
			String refAuthor = rs.getString("RefAuthor");
140
			String refPages = rs.getString("RefPages");
141

    
142
			String isbn = null;
143
			String issn = null;
144
			if (isNotBlank(refIssn)){
145
				refIssn = refIssn.trim();
146
				if (refIssn.startsWith("ISBN")){
147
					isbn = refIssn.replace("ISBN", "").trim();
148
				}else if (refIssn.startsWith("ISSN")){
149
					issn = refIssn.replace("ISSN", "").trim();
150
				}else{
151
					String pureNumbers = refIssn.replace("-", "").replace(" ", "");
152
					if (pureNumbers.length() == 8){
153
						issn = refIssn;
154
					}else if (pureNumbers.length() == 10){
155
						isbn = refIssn;
156
					}else{
157
						logger.warn("RefISSN could not be parsed: " + refIssn + ",  refId: " + refId);
158
					}
159
				}
160
			}
161

    
162

    
163
			//source ref   //TODO
164
			Reference sourceRef = state.getTransactionalSourceReference();
165

    
166
			Reference ref = createObject(rs, state);
167
			testIsxnType(ref, isbn, issn, refId);
168
			ref.setTitle(title);
169

    
170
			//refAuthor
171
			TeamOrPersonBase<?> author = makeAuthor(refAuthor, state);
172
			ref.setAuthorship(author);
173

    
174
			//inRef
175
			if (isNotBlank(refJournal)){
176
				//Article
177
				if (ref.getType().equals(ReferenceType.Article) ){
178
					Reference journal = getJournal(state, rs, refJournal);
179
					ref.setInJournal(journal);
180
				}else{
181
					logger.warn("Reference type not supported for RefJournal. Type: " + ref.getType().toString() + ", refId: " + refId );
182
				}
183
			}
184
			if (isNotBlank(refBookTitle)){
185
				//BookSection
186
				//TODO RefSerial
187
				if (ref.getType().equals(ReferenceType.BookSection) ){
188
					IBook book = getBook(state, rs, refBookTitle);
189
					ref.setInBook(book);
190
				}else if (ref.getType().equals(ReferenceType.Book)) {
191
					ref.setTitle(refBookTitle);
192
				}else{
193
					logger.warn("Reference type not supported for RefBookTitle. Type: " + ref.getType().toString() + ", refId: " + refId );
194
				}
195
			}
196

    
197
			IBookSection bookSection;
198
			IBook book;
199
			IArticle article;
200
			IJournal journal;
201

    
202

    
203
			//RefVolume
204
			if (isNotBlank(refVolume)){
205
				if (ref.getType().isVolumeReference()){
206
					ref.setVolume(refVolume);
207
				}else if(ref.getInReference() != null && ref.getInReference().getType().isVolumeReference()){
208
					ref.getInReference().setVolume(refVolume);
209
				}else{
210
					logger.warn(ref.getType() + " does not support volume but volume exists, refId: " + refId);
211
				}
212
			}
213

    
214
			//RefYear
215
			//TODO check correct parsing for [] and full dates
216
			if (isNotBlank(refYear)){
217
				ref.setDatePublished(TimePeriodParser.parseStringVerbatim(refYear));
218
			}
219

    
220
			//refPages
221
			if (isNotBlank(refPages)){
222
				refPages = refPages.trim();
223
				if (refPages.endsWith(".")){
224
					refPages = refPages.substring(0, refPages.length()-1).trim();
225
				}
226
				ref.setPages(refPages);
227
			}
228

    
229
			//ISXN
230
			if (isbn != null){
231
				Reference isbnRef = getIsbnReference(ref, refId);
232
				if (isbnRef != null){
233
					isbnRef.setIsbn(isbn);
234
				}
235
			}
236
			if(issn != null){
237
				Reference issnRef = getIssnReference(ref, refId);
238
				if (issnRef != null){
239
					issnRef.setIssn(issn);
240
				}
241
			}
242

    
243
			//refURL
244
			if (isNotBlank(refUrl)){
245
				URI uri = URI.create(refUrl);
246
				ref.setUri(uri);
247
			}
248

    
249
			//refRemarks
250
			if (isNotBlank(refRemarks)){
251
				Annotation anno = Annotation.NewDefaultLanguageInstance(refRemarks);
252
				anno.setAnnotationType(AnnotationType.EDITORIAL());
253
				ref.addAnnotation(anno);
254
			}
255

    
256
			//Publisher + Place
257
			handlePublisherAndPlace(refId, refPublisher, refPlace, ref);
258

    
259
			//refEdition
260
			if (isNotBlank(refEdition)){
261
				Reference edRef = ref;
262
				if (ref.getType() == ReferenceType.BookSection){
263
					edRef = ref.getInReference();
264
				}
265
				if (edRef == null || edRef.getType() != ReferenceType.Book){
266
					logger.warn("Incorrect refType " + ref.getType() + " for refererence with edition or inRef is null, " + refId);
267
				}
268
			}
269

    
270
			//refEditor
271
			if (isNotBlank(refEditor)){
272
				Reference edsRef = ref;
273
				if (ref.getType() == ReferenceType.BookSection){
274
					edsRef = ref.getInReference();
275
				}
276
				if (edsRef == null || edsRef.getType() != ReferenceType.Book){
277
					logger.warn("Reference type for RefEditor must be Book or Booksection but was " + ref.getType() + " or inRef was null, refId " + refId);
278
				}
279
			}
280

    
281
			//id, created, notes
282
			this.doIdCreatedUpdatedNotes(state, ref, rs, refId, REFERENCE_NAMESPACE);
283

    
284

    
285

    
286
			//DONE
287
			//RefType, RefTitle, RefJournal,
288
			//RefId, ...
289

    
290
			//TODO
291

    
292
			//RefBookTitle, RefJournal, RefSerial, - mostly done
293

    
294
			//RefIll only, RefPages,RefPages only,
295

    
296
			//unclear
297
			//RefDatePublished, RefVolPageFig,
298
			//RefSource,
299
			//RefLibrary, RefMarker,
300
			//RefGeneralKeywords, RefGeoKeywords,	RefSpecificKeywords, RefTaxKeywords, SpecificKeywordDummy
301

    
302

    
303
			//no data
304
				//CountryDummy
305

    
306
			objectsToSave.add(ref);
307

    
308

    
309
		} catch (Exception e) {
310
			logger.warn("Exception in literature: RefId " + refId + ". " + e.getMessage());
311
			e.printStackTrace();
312
		}
313
	}
314

    
315
	private TeamOrPersonBase<?> makeAuthor(String refAuthor, GlobisImportState state) {
316
		TeamOrPersonBase<?> author = GlobisAuthorImport.makeAuthor(refAuthor, state, getAgentService());
317
//		getAgentService().update(author);
318
		return author;
319
	}
320

    
321
//	private TeamOrPersonBase<?> makeAuthor(String refAuthor) {
322
//		String[] split = refAuthor.split(";");
323
//		List<String> singleAuthorStrings = new ArrayList<String>();
324
//		for (String single : split){
325
//			single = single.trim();
326
//			if (single.startsWith("&")){
327
//				single = single.substring(1).trim();
328
//			}
329
//			String[] split2 = single.split("&");
330
//			for (String single2 : split2){
331
//				singleAuthorStrings.add(single2);
332
//			}
333
//		}
334
//
335
//		TeamOrPersonBase<?> result;
336
//		if (singleAuthorStrings.size() > 1){
337
//			Team team= Team.NewInstance();
338
//			for (String str : singleAuthorStrings){
339
//				Person person = makePerson(str);
340
//				team.addTeamMember(person);
341
//			}
342
//			result = team;
343
//		}else{
344
//			result = makePerson(singleAuthorStrings.get(0));
345
//		}
346
//
347
//		//TODO deduplicate
348
//		return result;
349
//	}
350
//
351
//	private Person makePerson(String string) {
352
//		Person person = Person.NewTitledInstance(string.trim());
353
//		//TODO deduplicate
354
//		return person;
355
//	}
356

    
357
	/**
358
	 * @param refId
359
	 * @param refPublisher
360
	 * @param refPlace
361
	 * @param ref
362
	 */
363
	private void handlePublisherAndPlace(Integer refId, String refPublisher,
364
			String refPlace, Reference ref) {
365
		//refPublisher
366
		if (isNotBlank(refPublisher)){
367
			if (ref.getType().isPublication()){
368
				ref.setPublisher(refPublisher);
369
			}else if (ref.getInReference() != null && ref.getInReference().getType().isPublication()){
370
				ref.getInReference().setPublisher(refPublisher);
371
			}else{
372
				logger.warn("RefPublisher can not be set, " +  ref.getType() + ", refId " + refId);
373
			}
374
		}
375

    
376
		//refPlace
377
		if (isNotBlank(refPlace)){
378
			if (ref.getType().isPublication()){
379
				ref.setPlacePublished(refPlace);
380
			}else if (ref.getInReference() != null && ref.getInReference().getType().isPublication()){
381
				//TODO handle if not empty
382
				ref.getInReference().setPlacePublished(refPlace);
383
			}else{
384
				logger.warn("RefPlace can not be set, " +  ref.getType() + ", refId " + refId);
385
			}
386
		}
387
	}
388

    
389
	private Reference getIssnReference(Reference ref, int refId) {
390
		if (ref == null){
391
			return null;
392
		}
393
		if (ref.getType() == ReferenceType.Article){
394
			ref = ref.getInReference();
395
		}
396
		if (ref.getType() != ReferenceType.Journal && ref.getType() != ReferenceType.Generic){
397
			logger.warn("Invalid refType for issn, refId " + refId);
398
			return null;
399
		}else{
400
			return ref;
401
		}
402
	}
403

    
404
	private Reference getIsbnReference(Reference ref, int refId) {
405
		if (ref == null){
406
			return null;
407
		}
408
		if (ref.getType() == ReferenceType.BookSection){
409
			ref = ref.getInReference();
410
		}
411
		if (ref.getType() != ReferenceType.Book && ref.getType() != ReferenceType.Generic){
412
			logger.warn("Invalid refType for isbn, refId " + refId);
413
			return null;
414
		}else{
415
			return ref;
416
		}
417
	}
418

    
419
	private void testIsxnType(Reference ref, String isbn, String issn, int refID) {
420
		if (isbn != null && ref.getType() != ReferenceType.Book && ref.getType() != ReferenceType.BookSection ){
421
			logger.warn("Reference has isbn but is not a book type, type " + ref.getType() + ", row " + refID);
422
		}else if (issn != null && ref.getType() != ReferenceType.Article){
423
			logger.warn("Reference has issn but is not an article, row " + refID);
424
		}
425
	}
426

    
427
	/**
428
	 * @param refJournal
429
	 * @return
430
	 */
431
	private String normalizeRefJournal(String refJournal) {
432
		if (refJournal != null){
433
			refJournal = refJournal.trim();
434
			if (refJournal.equals(".")){
435
				refJournal = null;
436
			}
437
		}
438
		return refJournal;
439
	}
440

    
441

    
442

    
443

    
444
	private Reference getJournal(GlobisImportState state, ResultSet rs, String refJournal) throws SQLException {
445

    
446
		Reference journal = ReferenceFactory.newJournal();
447
		//TODO deduplicate
448
		journal.setTitle(refJournal);
449
		return journal;
450
	}
451

    
452
	private IBook getBook(GlobisImportState state, ResultSet rs, String refBookTitle) throws SQLException {
453

    
454
		Reference book = ReferenceFactory.newBook();
455
		//TODO deduplicate
456
		book.setTitle(refBookTitle);
457
		return book;
458
	}
459

    
460
	@Override
461
	public Reference createObject(ResultSet rs, GlobisImportState state)
462
			throws SQLException {
463
		String refJournal = rs.getString("RefJournal");
464
		boolean isInJournal =isNotBlank(refJournal);
465
		String refBookTitle = rs.getString("RefBookTitle");
466
		boolean isInBook =isNotBlank(refBookTitle);
467

    
468

    
469

    
470
		Reference ref;
471
		String refType = rs.getString("RefType");
472
		if (refType == null){
473
			if (isInJournal && ! isInBook){
474
				ref = ReferenceFactory.newArticle();
475
			}else{
476
				ref = ReferenceFactory.newGeneric();
477
			}
478
		}else if (refType.equals("book")){
479
			ref = ReferenceFactory.newBook();
480
		}else if (refType.equals("paper in journal")){
481
			ref = ReferenceFactory.newArticle();
482
		}else if (refType.startsWith("unpublished") ){
483
			ref = ReferenceFactory.newGeneric();
484
		}else if (refType.endsWith("paper in journal")){
485
			ref = ReferenceFactory.newArticle();
486
		}else if (refType.equals("paper in book")){
487
			ref = ReferenceFactory.newBookSection();
488
		}else if (refType.matches("paper in journal.*website.*")){
489
			ref = ReferenceFactory.newArticle();
490
		}else{
491
			logger.warn("Unknown reference type: " + refType);
492
			ref = ReferenceFactory.newGeneric();
493
		}
494
		return ref;
495
	}
496

    
497

    
498
	@Override
499
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
500
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
501
		return result;  //not needed
502
	}
503

    
504
	@Override
505
	protected boolean doCheck(GlobisImportState state){
506
		IOValidator<GlobisImportState> validator = new GlobisReferenceImportValidator();
507
		return validator.validate(state);
508
	}
509

    
510
	@Override
511
	protected boolean isIgnore(GlobisImportState state){
512
		//TODO
513
		return state.getConfig().getDoReferences() != IImportConfigurator.DO_REFERENCES.ALL;
514
	}
515

    
516

    
517

    
518

    
519

    
520
}
(8-8/10)