Unifiy name and taxon creation
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisReferenceImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.net.URI;
13 import java.sql.ResultSet;
14 import java.sql.SQLException;
15 import java.util.HashMap;
16 import java.util.HashSet;
17 import java.util.Map;
18 import java.util.Set;
19
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
24 import eu.etaxonomy.cdm.io.common.IOValidator;
25 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
26 import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;
27 import eu.etaxonomy.cdm.io.globis.validation.GlobisReferenceImportValidator;
28 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
29 import eu.etaxonomy.cdm.model.common.Annotation;
30 import eu.etaxonomy.cdm.model.common.AnnotationType;
31 import eu.etaxonomy.cdm.model.common.CdmBase;
32 import eu.etaxonomy.cdm.model.reference.IArticle;
33 import eu.etaxonomy.cdm.model.reference.IBook;
34 import eu.etaxonomy.cdm.model.reference.IBookSection;
35 import eu.etaxonomy.cdm.model.reference.IJournal;
36 import eu.etaxonomy.cdm.model.reference.Reference;
37 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
38 import eu.etaxonomy.cdm.model.reference.ReferenceType;
39 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
40
41
42 /**
43 * @author a.mueller
44 * @created 20.02.2010
45 */
46 @Component
47 public class GlobisReferenceImport extends GlobisImportBase<Reference> implements IMappingImport<Reference, GlobisImportState>{
48 private static final Logger logger = Logger.getLogger(GlobisReferenceImport.class);
49
50 private int modCount = 10000;
51 private static final String pluralString = "references";
52 private static final String dbTableName = "Literatur";
53 private static final Class<?> cdmTargetClass = Reference.class;
54
55 public GlobisReferenceImport(){
56 super(pluralString, dbTableName, cdmTargetClass);
57 }
58
59 @Override
60 protected String getIdQuery() {
61 String strRecordQuery =
62 " SELECT refID " +
63 " FROM " + dbTableName
64 + " WHERE RefSource like 'Original' or refID in (SELECT fiSpecRefId FROM specTax)";
65 return strRecordQuery;
66 }
67
68 @Override
69 protected String getRecordQuery(GlobisImportConfigurator config) {
70 String strRecordQuery =
71 " SELECT l.*, l.DateCreated as Created_When, l.CreatedBy as Created_Who," +
72 " l.ModifiedBy as Updated_who, l.DateModified as Updated_When, l.RefRemarks as Notes " +
73 " FROM " + getTableName() + " l " +
74 " WHERE ( l.refId IN (" + ID_LIST_TOKEN + ") )";
75 return strRecordQuery;
76 }
77
78 @Override
79 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
80 boolean success = true;
81
82 Set<Reference> objectsToSave = new HashSet<Reference>();
83
84 ResultSet rs = partitioner.getResultSet();
85
86 try {
87
88 int i = 0;
89
90 //for each reference
91 while (rs.next()){
92
93 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
94
95
96 handleSingleRecord(state, objectsToSave, rs);
97
98 }
99
100 // logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
101
102 logger.warn(pluralString + " to save: " + objectsToSave.size());
103 getReferenceService().save(objectsToSave);
104
105 return success;
106 } catch (SQLException e) {
107 logger.error("SQLException:" + e);
108 return false;
109 }
110 }
111
112 /**
113 * @param state
114 * @param objectsToSave
115 * @param rs
116 * @throws SQLException
117 */
118 private void handleSingleRecord(GlobisImportState state,
119 Set<Reference> objectsToSave, ResultSet rs) throws SQLException {
120
121 Integer refId = rs.getInt("RefId");
122
123 try {
124
125 String title = rs.getString("RefTitle");
126 String refJournal = rs.getString("RefJournal");
127 refJournal = normalizeRefJournal(refJournal);
128 String refBookTitle = rs.getString("RefBookTitle");
129
130 String refUrl = rs.getString("RefURL");
131 String refVolume = rs.getString("RefVolume");
132 String refYear = rs.getString("RefYear");
133 String refIssn = rs.getString("RefISSN");
134 String refRemarks = rs.getString("RefRemarks");
135 String refPublisher = rs.getString("RefPublisher");
136 String refPlace = rs.getString("RefPlace");
137 String refEdition = rs.getString("RefEdition");
138 String refEditor = rs.getString("RefEditor");
139 String refAuthor = rs.getString("RefAuthor");
140 String refPages = rs.getString("RefPages");
141
142 String isbn = null;
143 String issn = null;
144 if (isNotBlank(refIssn)){
145 refIssn = refIssn.trim();
146 if (refIssn.startsWith("ISBN")){
147 isbn = refIssn.replace("ISBN", "").trim();
148 }else if (refIssn.startsWith("ISSN")){
149 issn = refIssn.replace("ISSN", "").trim();
150 }else{
151 String pureNumbers = refIssn.replace("-", "").replace(" ", "");
152 if (pureNumbers.length() == 8){
153 issn = refIssn;
154 }else if (pureNumbers.length() == 10){
155 isbn = refIssn;
156 }else{
157 logger.warn("RefISSN could not be parsed: " + refIssn + ", refId: " + refId);
158 }
159 }
160 }
161
162
163 //source ref //TODO
164 Reference<?> sourceRef = state.getTransactionalSourceReference();
165
166 Reference<?> ref = createObject(rs, state);
167 testIsxnType(ref, isbn, issn, refId);
168 ref.setTitle(title);
169
170 //refAuthor
171 TeamOrPersonBase<?> author = makeAuthor(refAuthor, state);
172 ref.setAuthorship(author);
173
174 //inRef
175 if (isNotBlank(refJournal)){
176 //Article
177 if (ref.getType().equals(ReferenceType.Article) ){
178 Reference<?> journal = getJournal(state, rs, refJournal);
179 ref.setInJournal(journal);
180 }else{
181 logger.warn("Reference type not supported for RefJournal. Type: " + ref.getType().toString() + ", refId: " + refId );
182 }
183 }
184 if (isNotBlank(refBookTitle)){
185 //BookSection
186 //TODO RefSerial
187 if (ref.getType().equals(ReferenceType.BookSection) ){
188 IBook book = getBook(state, rs, refBookTitle);
189 ref.setInBook(book);
190 }else if (ref.getType().equals(ReferenceType.Book)) {
191 ref.setTitle(refBookTitle);
192 }else{
193 logger.warn("Reference type not supported for RefBookTitle. Type: " + ref.getType().toString() + ", refId: " + refId );
194 }
195 }
196
197 IBookSection bookSection;
198 IBook book;
199 IArticle article;
200 IJournal journal;
201
202
203 //RefVolume
204 if (isNotBlank(refVolume)){
205 if (ref.getType().isVolumeReference()){
206 ref.setVolume(refVolume);
207 }else if(ref.getInReference() != null && ref.getInReference().getType().isVolumeReference()){
208 ref.getInReference().setVolume(refVolume);
209 }else{
210 logger.warn(ref.getType() + " does not support volume but volume exists, refId: " + refId);
211 }
212 }
213
214 //RefYear
215 //TODO check correct parsing for [] and full dates
216 if (isNotBlank(refYear)){
217 ref.setDatePublished(TimePeriodParser.parseString(refYear));
218 }
219
220 //refPages
221 if (isNotBlank(refPages)){
222 refPages = refPages.trim();
223 if (refPages.endsWith(".")){
224 refPages = refPages.substring(0, refPages.length()-1).trim();
225 }
226 ref.setPages(refPages);
227 }
228
229 //ISXN
230 if (isbn != null){
231 Reference<?> isbnRef = getIsbnReference(ref, refId);
232 if (isbnRef != null){
233 isbnRef.setIsbn(isbn);
234 }
235 }
236 if(issn != null){
237 Reference<?> issnRef = getIssnReference(ref, refId);
238 if (issnRef != null){
239 issnRef.setIssn(issn);
240 }
241 }
242
243 //refURL
244 if (isNotBlank(refUrl)){
245 URI uri = URI.create(refUrl);
246 ref.setUri(uri);
247 }
248
249 //refRemarks
250 if (isNotBlank(refRemarks)){
251 Annotation anno = Annotation.NewDefaultLanguageInstance(refRemarks);
252 anno.setAnnotationType(AnnotationType.EDITORIAL());
253 ref.addAnnotation(anno);
254 }
255
256 //Publisher + Place
257 handlePublisherAndPlace(refId, refPublisher, refPlace, ref);
258
259 //refEdition
260 if (isNotBlank(refEdition)){
261 Reference<?> edRef = ref;
262 if (ref.getType() == ReferenceType.BookSection){
263 edRef = ref.getInReference();
264 }
265 if (edRef == null || edRef.getType() != ReferenceType.Book){
266 logger.warn("Incorrect refType " + ref.getType() + " for refererence with edition or inRef is null, " + refId);
267 }
268 }
269
270 //refEditor
271 if (isNotBlank(refEditor)){
272 Reference<?> edsRef = ref;
273 if (ref.getType() == ReferenceType.BookSection){
274 edsRef = ref.getInReference();
275 }
276 if (edsRef == null || edsRef.getType() != ReferenceType.Book){
277 logger.warn("Reference type for RefEditor must be Book or Booksection but was " + ref.getType() + " or inRef was null, refId " + refId);
278 }
279 }
280
281 //id, created, notes
282 this.doIdCreatedUpdatedNotes(state, ref, rs, refId, REFERENCE_NAMESPACE);
283
284
285
286 //DONE
287 //RefType, RefTitle, RefJournal,
288 //RefId, ...
289
290 //TODO
291
292 //RefBookTitle, RefJournal, RefSerial, - mostly done
293
294 //RefIll only, RefPages,RefPages only,
295
296 //unclear
297 //RefDatePublished, RefVolPageFig,
298 //RefSource,
299 //RefLibrary, RefMarker,
300 //RefGeneralKeywords, RefGeoKeywords, RefSpecificKeywords, RefTaxKeywords, SpecificKeywordDummy
301
302
303 //no data
304 //CountryDummy
305
306 objectsToSave.add(ref);
307
308
309 } catch (Exception e) {
310 logger.warn("Exception in literature: RefId " + refId + ". " + e.getMessage());
311 e.printStackTrace();
312 }
313 }
314
315 private TeamOrPersonBase<?> makeAuthor(String refAuthor, GlobisImportState state) {
316 TeamOrPersonBase<?> author = GlobisAuthorImport.makeAuthor(refAuthor, state, getAgentService());
317 // getAgentService().update(author);
318 return author;
319 }
320
321 // private TeamOrPersonBase<?> makeAuthor(String refAuthor) {
322 // String[] split = refAuthor.split(";");
323 // List<String> singleAuthorStrings = new ArrayList<String>();
324 // for (String single : split){
325 // single = single.trim();
326 // if (single.startsWith("&")){
327 // single = single.substring(1).trim();
328 // }
329 // String[] split2 = single.split("&");
330 // for (String single2 : split2){
331 // singleAuthorStrings.add(single2);
332 // }
333 // }
334 //
335 // TeamOrPersonBase<?> result;
336 // if (singleAuthorStrings.size() > 1){
337 // Team team= Team.NewInstance();
338 // for (String str : singleAuthorStrings){
339 // Person person = makePerson(str);
340 // team.addTeamMember(person);
341 // }
342 // result = team;
343 // }else{
344 // result = makePerson(singleAuthorStrings.get(0));
345 // }
346 //
347 // //TODO deduplicate
348 // return result;
349 // }
350 //
351 // private Person makePerson(String string) {
352 // Person person = Person.NewTitledInstance(string.trim());
353 // //TODO deduplicate
354 // return person;
355 // }
356
357 /**
358 * @param refId
359 * @param refPublisher
360 * @param refPlace
361 * @param ref
362 */
363 private void handlePublisherAndPlace(Integer refId, String refPublisher,
364 String refPlace, Reference<?> ref) {
365 //refPublisher
366 if (isNotBlank(refPublisher)){
367 if (ref.getType().isPublication()){
368 ref.setPublisher(refPublisher);
369 }else if (ref.getInReference() != null && ref.getInReference().getType().isPublication()){
370 ref.getInReference().setPublisher(refPublisher);
371 }else{
372 logger.warn("RefPublisher can not be set, " + ref.getType() + ", refId " + refId);
373 }
374 }
375
376 //refPlace
377 if (isNotBlank(refPlace)){
378 if (ref.getType().isPublication()){
379 ref.setPlacePublished(refPlace);
380 }else if (ref.getInReference() != null && ref.getInReference().getType().isPublication()){
381 //TODO handle if not empty
382 ref.getInReference().setPlacePublished(refPlace);
383 }else{
384 logger.warn("RefPlace can not be set, " + ref.getType() + ", refId " + refId);
385 }
386 }
387 }
388
389 private Reference<?> getIssnReference(Reference<?> ref, int refId) {
390 if (ref == null){
391 return null;
392 }
393 if (ref.getType() == ReferenceType.Article){
394 ref = ref.getInReference();
395 }
396 if (ref.getType() != ReferenceType.Journal && ref.getType() != ReferenceType.Generic){
397 logger.warn("Invalid refType for issn, refId " + refId);
398 return null;
399 }else{
400 return ref;
401 }
402 }
403
404 private Reference<?> getIsbnReference(Reference<?> ref, int refId) {
405 if (ref == null){
406 return null;
407 }
408 if (ref.getType() == ReferenceType.BookSection){
409 ref = ref.getInReference();
410 }
411 if (ref.getType() != ReferenceType.Book && ref.getType() != ReferenceType.Generic){
412 logger.warn("Invalid refType for isbn, refId " + refId);
413 return null;
414 }else{
415 return ref;
416 }
417 }
418
419 private void testIsxnType(Reference<?> ref, String isbn, String issn, int refID) {
420 if (isbn != null && ref.getType() != ReferenceType.Book && ref.getType() != ReferenceType.BookSection ){
421 logger.warn("Reference has isbn but is not a book type, type " + ref.getType() + ", row " + refID);
422 }else if (issn != null && ref.getType() != ReferenceType.Article){
423 logger.warn("Reference has issn but is not an article, row " + refID);
424 }
425 }
426
427 /**
428 * @param refJournal
429 * @return
430 */
431 private String normalizeRefJournal(String refJournal) {
432 if (refJournal != null){
433 refJournal = refJournal.trim();
434 if (refJournal.equals(".")){
435 refJournal = null;
436 }
437 }
438 return refJournal;
439 }
440
441
442
443
444 private Reference<?> getJournal(GlobisImportState state, ResultSet rs, String refJournal) throws SQLException {
445
446 Reference<?> journal = ReferenceFactory.newJournal();
447 //TODO deduplicate
448 journal.setTitle(refJournal);
449 return journal;
450 }
451
452 private IBook getBook(GlobisImportState state, ResultSet rs, String refBookTitle) throws SQLException {
453
454 Reference<?> book = ReferenceFactory.newBook();
455 //TODO deduplicate
456 book.setTitle(refBookTitle);
457 return book;
458 }
459
460 @Override
461 public Reference<?> createObject(ResultSet rs, GlobisImportState state)
462 throws SQLException {
463 String refJournal = rs.getString("RefJournal");
464 boolean isInJournal =isNotBlank(refJournal);
465 String refBookTitle = rs.getString("RefBookTitle");
466 boolean isInBook =isNotBlank(refBookTitle);
467
468
469
470 Reference<?> ref;
471 String refType = rs.getString("RefType");
472 if (refType == null){
473 if (isInJournal && ! isInBook){
474 ref = ReferenceFactory.newArticle();
475 }else{
476 ref = ReferenceFactory.newGeneric();
477 }
478 }else if (refType.equals("book")){
479 ref = ReferenceFactory.newBook();
480 }else if (refType.equals("paper in journal")){
481 ref = ReferenceFactory.newArticle();
482 }else if (refType.startsWith("unpublished") ){
483 ref = ReferenceFactory.newGeneric();
484 }else if (refType.endsWith("paper in journal")){
485 ref = ReferenceFactory.newArticle();
486 }else if (refType.equals("paper in book")){
487 ref = ReferenceFactory.newBookSection();
488 }else if (refType.matches("paper in journal.*website.*")){
489 ref = ReferenceFactory.newArticle();
490 }else{
491 logger.warn("Unknown reference type: " + refType);
492 ref = ReferenceFactory.newGeneric();
493 }
494 return ref;
495 }
496
497
498 @Override
499 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
500 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
501 return result; //not needed
502 }
503
504 @Override
505 protected boolean doCheck(GlobisImportState state){
506 IOValidator<GlobisImportState> validator = new GlobisReferenceImportValidator();
507 return validator.validate(state);
508 }
509
510 @Override
511 protected boolean isIgnore(GlobisImportState state){
512 //TODO
513 return state.getConfig().getDoReferences() != IImportConfigurator.DO_REFERENCES.ALL;
514 }
515
516
517
518
519
520 }