2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.globis
;
13 import java
.sql
.ResultSet
;
14 import java
.sql
.SQLException
;
15 import java
.util
.HashMap
;
16 import java
.util
.HashSet
;
20 import org
.apache
.log4j
.Logger
;
21 import org
.springframework
.stereotype
.Component
;
23 import eu
.etaxonomy
.cdm
.io
.common
.IImportConfigurator
;
24 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
25 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
26 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.IMappingImport
;
27 import eu
.etaxonomy
.cdm
.io
.globis
.validation
.GlobisReferenceImportValidator
;
28 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
29 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
30 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
31 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
32 import eu
.etaxonomy
.cdm
.model
.reference
.IArticle
;
33 import eu
.etaxonomy
.cdm
.model
.reference
.IBook
;
34 import eu
.etaxonomy
.cdm
.model
.reference
.IBookSection
;
35 import eu
.etaxonomy
.cdm
.model
.reference
.IJournal
;
36 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
37 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
38 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceType
;
39 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
47 public class GlobisReferenceImport
extends GlobisImportBase
<Reference
> implements IMappingImport
<Reference
, GlobisImportState
>{
48 private static final Logger logger
= Logger
.getLogger(GlobisReferenceImport
.class);
50 private int modCount
= 10000;
51 private static final String pluralString
= "references";
52 private static final String dbTableName
= "Literatur";
53 private static final Class
<?
> cdmTargetClass
= Reference
.class;
55 public GlobisReferenceImport(){
56 super(pluralString
, dbTableName
, cdmTargetClass
);
60 protected String
getIdQuery() {
61 String strRecordQuery
=
63 " FROM " + dbTableName
64 + " WHERE RefSource like 'Original' or refID in (SELECT fiSpecRefId FROM specTax)";
65 return strRecordQuery
;
69 protected String
getRecordQuery(GlobisImportConfigurator config
) {
70 String strRecordQuery
=
71 " SELECT l.*, l.DateCreated as Created_When, l.CreatedBy as Created_Who," +
72 " l.ModifiedBy as Updated_who, l.DateModified as Updated_When, l.RefRemarks as Notes " +
73 " FROM " + getTableName() + " l " +
74 " WHERE ( l.refId IN (" + ID_LIST_TOKEN
+ ") )";
75 return strRecordQuery
;
79 public boolean doPartition(ResultSetPartitioner partitioner
, GlobisImportState state
) {
80 boolean success
= true;
82 Set
<Reference
> objectsToSave
= new HashSet
<Reference
>();
84 ResultSet rs
= partitioner
.getResultSet();
93 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info(pluralString
+ " handled: " + (i
-1));}
96 handleSingleRecord(state
, objectsToSave
, rs
);
100 // logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
102 logger
.warn(pluralString
+ " to save: " + objectsToSave
.size());
103 getReferenceService().save(objectsToSave
);
106 } catch (SQLException e
) {
107 logger
.error("SQLException:" + e
);
114 * @param objectsToSave
116 * @throws SQLException
118 private void handleSingleRecord(GlobisImportState state
,
119 Set
<Reference
> objectsToSave
, ResultSet rs
) throws SQLException
{
121 Integer refId
= rs
.getInt("RefId");
125 String title
= rs
.getString("RefTitle");
126 String refJournal
= rs
.getString("RefJournal");
127 refJournal
= normalizeRefJournal(refJournal
);
128 String refBookTitle
= rs
.getString("RefBookTitle");
130 String refUrl
= rs
.getString("RefURL");
131 String refVolume
= rs
.getString("RefVolume");
132 String refYear
= rs
.getString("RefYear");
133 String refIssn
= rs
.getString("RefISSN");
134 String refRemarks
= rs
.getString("RefRemarks");
135 String refPublisher
= rs
.getString("RefPublisher");
136 String refPlace
= rs
.getString("RefPlace");
137 String refEdition
= rs
.getString("RefEdition");
138 String refEditor
= rs
.getString("RefEditor");
139 String refAuthor
= rs
.getString("RefAuthor");
140 String refPages
= rs
.getString("RefPages");
144 if (isNotBlank(refIssn
)){
145 refIssn
= refIssn
.trim();
146 if (refIssn
.startsWith("ISBN")){
147 isbn
= refIssn
.replace("ISBN", "").trim();
148 }else if (refIssn
.startsWith("ISSN")){
149 issn
= refIssn
.replace("ISSN", "").trim();
151 String pureNumbers
= refIssn
.replace("-", "").replace(" ", "");
152 if (pureNumbers
.length() == 8){
154 }else if (pureNumbers
.length() == 10){
157 logger
.warn("RefISSN could not be parsed: " + refIssn
+ ", refId: " + refId
);
164 Reference
<?
> sourceRef
= state
.getTransactionalSourceReference();
166 Reference
<?
> ref
= createObject(rs
, state
);
167 testIsxnType(ref
, isbn
, issn
, refId
);
171 TeamOrPersonBase
<?
> author
= makeAuthor(refAuthor
, state
);
172 ref
.setAuthorship(author
);
175 if (isNotBlank(refJournal
)){
177 if (ref
.getType().equals(ReferenceType
.Article
) ){
178 Reference
<?
> journal
= getJournal(state
, rs
, refJournal
);
179 ref
.setInJournal(journal
);
181 logger
.warn("Reference type not supported for RefJournal. Type: " + ref
.getType().toString() + ", refId: " + refId
);
184 if (isNotBlank(refBookTitle
)){
187 if (ref
.getType().equals(ReferenceType
.BookSection
) ){
188 IBook book
= getBook(state
, rs
, refBookTitle
);
190 }else if (ref
.getType().equals(ReferenceType
.Book
)) {
191 ref
.setTitle(refBookTitle
);
193 logger
.warn("Reference type not supported for RefBookTitle. Type: " + ref
.getType().toString() + ", refId: " + refId
);
197 IBookSection bookSection
;
204 if (isNotBlank(refVolume
)){
205 if (ref
.getType().isVolumeReference()){
206 ref
.setVolume(refVolume
);
207 }else if(ref
.getInReference() != null && ref
.getInReference().getType().isVolumeReference()){
208 ref
.getInReference().setVolume(refVolume
);
210 logger
.warn(ref
.getType() + " does not support volume but volume exists, refId: " + refId
);
215 //TODO check correct parsing for [] and full dates
216 if (isNotBlank(refYear
)){
217 ref
.setDatePublished(TimePeriodParser
.parseString(refYear
));
221 if (isNotBlank(refPages
)){
222 refPages
= refPages
.trim();
223 if (refPages
.endsWith(".")){
224 refPages
= refPages
.substring(0, refPages
.length()-1).trim();
226 ref
.setPages(refPages
);
231 Reference
<?
> isbnRef
= getIsbnReference(ref
, refId
);
232 if (isbnRef
!= null){
233 isbnRef
.setIsbn(isbn
);
237 Reference
<?
> issnRef
= getIssnReference(ref
, refId
);
238 if (issnRef
!= null){
239 issnRef
.setIssn(issn
);
244 if (isNotBlank(refUrl
)){
245 URI uri
= URI
.create(refUrl
);
250 if (isNotBlank(refRemarks
)){
251 Annotation anno
= Annotation
.NewDefaultLanguageInstance(refRemarks
);
252 anno
.setAnnotationType(AnnotationType
.EDITORIAL());
253 ref
.addAnnotation(anno
);
257 handlePublisherAndPlace(refId
, refPublisher
, refPlace
, ref
);
260 if (isNotBlank(refEdition
)){
261 Reference
<?
> edRef
= ref
;
262 if (ref
.getType() == ReferenceType
.BookSection
){
263 edRef
= ref
.getInReference();
265 if (edRef
== null || edRef
.getType() != ReferenceType
.Book
){
266 logger
.warn("Incorrect refType " + ref
.getType() + " for refererence with edition or inRef is null, " + refId
);
271 if (isNotBlank(refEditor
)){
272 Reference
<?
> edsRef
= ref
;
273 if (ref
.getType() == ReferenceType
.BookSection
){
274 edsRef
= ref
.getInReference();
276 if (edsRef
== null || edsRef
.getType() != ReferenceType
.Book
){
277 logger
.warn("Reference type for RefEditor must be Book or Booksection but was " + ref
.getType() + " or inRef was null, refId " + refId
);
282 this.doIdCreatedUpdatedNotes(state
, ref
, rs
, refId
, REFERENCE_NAMESPACE
);
287 //RefType, RefTitle, RefJournal,
292 //RefBookTitle, RefJournal, RefSerial, - mostly done
294 //RefIll only, RefPages,RefPages only,
297 //RefDatePublished, RefVolPageFig,
299 //RefLibrary, RefMarker,
300 //RefGeneralKeywords, RefGeoKeywords, RefSpecificKeywords, RefTaxKeywords, SpecificKeywordDummy
306 objectsToSave
.add(ref
);
309 } catch (Exception e
) {
310 logger
.warn("Exception in literature: RefId " + refId
+ ". " + e
.getMessage());
315 private TeamOrPersonBase
<?
> makeAuthor(String refAuthor
, GlobisImportState state
) {
316 TeamOrPersonBase
<?
> author
= GlobisAuthorImport
.makeAuthor(refAuthor
, state
, getAgentService());
317 // getAgentService().update(author);
321 // private TeamOrPersonBase<?> makeAuthor(String refAuthor) {
322 // String[] split = refAuthor.split(";");
323 // List<String> singleAuthorStrings = new ArrayList<String>();
324 // for (String single : split){
325 // single = single.trim();
326 // if (single.startsWith("&")){
327 // single = single.substring(1).trim();
329 // String[] split2 = single.split("&");
330 // for (String single2 : split2){
331 // singleAuthorStrings.add(single2);
335 // TeamOrPersonBase<?> result;
336 // if (singleAuthorStrings.size() > 1){
337 // Team team= Team.NewInstance();
338 // for (String str : singleAuthorStrings){
339 // Person person = makePerson(str);
340 // team.addTeamMember(person);
344 // result = makePerson(singleAuthorStrings.get(0));
347 // //TODO deduplicate
351 // private Person makePerson(String string) {
352 // Person person = Person.NewTitledInstance(string.trim());
353 // //TODO deduplicate
359 * @param refPublisher
363 private void handlePublisherAndPlace(Integer refId
, String refPublisher
,
364 String refPlace
, Reference
<?
> ref
) {
366 if (isNotBlank(refPublisher
)){
367 if (ref
.getType().isPublication()){
368 ref
.setPublisher(refPublisher
);
369 }else if (ref
.getInReference() != null && ref
.getInReference().getType().isPublication()){
370 ref
.getInReference().setPublisher(refPublisher
);
372 logger
.warn("RefPublisher can not be set, " + ref
.getType() + ", refId " + refId
);
377 if (isNotBlank(refPlace
)){
378 if (ref
.getType().isPublication()){
379 ref
.setPlacePublished(refPlace
);
380 }else if (ref
.getInReference() != null && ref
.getInReference().getType().isPublication()){
381 //TODO handle if not empty
382 ref
.getInReference().setPlacePublished(refPlace
);
384 logger
.warn("RefPlace can not be set, " + ref
.getType() + ", refId " + refId
);
389 private Reference
<?
> getIssnReference(Reference
<?
> ref
, int refId
) {
393 if (ref
.getType() == ReferenceType
.Article
){
394 ref
= ref
.getInReference();
396 if (ref
.getType() != ReferenceType
.Journal
&& ref
.getType() != ReferenceType
.Generic
){
397 logger
.warn("Invalid refType for issn, refId " + refId
);
404 private Reference
<?
> getIsbnReference(Reference
<?
> ref
, int refId
) {
408 if (ref
.getType() == ReferenceType
.BookSection
){
409 ref
= ref
.getInReference();
411 if (ref
.getType() != ReferenceType
.Book
&& ref
.getType() != ReferenceType
.Generic
){
412 logger
.warn("Invalid refType for isbn, refId " + refId
);
419 private void testIsxnType(Reference
<?
> ref
, String isbn
, String issn
, int refID
) {
420 if (isbn
!= null && ref
.getType() != ReferenceType
.Book
&& ref
.getType() != ReferenceType
.BookSection
){
421 logger
.warn("Reference has isbn but is not a book type, type " + ref
.getType() + ", row " + refID
);
422 }else if (issn
!= null && ref
.getType() != ReferenceType
.Article
){
423 logger
.warn("Reference has issn but is not an article, row " + refID
);
431 private String
normalizeRefJournal(String refJournal
) {
432 if (refJournal
!= null){
433 refJournal
= refJournal
.trim();
434 if (refJournal
.equals(".")){
444 private Reference
<?
> getJournal(GlobisImportState state
, ResultSet rs
, String refJournal
) throws SQLException
{
446 Reference
<?
> journal
= ReferenceFactory
.newJournal();
448 journal
.setTitle(refJournal
);
452 private IBook
getBook(GlobisImportState state
, ResultSet rs
, String refBookTitle
) throws SQLException
{
454 Reference
<?
> book
= ReferenceFactory
.newBook();
456 book
.setTitle(refBookTitle
);
461 public Reference
<?
> createObject(ResultSet rs
, GlobisImportState state
)
462 throws SQLException
{
463 String refJournal
= rs
.getString("RefJournal");
464 boolean isInJournal
=isNotBlank(refJournal
);
465 String refBookTitle
= rs
.getString("RefBookTitle");
466 boolean isInBook
=isNotBlank(refBookTitle
);
471 String refType
= rs
.getString("RefType");
472 if (refType
== null){
473 if (isInJournal
&& ! isInBook
){
474 ref
= ReferenceFactory
.newArticle();
476 ref
= ReferenceFactory
.newGeneric();
478 }else if (refType
.equals("book")){
479 ref
= ReferenceFactory
.newBook();
480 }else if (refType
.equals("paper in journal")){
481 ref
= ReferenceFactory
.newArticle();
482 }else if (refType
.startsWith("unpublished") ){
483 ref
= ReferenceFactory
.newGeneric();
484 }else if (refType
.endsWith("paper in journal")){
485 ref
= ReferenceFactory
.newArticle();
486 }else if (refType
.equals("paper in book")){
487 ref
= ReferenceFactory
.newBookSection();
488 }else if (refType
.matches("paper in journal.*website.*")){
489 ref
= ReferenceFactory
.newArticle();
491 logger
.warn("Unknown reference type: " + refType
);
492 ref
= ReferenceFactory
.newGeneric();
499 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
, GlobisImportState state
) {
500 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
501 return result
; //not needed
505 protected boolean doCheck(GlobisImportState state
){
506 IOValidator
<GlobisImportState
> validator
= new GlobisReferenceImportValidator();
507 return validator
.validate(state
);
511 protected boolean isIgnore(GlobisImportState state
){
513 return state
.getConfig().getDoReferences() != IImportConfigurator
.DO_REFERENCES
.ALL
;