Ordered input for easier comparison
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / edaphobase / EdaphobaseReferenceImport.java
1 // $Id$
2 /**
3 * Copyright (C) 2015 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.edaphobase;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Map;
17 import java.util.Set;
18
19 import org.apache.commons.lang3.StringUtils;
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.common.DOI;
24 import eu.etaxonomy.cdm.io.common.IPartitionedIO;
25 import eu.etaxonomy.cdm.io.common.ImportHelper;
26 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27 import eu.etaxonomy.cdm.model.common.CdmBase;
28 import eu.etaxonomy.cdm.model.common.TimePeriod;
29 import eu.etaxonomy.cdm.model.reference.Reference;
30 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
31
32 /**
33 * @author a.mueller
34 * @date 18.12.2015
35 *
36 */
37 @Component
38 public class EdaphobaseReferenceImport extends EdaphobaseImportBase {
39 private static final long serialVersionUID = 6895687693249076160L;
40
41 @SuppressWarnings("unused")
42 private static final Logger logger = Logger.getLogger(EdaphobaseReferenceImport.class);
43
44 private static final String tableName = "lit_document";
45
46 private static final String pluralString = "documents";
47 //
48 // private static final Object AUTHOR_NAMESPACE = "tax_author_name";
49
50 /**
51 * @param tableName
52 * @param pluralString
53 */
54 public EdaphobaseReferenceImport() {
55 super(tableName, pluralString);
56 }
57
58 @Override
59 protected String getIdQuery(EdaphobaseImportState state) {
60 return " SELECT DISTINCT document_id "
61 + " FROM lit_document ld INNER JOIN tax_taxon t ON t.tax_document = ld.document_id "
62 + " UNION "
63 + " SELECT DISTINCT pd.document_id "
64 + " FROM lit_document ld INNER JOIN tax_taxon t ON t.tax_document = ld.document_id "
65 + " INNER JOIN lit_document pd ON pd.document_id = ld.parent_document_fk_document_id "
66 + " ORDER BY document_id ";
67 }
68
69 @Override
70 protected String getRecordQuery(EdaphobaseImportConfigurator config) {
71 String result = " SELECT * "
72 + " FROM lit_document ld "
73 + " WHERE ld.document_id IN (@IDSET)";
74 result = result.replace("@IDSET", IPartitionedIO.ID_LIST_TOKEN);
75 return result;
76 }
77
78 @Override
79 public boolean doPartition(ResultSetPartitioner partitioner, EdaphobaseImportState state) {
80 ResultSet rs = partitioner.getResultSet();
81 Set<Reference> referencesToSave = new HashSet<>();
82 try {
83 while (rs.next()){
84
85 handleSingleReference(state, rs, referencesToSave);
86
87 }
88 } catch (SQLException e) {
89 // TODO Auto-generated catch block
90 e.printStackTrace();
91 }
92
93 getReferenceService().saveOrUpdate(referencesToSave);
94
95 return true;
96 }
97
98 /**
99 * @param state
100 * @param rs
101 * @param referencesToSave
102 * @throws SQLException
103 */
104 private void handleSingleReference(EdaphobaseImportState state, ResultSet rs, Set<Reference> referencesToSave) throws SQLException {
105 Integer id = nullSafeInt(rs, "document_id");
106 String dtype = rs.getString("dtype");
107 String issue = rs.getString("issue");
108 String orderer = rs.getString("orderer");
109 String place = rs.getString("place");
110 Integer pageFrom = nullSafeInt(rs, "page_from");
111 Integer pageTo = nullSafeInt(rs, "page_to");
112 String subtitle = rs.getString("subtitle");
113 Integer year = nullSafeInt(rs, "year");
114 String isbn = rs.getString("isbn");
115 //refers_to_literature
116 //refers_to_collection
117 //refers_to_observation
118 String remark = rs.getString("remark");
119 String volume = rs.getString("volume");
120 //abbreviation (no record)
121 String title = rs.getString("title");
122 String issn = rs.getString("issn");
123 //circulation //2 records
124 String keywords = rs.getString("keywords");
125 String abstractt = rs.getString("abstract");
126 String parallel_title = rs.getString("parallel_title");
127 //language_fk_language_id
128 //document_type_fk_document_type_id
129 //editor_fk_person_id
130 Integer editorFk = nullSafeInt(rs, "editor_fk_person_id");
131
132 // Integer parentFk = nullSafeInt(rs, "parent_document_fk_document_id");
133 //publisher_fk_publisher_id
134 //deleted
135 //chapter_no
136 //versionfield
137 String doi = rs.getString("doi");
138 String displayString = rs.getString("display_string");
139 //aquisistion_date, aquisition_type, adoption_date, ex_colletion, barcode_prefix, barcode_org_prefix
140 //barcode_type, collection_status, barcode, typus_form,
141
142 //taxon_for_scope, taxon_is_scope
143 //language_fk, document_type_backup
144
145 Integer documentType = nullSafeInt(rs, "document_type");
146 //normalized_title, normalized_abk_official_remark
147
148 Reference<?> ref = makeReferenceType(documentType, dtype);
149 ref.setTitle(title);
150 ref.setPlacePublished(place);
151 ref.setIssn(issn);
152 ref.setIsbn(isbn);
153 if (pageFrom != null || pageTo != null){
154 String pageStr = pageFrom == null ? "" : String.valueOf(pageFrom);
155 pageStr = pageTo == null ? pageStr : "-" + pageTo;
156 ref.setPages(pageStr);
157 }
158 if (year != null){
159 ref.setDatePublished(TimePeriod.NewInstance(year));
160 }
161 ref.setVolume(volume);
162 ref.setReferenceAbstract(abstractt);
163 if (StringUtils.isNotBlank(doi)){
164 try {
165 String doiStr = doi;
166 if (doiStr.startsWith("dx.doi.org/")){
167 doiStr = doiStr.substring(11);
168 }
169 ref.setDoi(DOI.fromString(doiStr));
170 } catch (IllegalArgumentException e) {
171 logger.warn("DOI could not be parsed: " + doi);
172 }
173 }
174 ref.setEdition(issue);
175
176 //id
177 ImportHelper.setOriginalSource(ref, state.getTransactionalSourceReference(), id, REFERENCE_NAMESPACE);
178
179 referencesToSave.add(ref);
180 }
181
182
183 /**
184 * @param documentType
185 * @return
186 */
187 private Reference<?> makeReferenceType(Integer documentType, String dtype) {
188 if (documentType == 11914){
189 return ReferenceFactory.newArticle();
190 } else if (documentType == 11916){
191 return ReferenceFactory.newBook();
192 } else if (documentType == 11915){
193 return ReferenceFactory.newPrintSeries();
194 } else if (documentType == 11913){
195 return ReferenceFactory.newJournal();
196 } else if (documentType == 11917){
197 return ReferenceFactory.newBookSection();
198 } else if (documentType == 11912 || documentType == 11919 || documentType == 11924 ){
199 Reference<?> ref = ReferenceFactory.newGeneric();
200 return ref;
201 } else {
202 throw new RuntimeException("DocumentType not yet supported: " + documentType + ", " + dtype);
203 }
204 }
205
206 @Override
207 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs,
208 EdaphobaseImportState state) {
209 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
210 // Map<String, TeamOrPersonBase<?>> authorMap = new HashMap<>();
211 // Set<String> authorSet = new HashSet<>();
212 // try {
213 // while (rs.next()){
214 // String authorStr = rs.getString("tax_author_name");
215 // authorSet.add(authorStr);
216 // }
217 // } catch (SQLException e) {
218 // e.printStackTrace();
219 // }
220 //
221 // //Authors
222 // Set<UUID> uuidSet = new HashSet<>();
223 // for (String authorStr : authorSet){
224 // UUID uuid = state.getAuthorUuid(authorStr);
225 // uuidSet.add(uuid);
226 // }
227 // List<TeamOrPersonBase<?>> authors = (List)getAgentService().find(uuidSet);
228 // Map<UUID, TeamOrPersonBase<?>> authorUuidMap = new HashMap<>();
229 // for (TeamOrPersonBase<?> author : authors){
230 // authorUuidMap.put(author.getUuid(), author);
231 // }
232 //
233 // for (String authorStr : authorSet){
234 // UUID uuid = state.getAuthorUuid(authorStr);
235 // TeamOrPersonBase<?> author = authorUuidMap.get(uuid);
236 // authorMap.put(authorStr, author);
237 // }
238 // result.put(AUTHOR_NAMESPACE, authorMap);
239
240 return result;
241 }
242
243
244
245 @Override
246 protected boolean doCheck(EdaphobaseImportState state) {
247 return false;
248 }
249
250 @Override
251 protected boolean isIgnore(EdaphobaseImportState state) {
252 return ! state.getConfig().isDoReferences();
253 }
254
255 }