latest globis import changes and update for IPartionedIO signature for app-import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisImportBase.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.lang.reflect.Method;
13 import java.sql.ResultSet;
14 import java.sql.SQLException;
15 import java.sql.Timestamp;
16 import java.util.HashSet;
17 import java.util.Set;
18 import java.util.UUID;
19
20 import org.apache.commons.lang.StringUtils;
21 import org.apache.log4j.Logger;
22 import org.hibernate.NonUniqueObjectException;
23 import org.joda.time.DateTime;
24
25 import eu.etaxonomy.cdm.api.service.AgentServiceImpl;
26 import eu.etaxonomy.cdm.io.common.CdmImportBase;
27 import eu.etaxonomy.cdm.io.common.ICdmIO;
28 import eu.etaxonomy.cdm.io.common.IImportConfigurator.EDITOR;
29 import eu.etaxonomy.cdm.io.common.IPartitionedIO;
30 import eu.etaxonomy.cdm.io.common.ImportHelper;
31 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
32 import eu.etaxonomy.cdm.io.common.Source;
33 import eu.etaxonomy.cdm.io.common.mapping.DbImportMapping;
34 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35 import eu.etaxonomy.cdm.model.agent.INomenclaturalAuthor;
36 import eu.etaxonomy.cdm.model.agent.Person;
37 import eu.etaxonomy.cdm.model.agent.Team;
38 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
39 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
40 import eu.etaxonomy.cdm.model.common.Annotation;
41 import eu.etaxonomy.cdm.model.common.AnnotationType;
42 import eu.etaxonomy.cdm.model.common.CdmBase;
43 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
44 import eu.etaxonomy.cdm.model.common.Language;
45 import eu.etaxonomy.cdm.model.common.User;
46 import eu.etaxonomy.cdm.model.location.NamedArea;
47 import eu.etaxonomy.cdm.model.location.Country;
48 import eu.etaxonomy.cdm.model.name.ZoologicalName;
49 import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
50 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
51 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
52
53 /**
54 * @author a.mueller
55 * @created 20.03.2008
56 */
57 public abstract class GlobisImportBase<CDM_BASE extends CdmBase> extends CdmImportBase<GlobisImportConfigurator, GlobisImportState> implements ICdmIO<GlobisImportState>, IPartitionedIO<GlobisImportState> {
58 private static final Logger logger = Logger.getLogger(GlobisImportBase.class);
59
60 public static final UUID ID_IN_SOURCE_EXT_UUID = UUID.fromString("23dac094-e793-40a4-bad9-649fc4fcfd44");
61
62 //NAMESPACES
63
64 protected static final String REFERENCE_NAMESPACE = "Literatur";
65 protected static final String TAXON_NAMESPACE = "current_species";
66 protected static final String COLLECTION_NAMESPACE = "Collection";
67 protected static final String IMAGE_NAMESPACE = "Einzelbilder";
68 protected static final String SPEC_TAX_NAMESPACE = "specTax";
69 protected static final String TYPE_NAMESPACE = "specTax.SpecTypeDepository";
70
71 private String pluralString;
72 private String dbTableName;
73 private Class cdmTargetClass;
74
75 private INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
76
77
78 /**
79 * @param dbTableName
80 * @param dbTableName2
81 */
82 public GlobisImportBase(String pluralString, String dbTableName, Class<?> cdmTargetClass) {
83 this.pluralString = pluralString;
84 this.dbTableName = dbTableName;
85 this.cdmTargetClass = cdmTargetClass;
86 }
87
88 protected void doInvoke(GlobisImportState state){
89 logger.info("start make " + getPluralString() + " ...");
90 GlobisImportConfigurator config = state.getConfig();
91 Source source = config.getSource();
92
93 String strIdQuery = getIdQuery();
94 String strRecordQuery = getRecordQuery(config);
95
96 int recordsPerTransaction = config.getRecordsPerTransaction();
97 try{
98 ResultSetPartitioner<GlobisImportState> partitioner = ResultSetPartitioner.NewInstance(source, strIdQuery, strRecordQuery, recordsPerTransaction);
99 while (partitioner.nextPartition()){
100 partitioner.doPartition(this, state);
101 }
102 } catch (SQLException e) {
103 logger.error("SQLException:" + e);
104 state.setUnsuccessfull();
105 }
106
107 logger.info("end make " + getPluralString() + " ... " + getSuccessString(true));
108 return;
109 }
110
111 /**
112 * @param authorAndYear
113 * @param zooName
114 */
115 protected void handleAuthorAndYear(String authorAndYear, ZoologicalName zooName, Integer id, GlobisImportState state) {
116 if (isBlank(authorAndYear)){
117 return;
118 }
119 try {
120 String doubtfulAuthorAndYear = null;
121 if(authorAndYear.matches(".+\\,\\s\\[\\d{4}\\].*")){
122 doubtfulAuthorAndYear = authorAndYear;
123 authorAndYear = authorAndYear.replace("[", "").replace("]", "");
124 }
125 // if (authorAndYear.contains("?")){
126 // authorAndYear = authorAndYear.replace("H?bner", "H\u00fcbner");
127 // authorAndYear = authorAndYear.replace("Oberth?r", "Oberth\u00fcr");
128 // authorAndYear = authorAndYear.replace("M?n?tri?s","M\u00E9n\u00E9tri\u00E9s");
129 // authorAndYear = authorAndYear.replace("Schifferm?ller","Schifferm\u00fcller");
130 //
131 // //TODO remove
132 // authorAndYear = authorAndYear.replace("?", "");
133 //
134 // }
135
136 parser.parseAuthors(zooName, authorAndYear);
137 deduplicateAuthors(zooName, state);
138
139 if (doubtfulAuthorAndYear != null){
140 zooName.setAuthorshipCache(doubtfulAuthorAndYear, true);
141 }
142
143 } catch (StringNotParsableException e) {
144 logger.warn("Author could not be parsed: " + authorAndYear + " for id " +id);
145 zooName.setAuthorshipCache(authorAndYear, true);
146 }
147 }
148
149
150 private void deduplicateAuthors(ZoologicalName zooName, GlobisImportState state) {
151 zooName.setCombinationAuthorTeam(getExistingAuthor(zooName.getCombinationAuthorTeam(), state));
152 zooName.setExCombinationAuthorTeam(getExistingAuthor(zooName.getExCombinationAuthorTeam(), state));
153 zooName.setBasionymAuthorTeam(getExistingAuthor(zooName.getBasionymAuthorTeam(), state));
154 zooName.setExBasionymAuthorTeam(getExistingAuthor(zooName.getExBasionymAuthorTeam(), state));
155 }
156
157 private INomenclaturalAuthor getExistingAuthor(INomenclaturalAuthor nomAuthor, GlobisImportState state) {
158 TeamOrPersonBase<?> author = (TeamOrPersonBase<?>)nomAuthor;
159 if (author == null){
160 return null;
161 }
162 if (author instanceof Person){
163 Person person = state.getPerson(author.getTitleCache());
164 return saveAndDecide(person, author, author.getTitleCache(), state);
165 }else if (author instanceof Team){
166 String key = GlobisAuthorImport.makeTeamKey((Team)author, state, getAgentService());
167 Team existingTeam = state.getTeam(key);
168 if (existingTeam == null){
169 Team newTeam = Team.NewInstance();
170 for (Person member :((Team) author).getTeamMembers()){
171 Person existingPerson = state.getPerson(member.getTitleCache());
172 if (existingPerson != null){
173 try {
174 getAgentService().update(existingPerson);
175 } catch (NonUniqueObjectException nuoe){
176 // person already exists in
177 existingPerson = CdmBase.deproxy(getAgentService().find(existingPerson.getUuid()), Person.class);
178 state.putPerson(existingPerson.getTitleCache(), existingPerson);
179 } catch (Exception e) {
180 throw new RuntimeException (e);
181 }
182 newTeam.addTeamMember(existingPerson);
183
184 logger.warn("newTeam " + newTeam.getId());
185 }else{
186 newTeam.addTeamMember(member);
187 }
188 }
189 author = newTeam;
190 }
191
192 return saveAndDecide(existingTeam, author, key, state);
193 }else{
194 logger.warn("Author type not supported: " + author.getClass().getName());
195 return author;
196 }
197 }
198
199 private TeamOrPersonBase<?> saveAndDecide(TeamOrPersonBase<?> existing, TeamOrPersonBase<?> author, String key, GlobisImportState state) {
200 if (existing != null){
201 try {
202 getAgentService().update(existing);
203 } catch (NonUniqueObjectException nuoe){
204 // person already exists in
205 existing = CdmBase.deproxy(getAgentService().find(existing.getUuid()), TeamOrPersonBase.class);
206 putAgent(existing, key, state);
207 } catch (Exception e) {
208 throw new RuntimeException (e);
209 }
210 return existing;
211 }else{
212 getAgentService().save(author);
213 putAgent(author, key, state);
214 return author;
215 }
216 }
217
218 /**
219 * @param author
220 * @param key
221 * @param state
222 */
223 private void putAgent(TeamOrPersonBase<?> agent, String key, GlobisImportState state) {
224 if (agent instanceof Team){
225 state.putTeam(key, (Team)agent);
226 }else{
227 state.putPerson(key, (Person)agent);
228 }
229 }
230
231 /**
232 * @param state
233 * @param countryStr
234 * @return
235 */
236 protected NamedArea getCountry(GlobisImportState state, String countryStr) {
237 NamedArea country = Country.getCountryByLabel(countryStr);
238 if (country == null){
239 try {
240 country = (NamedArea)state.getTransformer().getNamedAreaByKey(countryStr);
241 } catch (UndefinedTransformerMethodException e) {
242 e.printStackTrace();
243 }
244 }
245 return country;
246 }
247
248
249
250 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
251 boolean success = true ;
252 Set objectsToSave = new HashSet();
253
254 DbImportMapping<?, ?> mapping = getMapping();
255 mapping.initialize(state, cdmTargetClass);
256
257 ResultSet rs = partitioner.getResultSet();
258 try{
259 while (rs.next()){
260 success &= mapping.invoke(rs,objectsToSave);
261 }
262 } catch (SQLException e) {
263 logger.error("SQLException:" + e);
264 return false;
265 }
266
267 partitioner.startDoSave();
268 getCommonService().save(objectsToSave);
269 return success;
270 }
271
272
273
274 /**
275 * @return
276 */
277 protected /*abstract*/ DbImportMapping<?, ?> getMapping(){
278 return null;
279 }
280
281 /**
282 * @return
283 */
284 protected abstract String getRecordQuery(GlobisImportConfigurator config);
285
286 /**
287 * @return
288 */
289 protected String getIdQuery(){
290 String result = " SELECT id FROM " + getTableName();
291 return result;
292 }
293
294 /* (non-Javadoc)
295 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getPluralString()
296 */
297 public String getPluralString(){
298 return pluralString;
299 }
300
301 /**
302 * @return
303 */
304 protected String getTableName(){
305 return this.dbTableName;
306 }
307
308 protected boolean doIdCreatedUpdatedNotes(GlobisImportState state, IdentifiableEntity identifiableEntity, ResultSet rs, long id, String namespace)
309 throws SQLException{
310 boolean success = true;
311 //id
312 success &= ImportHelper.setOriginalSource(identifiableEntity, state.getTransactionalSourceReference(), id, namespace);
313 //createdUpdateNotes
314 success &= doCreatedUpdatedNotes(state, identifiableEntity, rs, namespace);
315 return success;
316 }
317
318
319 protected boolean doCreatedUpdatedNotes(GlobisImportState state, AnnotatableEntity annotatableEntity, ResultSet rs, String namespace)
320 throws SQLException{
321
322 GlobisImportConfigurator config = state.getConfig();
323 Object createdWhen = rs.getObject("Created_When");
324 String createdWho = rs.getString("Created_Who");
325 Object updatedWhen = rs.getObject("Updated_When");
326 String updatedWho = rs.getString("Updated_who");
327 String notes = rs.getString("notes");
328
329 boolean success = true;
330
331 //Created When, Who, Updated When Who
332 if (config.getEditor() == null || config.getEditor().equals(EDITOR.NO_EDITORS)){
333 //do nothing
334 }else if (config.getEditor().equals(EDITOR.EDITOR_AS_ANNOTATION)){
335 String createdAnnotationString = "Berlin Model record was created By: " + String.valueOf(createdWho) + " (" + String.valueOf(createdWhen) + ") ";
336 if (updatedWhen != null && updatedWho != null){
337 createdAnnotationString += " and updated By: " + String.valueOf(updatedWho) + " (" + String.valueOf(updatedWhen) + ")";
338 }
339 Annotation annotation = Annotation.NewInstance(createdAnnotationString, Language.DEFAULT());
340 annotation.setCommentator(config.getCommentator());
341 annotation.setAnnotationType(AnnotationType.TECHNICAL());
342 annotatableEntity.addAnnotation(annotation);
343 }else if (config.getEditor().equals(EDITOR.EDITOR_AS_EDITOR)){
344 User creator = getUser(createdWho, state);
345 User updator = getUser(updatedWho, state);
346 DateTime created = getDateTime(createdWhen);
347 DateTime updated = getDateTime(updatedWhen);
348 annotatableEntity.setCreatedBy(creator);
349 annotatableEntity.setUpdatedBy(updator);
350 annotatableEntity.setCreated(created);
351 annotatableEntity.setUpdated(updated);
352 }else {
353 logger.warn("Editor type not yet implemented: " + config.getEditor());
354 }
355
356
357 //notes
358 if (StringUtils.isNotBlank(notes)){
359 String notesString = String.valueOf(notes);
360 if (notesString.length() > 65530 ){
361 notesString = notesString.substring(0, 65530) + "...";
362 logger.warn("Notes string is longer than 65530 and was truncated: " + annotatableEntity);
363 }
364 Annotation notesAnnotation = Annotation.NewInstance(notesString, null);
365 //notesAnnotation.setAnnotationType(AnnotationType.EDITORIAL());
366 //notes.setCommentator(bmiConfig.getCommentator());
367 annotatableEntity.addAnnotation(notesAnnotation);
368 }
369 return success;
370 }
371
372 private User getUser(String userString, GlobisImportState state){
373 if (isBlank(userString)){
374 return null;
375 }
376 userString = userString.trim();
377
378 User user = state.getUser(userString);
379 if (user == null){
380 user = getTransformedUser(userString,state);
381 }
382 if (user == null){
383 user = makeNewUser(userString, state);
384 }
385 if (user == null){
386 logger.warn("User is null");
387 }
388 return user;
389 }
390
391 private User getTransformedUser(String userString, GlobisImportState state){
392 Method method = state.getConfig().getUserTransformationMethod();
393 if (method == null){
394 return null;
395 }
396 try {
397 userString = (String)state.getConfig().getUserTransformationMethod().invoke(null, userString);
398 } catch (Exception e) {
399 logger.warn("Error when trying to transform userString " + userString + ". No transformation done.");
400 }
401 User user = state.getUser(userString);
402 return user;
403 }
404
405 private User makeNewUser(String userString, GlobisImportState state){
406 String pwd = getPassword();
407 User user = User.NewInstance(userString, pwd);
408 state.putUser(userString, user);
409 getUserService().save(user);
410 logger.info("Added new user: " + userString);
411 return user;
412 }
413
414 private String getPassword(){
415 String result = UUID.randomUUID().toString();
416 return result;
417 }
418
419 private DateTime getDateTime(Object timeString){
420 if (timeString == null){
421 return null;
422 }
423 DateTime dateTime = null;
424 if (timeString instanceof Timestamp){
425 Timestamp timestamp = (Timestamp)timeString;
426 dateTime = new DateTime(timestamp);
427 }else{
428 logger.warn("time ("+timeString+") is not a timestamp. Datetime set to current date. ");
429 dateTime = new DateTime();
430 }
431 return dateTime;
432 }
433
434
435
436 /**
437 * Reads a foreign key field from the result set and adds its value to the idSet.
438 * @param rs
439 * @param teamIdSet
440 * @throws SQLException
441 */
442 protected void handleForeignKey(ResultSet rs, Set<String> idSet, String attributeName)
443 throws SQLException {
444 Object idObj = rs.getObject(attributeName);
445 if (idObj != null){
446 String id = String.valueOf(idObj);
447 idSet.add(id);
448 }
449 }
450
451
452
453
454 /**
455 * Returns true if i is a multiple of recordsPerTransaction
456 * @param i
457 * @param recordsPerTransaction
458 * @return
459 */
460 protected boolean loopNeedsHandling(int i, int recordsPerLoop) {
461 startTransaction();
462 return (i % recordsPerLoop) == 0;
463 }
464
465 protected void doLogPerLoop(int count, int recordsPerLog, String pluralString){
466 if ((count % recordsPerLog ) == 0 && count!= 0 ){ logger.info(pluralString + " handled: " + (count));}
467 }
468
469
470
471
472 }