Unifiy name and taxon creation
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisImportBase.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.lang.reflect.Method;
13 import java.sql.ResultSet;
14 import java.sql.SQLException;
15 import java.sql.Timestamp;
16 import java.util.HashSet;
17 import java.util.Set;
18 import java.util.UUID;
19
20 import org.apache.commons.lang.StringUtils;
21 import org.apache.log4j.Logger;
22 import org.hibernate.NonUniqueObjectException;
23 import org.joda.time.DateTime;
24
25 import eu.etaxonomy.cdm.io.common.CdmImportBase;
26 import eu.etaxonomy.cdm.io.common.ICdmIO;
27 import eu.etaxonomy.cdm.io.common.IImportConfigurator.EDITOR;
28 import eu.etaxonomy.cdm.io.common.IPartitionedIO;
29 import eu.etaxonomy.cdm.io.common.ImportHelper;
30 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31 import eu.etaxonomy.cdm.io.common.Source;
32 import eu.etaxonomy.cdm.io.common.mapping.DbImportMapping;
33 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
34 import eu.etaxonomy.cdm.model.agent.INomenclaturalAuthor;
35 import eu.etaxonomy.cdm.model.agent.Person;
36 import eu.etaxonomy.cdm.model.agent.Team;
37 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
38 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
39 import eu.etaxonomy.cdm.model.common.Annotation;
40 import eu.etaxonomy.cdm.model.common.AnnotationType;
41 import eu.etaxonomy.cdm.model.common.CdmBase;
42 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
43 import eu.etaxonomy.cdm.model.common.Language;
44 import eu.etaxonomy.cdm.model.common.User;
45 import eu.etaxonomy.cdm.model.location.Country;
46 import eu.etaxonomy.cdm.model.location.NamedArea;
47 import eu.etaxonomy.cdm.model.name.ZoologicalName;
48 import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
49 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
50 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
51
52 /**
53 * @author a.mueller
54 * @created 20.03.2008
55 */
56 public abstract class GlobisImportBase<CDM_BASE extends CdmBase> extends CdmImportBase<GlobisImportConfigurator, GlobisImportState> implements ICdmIO<GlobisImportState>, IPartitionedIO<GlobisImportState> {
57 private static final Logger logger = Logger.getLogger(GlobisImportBase.class);
58
59 public static final UUID ID_IN_SOURCE_EXT_UUID = UUID.fromString("23dac094-e793-40a4-bad9-649fc4fcfd44");
60
61 //NAMESPACES
62
63 protected static final String REFERENCE_NAMESPACE = "Literatur";
64 protected static final String TAXON_NAMESPACE = "current_species";
65 protected static final String COLLECTION_NAMESPACE = "Collection";
66 protected static final String IMAGE_NAMESPACE = "Einzelbilder";
67 protected static final String SPEC_TAX_NAMESPACE = "specTax";
68 protected static final String TYPE_NAMESPACE = "specTax.SpecTypeDepository";
69
70 private final String pluralString;
71 private final String dbTableName;
72 private final Class cdmTargetClass;
73
74 private final INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
75
76
77 /**
78 * @param dbTableName
79 * @param dbTableName2
80 */
81 public GlobisImportBase(String pluralString, String dbTableName, Class<?> cdmTargetClass) {
82 this.pluralString = pluralString;
83 this.dbTableName = dbTableName;
84 this.cdmTargetClass = cdmTargetClass;
85 }
86
87 @Override
88 protected void doInvoke(GlobisImportState state){
89 logger.info("start make " + getPluralString() + " ...");
90 GlobisImportConfigurator config = state.getConfig();
91 Source source = config.getSource();
92
93 String strIdQuery = getIdQuery();
94 String strRecordQuery = getRecordQuery(config);
95
96 int recordsPerTransaction = config.getRecordsPerTransaction();
97 try{
98 ResultSetPartitioner<GlobisImportState> partitioner = ResultSetPartitioner.NewInstance(source, strIdQuery, strRecordQuery, recordsPerTransaction);
99 while (partitioner.nextPartition()){
100 partitioner.doPartition(this, state);
101 }
102 } catch (SQLException e) {
103 logger.error("SQLException:" + e);
104 state.setUnsuccessfull();
105 }
106
107 logger.info("end make " + getPluralString() + " ... " + getSuccessString(true));
108 return;
109 }
110
111 /**
112 * @param authorAndYear
113 * @param zooName
114 */
115 protected void handleAuthorAndYear(String authorAndYear, ZoologicalName zooName, Integer id, GlobisImportState state) {
116 if (isBlank(authorAndYear)){
117 return;
118 }else if ("[Denis & Schifferm\u00FCller], 1775".equals(authorAndYear)){
119 handleDenisSchiffermueller(zooName, state);
120 return;
121 }else{
122 try {
123 String doubtfulAuthorAndYear = null;
124 if(authorAndYear.matches(".+\\,\\s\\[\\d{4}\\].*")){
125 doubtfulAuthorAndYear = authorAndYear;
126 authorAndYear = authorAndYear.replace("[", "").replace("]", "");
127 }
128
129 parser.parseAuthors(zooName, authorAndYear);
130 deduplicateAuthors(zooName, state);
131
132 if (doubtfulAuthorAndYear != null){
133 zooName.setAuthorshipCache(doubtfulAuthorAndYear, true);
134 }
135
136 } catch (StringNotParsableException e) {
137 logger.warn("Author could not be parsed: " + authorAndYear + " for id " +id);
138 zooName.setAuthorshipCache(authorAndYear, true);
139 }
140 }
141 }
142
143 /**
144 * @param zooName
145 * @param state
146 */
147 private void handleDenisSchiffermueller(ZoologicalName zooName,
148 GlobisImportState state) {
149 String teamStr = "Denis & Schifferm\u00FCller";
150 Team team = state.getTeam(teamStr);
151 if (team == null){
152 team = Team.NewInstance();
153 state.putTeam(teamStr, team);
154 getAgentService().save(team);
155 }
156 zooName.setCombinationAuthorship(team);
157 zooName.setPublicationYear(1775);
158 zooName.setAuthorshipCache("[Denis & Schifferm\u00FCller], 1775", true);
159 }
160
161
162 private void deduplicateAuthors(ZoologicalName zooName, GlobisImportState state) {
163 zooName.setCombinationAuthorship(getExistingAuthor(zooName.getCombinationAuthorship(), state));
164 zooName.setExCombinationAuthorship(getExistingAuthor(zooName.getExCombinationAuthorship(), state));
165 zooName.setBasionymAuthorship(getExistingAuthor(zooName.getBasionymAuthorship(), state));
166 zooName.setExBasionymAuthorship(getExistingAuthor(zooName.getExBasionymAuthorship(), state));
167 }
168
169 private TeamOrPersonBase<?> getExistingAuthor(INomenclaturalAuthor nomAuthor, GlobisImportState state) {
170 TeamOrPersonBase<?> author = (TeamOrPersonBase<?>)nomAuthor;
171 if (author == null){
172 return null;
173 }
174 if (author instanceof Person){
175 Person person = state.getPerson(author.getTitleCache());
176 return saveAndDecide(person, author, author.getTitleCache(), state);
177 }else if (author instanceof Team){
178 String key = GlobisAuthorImport.makeTeamKey((Team)author, state, getAgentService());
179 Team existingTeam = state.getTeam(key);
180 if (existingTeam == null){
181 Team newTeam = Team.NewInstance();
182 for (Person member :((Team) author).getTeamMembers()){
183 Person existingPerson = state.getPerson(member.getTitleCache());
184 if (existingPerson != null){
185 try {
186 getAgentService().update(existingPerson);
187 } catch (NonUniqueObjectException nuoe){
188 // person already exists in
189 existingPerson = CdmBase.deproxy(getAgentService().find(existingPerson.getUuid()), Person.class);
190 state.putPerson(existingPerson.getTitleCache(), existingPerson);
191 } catch (Exception e) {
192 throw new RuntimeException (e);
193 }
194 newTeam.addTeamMember(existingPerson);
195 //
196 // logger.warn("newTeam " + newTeam.getId());
197 }else{
198 newTeam.addTeamMember(member);
199 }
200 }
201 author = newTeam;
202 }
203
204 return saveAndDecide(existingTeam, author, key, state);
205 }else{
206 logger.warn("Author type not supported: " + author.getClass().getName());
207 return author;
208 }
209 }
210
211 private TeamOrPersonBase<?> saveAndDecide(TeamOrPersonBase<?> existing, TeamOrPersonBase<?> author, String key, GlobisImportState state) {
212 if (existing != null){
213 try {
214 getAgentService().update(existing);
215 } catch (NonUniqueObjectException nuoe){
216 // person already exists in
217 existing = CdmBase.deproxy(getAgentService().find(existing.getUuid()), TeamOrPersonBase.class);
218 putAgent(existing, key, state);
219 } catch (Exception e) {
220 throw new RuntimeException (e);
221 }
222 return existing;
223 }else{
224 getAgentService().save(author);
225 putAgent(author, key, state);
226 return author;
227 }
228 }
229
230 /**
231 * @param author
232 * @param key
233 * @param state
234 */
235 private void putAgent(TeamOrPersonBase<?> agent, String key, GlobisImportState state) {
236 if (agent instanceof Team){
237 state.putTeam(key, (Team)agent);
238 }else{
239 state.putPerson(key, (Person)agent);
240 }
241 }
242
243 /**
244 * @param state
245 * @param countryStr
246 * @return
247 */
248 protected NamedArea getCountry(GlobisImportState state, String countryStr) {
249 NamedArea country = Country.getCountryByLabel(countryStr);
250 if (country == null){
251 try {
252 country = state.getTransformer().getNamedAreaByKey(countryStr);
253 } catch (UndefinedTransformerMethodException e) {
254 e.printStackTrace();
255 }
256 }
257 return country;
258 }
259
260
261
262 @Override
263 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
264 boolean success = true ;
265 Set objectsToSave = new HashSet();
266
267 DbImportMapping<?, ?> mapping = getMapping();
268 mapping.initialize(state, cdmTargetClass);
269
270 ResultSet rs = partitioner.getResultSet();
271 try{
272 while (rs.next()){
273 success &= mapping.invoke(rs,objectsToSave);
274 }
275 } catch (SQLException e) {
276 logger.error("SQLException:" + e);
277 return false;
278 }
279
280 partitioner.startDoSave();
281 getCommonService().save(objectsToSave);
282 return success;
283 }
284
285
286
287 /**
288 * @return
289 */
290 protected /*abstract*/ DbImportMapping<?, ?> getMapping(){
291 return null;
292 }
293
294 /**
295 * @return
296 */
297 protected abstract String getRecordQuery(GlobisImportConfigurator config);
298
299 /**
300 * @return
301 */
302 protected String getIdQuery(){
303 String result = " SELECT id FROM " + getTableName();
304 return result;
305 }
306
307 /* (non-Javadoc)
308 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getPluralString()
309 */
310 @Override
311 public String getPluralString(){
312 return pluralString;
313 }
314
315 /**
316 * @return
317 */
318 protected String getTableName(){
319 return this.dbTableName;
320 }
321
322 protected boolean doIdCreatedUpdatedNotes(GlobisImportState state, IdentifiableEntity identifiableEntity, ResultSet rs, long id, String namespace)
323 throws SQLException{
324 boolean success = true;
325 //id
326 success &= ImportHelper.setOriginalSource(identifiableEntity, state.getTransactionalSourceReference(), id, namespace);
327 //createdUpdateNotes
328 success &= doCreatedUpdatedNotes(state, identifiableEntity, rs, namespace);
329 return success;
330 }
331
332
333 protected boolean doCreatedUpdatedNotes(GlobisImportState state, AnnotatableEntity annotatableEntity, ResultSet rs, String namespace)
334 throws SQLException{
335
336 GlobisImportConfigurator config = state.getConfig();
337 Object createdWhen = rs.getObject("Created_When");
338 String createdWho = rs.getString("Created_Who");
339 Object updatedWhen = rs.getObject("Updated_When");
340 String updatedWho = rs.getString("Updated_who");
341 String notes = rs.getString("notes");
342
343 boolean success = true;
344
345 //Created When, Who, Updated When Who
346 if (config.getEditor() == null || config.getEditor().equals(EDITOR.NO_EDITORS)){
347 //do nothing
348 }else if (config.getEditor().equals(EDITOR.EDITOR_AS_ANNOTATION)){
349 String createdAnnotationString = "Berlin Model record was created By: " + String.valueOf(createdWho) + " (" + String.valueOf(createdWhen) + ") ";
350 if (updatedWhen != null && updatedWho != null){
351 createdAnnotationString += " and updated By: " + String.valueOf(updatedWho) + " (" + String.valueOf(updatedWhen) + ")";
352 }
353 Annotation annotation = Annotation.NewInstance(createdAnnotationString, Language.DEFAULT());
354 annotation.setCommentator(config.getCommentator());
355 annotation.setAnnotationType(AnnotationType.TECHNICAL());
356 annotatableEntity.addAnnotation(annotation);
357 }else if (config.getEditor().equals(EDITOR.EDITOR_AS_EDITOR)){
358 User creator = getUser(createdWho, state);
359 User updator = getUser(updatedWho, state);
360 DateTime created = getDateTime(createdWhen);
361 DateTime updated = getDateTime(updatedWhen);
362 annotatableEntity.setCreatedBy(creator);
363 annotatableEntity.setUpdatedBy(updator);
364 annotatableEntity.setCreated(created);
365 annotatableEntity.setUpdated(updated);
366 }else {
367 logger.warn("Editor type not yet implemented: " + config.getEditor());
368 }
369
370
371 //notes
372 if (StringUtils.isNotBlank(notes)){
373 String notesString = String.valueOf(notes);
374 if (notesString.length() > 65530 ){
375 notesString = notesString.substring(0, 65530) + "...";
376 logger.warn("Notes string is longer than 65530 and was truncated: " + annotatableEntity);
377 }
378 Annotation notesAnnotation = Annotation.NewInstance(notesString, null);
379 //notesAnnotation.setAnnotationType(AnnotationType.EDITORIAL());
380 //notes.setCommentator(bmiConfig.getCommentator());
381 annotatableEntity.addAnnotation(notesAnnotation);
382 }
383 return success;
384 }
385
386 private User getUser(String userString, GlobisImportState state){
387 if (isBlank(userString)){
388 return null;
389 }
390 userString = userString.trim();
391
392 User user = state.getUser(userString);
393 if (user == null){
394 user = getTransformedUser(userString,state);
395 }
396 if (user == null){
397 user = makeNewUser(userString, state);
398 }
399 if (user == null){
400 logger.warn("User is null");
401 }
402 return user;
403 }
404
405 private User getTransformedUser(String userString, GlobisImportState state){
406 Method method = state.getConfig().getUserTransformationMethod();
407 if (method == null){
408 return null;
409 }
410 try {
411 userString = (String)state.getConfig().getUserTransformationMethod().invoke(null, userString);
412 } catch (Exception e) {
413 logger.warn("Error when trying to transform userString " + userString + ". No transformation done.");
414 }
415 User user = state.getUser(userString);
416 return user;
417 }
418
419 private User makeNewUser(String userString, GlobisImportState state){
420 String pwd = getPassword();
421 User user = User.NewInstance(userString, pwd);
422 state.putUser(userString, user);
423 getUserService().save(user);
424 logger.info("Added new user: " + userString);
425 return user;
426 }
427
428 private String getPassword(){
429 String result = UUID.randomUUID().toString();
430 return result;
431 }
432
433 private DateTime getDateTime(Object timeString){
434 if (timeString == null){
435 return null;
436 }
437 DateTime dateTime = null;
438 if (timeString instanceof Timestamp){
439 Timestamp timestamp = (Timestamp)timeString;
440 dateTime = new DateTime(timestamp);
441 }else{
442 logger.warn("time ("+timeString+") is not a timestamp. Datetime set to current date. ");
443 dateTime = new DateTime();
444 }
445 return dateTime;
446 }
447
448
449
450 /**
451 * Reads a foreign key field from the result set and adds its value to the idSet.
452 * @param rs
453 * @param teamIdSet
454 * @throws SQLException
455 */
456 protected void handleForeignKey(ResultSet rs, Set<String> idSet, String attributeName)
457 throws SQLException {
458 Object idObj = rs.getObject(attributeName);
459 if (idObj != null){
460 String id = String.valueOf(idObj);
461 idSet.add(id);
462 }
463 }
464
465
466
467
468 /**
469 * Returns true if i is a multiple of recordsPerTransaction
470 * @param i
471 * @param recordsPerTransaction
472 * @return
473 */
474 protected boolean loopNeedsHandling(int i, int recordsPerLoop) {
475 startTransaction();
476 return (i % recordsPerLoop) == 0;
477 }
478
479 protected void doLogPerLoop(int count, int recordsPerLog, String pluralString){
480 if ((count % recordsPerLog ) == 0 && count!= 0 ){ logger.info(pluralString + " handled: " + (count));}
481 }
482
483
484
485
486 }