1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.berlinModel.in;
|
10
|
|
11
|
import java.sql.ResultSet;
|
12
|
import java.sql.SQLException;
|
13
|
import java.util.Collection;
|
14
|
import java.util.HashMap;
|
15
|
import java.util.HashSet;
|
16
|
import java.util.Map;
|
17
|
import java.util.Set;
|
18
|
|
19
|
import org.apache.log4j.Logger;
|
20
|
import org.springframework.stereotype.Component;
|
21
|
|
22
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
23
|
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelAuthorTeamImportValidator;
|
24
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
25
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
26
|
import eu.etaxonomy.cdm.io.common.Source;
|
27
|
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
|
28
|
import eu.etaxonomy.cdm.model.agent.Person;
|
29
|
import eu.etaxonomy.cdm.model.agent.Team;
|
30
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
31
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
32
|
|
33
|
|
34
|
/**
|
35
|
* @author a.mueller
|
36
|
* @since 20.03.2008
|
37
|
*/
|
38
|
@Component
|
39
|
public class BerlinModelAuthorTeamImport extends BerlinModelImportBase {
|
40
|
|
41
|
private static final long serialVersionUID = -4318481607033688522L;
|
42
|
private static final Logger logger = Logger.getLogger(BerlinModelAuthorTeamImport.class);
|
43
|
|
44
|
public static final String NAMESPACE = "AuthorTeam";
|
45
|
|
46
|
private static final String pluralString = "AuthorTeams";
|
47
|
private static final String dbTableName = "AuthorTeam";
|
48
|
|
49
|
private ResultSet rsSequence;
|
50
|
private Source source;
|
51
|
|
52
|
private ImportDeduplicationHelper<BerlinModelImportState> deduplicationHelper;
|
53
|
|
54
|
|
55
|
public BerlinModelAuthorTeamImport(){
|
56
|
super(dbTableName, pluralString);
|
57
|
}
|
58
|
|
59
|
|
60
|
@Override
|
61
|
protected void doInvoke(BerlinModelImportState state){
|
62
|
BerlinModelImportConfigurator config = state.getConfig();
|
63
|
source = config.getSource();
|
64
|
this.deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state);
|
65
|
|
66
|
logger.info("start make " + pluralString + " ...");
|
67
|
|
68
|
//queryStrings
|
69
|
String strIdQuery = getIdQuery(state);
|
70
|
|
71
|
String strRecordQuery = getRecordQuery(config);
|
72
|
String strWhere = " WHERE (1=1) ";
|
73
|
if (state.getConfig().getAuthorTeamFilter() != null){
|
74
|
strWhere += " AND " + state.getConfig().getAuthorTeamFilter();
|
75
|
strWhere = strWhere.replaceFirst("authorTeamId", "authorTeamFk");
|
76
|
}
|
77
|
String strQuerySequence =
|
78
|
" SELECT * " +
|
79
|
" FROM AuthorTeamSequence " +
|
80
|
(state.getConfig().isEuroMed() ? "" : strWhere) +
|
81
|
" ORDER By authorTeamFk, Sequence ";
|
82
|
|
83
|
int recordsPerTransaction = config.getRecordsPerTransaction();
|
84
|
try{
|
85
|
ResultSetPartitioner<BerlinModelImportState> partitioner = ResultSetPartitioner.NewInstance(source, strIdQuery, strRecordQuery, recordsPerTransaction);
|
86
|
rsSequence = source.getResultSet(strQuerySequence) ; //only here, to reduce deadlock/timeout probability
|
87
|
while (partitioner.nextPartition()){
|
88
|
partitioner.doPartition(this, state);
|
89
|
}
|
90
|
} catch (SQLException e) {
|
91
|
logger.error("SQLException:" + e);
|
92
|
state.setUnsuccessfull();
|
93
|
return;
|
94
|
}
|
95
|
|
96
|
logger.info("end make " + pluralString + " ... " + getSuccessString(true));
|
97
|
this.deduplicationHelper = null;
|
98
|
return;
|
99
|
}
|
100
|
|
101
|
@Override
|
102
|
protected String getIdQuery(BerlinModelImportState state){
|
103
|
if (state.getConfig().isEuroMed()){
|
104
|
return " SELECT authorTeamId "
|
105
|
+ " FROM v_cdm_exp_authorTeamsAll ORDER BY authorTeamId "
|
106
|
;
|
107
|
}
|
108
|
|
109
|
String strWhere = " WHERE (1=1) ";
|
110
|
if (state.getConfig().getAuthorTeamFilter() != null){
|
111
|
strWhere += " AND " + state.getConfig().getAuthorTeamFilter();
|
112
|
}
|
113
|
String idQuery =
|
114
|
" SELECT authorTeamId " +
|
115
|
" FROM AuthorTeam " +
|
116
|
strWhere +
|
117
|
" ORDER BY authorTeamId ";
|
118
|
return idQuery;
|
119
|
}
|
120
|
|
121
|
|
122
|
@Override
|
123
|
protected String getRecordQuery(BerlinModelImportConfigurator config) {
|
124
|
String strRecordQuery =
|
125
|
" SELECT * " +
|
126
|
" FROM AuthorTeam " +
|
127
|
" WHERE authorTeamId IN ( " + ID_LIST_TOKEN + " )" +
|
128
|
" ORDER By authorTeamId ";
|
129
|
return strRecordQuery;
|
130
|
}
|
131
|
|
132
|
|
133
|
@Override
|
134
|
public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
|
135
|
boolean success = true ;
|
136
|
deduplicationHelper.restartSession();
|
137
|
BerlinModelImportConfigurator config = state.getConfig();
|
138
|
Set<TeamOrPersonBase<?>> authorsToSave = new HashSet<>();
|
139
|
@SuppressWarnings("unchecked")
|
140
|
Map<String, Person> personMap = partitioner.getObjectMap(BerlinModelAuthorImport.NAMESPACE);
|
141
|
|
142
|
ResultSet rs = partitioner.getResultSet();
|
143
|
//for each reference
|
144
|
try{
|
145
|
while (rs.next()){
|
146
|
try{
|
147
|
//if ((i++ % modCount ) == 0 && i!= 1 ){ logger.info(""+pluralString+" handled: " + (i-1));}
|
148
|
|
149
|
//create Agent element
|
150
|
int teamId = rs.getInt("AuthorTeamId");
|
151
|
if (teamId == 0 && config.isIgnore0AuthorTeam()){
|
152
|
continue;
|
153
|
}
|
154
|
|
155
|
Team team = Team.NewInstance();
|
156
|
|
157
|
Boolean preliminaryFlag = rs.getBoolean("PreliminaryFlag");
|
158
|
String authorTeamCache = rs.getString("AuthorTeamCache");
|
159
|
String fullAuthorTeamCache = rs.getString("FullAuthorTeamCache");
|
160
|
if (isBlank(fullAuthorTeamCache)){
|
161
|
// fullAuthorTeamCache = authorTeamCache;
|
162
|
if (isBlank(authorTeamCache) && preliminaryFlag){
|
163
|
logger.warn("authorTeamCache and fullAuthorTeamCache are blank/null and preliminaryFlag is true. This makes no sense and should not happen: " + teamId);
|
164
|
}
|
165
|
}
|
166
|
// team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
|
167
|
// team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag);
|
168
|
|
169
|
success &= makeSequence(state, team, teamId, rsSequence, personMap);
|
170
|
|
171
|
TeamOrPersonBase<?> author = handleTeam(state, team, authorTeamCache,
|
172
|
fullAuthorTeamCache, preliminaryFlag, teamId);
|
173
|
|
174
|
if (author == team && team.getTeamMembers().size() == 0 && preliminaryFlag == false){
|
175
|
team.setProtectedTitleCache(true);
|
176
|
team.setProtectedNomenclaturalTitleCache(true);
|
177
|
}
|
178
|
|
179
|
//created, notes
|
180
|
doIdCreatedUpdatedNotes(state, author, rs, teamId, NAMESPACE);
|
181
|
|
182
|
authorsToSave.add(author);
|
183
|
}catch(Exception ex){
|
184
|
logger.error(ex.getMessage());
|
185
|
ex.printStackTrace();
|
186
|
success = false;
|
187
|
}
|
188
|
} //while rs.hasNext()
|
189
|
} catch (SQLException e) {
|
190
|
logger.error("SQLException:" + e);
|
191
|
return false;
|
192
|
}
|
193
|
|
194
|
//logger.info(i + " " + pluralString + " handled");
|
195
|
getAgentService().saveOrUpdate((Collection)authorsToSave);
|
196
|
|
197
|
return success;
|
198
|
}
|
199
|
|
200
|
|
201
|
/**
|
202
|
* @param state
|
203
|
* @param team
|
204
|
* @param authorTeamCache
|
205
|
* @param fullAuthorTeamCache
|
206
|
* @param preliminaryFlag
|
207
|
* @return
|
208
|
*/
|
209
|
private TeamOrPersonBase<?> handleTeam(BerlinModelImportState state, Team team, String authorTeamCache,
|
210
|
String fullAuthorTeamCache, boolean preliminaryFlag, int authorTeamId) {
|
211
|
if (!team.getTeamMembers().isEmpty()){
|
212
|
return team;
|
213
|
}
|
214
|
|
215
|
TeamOrPersonBase<?> result = team;
|
216
|
if (isBlank(authorTeamCache)){
|
217
|
logger.warn("Blank authorTeamCache not yet handled: " + authorTeamId);
|
218
|
}
|
219
|
|
220
|
if (!hasTeamSeparator(authorTeamCache) && !hasTeamSeparator(fullAuthorTeamCache)){
|
221
|
Person person = makePerson(fullAuthorTeamCache, authorTeamCache, preliminaryFlag, authorTeamId);
|
222
|
result = deduplicatePerson(state, person);
|
223
|
if (result != person){
|
224
|
logger.debug("Single person team deduplicated: " + authorTeamId);
|
225
|
}else{
|
226
|
person.addImportSource(String.valueOf(authorTeamId), NAMESPACE, state.getTransactionalSourceReference(), null);
|
227
|
|
228
|
}
|
229
|
}else{
|
230
|
String[] fullTeams = splitTeam(fullAuthorTeamCache);
|
231
|
String[] nomTeams = splitTeam(authorTeamCache);
|
232
|
if (fullTeams.length == nomTeams.length || fullTeams.length == 0){
|
233
|
for (int i = 0; i< nomTeams.length ;i++){
|
234
|
String fullTeam = fullTeams.length == 0? null: fullTeams[i].trim();
|
235
|
Person member = makePerson(fullTeam, nomTeams[i].trim(), preliminaryFlag, authorTeamId);
|
236
|
if (member == null){
|
237
|
logger.warn("Unexpected short nom. author: " + nomTeams[i].trim() + "; " + authorTeamId);
|
238
|
continue;
|
239
|
}
|
240
|
if (i == nomTeams.length -1 && isEtAl(member)){
|
241
|
team.setHasMoreMembers(true);
|
242
|
}else{
|
243
|
Person dedupMember = deduplicatePerson(state, member);
|
244
|
if (dedupMember != member){
|
245
|
logger.debug("Member deduplicated: " + authorTeamId);
|
246
|
}
|
247
|
//TODO add idInBM
|
248
|
team.addTeamMember(dedupMember);
|
249
|
}
|
250
|
}
|
251
|
//check nomenclatural title
|
252
|
if (team.getCacheStrategy().getNomenclaturalTitle(team).equals(authorTeamCache)){
|
253
|
team.setProtectedNomenclaturalTitleCache(false);
|
254
|
}else if(team.getCacheStrategy().getNomenclaturalTitle(team).replace(" ,", ",").equals(authorTeamCache)){
|
255
|
//also accept teams with ' , ' as separator as not protected
|
256
|
team.setProtectedTitleCache(false);
|
257
|
}else{
|
258
|
team.setNomenclaturalTitle(authorTeamCache, true);
|
259
|
logger.warn("Creation of nomTitle for team with members did not work: " + authorTeamCache + " <-> " + team.getCacheStrategy().getNomenclaturalTitle(team)+ " : " + authorTeamId);
|
260
|
}
|
261
|
//check titleCache
|
262
|
if (team.generateTitle().equals(fullAuthorTeamCache)){
|
263
|
team.setProtectedTitleCache(false);
|
264
|
}else if(fullAuthorTeamCache == null){
|
265
|
//do nothing
|
266
|
}else if(team.generateTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
|
267
|
//also accept teams with ', ' as final member separator as not protected
|
268
|
team.setProtectedTitleCache(false);
|
269
|
}else if(team.getFullTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
|
270
|
//also accept teams with ', ' as final member separator as not protected
|
271
|
team.setProtectedTitleCache(false);
|
272
|
}else{
|
273
|
String fullTitle = team.getFullTitle().replace(" & ", ", ");
|
274
|
team.setTitleCache(fullAuthorTeamCache, true);
|
275
|
logger.warn("Creation of titleCache for team with members did not work: " + fullAuthorTeamCache + " <-> " + team.generateTitle()+ " : " + authorTeamId);
|
276
|
}
|
277
|
}else{
|
278
|
logger.warn("AuthorTeamCache and fullAuthorTeamCache have not the same team size: " + authorTeamCache + " <-> " + fullAuthorTeamCache+ " : " + authorTeamId);
|
279
|
}
|
280
|
}
|
281
|
return result;
|
282
|
}
|
283
|
|
284
|
|
285
|
/**
|
286
|
* @param member
|
287
|
* @return
|
288
|
*/
|
289
|
private Person deduplicatePerson(BerlinModelImportState state, Person person) {
|
290
|
Person result = deduplicationHelper.getExistingAuthor(state, person);
|
291
|
return result;
|
292
|
}
|
293
|
|
294
|
|
295
|
/**
|
296
|
* @param member
|
297
|
* @return
|
298
|
*/
|
299
|
protected static boolean isEtAl(Person member) {
|
300
|
if (member != null && isEtAl(member.getTitleCache()) && isEtAl(member.getNomenclaturalTitle())){
|
301
|
return true;
|
302
|
}
|
303
|
return false;
|
304
|
}
|
305
|
|
306
|
private static boolean isEtAl(String str) {
|
307
|
if (str == null || !str.equals("al.")){
|
308
|
return false;
|
309
|
}else{
|
310
|
return true;
|
311
|
}
|
312
|
}
|
313
|
|
314
|
private Person makePerson(String full, String nom, boolean preliminaryFlag, int authorTeamId) {
|
315
|
Person person = Person.NewInstance(nom, null, null, null);
|
316
|
if (isBlank(full)){
|
317
|
//do nothing
|
318
|
}else if (!full.matches(".*[\\s\\.].*")){
|
319
|
person.setFamilyName(full);
|
320
|
}else if (nom.equals(full)){
|
321
|
parsePerson(person, full, preliminaryFlag);
|
322
|
}else{
|
323
|
parsePerson(person, full, true);
|
324
|
}
|
325
|
if (nom.length() <= 2 || (nom.length() == 3 && nom.endsWith(".")) ){
|
326
|
if (!nom.matches("((L|Sm|DC|al|Sw|Qz|Fr|Ib)\\.|Hu|Ma|Hy|Wu)")){
|
327
|
logger.warn("Unexpected short nom author name part: " + nom + "; " + authorTeamId);
|
328
|
}
|
329
|
}
|
330
|
|
331
|
return person;
|
332
|
}
|
333
|
|
334
|
/**
|
335
|
* @param person
|
336
|
*/
|
337
|
private void parsePerson(Person person, String str, boolean preliminary) {
|
338
|
if (str.matches("\\p{javaUpperCase}\\.(\\s\\p{javaUpperCase}\\.)*\\s\\p{javaUpperCase}\\p{javaLowerCase}{2,}")){
|
339
|
String[] splits = str.split("\\s");
|
340
|
person.setFamilyName(splits[splits.length-1]);
|
341
|
String initials = splits[0];
|
342
|
for (int i = 1; i < splits.length -1; i++ ){
|
343
|
initials += " " + splits[i];
|
344
|
}
|
345
|
person.setInitials(initials);
|
346
|
person.setProtectedTitleCache(false);
|
347
|
}else{
|
348
|
person.setTitleCache(str, preliminary);
|
349
|
}
|
350
|
|
351
|
}
|
352
|
|
353
|
private static final String TEAM_SPLITTER = "(,|;|&| et | Et )";
|
354
|
|
355
|
/**
|
356
|
* @param fullAuthorTeamCache
|
357
|
* @param TEAM_SPLITTER
|
358
|
* @return
|
359
|
*/
|
360
|
protected static String[] splitTeam(String teamCache) {
|
361
|
if (teamCache == null){
|
362
|
return new String[0];
|
363
|
}
|
364
|
return teamCache.split(TEAM_SPLITTER);
|
365
|
}
|
366
|
|
367
|
|
368
|
/**
|
369
|
* @param authorTeamCache
|
370
|
* @return
|
371
|
*/
|
372
|
protected static boolean hasTeamSeparator(String teamCache) {
|
373
|
if (isBlank(teamCache)){
|
374
|
return false;
|
375
|
}else if (teamCache.contains(",") || teamCache.contains("&")||teamCache.contains(" et ")||teamCache.endsWith(" al.")){
|
376
|
return true;
|
377
|
}else{
|
378
|
return false;
|
379
|
}
|
380
|
}
|
381
|
|
382
|
|
383
|
@Override
|
384
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
|
385
|
String nameSpace;
|
386
|
Class<?> cdmClass;
|
387
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
388
|
|
389
|
//person map
|
390
|
Set<String> idInSourceList = makeAuthorIdList(rs);
|
391
|
nameSpace = BerlinModelAuthorImport.NAMESPACE;
|
392
|
cdmClass = Person.class;
|
393
|
Map<String, Person> personMap = (Map<String, Person>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idInSourceList, nameSpace);
|
394
|
result.put(nameSpace, personMap);
|
395
|
|
396
|
return result;
|
397
|
}
|
398
|
|
399
|
/**
|
400
|
* @param rs
|
401
|
* @return
|
402
|
* @throws SQLException
|
403
|
* @throws SQLException
|
404
|
*/
|
405
|
private Set<String> makeAuthorIdList(ResultSet rs) {
|
406
|
Set<String> result = new HashSet<String>();
|
407
|
|
408
|
String authorTeamIdList = "";
|
409
|
try {
|
410
|
while (rs.next()){
|
411
|
int id = rs.getInt("AuthorTeamId");
|
412
|
authorTeamIdList = CdmUtils.concat(",", authorTeamIdList, String.valueOf(id));
|
413
|
}
|
414
|
|
415
|
String strQuerySequence =
|
416
|
" SELECT DISTINCT authorFk " +
|
417
|
" FROM AuthorTeamSequence " +
|
418
|
" WHERE authorTeamFk IN (@) ";
|
419
|
strQuerySequence = strQuerySequence.replace("@", authorTeamIdList);
|
420
|
|
421
|
rs = source.getResultSet(strQuerySequence) ;
|
422
|
while (rs.next()){
|
423
|
int authorFk = rs.getInt("authorFk");
|
424
|
result.add(String.valueOf(authorFk));
|
425
|
}
|
426
|
} catch (SQLException e) {
|
427
|
throw new RuntimeException(e);
|
428
|
}
|
429
|
return result;
|
430
|
}
|
431
|
|
432
|
private boolean makeSequence(BerlinModelImportState state, Team team, int teamId, ResultSet rsSequence, Map<String, Person> personMap){
|
433
|
try {
|
434
|
if (rsSequence.isBeforeFirst()){
|
435
|
rsSequence.next();
|
436
|
}
|
437
|
if (rsSequence.isAfterLast()){
|
438
|
return true;
|
439
|
}
|
440
|
int sequenceTeamFk;
|
441
|
try {
|
442
|
sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
|
443
|
} catch (SQLException e) {
|
444
|
if (rsSequence.next() == false){
|
445
|
return true;
|
446
|
}else{
|
447
|
throw e;
|
448
|
}
|
449
|
}
|
450
|
while (sequenceTeamFk < teamId){
|
451
|
if (! state.getConfig().isEuroMed()){
|
452
|
logger.warn("Sequence team FK is smaller then team ID. Some teams for a sequence may not be available");
|
453
|
}
|
454
|
rsSequence.next();
|
455
|
sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
|
456
|
}
|
457
|
while (sequenceTeamFk == teamId){
|
458
|
int authorFk = rsSequence.getInt("AuthorFk");
|
459
|
Person author = personMap.get(String.valueOf(authorFk));
|
460
|
if (author != null){
|
461
|
team.addTeamMember(author);
|
462
|
}else{
|
463
|
logger.error("Author " + authorFk + " was not found for team " + teamId);
|
464
|
}
|
465
|
if (rsSequence.next()){
|
466
|
sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
|
467
|
}else{
|
468
|
break;
|
469
|
}
|
470
|
}
|
471
|
return true;
|
472
|
} catch (SQLException e) {
|
473
|
e.printStackTrace();
|
474
|
return false;
|
475
|
}
|
476
|
}
|
477
|
|
478
|
|
479
|
@Override
|
480
|
protected boolean doCheck(BerlinModelImportState state){
|
481
|
IOValidator<BerlinModelImportState> validator = new BerlinModelAuthorTeamImportValidator();
|
482
|
return validator.validate(state);
|
483
|
}
|
484
|
|
485
|
|
486
|
@Override
|
487
|
protected boolean isIgnore(BerlinModelImportState state){
|
488
|
return ! state.getConfig().isDoAuthors();
|
489
|
}
|
490
|
|
491
|
|
492
|
|
493
|
}
|