Project

General

Profile

Download (20.1 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.berlinModel.in;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.util.Collection;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19
import java.util.regex.Matcher;
20
import java.util.regex.Pattern;
21

    
22
import org.apache.log4j.Logger;
23
import org.springframework.stereotype.Component;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelAuthorTeamImportValidator;
27
import eu.etaxonomy.cdm.io.common.IOValidator;
28
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
29
import eu.etaxonomy.cdm.io.common.Source;
30
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
31
import eu.etaxonomy.cdm.model.agent.Person;
32
import eu.etaxonomy.cdm.model.agent.Team;
33
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
34
import eu.etaxonomy.cdm.model.common.CdmBase;
35
import eu.etaxonomy.cdm.strategy.cache.agent.INomenclaturalAuthorCacheStrategy;
36

    
37
/**
38
 * @author a.mueller
39
 * @since 20.03.2008
40
 */
41
@Component
42
public class BerlinModelAuthorTeamImport extends BerlinModelImportBase {
43

    
44
    private static final long serialVersionUID = -4318481607033688522L;
45
    private static final Logger logger = Logger.getLogger(BerlinModelAuthorTeamImport.class);
46

    
47
	public static final String NAMESPACE = "AuthorTeam";
48
	   public static final String NAMESPACE_SPLIT = "AuthorTeam_Split";
49

    
50
	private static final String pluralString = "AuthorTeams";
51
	private static final String dbTableName = "AuthorTeam";
52

    
53
	private ResultSet rsSequence;
54
	private Source source;
55

    
56
    private ImportDeduplicationHelper<BerlinModelImportState> deduplicationHelper;
57

    
58

    
59
	public BerlinModelAuthorTeamImport(){
60
		super(dbTableName, pluralString);
61
	}
62

    
63

    
64
	@Override
65
    protected void doInvoke(BerlinModelImportState state){
66
		BerlinModelImportConfigurator config = state.getConfig();
67
		source = config.getSource();
68
		this.deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state);
69

    
70
		logger.info("start make " + pluralString + " ...");
71

    
72
		//queryStrings
73
		String strIdQuery = getIdQuery(state);
74

    
75
		String strRecordQuery = getRecordQuery(config);
76
		String strWhere = " WHERE (1=1) ";
77
		if (state.getConfig().getAuthorTeamFilter() != null){
78
			strWhere += " AND " + state.getConfig().getAuthorTeamFilter();
79
			strWhere = strWhere.replaceFirst("authorTeamId", "authorTeamFk");
80
		}
81
		String strQuerySequence =
82
			" SELECT *  " +
83
            " FROM AuthorTeamSequence " +
84
                (state.getConfig().isEuroMed() ? "" : strWhere) +
85
            " ORDER By authorTeamFk, Sequence ";
86

    
87
		int recordsPerTransaction = config.getRecordsPerTransaction();
88
		try{
89
			ResultSetPartitioner<BerlinModelImportState> partitioner = ResultSetPartitioner.NewInstance(source, strIdQuery, strRecordQuery, recordsPerTransaction);
90
			rsSequence = source.getResultSet(strQuerySequence) ; //only here, to reduce deadlock/timeout probability
91
			while (partitioner.nextPartition()){
92
				partitioner.doPartition(this, state);
93
			}
94
		} catch (SQLException e) {
95
			logger.error("SQLException:" +  e);
96
			state.setUnsuccessfull();
97
			return;
98
		}
99

    
100
		logger.info("end make " + pluralString + " ... " + getSuccessString(true));
101
		this.deduplicationHelper = null;
102
		return;
103
	}
104

    
105
	@Override
106
	protected String getIdQuery(BerlinModelImportState state){
107
		if (state.getConfig().isEuroMed()){
108
		    return " SELECT authorTeamId "
109
		         + " FROM v_cdm_exp_authorTeamsAll ORDER BY authorTeamId "
110
		         ;
111
		}
112

    
113
	    String strWhere = " WHERE (1=1) ";
114
		if (state.getConfig().getAuthorTeamFilter() != null){
115
			strWhere += " AND " + state.getConfig().getAuthorTeamFilter();
116
		}
117
		String idQuery =
118
				" SELECT authorTeamId " +
119
                " FROM AuthorTeam " +
120
                strWhere +
121
                " ORDER BY authorTeamId ";
122
		return idQuery;
123
	}
124

    
125

    
126
	@Override
127
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
128
		String strRecordQuery =
129
			" SELECT *  " +
130
            " FROM AuthorTeam " +
131
            " WHERE authorTeamId IN ( " + ID_LIST_TOKEN + " )" +
132
            " ORDER By authorTeamId ";
133
		return strRecordQuery;
134
	}
135

    
136

    
137
	@Override
138
    public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
139
		boolean success = true ;
140
		deduplicationHelper.restartSession();
141
		BerlinModelImportConfigurator config = state.getConfig();
142
		Set<TeamOrPersonBase<?>> authorsToSave = new HashSet<>();
143
		@SuppressWarnings("unchecked")
144
        Map<String, Person> personMap = partitioner.getObjectMap(BerlinModelAuthorImport.NAMESPACE);
145

    
146
		ResultSet rs = partitioner.getResultSet();
147
		//for each reference
148
		try{
149
			while (rs.next()){
150
				try{
151
					//if ((i++ % modCount ) == 0 && i!= 1 ){ logger.info(""+pluralString+" handled: " + (i-1));}
152

    
153
					//create Agent element
154
					int teamId = rs.getInt("AuthorTeamId");
155
					if (teamId == 0 && config.isIgnore0AuthorTeam()){
156
						continue;
157
					}
158

    
159
					Team team = Team.NewInstance();
160

    
161
					boolean preliminaryFlag = rs.getBoolean("PreliminaryFlag");
162
					String authorTeamCache = rs.getString("AuthorTeamCache");
163
					String fullAuthorTeamCache = rs.getString("FullAuthorTeamCache");
164
					if (isBlank(fullAuthorTeamCache)){
165
//						fullAuthorTeamCache = authorTeamCache;
166
						if (isBlank(authorTeamCache) && preliminaryFlag){
167
						    logger.warn("authorTeamCache and fullAuthorTeamCache are blank/null and preliminaryFlag is true. This makes no sense and should not happen: " + teamId);
168
						}
169
					}else{
170
					    fullAuthorTeamCache = fullAuthorTeamCache.trim();
171
					}
172
					if (isNotBlank(authorTeamCache)){
173
					    authorTeamCache = authorTeamCache.trim();
174
					}
175
//					team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
176
//					team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag);
177

    
178
					success &= makeSequence(state, team, teamId, rsSequence, personMap);
179

    
180
					team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
181
					team.setNomenclaturalTitleCache(authorTeamCache, preliminaryFlag);
182
					//not yet supported by model
183
//	                team.setOriginalNomenclaturalTitle(authorTeamCache);
184

    
185
					TeamOrPersonBase<?> author = handleTeam(state, team, authorTeamCache,
186
					        fullAuthorTeamCache, preliminaryFlag, teamId);
187

    
188
					//in case preliminary flag is set incorrectly in BM
189
					if (author == team && team.getTeamMembers().size() == 0 && preliminaryFlag == false){
190
                        team.setProtectedTitleCache(true);
191
                        team.setProtectedNomenclaturalTitleCache(true);
192
                    }
193

    
194
		            String uuid = null;
195
		            if (resultSetHasColumn(rs,"UUID")){
196
		                uuid = rs.getString("UUID");
197
		                if (uuid != null && !author.isPersited()){
198
		                    author.setUuid(UUID.fromString(uuid));
199
		                }
200
		            }
201

    
202
					//created, notes
203
//					doIdCreatedUpdatedNotes(state, author, rs, teamId, NAMESPACE);
204
					doCreatedUpdatedNotes(state, author, rs);
205
					if (!importSourceExists(author, String.valueOf(teamId), NAMESPACE, state.getTransactionalSourceReference())){
206
					    doId(state, author, teamId, NAMESPACE);
207
					}
208

    
209
					authorsToSave.add(author);
210
				}catch(Exception ex){
211
					logger.error(ex.getMessage());
212
					ex.printStackTrace();
213
					success = false;
214
				}
215
			} //while rs.hasNext()
216
		} catch (SQLException e) {
217
			logger.error("SQLException:" +  e);
218
			return false;
219
		}
220

    
221
		//logger.info(i + " " + pluralString + " handled");
222
		getAgentService().saveOrUpdate((Collection)authorsToSave);
223

    
224
		return success;
225
	}
226

    
227
    private TeamOrPersonBase<?> handleTeam(BerlinModelImportState state, Team team, String authorTeamCache,
228
            String fullAuthorTeamCache, boolean preliminaryFlag, int authorTeamId) {
229

    
230
        if (!team.getTeamMembers().isEmpty()){
231
            return team;
232
        }
233

    
234
        TeamOrPersonBase<?> result = team;
235
        if (isBlank(authorTeamCache)){
236
            logger.warn("Blank authorTeamCache not yet handled: " + authorTeamId);
237
        }
238

    
239
        //single person
240
        if (!hasTeamSeparator(authorTeamCache) && !hasTeamSeparator(fullAuthorTeamCache)){
241
            Person person = makePerson(fullAuthorTeamCache, authorTeamCache, preliminaryFlag, authorTeamId);
242
            result = deduplicatePerson(state, person);
243
            if (result != person){
244
                logger.debug("Single person team deduplicated: " + authorTeamId);
245
            }else{
246
                String idInSource = String.valueOf(authorTeamId);
247
                if (!importSourceExists(person, idInSource, NAMESPACE, state.getTransactionalSourceReference())){
248
                    person.addImportSource(idInSource, NAMESPACE, state.getTransactionalSourceReference(), null);
249
                }
250
            }
251
        //team
252
        }else{
253
            String[] fullTeams = splitTeam(fullAuthorTeamCache);
254
            String[] nomTeams = splitTeam(authorTeamCache);
255
            if (fullTeams.length != nomTeams.length && fullTeams.length != 0){
256
                logger.warn("AuthorTeamCache and fullAuthorTeamCache have not the same team size: " + authorTeamCache + " <-> " + fullAuthorTeamCache+ " ; authorTeamId=" + authorTeamId);
257
            }else{
258
                for (int i = 0; i< nomTeams.length ;i++){
259
                    String fullTeam = fullTeams.length == 0? null: fullTeams[i].trim();
260
                    Person member = makePerson(fullTeam, nomTeams[i].trim(), preliminaryFlag, authorTeamId);
261
                    if (member == null){
262
                        logger.warn("Unexpected short nom. author: " + nomTeams[i].trim() + "; " + authorTeamId);
263
                        continue;
264
                    }
265
                    if (i == nomTeams.length -1 && isEtAl(member)){
266
                        team.setHasMoreMembers(true);
267
                    }else{
268
                        Person dedupMember = deduplicatePerson(state, member);
269
                        if (dedupMember != member){
270
                            logger.debug("Member deduplicated: " + authorTeamId);
271
                        }else{
272
                            String idInSource = String.valueOf(authorTeamId);
273
                            if (!importSourceExists(member, idInSource, NAMESPACE_SPLIT, state.getTransactionalSourceReference())){
274
                                member.addImportSource(idInSource, NAMESPACE_SPLIT, state.getTransactionalSourceReference(), null);
275
                            }
276
                        }
277
                        //TODO add idInBM
278
                        team.addTeamMember(dedupMember);
279
                    }
280
                }
281
                //check nomenclatural title
282
                //TODO
283
                checkTeamNomenclaturalTitle(team, authorTeamCache, authorTeamId);
284
                //check titleCache
285
                checkTeamTitleCache(team, fullAuthorTeamCache, authorTeamId);
286
            }//same size team
287
            result = deduplicateTeam(state, team);
288
            if (result != team){
289
                logger.debug("Dedup team");
290
            }else{
291
                String idInSource = String.valueOf(authorTeamId);
292
                if (!importSourceExists(result, idInSource, NAMESPACE, state.getTransactionalSourceReference())){
293
                    result.addImportSource(idInSource, NAMESPACE, state.getTransactionalSourceReference(), null);
294
                }
295
            }
296
        }//team
297

    
298
        return result;
299
    }
300

    
301
    protected void checkTeamNomenclaturalTitle(Team team, String authorTeamCache, int authorTeamId) {
302
        if (team.getCacheStrategy().getNomenclaturalTitleCache(team).equals(authorTeamCache)){
303
            team.setProtectedNomenclaturalTitleCache(false);
304
        }else if(team.getCacheStrategy().getNomenclaturalTitleCache(team).replace(" ,", ",").equals(authorTeamCache)){
305
            //also accept teams with ' , ' as separator as not protected
306
            team.setProtectedNomenclaturalTitleCache(false);
307
        }else{
308
            team.setNomenclaturalTitleCache(authorTeamCache, true);
309
            logger.warn("Creation of nomTitle for team with members did not work: " + authorTeamCache + " <-> " + team.getCacheStrategy().getNomenclaturalTitleCache(team)+ " : " + authorTeamId);
310
        }
311
    }
312

    
313
    protected void checkTeamTitleCache(Team team, String fullAuthorTeamCache, int authorTeamId) {
314
        INomenclaturalAuthorCacheStrategy<Team> formatter = team.getCacheStrategy();
315
        if (team.generateTitle().equals(fullAuthorTeamCache)){
316
            team.setProtectedTitleCache(false);
317
        }else if(fullAuthorTeamCache == null){
318
            team.setProtectedTitleCache(false);
319
        }else if(team.generateTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
320
            //also accept teams with ', ' as final member separator as not protected
321
            team.setProtectedTitleCache(false);
322
        }else if(formatter.getFullTitle(team).replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
323
            //also accept teams with ', ' as final member separator as not protected
324
            team.setProtectedTitleCache(false);
325
        }else{
326
            String fullTitle = formatter.getFullTitle(team).replace(" & ", ", ");
327
            team.setTitleCache(fullAuthorTeamCache, true);
328
            logger.warn("Creation of titleCache for team with members did not work: " + fullAuthorTeamCache + " <-> " + team.generateTitle()+ " : " + authorTeamId);
329
        }
330
    }
331

    
332
    private Person deduplicatePerson(BerlinModelImportState state, Person person) {
333
        Person result = deduplicationHelper.getExistingAuthor(state, person);
334
        return result;
335
    }
336

    
337
    private Team deduplicateTeam(BerlinModelImportState state, Team team) {
338
        Team result = deduplicationHelper.getExistingAuthor(state, team);
339
        return result;
340
    }
341

    
342
    protected static boolean isEtAl(Person member) {
343
        if (member != null && isEtAl(member.getTitleCache()) && isEtAl(member.getNomenclaturalTitleCache())){
344
            return true;
345
        }
346
        return false;
347
    }
348

    
349
    private static boolean isEtAl(String str) {
350
        if (str == null || !str.equals("al.")){
351
            return false;
352
        }else{
353
            return true;
354
        }
355
    }
356

    
357
    private Person makePerson(String full, String nom, boolean preliminaryFlag, int authorTeamId) {
358
        Person person = Person.NewInstance(nom, null, null, null);
359
        if (isBlank(full)){
360
            //do nothing
361
        }else if (!full.matches(".*[\\s\\.].*")){
362
            //no whitespace and no . => family name
363
            person.setFamilyName(full);
364
        }else if (nom.equals(full)){
365
            parsePerson(person, full, preliminaryFlag);
366
        }else{
367
            parsePerson(person, full, true);
368
        }
369
        if (nom.length() <= 2 || (nom.length() == 3 && nom.endsWith(".")) ){
370
            if (!nom.matches("((L|Sm|DC|al|Sw|Qz|Fr|Ib)\\.|Hu|Ma|Hy|Wu)")){
371
                logger.warn("Unexpected short nom author name part: " + nom + "; " + authorTeamId);
372
            }
373
        }
374

    
375
        return person;
376
    }
377

    
378
    private void parsePerson(Person person, String str, boolean preliminary) {
379
        String capWord = "\\p{javaUpperCase}\\p{javaLowerCase}{2,}";
380
        String famStart = "(Le |D'|'t |Mc|Mac|Des |d'|Du |De )";
381
        String regEx = "(\\p{javaUpperCase}\\.([\\s-]\\p{javaUpperCase}\\.)*(\\s(de|del|da|von|van|v.|af|zu))?\\s)("
382
                + famStart + "?" + capWord + "((-| y | é | de | de la )" + capWord + ")?)";
383
        Matcher matcher = Pattern.compile(regEx).matcher(str);
384
        if (matcher.matches()){
385

    
386
            person.setProtectedTitleCache(false);
387
            //Initials + family name
388
//            String[] splits = str.split("\\s");
389
//            int n = matcher.groupCount();
390
//            for (int i = 0; i< n; i++){
391
//                String s = matcher.group(i);
392
//                System.out.println(s);
393
//            }
394
            person.setFamilyName(matcher.group(5).trim());
395

    
396
//            String initials = splits[0];
397
//            for (int i = 1; i < splits.length -1; i++ ){
398
//                initials += " " + splits[i];
399
//            }
400
            person.setInitials(matcher.group(1).trim());
401
        }else{
402
            person.setTitleCache(str, preliminary);
403
        }
404
    }
405

    
406
    private static final String TEAM_SPLITTER = "(,|&)";
407

    
408
    protected static String[] splitTeam(String teamCache) {
409
        if (teamCache == null){
410
            return new String[0];
411
        }
412
        return teamCache.split(TEAM_SPLITTER);
413
    }
414

    
415
    protected static boolean hasTeamSeparator(String teamCache) {
416
        if (isBlank(teamCache)){
417
            return false;
418
        }else if (teamCache.contains(",") || teamCache.contains("&")||teamCache.contains(" et ")||teamCache.endsWith(" al.")){
419
            return true;
420
        }else{
421
            return false;
422
        }
423
    }
424

    
425
    @Override
426
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state)  {
427

    
428
        String nameSpace;
429
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
430

    
431
		//person map
432
		Set<String> idInSourceList = makeAuthorIdList(rs);
433
		nameSpace = BerlinModelAuthorImport.NAMESPACE;
434
		Map<String, Person> personMap = getCommonService().getSourcedObjectsByIdInSourceC(Person.class, idInSourceList, nameSpace);
435
		result.put(nameSpace, personMap);
436

    
437
		return result;
438
	}
439

    
440
	private Set<String> makeAuthorIdList(ResultSet rs) {
441
		Set<String> result = new HashSet<>();
442

    
443
		String authorTeamIdList = "";
444
		try {
445
			while (rs.next()){
446
				int id = rs.getInt("AuthorTeamId");
447
				authorTeamIdList = CdmUtils.concat(",", authorTeamIdList, String.valueOf(id));
448
			}
449

    
450
			String strQuerySequence =
451
				" SELECT DISTINCT authorFk " +
452
	            " FROM AuthorTeamSequence " +
453
	            " WHERE authorTeamFk IN (@) ";
454
			strQuerySequence = strQuerySequence.replace("@", authorTeamIdList);
455

    
456
			rs = source.getResultSet(strQuerySequence) ;
457
			while (rs.next()){
458
				int authorFk = rs.getInt("authorFk");
459
				result.add(String.valueOf(authorFk));
460
			}
461
		} catch (SQLException e) {
462
			throw new RuntimeException(e);
463
		}
464
		return result;
465
	}
466

    
467
	private boolean makeSequence(BerlinModelImportState state, Team team, int teamId, ResultSet rsSequence, Map<String, Person> personMap){
468
		try {
469
			if (rsSequence.isBeforeFirst()){
470
				rsSequence.next();
471
			}
472
			if (rsSequence.isAfterLast()){
473
				return true;
474
			}
475
			int sequenceTeamFk;
476
			try {
477
				sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
478
			} catch (SQLException e) {
479
				if (rsSequence.next() == false){
480
					return true;
481
				}else{
482
					throw e;
483
				}
484
			}
485
			while (sequenceTeamFk < teamId){
486
				if (! state.getConfig().isEuroMed()){
487
				    logger.warn("Sequence team FK is smaller then team ID. Some teams for a sequence may not be available");
488
				}
489
				rsSequence.next();
490
				sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
491
			}
492
			while (sequenceTeamFk == teamId){
493
				int authorFk = rsSequence.getInt("AuthorFk");
494
				Person author = personMap.get(String.valueOf(authorFk));
495
				if (author != null){
496
				team.addTeamMember(author);
497
				}else{
498
					logger.error("Author " + authorFk + " was not found for team " + teamId);
499
				}
500
				if (rsSequence.next()){
501
					sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
502
				}else{
503
					break;
504
				}
505
			}
506
			return true;
507
		} catch (SQLException e) {
508
			e.printStackTrace();
509
			return false;
510
		}
511
	}
512

    
513
	@Override
514
	protected boolean doCheck(BerlinModelImportState state){
515
		IOValidator<BerlinModelImportState> validator = new BerlinModelAuthorTeamImportValidator();
516
		return validator.validate(state);
517
	}
518

    
519
	@Override
520
    protected boolean isIgnore(BerlinModelImportState state){
521
		return ! state.getConfig().isDoAuthors();
522
	}
523
}
(3-3/22)