Project

General

Profile

Download (20.5 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.berlinModel.in;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.util.Collection;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.regex.Matcher;
19
import java.util.regex.Pattern;
20

    
21
import org.apache.log4j.Logger;
22
import org.springframework.stereotype.Component;
23

    
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelAuthorTeamImportValidator;
26
import eu.etaxonomy.cdm.io.common.IOValidator;
27
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
28
import eu.etaxonomy.cdm.io.common.Source;
29
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
30
import eu.etaxonomy.cdm.model.agent.Person;
31
import eu.etaxonomy.cdm.model.agent.Team;
32
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
33
import eu.etaxonomy.cdm.model.common.CdmBase;
34
import eu.etaxonomy.cdm.strategy.cache.agent.INomenclaturalAuthorCacheStrategy;
35

    
36

    
37
/**
38
 * @author a.mueller
39
 * @since 20.03.2008
40
 */
41
@Component
42
public class BerlinModelAuthorTeamImport extends BerlinModelImportBase {
43

    
44
    private static final long serialVersionUID = -4318481607033688522L;
45
    private static final Logger logger = Logger.getLogger(BerlinModelAuthorTeamImport.class);
46

    
47
	public static final String NAMESPACE = "AuthorTeam";
48
	   public static final String NAMESPACE_SPLIT = "AuthorTeam_Split";
49

    
50
	private static final String pluralString = "AuthorTeams";
51
	private static final String dbTableName = "AuthorTeam";
52

    
53
	private ResultSet rsSequence;
54
	private Source source;
55

    
56
    private ImportDeduplicationHelper<BerlinModelImportState> deduplicationHelper;
57

    
58

    
59
	public BerlinModelAuthorTeamImport(){
60
		super(dbTableName, pluralString);
61
	}
62

    
63

    
64
	@Override
65
    protected void doInvoke(BerlinModelImportState state){
66
		BerlinModelImportConfigurator config = state.getConfig();
67
		source = config.getSource();
68
		this.deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state);
69

    
70
		logger.info("start make " + pluralString + " ...");
71

    
72
		//queryStrings
73
		String strIdQuery = getIdQuery(state);
74

    
75
		String strRecordQuery = getRecordQuery(config);
76
		String strWhere = " WHERE (1=1) ";
77
		if (state.getConfig().getAuthorTeamFilter() != null){
78
			strWhere += " AND " + state.getConfig().getAuthorTeamFilter();
79
			strWhere = strWhere.replaceFirst("authorTeamId", "authorTeamFk");
80
		}
81
		String strQuerySequence =
82
			" SELECT *  " +
83
            " FROM AuthorTeamSequence " +
84
                (state.getConfig().isEuroMed() ? "" : strWhere) +
85
            " ORDER By authorTeamFk, Sequence ";
86

    
87
		int recordsPerTransaction = config.getRecordsPerTransaction();
88
		try{
89
			ResultSetPartitioner<BerlinModelImportState> partitioner = ResultSetPartitioner.NewInstance(source, strIdQuery, strRecordQuery, recordsPerTransaction);
90
			rsSequence = source.getResultSet(strQuerySequence) ; //only here, to reduce deadlock/timeout probability
91
			while (partitioner.nextPartition()){
92
				partitioner.doPartition(this, state);
93
			}
94
		} catch (SQLException e) {
95
			logger.error("SQLException:" +  e);
96
			state.setUnsuccessfull();
97
			return;
98
		}
99

    
100
		logger.info("end make " + pluralString + " ... " + getSuccessString(true));
101
		this.deduplicationHelper = null;
102
		return;
103
	}
104

    
105
	@Override
106
	protected String getIdQuery(BerlinModelImportState state){
107
		if (state.getConfig().isEuroMed()){
108
		    return " SELECT authorTeamId "
109
		         + " FROM v_cdm_exp_authorTeamsAll ORDER BY authorTeamId "
110
		         ;
111
		}
112

    
113
	    String strWhere = " WHERE (1=1) ";
114
		if (state.getConfig().getAuthorTeamFilter() != null){
115
			strWhere += " AND " + state.getConfig().getAuthorTeamFilter();
116
		}
117
		String idQuery =
118
				" SELECT authorTeamId " +
119
                " FROM AuthorTeam " +
120
                strWhere +
121
                " ORDER BY authorTeamId ";
122
		return idQuery;
123
	}
124

    
125

    
126
	@Override
127
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
128
		String strRecordQuery =
129
			" SELECT *  " +
130
            " FROM AuthorTeam " +
131
            " WHERE authorTeamId IN ( " + ID_LIST_TOKEN + " )" +
132
            " ORDER By authorTeamId ";
133
		return strRecordQuery;
134
	}
135

    
136

    
137
	@Override
138
    public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
139
		boolean success = true ;
140
		deduplicationHelper.restartSession();
141
		BerlinModelImportConfigurator config = state.getConfig();
142
		Set<TeamOrPersonBase<?>> authorsToSave = new HashSet<>();
143
		@SuppressWarnings("unchecked")
144
        Map<String, Person> personMap = partitioner.getObjectMap(BerlinModelAuthorImport.NAMESPACE);
145

    
146
		ResultSet rs = partitioner.getResultSet();
147
		//for each reference
148
		try{
149
			while (rs.next()){
150
				try{
151
					//if ((i++ % modCount ) == 0 && i!= 1 ){ logger.info(""+pluralString+" handled: " + (i-1));}
152

    
153
					//create Agent element
154
					int teamId = rs.getInt("AuthorTeamId");
155
					if (teamId == 0 && config.isIgnore0AuthorTeam()){
156
						continue;
157
					}
158

    
159
					Team team = Team.NewInstance();
160

    
161
					boolean preliminaryFlag = rs.getBoolean("PreliminaryFlag");
162
					String authorTeamCache = rs.getString("AuthorTeamCache");
163
					String fullAuthorTeamCache = rs.getString("FullAuthorTeamCache");
164
					if (isBlank(fullAuthorTeamCache)){
165
//						fullAuthorTeamCache = authorTeamCache;
166
						if (isBlank(authorTeamCache) && preliminaryFlag){
167
						    logger.warn("authorTeamCache and fullAuthorTeamCache are blank/null and preliminaryFlag is true. This makes no sense and should not happen: " + teamId);
168
						}
169
					}else{
170
					    fullAuthorTeamCache = fullAuthorTeamCache.trim();
171
					}
172
					if (isNotBlank(authorTeamCache)){
173
					    authorTeamCache = authorTeamCache.trim();
174
					}
175
//					team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
176
//					team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag);
177

    
178
					success &= makeSequence(state, team, teamId, rsSequence, personMap);
179

    
180
					team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
181
					team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag);
182

    
183
					TeamOrPersonBase<?> author = handleTeam(state, team, authorTeamCache,
184
					        fullAuthorTeamCache, preliminaryFlag, teamId);
185

    
186
					//in case preliminary flag is set incorrectly in BM
187
					if (author == team && team.getTeamMembers().size() == 0 && preliminaryFlag == false){
188
                        team.setProtectedTitleCache(true);
189
                        team.setProtectedNomenclaturalTitleCache(true);
190
                    }
191

    
192
					//created, notes
193
//					doIdCreatedUpdatedNotes(state, author, rs, teamId, NAMESPACE);
194
					doCreatedUpdatedNotes(state, author, rs);
195
					if (!importSourceExists(author, String.valueOf(teamId), NAMESPACE, state.getTransactionalSourceReference())){
196
					    doId(state, author, teamId, NAMESPACE);
197
					}
198

    
199
					authorsToSave.add(author);
200
				}catch(Exception ex){
201
					logger.error(ex.getMessage());
202
					ex.printStackTrace();
203
					success = false;
204
				}
205
			} //while rs.hasNext()
206
		} catch (SQLException e) {
207
			logger.error("SQLException:" +  e);
208
			return false;
209
		}
210

    
211
		//logger.info(i + " " + pluralString + " handled");
212
		getAgentService().saveOrUpdate((Collection)authorsToSave);
213

    
214
		return success;
215
	}
216

    
217

    
218
	/**
219
     * @param state
220
     * @param team
221
     * @param authorTeamCache
222
     * @param fullAuthorTeamCache
223
     * @param preliminaryFlag
224
     * @return
225
     */
226
    private TeamOrPersonBase<?> handleTeam(BerlinModelImportState state, Team team, String authorTeamCache,
227
            String fullAuthorTeamCache, boolean preliminaryFlag, int authorTeamId) {
228
        if (!team.getTeamMembers().isEmpty()){
229
            return team;
230
        }
231

    
232
        TeamOrPersonBase<?> result = team;
233
        if (isBlank(authorTeamCache)){
234
            logger.warn("Blank authorTeamCache not yet handled: " + authorTeamId);
235
        }
236

    
237
        //single person
238
        if (!hasTeamSeparator(authorTeamCache) && !hasTeamSeparator(fullAuthorTeamCache)){
239
            Person person = makePerson(fullAuthorTeamCache, authorTeamCache, preliminaryFlag, authorTeamId);
240
            result = deduplicatePerson(state, person);
241
            if (result != person){
242
                logger.debug("Single person team deduplicated: " + authorTeamId);
243
            }else{
244
                String idInSource = String.valueOf(authorTeamId);
245
                if (!importSourceExists(person, idInSource, NAMESPACE, state.getTransactionalSourceReference())){
246
                    person.addImportSource(idInSource, NAMESPACE, state.getTransactionalSourceReference(), null);
247
                }
248
            }
249
        //team
250
        }else{
251
            String[] fullTeams = splitTeam(fullAuthorTeamCache);
252
            String[] nomTeams = splitTeam(authorTeamCache);
253
            if (fullTeams.length != nomTeams.length && fullTeams.length != 0){
254
                logger.warn("AuthorTeamCache and fullAuthorTeamCache have not the same team size: " + authorTeamCache + " <-> " + fullAuthorTeamCache+ " : " + authorTeamId);
255
            }else{
256
                for (int i = 0; i< nomTeams.length ;i++){
257
                    String fullTeam = fullTeams.length == 0? null: fullTeams[i].trim();
258
                    Person member = makePerson(fullTeam, nomTeams[i].trim(), preliminaryFlag, authorTeamId);
259
                    if (member == null){
260
                        logger.warn("Unexpected short nom. author: " + nomTeams[i].trim() + "; " + authorTeamId);
261
                        continue;
262
                    }
263
                    if (i == nomTeams.length -1 && isEtAl(member)){
264
                        team.setHasMoreMembers(true);
265
                    }else{
266
                        Person dedupMember = deduplicatePerson(state, member);
267
                        if (dedupMember != member){
268
                            logger.debug("Member deduplicated: " + authorTeamId);
269
                        }else{
270
                            String idInSource = String.valueOf(authorTeamId);
271
                            if (!importSourceExists(member, idInSource, NAMESPACE_SPLIT, state.getTransactionalSourceReference())){
272
                                member.addImportSource(idInSource, NAMESPACE_SPLIT, state.getTransactionalSourceReference(), null);
273
                            }
274
                        }
275
                        //TODO add idInBM
276
                        team.addTeamMember(dedupMember);
277
                    }
278
                }
279
                //check nomenclatural title
280
                //TODO
281
                checkTeamNomenclaturalTitle(team, authorTeamCache, authorTeamId);
282
                //check titleCache
283
                checkTeamTitleCache(team, fullAuthorTeamCache, authorTeamId);
284
            }//same size team
285
            result = deduplicateTeam(state, team);
286
            if (result != team){
287
                logger.debug("Dedup team");
288
            }else{
289
                String idInSource = String.valueOf(authorTeamId);
290
                if (!importSourceExists(result, idInSource, NAMESPACE, state.getTransactionalSourceReference())){
291
                    result.addImportSource(idInSource, NAMESPACE, state.getTransactionalSourceReference(), null);
292
                }
293
            }
294
        }//team
295

    
296
        return result;
297
    }
298

    
299

    
300
    /**
301
     * @param team
302
     * @param authorTeamCache
303
     * @param authorTeamId
304
     */
305
    protected void checkTeamNomenclaturalTitle(Team team, String authorTeamCache, int authorTeamId) {
306
        if (team.getCacheStrategy().getNomenclaturalTitle(team).equals(authorTeamCache)){
307
            team.setProtectedNomenclaturalTitleCache(false);
308
        }else if(team.getCacheStrategy().getNomenclaturalTitle(team).replace(" ,", ",").equals(authorTeamCache)){
309
            //also accept teams with ' , ' as separator as not protected
310
            team.setProtectedNomenclaturalTitleCache(false);
311
        }else{
312
            team.setNomenclaturalTitle(authorTeamCache, true);
313
            logger.warn("Creation of nomTitle for team with members did not work: " + authorTeamCache + " <-> " + team.getCacheStrategy().getNomenclaturalTitle(team)+ " : " + authorTeamId);
314
        }
315
    }
316

    
317

    
318
    /**
319
     * @param team
320
     * @param fullAuthorTeamCache
321
     * @param authorTeamId
322
     * @param formatter
323
     */
324
    protected void checkTeamTitleCache(Team team, String fullAuthorTeamCache, int authorTeamId) {
325
        INomenclaturalAuthorCacheStrategy<Team> formatter = team.getCacheStrategy();
326
        if (team.generateTitle().equals(fullAuthorTeamCache)){
327
            team.setProtectedTitleCache(false);
328
        }else if(fullAuthorTeamCache == null){
329
            team.setProtectedTitleCache(false);
330
        }else if(team.generateTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
331
            //also accept teams with ', ' as final member separator as not protected
332
            team.setProtectedTitleCache(false);
333
        }else if(formatter.getFullTitle(team).replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
334
            //also accept teams with ', ' as final member separator as not protected
335
            team.setProtectedTitleCache(false);
336
        }else{
337
            String fullTitle = formatter.getFullTitle(team).replace(" & ", ", ");
338
            team.setTitleCache(fullAuthorTeamCache, true);
339
            logger.warn("Creation of titleCache for team with members did not work: " + fullAuthorTeamCache + " <-> " + team.generateTitle()+ " : " + authorTeamId);
340
        }
341
    }
342

    
343

    
344
    /**
345
     * @param member
346
     * @return
347
     */
348
    private Person deduplicatePerson(BerlinModelImportState state, Person person) {
349
        Person result = deduplicationHelper.getExistingAuthor(state, person);
350
        return result;
351
    }
352

    
353
    private Team deduplicateTeam(BerlinModelImportState state, Team team) {
354
        Team result = deduplicationHelper.getExistingAuthor(state, team);
355
        return result;
356
    }
357

    
358

    
359
    /**
360
     * @param member
361
     * @return
362
     */
363
    protected static boolean isEtAl(Person member) {
364
        if (member != null && isEtAl(member.getTitleCache()) && isEtAl(member.getNomenclaturalTitle())){
365
            return true;
366
        }
367
        return false;
368
    }
369

    
370
    private static boolean isEtAl(String str) {
371
        if (str == null || !str.equals("al.")){
372
            return false;
373
        }else{
374
            return true;
375
        }
376
    }
377

    
378
    private Person makePerson(String full, String nom, boolean preliminaryFlag, int authorTeamId) {
379
        Person person = Person.NewInstance(nom, null, null, null);
380
        if (isBlank(full)){
381
            //do nothing
382
        }else if (!full.matches(".*[\\s\\.].*")){
383
            //no whitespace and no . => family name
384
            person.setFamilyName(full);
385
        }else if (nom.equals(full)){
386
            parsePerson(person, full, preliminaryFlag);
387
        }else{
388
            parsePerson(person, full, true);
389
        }
390
        if (nom.length() <= 2 || (nom.length() == 3 && nom.endsWith(".")) ){
391
            if (!nom.matches("((L|Sm|DC|al|Sw|Qz|Fr|Ib)\\.|Hu|Ma|Hy|Wu)")){
392
                logger.warn("Unexpected short nom author name part: " + nom + "; " + authorTeamId);
393
            }
394
        }
395

    
396
        return person;
397
    }
398

    
399
    /**
400
     * @param person
401
     */
402
    private void parsePerson(Person person, String str, boolean preliminary) {
403
        String capWord = "\\p{javaUpperCase}\\p{javaLowerCase}{2,}";
404
        String famStart = "(Le |D'|'t |Mc|Mac|Des |d'|Du |De )";
405
        String regEx = "(\\p{javaUpperCase}\\.([\\s-]\\p{javaUpperCase}\\.)*(\\s(de|del|da|von|van|v.|af|zu))?\\s)("
406
                + famStart + "?" + capWord + "((-| y | é | de | de la )" + capWord + ")?)";
407
        Matcher matcher = Pattern.compile(regEx).matcher(str);
408
        if (matcher.matches()){
409

    
410
            person.setProtectedTitleCache(false);
411
            //Initials + family name
412
//            String[] splits = str.split("\\s");
413
//            int n = matcher.groupCount();
414
//            for (int i = 0; i< n; i++){
415
//                String s = matcher.group(i);
416
//                System.out.println(s);
417
//            }
418
            person.setFamilyName(matcher.group(5).trim());
419

    
420
//            String initials = splits[0];
421
//            for (int i = 1; i < splits.length -1; i++ ){
422
//                initials += " " + splits[i];
423
//            }
424
            person.setInitials(matcher.group(1).trim());
425
        }else{
426
            person.setTitleCache(str, preliminary);
427
        }
428
    }
429

    
430
    private static final String TEAM_SPLITTER = "(,|&)";
431

    
432
    /**
433
     * @param fullAuthorTeamCache
434
     * @param TEAM_SPLITTER
435
     * @return
436
     */
437
    protected static String[] splitTeam(String teamCache) {
438
        if (teamCache == null){
439
            return new String[0];
440
        }
441
        return teamCache.split(TEAM_SPLITTER);
442
    }
443

    
444

    
445
    /**
446
     * @param authorTeamCache
447
     * @return
448
     */
449
    protected static boolean hasTeamSeparator(String teamCache) {
450
        if (isBlank(teamCache)){
451
            return false;
452
        }else if (teamCache.contains(",") || teamCache.contains("&")||teamCache.contains(" et ")||teamCache.endsWith(" al.")){
453
            return true;
454
        }else{
455
            return false;
456
        }
457
    }
458

    
459

    
460
    @Override
461
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state)  {
462
		String nameSpace;
463
		Class<?> cdmClass;
464
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
465

    
466
		//person map
467
		Set<String> idInSourceList = makeAuthorIdList(rs);
468
		nameSpace = BerlinModelAuthorImport.NAMESPACE;
469
		cdmClass = Person.class;
470
		Map<String, Person> personMap = (Map<String, Person>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idInSourceList, nameSpace);
471
		result.put(nameSpace, personMap);
472

    
473
		return result;
474
	}
475

    
476
	/**
477
	 * @param rs
478
	 * @return
479
	 * @throws SQLException
480
	 * @throws SQLException
481
	 */
482
	private Set<String> makeAuthorIdList(ResultSet rs) {
483
		Set<String> result = new HashSet<String>();
484

    
485
		String authorTeamIdList = "";
486
		try {
487
			while (rs.next()){
488
				int id = rs.getInt("AuthorTeamId");
489
				authorTeamIdList = CdmUtils.concat(",", authorTeamIdList, String.valueOf(id));
490
			}
491

    
492
			String strQuerySequence =
493
				" SELECT DISTINCT authorFk " +
494
	            " FROM AuthorTeamSequence " +
495
	            " WHERE authorTeamFk IN (@) ";
496
			strQuerySequence = strQuerySequence.replace("@", authorTeamIdList);
497

    
498
			rs = source.getResultSet(strQuerySequence) ;
499
			while (rs.next()){
500
				int authorFk = rs.getInt("authorFk");
501
				result.add(String.valueOf(authorFk));
502
			}
503
		} catch (SQLException e) {
504
			throw new RuntimeException(e);
505
		}
506
		return result;
507
	}
508

    
509
	private boolean makeSequence(BerlinModelImportState state, Team team, int teamId, ResultSet rsSequence, Map<String, Person> personMap){
510
		try {
511
			if (rsSequence.isBeforeFirst()){
512
				rsSequence.next();
513
			}
514
			if (rsSequence.isAfterLast()){
515
				return true;
516
			}
517
			int sequenceTeamFk;
518
			try {
519
				sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
520
			} catch (SQLException e) {
521
				if (rsSequence.next() == false){
522
					return true;
523
				}else{
524
					throw e;
525
				}
526
			}
527
			while (sequenceTeamFk < teamId){
528
				if (! state.getConfig().isEuroMed()){
529
				    logger.warn("Sequence team FK is smaller then team ID. Some teams for a sequence may not be available");
530
				}
531
				rsSequence.next();
532
				sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
533
			}
534
			while (sequenceTeamFk == teamId){
535
				int authorFk = rsSequence.getInt("AuthorFk");
536
				Person author = personMap.get(String.valueOf(authorFk));
537
				if (author != null){
538
				team.addTeamMember(author);
539
				}else{
540
					logger.error("Author " + authorFk + " was not found for team " + teamId);
541
				}
542
				if (rsSequence.next()){
543
					sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
544
				}else{
545
					break;
546
				}
547
			}
548
			return true;
549
		} catch (SQLException e) {
550
			e.printStackTrace();
551
			return false;
552
		}
553
	}
554

    
555

    
556
	@Override
557
	protected boolean doCheck(BerlinModelImportState state){
558
		IOValidator<BerlinModelImportState> validator = new BerlinModelAuthorTeamImportValidator();
559
		return validator.validate(state);
560
	}
561

    
562

    
563
	@Override
564
    protected boolean isIgnore(BerlinModelImportState state){
565
		return ! state.getConfig().isDoAuthors();
566
	}
567

    
568

    
569

    
570
}
(3-3/22)