Project

General

Profile

« Previous | Next » 

Revision 6943d396

Added by Andreas Müller over 5 years ago

ref #7799 deduplicate and parse authorteams

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelAuthorTeamImport.java
15 15
import java.util.HashSet;
16 16
import java.util.Map;
17 17
import java.util.Set;
18
import java.util.regex.Matcher;
19
import java.util.regex.Pattern;
18 20

  
19 21
import org.apache.log4j.Logger;
20 22
import org.springframework.stereotype.Component;
......
29 31
import eu.etaxonomy.cdm.model.agent.Team;
30 32
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31 33
import eu.etaxonomy.cdm.model.common.CdmBase;
34
import eu.etaxonomy.cdm.strategy.cache.agent.INomenclaturalAuthorCacheStrategy;
32 35

  
33 36

  
34 37
/**
......
42 45
    private static final Logger logger = Logger.getLogger(BerlinModelAuthorTeamImport.class);
43 46

  
44 47
	public static final String NAMESPACE = "AuthorTeam";
48
	   public static final String NAMESPACE_SPLIT = "AuthorTeam_Split";
45 49

  
46 50
	private static final String pluralString = "AuthorTeams";
47 51
	private static final String dbTableName = "AuthorTeam";
......
154 158

  
155 159
					Team team = Team.NewInstance();
156 160

  
157
					Boolean preliminaryFlag = rs.getBoolean("PreliminaryFlag");
161
					boolean preliminaryFlag = rs.getBoolean("PreliminaryFlag");
158 162
					String authorTeamCache = rs.getString("AuthorTeamCache");
159 163
					String fullAuthorTeamCache = rs.getString("FullAuthorTeamCache");
160 164
					if (isBlank(fullAuthorTeamCache)){
......
162 166
						if (isBlank(authorTeamCache) && preliminaryFlag){
163 167
						    logger.warn("authorTeamCache and fullAuthorTeamCache are blank/null and preliminaryFlag is true. This makes no sense and should not happen: " + teamId);
164 168
						}
169
					}else{
170
					    fullAuthorTeamCache = fullAuthorTeamCache.trim();
171
					}
172
					if (isNotBlank(authorTeamCache)){
173
					    authorTeamCache = authorTeamCache.trim();
165 174
					}
166 175
//					team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
167 176
//					team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag);
168 177

  
169 178
					success &= makeSequence(state, team, teamId, rsSequence, personMap);
170 179

  
180
					team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
181
					team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag);
182

  
171 183
					TeamOrPersonBase<?> author = handleTeam(state, team, authorTeamCache,
172 184
					        fullAuthorTeamCache, preliminaryFlag, teamId);
173 185

  
186
					//in case preliminary flag is set incorrectly in BM
174 187
					if (author == team && team.getTeamMembers().size() == 0 && preliminaryFlag == false){
175 188
                        team.setProtectedTitleCache(true);
176 189
                        team.setProtectedNomenclaturalTitleCache(true);
177 190
                    }
178 191

  
179 192
					//created, notes
180
					doIdCreatedUpdatedNotes(state, author, rs, teamId, NAMESPACE);
193
//					doIdCreatedUpdatedNotes(state, author, rs, teamId, NAMESPACE);
194
					doCreatedUpdatedNotes(state, author, rs);
195
					if (!importSourceExists(author, String.valueOf(teamId), NAMESPACE, state.getTransactionalSourceReference())){
196
					    doId(state, author, teamId, NAMESPACE);
197
					}
181 198

  
182 199
					authorsToSave.add(author);
183 200
				}catch(Exception ex){
......
217 234
            logger.warn("Blank authorTeamCache not yet handled: " + authorTeamId);
218 235
        }
219 236

  
237
        //single person
220 238
        if (!hasTeamSeparator(authorTeamCache) && !hasTeamSeparator(fullAuthorTeamCache)){
221 239
            Person person = makePerson(fullAuthorTeamCache, authorTeamCache, preliminaryFlag, authorTeamId);
222 240
            result = deduplicatePerson(state, person);
223 241
            if (result != person){
224 242
                logger.debug("Single person team deduplicated: " + authorTeamId);
225 243
            }else{
226
                person.addImportSource(String.valueOf(authorTeamId), NAMESPACE, state.getTransactionalSourceReference(), null);
227

  
244
                String idInSource = String.valueOf(authorTeamId);
245
                if (!importSourceExists(person, idInSource, NAMESPACE, state.getTransactionalSourceReference())){
246
                    person.addImportSource(idInSource, NAMESPACE, state.getTransactionalSourceReference(), null);
247
                }
228 248
            }
249
        //team
229 250
        }else{
230 251
            String[] fullTeams = splitTeam(fullAuthorTeamCache);
231 252
            String[] nomTeams = splitTeam(authorTeamCache);
232
            if (fullTeams.length == nomTeams.length || fullTeams.length == 0){
253
            if (fullTeams.length != nomTeams.length && fullTeams.length != 0){
254
                logger.warn("AuthorTeamCache and fullAuthorTeamCache have not the same team size: " + authorTeamCache + " <-> " + fullAuthorTeamCache+ " : " + authorTeamId);
255
            }else{
233 256
                for (int i = 0; i< nomTeams.length ;i++){
234 257
                    String fullTeam = fullTeams.length == 0? null: fullTeams[i].trim();
235 258
                    Person member = makePerson(fullTeam, nomTeams[i].trim(), preliminaryFlag, authorTeamId);
......
243 266
                        Person dedupMember = deduplicatePerson(state, member);
244 267
                        if (dedupMember != member){
245 268
                            logger.debug("Member deduplicated: " + authorTeamId);
269
                        }else{
270
                            String idInSource = String.valueOf(authorTeamId);
271
                            if (!importSourceExists(member, idInSource, NAMESPACE_SPLIT, state.getTransactionalSourceReference())){
272
                                member.addImportSource(idInSource, NAMESPACE_SPLIT, state.getTransactionalSourceReference(), null);
273
                            }
246 274
                        }
247 275
                        //TODO add idInBM
248 276
                        team.addTeamMember(dedupMember);
249 277
                    }
250 278
                }
251 279
                //check nomenclatural title
252
                if (team.getCacheStrategy().getNomenclaturalTitle(team).equals(authorTeamCache)){
253
                    team.setProtectedNomenclaturalTitleCache(false);
254
                }else if(team.getCacheStrategy().getNomenclaturalTitle(team).replace(" ,", ",").equals(authorTeamCache)){
255
                    //also accept teams with ' , ' as separator as not protected
256
                    team.setProtectedTitleCache(false);
257
                }else{
258
                    team.setNomenclaturalTitle(authorTeamCache, true);
259
                    logger.warn("Creation of nomTitle for team with members did not work: " + authorTeamCache + " <-> " + team.getCacheStrategy().getNomenclaturalTitle(team)+ " : " + authorTeamId);
260
                }
280
                //TODO
281
                checkTeamNomenclaturalTitle(team, authorTeamCache, authorTeamId);
261 282
                //check titleCache
262
                if (team.generateTitle().equals(fullAuthorTeamCache)){
263
                    team.setProtectedTitleCache(false);
264
                }else if(fullAuthorTeamCache == null){
265
                    //do nothing
266
                }else if(team.generateTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
267
                    //also accept teams with ', ' as final member separator as not protected
268
                    team.setProtectedTitleCache(false);
269
                }else if(team.getFullTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
270
                    //also accept teams with ', ' as final member separator as not protected
271
                    team.setProtectedTitleCache(false);
272
                }else{
273
                    String fullTitle = team.getFullTitle().replace(" & ", ", ");
274
                    team.setTitleCache(fullAuthorTeamCache, true);
275
                    logger.warn("Creation of titleCache for team with members did not work: " + fullAuthorTeamCache + " <-> " + team.generateTitle()+ " : " + authorTeamId);
276
                }
283
                checkTeamTitleCache(team, fullAuthorTeamCache, authorTeamId);
284
            }//same size team
285
            result = deduplicateTeam(state, team);
286
            if (result != team){
287
                logger.debug("Dedup team");
277 288
            }else{
278
                logger.warn("AuthorTeamCache and fullAuthorTeamCache have not the same team size: " + authorTeamCache + " <-> " + fullAuthorTeamCache+ " : " + authorTeamId);
289
                String idInSource = String.valueOf(authorTeamId);
290
                if (!importSourceExists(result, idInSource, NAMESPACE, state.getTransactionalSourceReference())){
291
                    result.addImportSource(idInSource, NAMESPACE, state.getTransactionalSourceReference(), null);
292
                }
279 293
            }
280
        }
294
        }//team
295

  
281 296
        return result;
282 297
    }
283 298

  
284 299

  
300
    /**
301
     * @param team
302
     * @param authorTeamCache
303
     * @param authorTeamId
304
     */
305
    protected void checkTeamNomenclaturalTitle(Team team, String authorTeamCache, int authorTeamId) {
306
        if (team.getCacheStrategy().getNomenclaturalTitle(team).equals(authorTeamCache)){
307
            team.setProtectedNomenclaturalTitleCache(false);
308
        }else if(team.getCacheStrategy().getNomenclaturalTitle(team).replace(" ,", ",").equals(authorTeamCache)){
309
            //also accept teams with ' , ' as separator as not protected
310
            team.setProtectedNomenclaturalTitleCache(false);
311
        }else{
312
            team.setNomenclaturalTitle(authorTeamCache, true);
313
            logger.warn("Creation of nomTitle for team with members did not work: " + authorTeamCache + " <-> " + team.getCacheStrategy().getNomenclaturalTitle(team)+ " : " + authorTeamId);
314
        }
315
    }
316

  
317

  
318
    /**
319
     * @param team
320
     * @param fullAuthorTeamCache
321
     * @param authorTeamId
322
     * @param formatter
323
     */
324
    protected void checkTeamTitleCache(Team team, String fullAuthorTeamCache, int authorTeamId) {
325
        INomenclaturalAuthorCacheStrategy<Team> formatter = team.getCacheStrategy();
326
        if (team.generateTitle().equals(fullAuthorTeamCache)){
327
            team.setProtectedTitleCache(false);
328
        }else if(fullAuthorTeamCache == null){
329
            team.setProtectedTitleCache(false);
330
        }else if(team.generateTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
331
            //also accept teams with ', ' as final member separator as not protected
332
            team.setProtectedTitleCache(false);
333
        }else if(formatter.getFullTitle(team).replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
334
            //also accept teams with ', ' as final member separator as not protected
335
            team.setProtectedTitleCache(false);
336
        }else{
337
            String fullTitle = formatter.getFullTitle(team).replace(" & ", ", ");
338
            team.setTitleCache(fullAuthorTeamCache, true);
339
            logger.warn("Creation of titleCache for team with members did not work: " + fullAuthorTeamCache + " <-> " + team.generateTitle()+ " : " + authorTeamId);
340
        }
341
    }
342

  
343

  
285 344
    /**
286 345
     * @param member
287 346
     * @return
......
291 350
        return result;
292 351
    }
293 352

  
353
    private Team deduplicateTeam(BerlinModelImportState state, Team team) {
354
        Team result = deduplicationHelper.getExistingAuthor(state, team);
355
        return result;
356
    }
357

  
294 358

  
295 359
    /**
296 360
     * @param member
......
316 380
        if (isBlank(full)){
317 381
            //do nothing
318 382
        }else if (!full.matches(".*[\\s\\.].*")){
383
            //no whitespace and no . => family name
319 384
            person.setFamilyName(full);
320 385
        }else if (nom.equals(full)){
321 386
            parsePerson(person, full, preliminaryFlag);
......
335 400
     * @param person
336 401
     */
337 402
    private void parsePerson(Person person, String str, boolean preliminary) {
338
        if (str.matches("\\p{javaUpperCase}\\.(\\s\\p{javaUpperCase}\\.)*\\s\\p{javaUpperCase}\\p{javaLowerCase}{2,}")){
339
            String[] splits = str.split("\\s");
340
            person.setFamilyName(splits[splits.length-1]);
341
            String initials = splits[0];
342
            for (int i = 1; i < splits.length -1; i++ ){
343
                initials += " " + splits[i];
344
            }
345
            person.setInitials(initials);
403
        String capWord = "\\p{javaUpperCase}\\p{javaLowerCase}{2,}";
404
        String famStart = "(Le |D'|'t |Mc|Mac|Des |d'|Du |De )";
405
        String regEx = "(\\p{javaUpperCase}\\.([\\s-]\\p{javaUpperCase}\\.)*(\\s(de|del|da|von|van|v.|af|zu))?\\s)("
406
                + famStart + "?" + capWord + "((-| y | é | de | de la )" + capWord + ")?)";
407
        Matcher matcher = Pattern.compile(regEx).matcher(str);
408
        if (matcher.matches()){
409

  
346 410
            person.setProtectedTitleCache(false);
411
            //Initials + family name
412
//            String[] splits = str.split("\\s");
413
//            int n = matcher.groupCount();
414
//            for (int i = 0; i< n; i++){
415
//                String s = matcher.group(i);
416
//                System.out.println(s);
417
//            }
418
            person.setFamilyName(matcher.group(5).trim());
419

  
420
//            String initials = splits[0];
421
//            for (int i = 1; i < splits.length -1; i++ ){
422
//                initials += " " + splits[i];
423
//            }
424
            person.setInitials(matcher.group(1).trim());
347 425
        }else{
348 426
            person.setTitleCache(str, preliminary);
349 427
        }
350

  
351 428
    }
352 429

  
353
    private static final String TEAM_SPLITTER = "(,|;|&| et | Et )";
430
    private static final String TEAM_SPLITTER = "(,|&)";
354 431

  
355 432
    /**
356 433
     * @param fullAuthorTeamCache

Also available in: Unified diff