Revision 6943d396
Added by Andreas Müller over 5 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelAuthorTeamImport.java | ||
---|---|---|
15 | 15 |
import java.util.HashSet; |
16 | 16 |
import java.util.Map; |
17 | 17 |
import java.util.Set; |
18 |
import java.util.regex.Matcher; |
|
19 |
import java.util.regex.Pattern; |
|
18 | 20 |
|
19 | 21 |
import org.apache.log4j.Logger; |
20 | 22 |
import org.springframework.stereotype.Component; |
... | ... | |
29 | 31 |
import eu.etaxonomy.cdm.model.agent.Team; |
30 | 32 |
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase; |
31 | 33 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
34 |
import eu.etaxonomy.cdm.strategy.cache.agent.INomenclaturalAuthorCacheStrategy; |
|
32 | 35 |
|
33 | 36 |
|
34 | 37 |
/** |
... | ... | |
42 | 45 |
private static final Logger logger = Logger.getLogger(BerlinModelAuthorTeamImport.class); |
43 | 46 |
|
44 | 47 |
public static final String NAMESPACE = "AuthorTeam"; |
48 |
public static final String NAMESPACE_SPLIT = "AuthorTeam_Split"; |
|
45 | 49 |
|
46 | 50 |
private static final String pluralString = "AuthorTeams"; |
47 | 51 |
private static final String dbTableName = "AuthorTeam"; |
... | ... | |
154 | 158 |
|
155 | 159 |
Team team = Team.NewInstance(); |
156 | 160 |
|
157 |
Boolean preliminaryFlag = rs.getBoolean("PreliminaryFlag");
|
|
161 |
boolean preliminaryFlag = rs.getBoolean("PreliminaryFlag");
|
|
158 | 162 |
String authorTeamCache = rs.getString("AuthorTeamCache"); |
159 | 163 |
String fullAuthorTeamCache = rs.getString("FullAuthorTeamCache"); |
160 | 164 |
if (isBlank(fullAuthorTeamCache)){ |
... | ... | |
162 | 166 |
if (isBlank(authorTeamCache) && preliminaryFlag){ |
163 | 167 |
logger.warn("authorTeamCache and fullAuthorTeamCache are blank/null and preliminaryFlag is true. This makes no sense and should not happen: " + teamId); |
164 | 168 |
} |
169 |
}else{ |
|
170 |
fullAuthorTeamCache = fullAuthorTeamCache.trim(); |
|
171 |
} |
|
172 |
if (isNotBlank(authorTeamCache)){ |
|
173 |
authorTeamCache = authorTeamCache.trim(); |
|
165 | 174 |
} |
166 | 175 |
// team.setTitleCache(fullAuthorTeamCache, preliminaryFlag); |
167 | 176 |
// team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag); |
168 | 177 |
|
169 | 178 |
success &= makeSequence(state, team, teamId, rsSequence, personMap); |
170 | 179 |
|
180 |
team.setTitleCache(fullAuthorTeamCache, preliminaryFlag); |
|
181 |
team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag); |
|
182 |
|
|
171 | 183 |
TeamOrPersonBase<?> author = handleTeam(state, team, authorTeamCache, |
172 | 184 |
fullAuthorTeamCache, preliminaryFlag, teamId); |
173 | 185 |
|
186 |
//in case preliminary flag is set incorrectly in BM |
|
174 | 187 |
if (author == team && team.getTeamMembers().size() == 0 && preliminaryFlag == false){ |
175 | 188 |
team.setProtectedTitleCache(true); |
176 | 189 |
team.setProtectedNomenclaturalTitleCache(true); |
177 | 190 |
} |
178 | 191 |
|
179 | 192 |
//created, notes |
180 |
doIdCreatedUpdatedNotes(state, author, rs, teamId, NAMESPACE); |
|
193 |
// doIdCreatedUpdatedNotes(state, author, rs, teamId, NAMESPACE); |
|
194 |
doCreatedUpdatedNotes(state, author, rs); |
|
195 |
if (!importSourceExists(author, String.valueOf(teamId), NAMESPACE, state.getTransactionalSourceReference())){ |
|
196 |
doId(state, author, teamId, NAMESPACE); |
|
197 |
} |
|
181 | 198 |
|
182 | 199 |
authorsToSave.add(author); |
183 | 200 |
}catch(Exception ex){ |
... | ... | |
217 | 234 |
logger.warn("Blank authorTeamCache not yet handled: " + authorTeamId); |
218 | 235 |
} |
219 | 236 |
|
237 |
//single person |
|
220 | 238 |
if (!hasTeamSeparator(authorTeamCache) && !hasTeamSeparator(fullAuthorTeamCache)){ |
221 | 239 |
Person person = makePerson(fullAuthorTeamCache, authorTeamCache, preliminaryFlag, authorTeamId); |
222 | 240 |
result = deduplicatePerson(state, person); |
223 | 241 |
if (result != person){ |
224 | 242 |
logger.debug("Single person team deduplicated: " + authorTeamId); |
225 | 243 |
}else{ |
226 |
person.addImportSource(String.valueOf(authorTeamId), NAMESPACE, state.getTransactionalSourceReference(), null); |
|
227 |
|
|
244 |
String idInSource = String.valueOf(authorTeamId); |
|
245 |
if (!importSourceExists(person, idInSource, NAMESPACE, state.getTransactionalSourceReference())){ |
|
246 |
person.addImportSource(idInSource, NAMESPACE, state.getTransactionalSourceReference(), null); |
|
247 |
} |
|
228 | 248 |
} |
249 |
//team |
|
229 | 250 |
}else{ |
230 | 251 |
String[] fullTeams = splitTeam(fullAuthorTeamCache); |
231 | 252 |
String[] nomTeams = splitTeam(authorTeamCache); |
232 |
if (fullTeams.length == nomTeams.length || fullTeams.length == 0){ |
|
253 |
if (fullTeams.length != nomTeams.length && fullTeams.length != 0){ |
|
254 |
logger.warn("AuthorTeamCache and fullAuthorTeamCache have not the same team size: " + authorTeamCache + " <-> " + fullAuthorTeamCache+ " : " + authorTeamId); |
|
255 |
}else{ |
|
233 | 256 |
for (int i = 0; i< nomTeams.length ;i++){ |
234 | 257 |
String fullTeam = fullTeams.length == 0? null: fullTeams[i].trim(); |
235 | 258 |
Person member = makePerson(fullTeam, nomTeams[i].trim(), preliminaryFlag, authorTeamId); |
... | ... | |
243 | 266 |
Person dedupMember = deduplicatePerson(state, member); |
244 | 267 |
if (dedupMember != member){ |
245 | 268 |
logger.debug("Member deduplicated: " + authorTeamId); |
269 |
}else{ |
|
270 |
String idInSource = String.valueOf(authorTeamId); |
|
271 |
if (!importSourceExists(member, idInSource, NAMESPACE_SPLIT, state.getTransactionalSourceReference())){ |
|
272 |
member.addImportSource(idInSource, NAMESPACE_SPLIT, state.getTransactionalSourceReference(), null); |
|
273 |
} |
|
246 | 274 |
} |
247 | 275 |
//TODO add idInBM |
248 | 276 |
team.addTeamMember(dedupMember); |
249 | 277 |
} |
250 | 278 |
} |
251 | 279 |
//check nomenclatural title |
252 |
if (team.getCacheStrategy().getNomenclaturalTitle(team).equals(authorTeamCache)){ |
|
253 |
team.setProtectedNomenclaturalTitleCache(false); |
|
254 |
}else if(team.getCacheStrategy().getNomenclaturalTitle(team).replace(" ,", ",").equals(authorTeamCache)){ |
|
255 |
//also accept teams with ' , ' as separator as not protected |
|
256 |
team.setProtectedTitleCache(false); |
|
257 |
}else{ |
|
258 |
team.setNomenclaturalTitle(authorTeamCache, true); |
|
259 |
logger.warn("Creation of nomTitle for team with members did not work: " + authorTeamCache + " <-> " + team.getCacheStrategy().getNomenclaturalTitle(team)+ " : " + authorTeamId); |
|
260 |
} |
|
280 |
//TODO |
|
281 |
checkTeamNomenclaturalTitle(team, authorTeamCache, authorTeamId); |
|
261 | 282 |
//check titleCache |
262 |
if (team.generateTitle().equals(fullAuthorTeamCache)){ |
|
263 |
team.setProtectedTitleCache(false); |
|
264 |
}else if(fullAuthorTeamCache == null){ |
|
265 |
//do nothing |
|
266 |
}else if(team.generateTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){ |
|
267 |
//also accept teams with ', ' as final member separator as not protected |
|
268 |
team.setProtectedTitleCache(false); |
|
269 |
}else if(team.getFullTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){ |
|
270 |
//also accept teams with ', ' as final member separator as not protected |
|
271 |
team.setProtectedTitleCache(false); |
|
272 |
}else{ |
|
273 |
String fullTitle = team.getFullTitle().replace(" & ", ", "); |
|
274 |
team.setTitleCache(fullAuthorTeamCache, true); |
|
275 |
logger.warn("Creation of titleCache for team with members did not work: " + fullAuthorTeamCache + " <-> " + team.generateTitle()+ " : " + authorTeamId); |
|
276 |
} |
|
283 |
checkTeamTitleCache(team, fullAuthorTeamCache, authorTeamId); |
|
284 |
}//same size team |
|
285 |
result = deduplicateTeam(state, team); |
|
286 |
if (result != team){ |
|
287 |
logger.debug("Dedup team"); |
|
277 | 288 |
}else{ |
278 |
logger.warn("AuthorTeamCache and fullAuthorTeamCache have not the same team size: " + authorTeamCache + " <-> " + fullAuthorTeamCache+ " : " + authorTeamId); |
|
289 |
String idInSource = String.valueOf(authorTeamId); |
|
290 |
if (!importSourceExists(result, idInSource, NAMESPACE, state.getTransactionalSourceReference())){ |
|
291 |
result.addImportSource(idInSource, NAMESPACE, state.getTransactionalSourceReference(), null); |
|
292 |
} |
|
279 | 293 |
} |
280 |
} |
|
294 |
}//team |
|
295 |
|
|
281 | 296 |
return result; |
282 | 297 |
} |
283 | 298 |
|
284 | 299 |
|
300 |
/** |
|
301 |
* @param team |
|
302 |
* @param authorTeamCache |
|
303 |
* @param authorTeamId |
|
304 |
*/ |
|
305 |
protected void checkTeamNomenclaturalTitle(Team team, String authorTeamCache, int authorTeamId) { |
|
306 |
if (team.getCacheStrategy().getNomenclaturalTitle(team).equals(authorTeamCache)){ |
|
307 |
team.setProtectedNomenclaturalTitleCache(false); |
|
308 |
}else if(team.getCacheStrategy().getNomenclaturalTitle(team).replace(" ,", ",").equals(authorTeamCache)){ |
|
309 |
//also accept teams with ' , ' as separator as not protected |
|
310 |
team.setProtectedNomenclaturalTitleCache(false); |
|
311 |
}else{ |
|
312 |
team.setNomenclaturalTitle(authorTeamCache, true); |
|
313 |
logger.warn("Creation of nomTitle for team with members did not work: " + authorTeamCache + " <-> " + team.getCacheStrategy().getNomenclaturalTitle(team)+ " : " + authorTeamId); |
|
314 |
} |
|
315 |
} |
|
316 |
|
|
317 |
|
|
318 |
/** |
|
319 |
* @param team |
|
320 |
* @param fullAuthorTeamCache |
|
321 |
* @param authorTeamId |
|
322 |
* @param formatter |
|
323 |
*/ |
|
324 |
protected void checkTeamTitleCache(Team team, String fullAuthorTeamCache, int authorTeamId) { |
|
325 |
INomenclaturalAuthorCacheStrategy<Team> formatter = team.getCacheStrategy(); |
|
326 |
if (team.generateTitle().equals(fullAuthorTeamCache)){ |
|
327 |
team.setProtectedTitleCache(false); |
|
328 |
}else if(fullAuthorTeamCache == null){ |
|
329 |
team.setProtectedTitleCache(false); |
|
330 |
}else if(team.generateTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){ |
|
331 |
//also accept teams with ', ' as final member separator as not protected |
|
332 |
team.setProtectedTitleCache(false); |
|
333 |
}else if(formatter.getFullTitle(team).replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){ |
|
334 |
//also accept teams with ', ' as final member separator as not protected |
|
335 |
team.setProtectedTitleCache(false); |
|
336 |
}else{ |
|
337 |
String fullTitle = formatter.getFullTitle(team).replace(" & ", ", "); |
|
338 |
team.setTitleCache(fullAuthorTeamCache, true); |
|
339 |
logger.warn("Creation of titleCache for team with members did not work: " + fullAuthorTeamCache + " <-> " + team.generateTitle()+ " : " + authorTeamId); |
|
340 |
} |
|
341 |
} |
|
342 |
|
|
343 |
|
|
285 | 344 |
/** |
286 | 345 |
* @param member |
287 | 346 |
* @return |
... | ... | |
291 | 350 |
return result; |
292 | 351 |
} |
293 | 352 |
|
353 |
private Team deduplicateTeam(BerlinModelImportState state, Team team) { |
|
354 |
Team result = deduplicationHelper.getExistingAuthor(state, team); |
|
355 |
return result; |
|
356 |
} |
|
357 |
|
|
294 | 358 |
|
295 | 359 |
/** |
296 | 360 |
* @param member |
... | ... | |
316 | 380 |
if (isBlank(full)){ |
317 | 381 |
//do nothing |
318 | 382 |
}else if (!full.matches(".*[\\s\\.].*")){ |
383 |
//no whitespace and no . => family name |
|
319 | 384 |
person.setFamilyName(full); |
320 | 385 |
}else if (nom.equals(full)){ |
321 | 386 |
parsePerson(person, full, preliminaryFlag); |
... | ... | |
335 | 400 |
* @param person |
336 | 401 |
*/ |
337 | 402 |
private void parsePerson(Person person, String str, boolean preliminary) { |
338 |
if (str.matches("\\p{javaUpperCase}\\.(\\s\\p{javaUpperCase}\\.)*\\s\\p{javaUpperCase}\\p{javaLowerCase}{2,}")){ |
|
339 |
String[] splits = str.split("\\s"); |
|
340 |
person.setFamilyName(splits[splits.length-1]); |
|
341 |
String initials = splits[0]; |
|
342 |
for (int i = 1; i < splits.length -1; i++ ){ |
|
343 |
initials += " " + splits[i]; |
|
344 |
} |
|
345 |
person.setInitials(initials); |
|
403 |
String capWord = "\\p{javaUpperCase}\\p{javaLowerCase}{2,}"; |
|
404 |
String famStart = "(Le |D'|'t |Mc|Mac|Des |d'|Du |De )"; |
|
405 |
String regEx = "(\\p{javaUpperCase}\\.([\\s-]\\p{javaUpperCase}\\.)*(\\s(de|del|da|von|van|v.|af|zu))?\\s)(" |
|
406 |
+ famStart + "?" + capWord + "((-| y | é | de | de la )" + capWord + ")?)"; |
|
407 |
Matcher matcher = Pattern.compile(regEx).matcher(str); |
|
408 |
if (matcher.matches()){ |
|
409 |
|
|
346 | 410 |
person.setProtectedTitleCache(false); |
411 |
//Initials + family name |
|
412 |
// String[] splits = str.split("\\s"); |
|
413 |
// int n = matcher.groupCount(); |
|
414 |
// for (int i = 0; i< n; i++){ |
|
415 |
// String s = matcher.group(i); |
|
416 |
// System.out.println(s); |
|
417 |
// } |
|
418 |
person.setFamilyName(matcher.group(5).trim()); |
|
419 |
|
|
420 |
// String initials = splits[0]; |
|
421 |
// for (int i = 1; i < splits.length -1; i++ ){ |
|
422 |
// initials += " " + splits[i]; |
|
423 |
// } |
|
424 |
person.setInitials(matcher.group(1).trim()); |
|
347 | 425 |
}else{ |
348 | 426 |
person.setTitleCache(str, preliminary); |
349 | 427 |
} |
350 |
|
|
351 | 428 |
} |
352 | 429 |
|
353 |
private static final String TEAM_SPLITTER = "(,|;|&| et | Et )";
|
|
430 |
private static final String TEAM_SPLITTER = "(,|&)";
|
|
354 | 431 |
|
355 | 432 |
/** |
356 | 433 |
* @param fullAuthorTeamCache |
Also available in: Unified diff
ref #7799 deduplicate and parse authorteams