Revision f4b9ac06
Added by Andreas Müller over 5 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelAuthorTeamImport.java | ||
---|---|---|
24 | 24 |
import eu.etaxonomy.cdm.io.common.IOValidator; |
25 | 25 |
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner; |
26 | 26 |
import eu.etaxonomy.cdm.io.common.Source; |
27 |
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper; |
|
27 | 28 |
import eu.etaxonomy.cdm.model.agent.Person; |
28 | 29 |
import eu.etaxonomy.cdm.model.agent.Team; |
30 |
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase; |
|
29 | 31 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
30 | 32 |
|
31 | 33 |
|
... | ... | |
47 | 49 |
private ResultSet rsSequence; |
48 | 50 |
private Source source; |
49 | 51 |
|
52 |
private ImportDeduplicationHelper<BerlinModelImportState> deduplicationHelper; |
|
53 |
|
|
54 |
|
|
50 | 55 |
public BerlinModelAuthorTeamImport(){ |
51 | 56 |
super(dbTableName, pluralString); |
52 | 57 |
} |
... | ... | |
56 | 61 |
protected void doInvoke(BerlinModelImportState state){ |
57 | 62 |
BerlinModelImportConfigurator config = state.getConfig(); |
58 | 63 |
source = config.getSource(); |
64 |
this.deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state); |
|
59 | 65 |
|
60 | 66 |
logger.info("start make " + pluralString + " ..."); |
61 | 67 |
|
... | ... | |
71 | 77 |
String strQuerySequence = |
72 | 78 |
" SELECT * " + |
73 | 79 |
" FROM AuthorTeamSequence " + |
74 |
strWhere +
|
|
80 |
(state.getConfig().isEuroMed() ? "" : strWhere) +
|
|
75 | 81 |
" ORDER By authorTeamFk, Sequence "; |
76 | 82 |
|
77 | 83 |
int recordsPerTransaction = config.getRecordsPerTransaction(); |
... | ... | |
87 | 93 |
return; |
88 | 94 |
} |
89 | 95 |
|
90 |
|
|
91 | 96 |
logger.info("end make " + pluralString + " ... " + getSuccessString(true)); |
97 |
this.deduplicationHelper = null; |
|
92 | 98 |
return; |
93 | 99 |
} |
94 | 100 |
|
95 | 101 |
@Override |
96 | 102 |
protected String getIdQuery(BerlinModelImportState state){ |
97 |
String strWhere = " WHERE (1=1) "; |
|
103 |
if (state.getConfig().isEuroMed()){ |
|
104 |
return " SELECT authorTeamId " |
|
105 |
+ " FROM v_cdm_exp_authorTeamsAll ORDER BY authorTeamId " |
|
106 |
; |
|
107 |
} |
|
108 |
|
|
109 |
String strWhere = " WHERE (1=1) "; |
|
98 | 110 |
if (state.getConfig().getAuthorTeamFilter() != null){ |
99 | 111 |
strWhere += " AND " + state.getConfig().getAuthorTeamFilter(); |
100 | 112 |
} |
... | ... | |
119 | 131 |
|
120 | 132 |
|
121 | 133 |
@Override |
122 |
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) { |
|
134 |
public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
|
|
123 | 135 |
boolean success = true ; |
136 |
deduplicationHelper.restartSession(); |
|
124 | 137 |
BerlinModelImportConfigurator config = state.getConfig(); |
125 |
Set<Team> teamsToSave = new HashSet<Team>(); |
|
126 |
Map<String, Person> personMap = partitioner.getObjectMap(BerlinModelAuthorImport.NAMESPACE); |
|
138 |
Set<TeamOrPersonBase<?>> authorsToSave = new HashSet<>(); |
|
139 |
@SuppressWarnings("unchecked") |
|
140 |
Map<String, Person> personMap = partitioner.getObjectMap(BerlinModelAuthorImport.NAMESPACE); |
|
127 | 141 |
|
128 | 142 |
ResultSet rs = partitioner.getResultSet(); |
129 | 143 |
//for each reference |
... | ... | |
143 | 157 |
Boolean preliminaryFlag = rs.getBoolean("PreliminaryFlag"); |
144 | 158 |
String authorTeamCache = rs.getString("AuthorTeamCache"); |
145 | 159 |
String fullAuthorTeamCache = rs.getString("FullAuthorTeamCache"); |
146 |
if (CdmUtils.isBlank(fullAuthorTeamCache)){ |
|
147 |
fullAuthorTeamCache = authorTeamCache; |
|
160 |
if (isBlank(fullAuthorTeamCache)){ |
|
161 |
// fullAuthorTeamCache = authorTeamCache; |
|
162 |
if (isBlank(authorTeamCache) && preliminaryFlag){ |
|
163 |
logger.warn("authorTeamCache and fullAuthorTeamCache are blank/null and preliminaryFlag is true. This makes no sense and should not happen: " + teamId); |
|
164 |
} |
|
148 | 165 |
} |
149 |
team.setTitleCache(fullAuthorTeamCache, preliminaryFlag); |
|
150 |
team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag); |
|
166 |
// team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
|
|
167 |
// team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag);
|
|
151 | 168 |
|
152 |
success &= makeSequence(team, teamId, rsSequence, personMap); |
|
153 |
if (team.getTeamMembers().size()== 0 && preliminaryFlag == false){ |
|
154 |
team.setProtectedTitleCache(true); |
|
155 |
team.setProtectedNomenclaturalTitleCache(true); |
|
156 |
} |
|
169 |
success &= makeSequence(state, team, teamId, rsSequence, personMap); |
|
170 |
|
|
171 |
TeamOrPersonBase<?> author = handleTeam(state, team, authorTeamCache, |
|
172 |
fullAuthorTeamCache, preliminaryFlag, teamId); |
|
173 |
|
|
174 |
if (author == team && team.getTeamMembers().size() == 0 && preliminaryFlag == false){ |
|
175 |
team.setProtectedTitleCache(true); |
|
176 |
team.setProtectedNomenclaturalTitleCache(true); |
|
177 |
} |
|
157 | 178 |
|
158 | 179 |
//created, notes |
159 |
doIdCreatedUpdatedNotes(state, team, rs, teamId, NAMESPACE);
|
|
180 |
doIdCreatedUpdatedNotes(state, author, rs, teamId, NAMESPACE);
|
|
160 | 181 |
|
161 |
teamsToSave.add(team);
|
|
182 |
authorsToSave.add(author);
|
|
162 | 183 |
}catch(Exception ex){ |
163 | 184 |
logger.error(ex.getMessage()); |
164 | 185 |
ex.printStackTrace(); |
... | ... | |
171 | 192 |
} |
172 | 193 |
|
173 | 194 |
//logger.info(i + " " + pluralString + " handled"); |
174 |
getAgentService().saveOrUpdate((Collection)teamsToSave);
|
|
195 |
getAgentService().saveOrUpdate((Collection)authorsToSave);
|
|
175 | 196 |
|
176 | 197 |
return success; |
177 | 198 |
} |
178 | 199 |
|
179 | 200 |
|
180 |
@Override |
|
201 |
/** |
|
202 |
* @param state |
|
203 |
* @param team |
|
204 |
* @param authorTeamCache |
|
205 |
* @param fullAuthorTeamCache |
|
206 |
* @param preliminaryFlag |
|
207 |
* @return |
|
208 |
*/ |
|
209 |
private TeamOrPersonBase<?> handleTeam(BerlinModelImportState state, Team team, String authorTeamCache, |
|
210 |
String fullAuthorTeamCache, boolean preliminaryFlag, int authorTeamId) { |
|
211 |
if (!team.getTeamMembers().isEmpty()){ |
|
212 |
return team; |
|
213 |
} |
|
214 |
|
|
215 |
TeamOrPersonBase<?> result = team; |
|
216 |
if (isBlank(authorTeamCache)){ |
|
217 |
logger.warn("Blank authorTeamCache not yet handled: " + authorTeamId); |
|
218 |
} |
|
219 |
|
|
220 |
if (!hasTeamSeparator(authorTeamCache) && !hasTeamSeparator(fullAuthorTeamCache)){ |
|
221 |
Person person = makePerson(fullAuthorTeamCache, authorTeamCache, preliminaryFlag, authorTeamId); |
|
222 |
result = deduplicatePerson(state, person); |
|
223 |
if (result != person){ |
|
224 |
logger.debug("Single person team deduplicated: " + authorTeamId); |
|
225 |
}else{ |
|
226 |
person.addImportSource(String.valueOf(authorTeamId), NAMESPACE, state.getTransactionalSourceReference(), null); |
|
227 |
|
|
228 |
} |
|
229 |
}else{ |
|
230 |
String[] fullTeams = splitTeam(fullAuthorTeamCache); |
|
231 |
String[] nomTeams = splitTeam(authorTeamCache); |
|
232 |
if (fullTeams.length == nomTeams.length || fullTeams.length == 0){ |
|
233 |
for (int i = 0; i< nomTeams.length ;i++){ |
|
234 |
String fullTeam = fullTeams.length == 0? null: fullTeams[i].trim(); |
|
235 |
Person member = makePerson(fullTeam, nomTeams[i].trim(), preliminaryFlag, authorTeamId); |
|
236 |
if (member == null){ |
|
237 |
logger.warn("Unexpected short nom. author: " + nomTeams[i].trim() + "; " + authorTeamId); |
|
238 |
continue; |
|
239 |
} |
|
240 |
if (i == nomTeams.length -1 && isEtAl(member)){ |
|
241 |
team.setHasMoreMembers(true); |
|
242 |
}else{ |
|
243 |
Person dedupMember = deduplicatePerson(state, member); |
|
244 |
if (dedupMember != member){ |
|
245 |
logger.debug("Member deduplicated: " + authorTeamId); |
|
246 |
} |
|
247 |
//TODO add idInBM |
|
248 |
team.addTeamMember(dedupMember); |
|
249 |
} |
|
250 |
} |
|
251 |
//check nomenclatural title |
|
252 |
if (team.getCacheStrategy().getNomenclaturalTitle(team).equals(authorTeamCache)){ |
|
253 |
team.setProtectedNomenclaturalTitleCache(false); |
|
254 |
}else if(team.getCacheStrategy().getNomenclaturalTitle(team).replace(" ,", ",").equals(authorTeamCache)){ |
|
255 |
//also accept teams with ' , ' as separator as not protected |
|
256 |
team.setProtectedTitleCache(false); |
|
257 |
}else{ |
|
258 |
team.setNomenclaturalTitle(authorTeamCache, true); |
|
259 |
logger.warn("Creation of nomTitle for team with members did not work: " + authorTeamCache + " <-> " + team.getCacheStrategy().getNomenclaturalTitle(team)+ " : " + authorTeamId); |
|
260 |
} |
|
261 |
//check titleCache |
|
262 |
if (team.generateTitle().equals(fullAuthorTeamCache)){ |
|
263 |
team.setProtectedTitleCache(false); |
|
264 |
}else if(fullAuthorTeamCache == null){ |
|
265 |
//do nothing |
|
266 |
}else if(team.generateTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){ |
|
267 |
//also accept teams with ', ' as final member separator as not protected |
|
268 |
team.setProtectedTitleCache(false); |
|
269 |
}else if(team.getFullTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){ |
|
270 |
//also accept teams with ', ' as final member separator as not protected |
|
271 |
team.setProtectedTitleCache(false); |
|
272 |
}else{ |
|
273 |
String fullTitle = team.getFullTitle().replace(" & ", ", "); |
|
274 |
team.setTitleCache(fullAuthorTeamCache, true); |
|
275 |
logger.warn("Creation of titleCache for team with members did not work: " + fullAuthorTeamCache + " <-> " + team.generateTitle()+ " : " + authorTeamId); |
|
276 |
} |
|
277 |
}else{ |
|
278 |
logger.warn("AuthorTeamCache and fullAuthorTeamCache have not the same team size: " + authorTeamCache + " <-> " + fullAuthorTeamCache+ " : " + authorTeamId); |
|
279 |
} |
|
280 |
} |
|
281 |
return result; |
|
282 |
} |
|
283 |
|
|
284 |
|
|
285 |
/** |
|
286 |
* @param member |
|
287 |
* @return |
|
288 |
*/ |
|
289 |
private Person deduplicatePerson(BerlinModelImportState state, Person person) { |
|
290 |
Person result = deduplicationHelper.getExistingAuthor(state, person); |
|
291 |
return result; |
|
292 |
} |
|
293 |
|
|
294 |
|
|
295 |
/** |
|
296 |
* @param member |
|
297 |
* @return |
|
298 |
*/ |
|
299 |
protected static boolean isEtAl(Person member) { |
|
300 |
if (member != null && isEtAl(member.getTitleCache()) && isEtAl(member.getNomenclaturalTitle())){ |
|
301 |
return true; |
|
302 |
} |
|
303 |
return false; |
|
304 |
} |
|
305 |
|
|
306 |
private static boolean isEtAl(String str) { |
|
307 |
if (str == null || !str.equals("al.")){ |
|
308 |
return false; |
|
309 |
}else{ |
|
310 |
return true; |
|
311 |
} |
|
312 |
} |
|
313 |
|
|
314 |
private Person makePerson(String full, String nom, boolean preliminaryFlag, int authorTeamId) { |
|
315 |
Person person = Person.NewInstance(nom, null, null, null); |
|
316 |
if (isBlank(full)){ |
|
317 |
//do nothing |
|
318 |
}else if (!full.matches(".*[\\s\\.].*")){ |
|
319 |
person.setFamilyName(full); |
|
320 |
}else if (nom.equals(full)){ |
|
321 |
parsePerson(person, full, preliminaryFlag); |
|
322 |
}else{ |
|
323 |
parsePerson(person, full, true); |
|
324 |
} |
|
325 |
if (nom.length() <= 2 || (nom.length() == 3 && nom.endsWith(".")) ){ |
|
326 |
if (!nom.matches("((L|Sm|DC|al|Sw|Qz|Fr|Ib)\\.|Hu|Ma|Hy|Wu)")){ |
|
327 |
logger.warn("Unexpected short nom author name part: " + nom + "; " + authorTeamId); |
|
328 |
} |
|
329 |
} |
|
330 |
|
|
331 |
return person; |
|
332 |
} |
|
333 |
|
|
334 |
/** |
|
335 |
* @param person |
|
336 |
*/ |
|
337 |
private void parsePerson(Person person, String str, boolean preliminary) { |
|
338 |
if (str.matches("\\p{javaUpperCase}\\.(\\s\\p{javaUpperCase}\\.)*\\s\\p{javaUpperCase}\\p{javaLowerCase}{2,}")){ |
|
339 |
String[] splits = str.split("\\s"); |
|
340 |
person.setFamilyName(splits[splits.length-1]); |
|
341 |
String initials = splits[0]; |
|
342 |
for (int i = 1; i < splits.length -1; i++ ){ |
|
343 |
initials += " " + splits[i]; |
|
344 |
} |
|
345 |
person.setInitials(initials); |
|
346 |
person.setProtectedTitleCache(false); |
|
347 |
}else{ |
|
348 |
person.setTitleCache(str, preliminary); |
|
349 |
} |
|
350 |
|
|
351 |
} |
|
352 |
|
|
353 |
private static final String TEAM_SPLITTER = "(,|;|&| et | Et )"; |
|
354 |
|
|
355 |
/** |
|
356 |
* @param fullAuthorTeamCache |
|
357 |
* @param TEAM_SPLITTER |
|
358 |
* @return |
|
359 |
*/ |
|
360 |
protected static String[] splitTeam(String teamCache) { |
|
361 |
if (teamCache == null){ |
|
362 |
return new String[0]; |
|
363 |
} |
|
364 |
return teamCache.split(TEAM_SPLITTER); |
|
365 |
} |
|
366 |
|
|
367 |
|
|
368 |
/** |
|
369 |
* @param authorTeamCache |
|
370 |
* @return |
|
371 |
*/ |
|
372 |
protected static boolean hasTeamSeparator(String teamCache) { |
|
373 |
if (isBlank(teamCache)){ |
|
374 |
return false; |
|
375 |
}else if (teamCache.contains(",") || teamCache.contains("&")||teamCache.contains(" et ")||teamCache.endsWith(" al.")){ |
|
376 |
return true; |
|
377 |
}else{ |
|
378 |
return false; |
|
379 |
} |
|
380 |
} |
|
381 |
|
|
382 |
|
|
383 |
@Override |
|
181 | 384 |
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) { |
182 | 385 |
String nameSpace; |
183 | 386 |
Class<?> cdmClass; |
... | ... | |
226 | 429 |
return result; |
227 | 430 |
} |
228 | 431 |
|
229 |
private boolean makeSequence(Team team, int teamId, ResultSet rsSequence, Map<String, Person> personMap){ |
|
432 |
private boolean makeSequence(BerlinModelImportState state, Team team, int teamId, ResultSet rsSequence, Map<String, Person> personMap){
|
|
230 | 433 |
try { |
231 | 434 |
if (rsSequence.isBeforeFirst()){ |
232 | 435 |
rsSequence.next(); |
... | ... | |
245 | 448 |
} |
246 | 449 |
} |
247 | 450 |
while (sequenceTeamFk < teamId){ |
248 |
logger.warn("Sequence team FK is smaller then team ID. Some teams for a sequence may not be available"); |
|
451 |
if (! state.getConfig().isEuroMed()){ |
|
452 |
logger.warn("Sequence team FK is smaller then team ID. Some teams for a sequence may not be available"); |
|
453 |
} |
|
249 | 454 |
rsSequence.next(); |
250 | 455 |
sequenceTeamFk = rsSequence.getInt("AuthorTeamFk"); |
251 | 456 |
} |
app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelReferenceImport.java | ||
---|---|---|
57 | 57 |
import eu.etaxonomy.cdm.io.common.mapping.berlinModel.CdmOneToManyMapper; |
58 | 58 |
import eu.etaxonomy.cdm.io.common.mapping.berlinModel.CdmStringMapper; |
59 | 59 |
import eu.etaxonomy.cdm.io.common.mapping.berlinModel.CdmUriMapper; |
60 |
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper; |
|
61 |
import eu.etaxonomy.cdm.model.agent.Person; |
|
60 | 62 |
import eu.etaxonomy.cdm.model.agent.Team; |
61 | 63 |
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase; |
62 | 64 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
... | ... | |
69 | 71 |
import eu.etaxonomy.cdm.model.reference.IPrintSeries; |
70 | 72 |
import eu.etaxonomy.cdm.model.reference.Reference; |
71 | 73 |
import eu.etaxonomy.cdm.model.reference.ReferenceFactory; |
74 |
import eu.etaxonomy.cdm.strategy.cache.agent.TeamDefaultCacheStrategy; |
|
72 | 75 |
|
73 | 76 |
/** |
74 | 77 |
* @author a.mueller |
... | ... | |
89 | 92 |
public static final UUID DATE_STRING_UUID = UUID.fromString("e4130eae-606e-4b0c-be4f-e93dc161be7d"); |
90 | 93 |
public static final UUID IS_PAPER_UUID = UUID.fromString("8a326129-d0d0-4f9d-bbdf-8d86b037c65e"); |
91 | 94 |
|
95 |
private static ImportDeduplicationHelper<BerlinModelImportState> deduplicationHelper; |
|
92 | 96 |
|
93 | 97 |
private final int modCount = 1000; |
94 | 98 |
private static final String pluralString = "references"; |
... | ... | |
179 | 183 |
@Override |
180 | 184 |
protected void doInvoke(BerlinModelImportState state){ |
181 | 185 |
logger.info("start make " + getPluralString() + " ..."); |
186 |
deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state); |
|
182 | 187 |
|
183 | 188 |
boolean success = true; |
184 | 189 |
initializeMappers(state); |
... | ... | |
251 | 256 |
if (! success){ |
252 | 257 |
state.setUnsuccessfull(); |
253 | 258 |
} |
259 |
deduplicationHelper = null; |
|
254 | 260 |
return; |
255 | 261 |
} |
256 | 262 |
|
... | ... | |
399 | 405 |
|
400 | 406 |
//team map |
401 | 407 |
nameSpace = BerlinModelAuthorTeamImport.NAMESPACE; |
402 |
cdmClass = Team.class; |
|
408 |
cdmClass = TeamOrPersonBase.class;
|
|
403 | 409 |
idSet = teamIdSet; |
404 | 410 |
@SuppressWarnings("unchecked") |
405 | 411 |
Map<String, Team> teamMap = (Map<String, Team>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace); |
... | ... | |
407 | 413 |
|
408 | 414 |
//refAuthor map |
409 | 415 |
nameSpace = REF_AUTHOR_NAMESPACE; |
410 |
cdmClass = Team.class; |
|
416 |
cdmClass = TeamOrPersonBase.class;
|
|
411 | 417 |
idSet = teamStringSet2; |
412 | 418 |
@SuppressWarnings("unchecked") |
413 | 419 |
Map<String, Team> refAuthorMap = (Map<String, Team>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace); |
... | ... | |
550 | 556 |
String nomTitleAbbrev = rs.getString("nomTitleAbbrev"); |
551 | 557 |
boolean isPreliminary = rs.getBoolean("PreliminaryFlag"); |
552 | 558 |
String refAuthorString = rs.getString("refAuthorString"); |
553 |
Integer nomAuthorTeamFk = rs.getInt("NomAuthorTeamFk"); |
|
554 |
String strNomAuthorTeamFk = String.valueOf(nomAuthorTeamFk); |
|
555 |
TeamOrPersonBase<?> nomAuthor = teamMap.get(strNomAuthorTeamFk); |
|
559 |
Integer nomAuthorTeamFk = nullSafeInt(rs, "NomAuthorTeamFk"); |
|
560 |
|
|
561 |
TeamOrPersonBase<?> nomAuthor = null; |
|
562 |
if (nomAuthorTeamFk != null){ |
|
563 |
String strNomAuthorTeamFk = String.valueOf(nomAuthorTeamFk); |
|
564 |
nomAuthor = teamMap.get(strNomAuthorTeamFk); |
|
565 |
if (nomAuthor == null){ |
|
566 |
logger.warn("NomAuthor ("+strNomAuthorTeamFk+") not found in teamMap for " + refId); |
|
567 |
} |
|
568 |
} |
|
556 | 569 |
|
557 | 570 |
Reference sourceReference = state.getTransactionalSourceReference(); |
558 | 571 |
|
... | ... | |
571 | 584 |
} |
572 | 585 |
|
573 | 586 |
//author |
574 |
TeamOrPersonBase<?> author = getAuthorship(state, refAuthorString, nomAuthor); |
|
587 |
TeamOrPersonBase<?> author = getAuthorship(state, refAuthorString, nomAuthor, refId);
|
|
575 | 588 |
ref.setAuthorship(author); |
576 | 589 |
|
577 | 590 |
//save |
... | ... | |
899 | 912 |
} |
900 | 913 |
|
901 | 914 |
|
902 |
private static TeamOrPersonBase<?> getAuthorship(BerlinModelImportState state, String authorString, TeamOrPersonBase<?> nomAuthor){ |
|
915 |
private static TeamOrPersonBase<?> getAuthorship(BerlinModelImportState state, String refAuthorString, |
|
916 |
TeamOrPersonBase<?> nomAuthor, Integer refId){ |
|
903 | 917 |
|
904 | 918 |
TeamOrPersonBase<?> result; |
905 | 919 |
if (nomAuthor != null){ |
906 | 920 |
result = nomAuthor; |
907 |
} else if (StringUtils.isNotBlank(authorString)){ |
|
921 |
if (isNotBlank(refAuthorString) && !nomAuthor.getTitleCache().equals(refAuthorString)){ |
|
922 |
boolean isSimilar = handleSimilarAuthors(state, refAuthorString, nomAuthor); |
|
923 |
if (! isSimilar){ |
|
924 |
logger.warn("refAuthorString differs from nomAuthor.titleCache: " + refAuthorString |
|
925 |
+ " <-> " + nomAuthor.getTitleCache() + "; RefId: " + refId); |
|
926 |
} |
|
927 |
} |
|
928 |
|
|
929 |
} else if (isNotBlank(refAuthorString)){ |
|
930 |
refAuthorString = refAuthorString.trim(); |
|
908 | 931 |
//TODO match with existing Persons/Teams |
909 |
Team team = state.getRelatedObject(REF_AUTHOR_NAMESPACE, authorString, Team.class); |
|
910 |
if (team == null){ |
|
911 |
team = Team.NewInstance(); |
|
912 |
team.setNomenclaturalTitle(authorString); |
|
913 |
team.setTitleCache(authorString, true); |
|
914 |
state.addRelatedObject(REF_AUTHOR_NAMESPACE, authorString, team); |
|
915 |
team.addImportSource(authorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference(), null); |
|
932 |
TeamOrPersonBase<?> author = state.getRelatedObject(REF_AUTHOR_NAMESPACE, refAuthorString, TeamOrPersonBase.class); |
|
933 |
if (author == null){ |
|
934 |
if (!BerlinModelAuthorTeamImport.hasTeamSeparator(refAuthorString)){ |
|
935 |
author = makePerson(refAuthorString, refId); |
|
936 |
}else{ |
|
937 |
author = makeTeam(state, refAuthorString, refId); |
|
938 |
} |
|
939 |
state.addRelatedObject(REF_AUTHOR_NAMESPACE, refAuthorString, author); |
|
940 |
author.addImportSource(refAuthorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference(), null); |
|
916 | 941 |
} |
917 |
result = team;
|
|
942 |
result = author;
|
|
918 | 943 |
}else{ |
919 | 944 |
result = null; |
920 | 945 |
} |
... | ... | |
922 | 947 |
return result; |
923 | 948 |
} |
924 | 949 |
|
950 |
/** |
|
951 |
* @param state |
|
952 |
* @param refAuthorString |
|
953 |
* @param refId |
|
954 |
* @return |
|
955 |
*/ |
|
956 |
private static Team makeTeam(BerlinModelImportState state, String refAuthorString, Integer refId) { |
|
957 |
Team team = Team.NewInstance(); |
|
958 |
if (containsEdOrColon(refAuthorString)){ |
|
959 |
team.setTitleCache(refAuthorString, true); |
|
960 |
}else{ |
|
961 |
String[] fullTeams = BerlinModelAuthorTeamImport.splitTeam(refAuthorString); |
|
962 |
boolean lastWasInitials = false; |
|
963 |
for (int i = 0; i< fullTeams.length ;i++){ |
|
964 |
if (lastWasInitials){ |
|
965 |
lastWasInitials = false; |
|
966 |
continue; |
|
967 |
} |
|
968 |
String fullTeam = fullTeams[i].trim(); |
|
969 |
String initials = null; |
|
970 |
if (fullTeams.length > i+1){ |
|
971 |
String nextSplit = fullTeams[i+1].trim(); |
|
972 |
if (isInitial(nextSplit)){ |
|
973 |
lastWasInitials = true; |
|
974 |
initials = nextSplit; |
|
975 |
} |
|
976 |
} |
|
977 |
Person member = makePerson(fullTeam, refId); |
|
978 |
|
|
979 |
if (initials != null && !member.isProtectedTitleCache()){ |
|
980 |
member.setInitials(initials); |
|
981 |
}else if (initials != null){ |
|
982 |
member.setTitleCache(member.getTitleCache() + ", " + initials, true); |
|
983 |
} |
|
984 |
|
|
985 |
if (i == fullTeams.length -1 && BerlinModelAuthorTeamImport.isEtAl(member)){ |
|
986 |
team.setHasMoreMembers(true); |
|
987 |
}else{ |
|
988 |
Person dedupMember = deduplicatePerson(state, member); |
|
989 |
if (dedupMember != member){ |
|
990 |
logger.debug("Member deduplicated: " + refId); |
|
991 |
}else{ |
|
992 |
member.addImportSource(refAuthorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference(), null); |
|
993 |
} |
|
994 |
//TODO add idInBM |
|
995 |
team.addTeamMember(dedupMember); |
|
996 |
} |
|
997 |
} |
|
998 |
} |
|
925 | 999 |
|
926 |
/** |
|
1000 |
TeamDefaultCacheStrategy formatter = (TeamDefaultCacheStrategy) team.getCacheStrategy(); |
|
1001 |
formatter.setEtAlPosition(100); |
|
1002 |
if (formatter.getTitleCache(team).equals(refAuthorString)){ |
|
1003 |
team.setProtectedTitleCache(false); |
|
1004 |
}else if(formatter.getTitleCache(team).replace(" & ", ", ").equals(refAuthorString.replace(" & ", ", ").replace(" ,", ","))){ |
|
1005 |
//also accept teams with ', ' as final member separator as not protected |
|
1006 |
team.setProtectedTitleCache(false); |
|
1007 |
}else if(formatter.getFullTitle(team).replace(" & ", ", ").equals(refAuthorString.replace(" & ", ", "))){ |
|
1008 |
//.. or teams with initials first |
|
1009 |
team.setProtectedTitleCache(false); |
|
1010 |
}else if (containsEdOrColon(refAuthorString)){ |
|
1011 |
//nothing to do, it is expected to be protected |
|
1012 |
}else{ |
|
1013 |
team.setTitleCache(refAuthorString, true); |
|
1014 |
logger.warn("Creation of titleCache for team with members did not (fully) work: " + refAuthorString + " <-> " + formatter.getTitleCache(team)+ " : " + refId); |
|
1015 |
} |
|
1016 |
return team; |
|
1017 |
} |
|
1018 |
|
|
1019 |
/** |
|
1020 |
* @param refAuthorString |
|
1021 |
* @return |
|
1022 |
*/ |
|
1023 |
private static boolean containsEdOrColon(String str) { |
|
1024 |
if (str.contains(" ed.") || str.contains(" Ed.") || str.contains("(ed.") |
|
1025 |
|| str.contains("[ed.") || str.contains("(Eds)") || str.contains("(Eds.)") || |
|
1026 |
str.contains("(eds.)") || str.contains(":")|| str.contains(";")){ |
|
1027 |
return true; |
|
1028 |
}else{ |
|
1029 |
return false; |
|
1030 |
} |
|
1031 |
} |
|
1032 |
|
|
1033 |
/** |
|
1034 |
* @param nextSplit |
|
1035 |
* @return |
|
1036 |
*/ |
|
1037 |
private static boolean isInitial(String str) { |
|
1038 |
if (str == null){ |
|
1039 |
return false; |
|
1040 |
} |
|
1041 |
boolean matches = str.trim().matches("(\\p{javaUpperCase}|Yu|Th|Ch|Lj|Sz|Dz|Sh)\\.?(\\s*[-\\s]\\s*(\\p{javaUpperCase}|Yu)\\.?)*(\\s+(van|von))?"); |
|
1042 |
return matches; |
|
1043 |
} |
|
1044 |
|
|
1045 |
private static Person deduplicatePerson(BerlinModelImportState state, Person person) { |
|
1046 |
Person result = deduplicationHelper.getExistingAuthor(state, person); |
|
1047 |
return result; |
|
1048 |
} |
|
1049 |
|
|
1050 |
private static Person makePerson(String full, Integer refId) { |
|
1051 |
Person person = Person.NewInstance(); |
|
1052 |
person.setTitleCache(full, true); |
|
1053 |
if (!full.matches(".*[\\s\\.].*")){ |
|
1054 |
person.setFamilyName(full); |
|
1055 |
person.setProtectedTitleCache(false); |
|
1056 |
}else if (full.matches("(\\p{javaUpperCase}|Kh)\\.(\\s\\p{javaUpperCase}\\.)*\\s\\p{javaUpperCase}\\p{javaLowerCase}{2,}")){ |
|
1057 |
String[] splits = full.split("\\s"); |
|
1058 |
person.setFamilyName(splits[splits.length-1]); |
|
1059 |
String initials = splits[0]; |
|
1060 |
for (int i = 1; i < splits.length -1; i++ ){ |
|
1061 |
initials += " " + splits[i]; |
|
1062 |
} |
|
1063 |
person.setInitials(initials); |
|
1064 |
person.setProtectedTitleCache(false); |
|
1065 |
} |
|
1066 |
if ((full.length() <= 2 && !full.matches("(Li|Bo|Em|Ay|Ma)")) || (full.length() == 3 && full.endsWith(".") && !full.equals("al.")) ){ |
|
1067 |
// if (!full.matches("((L|Sm|DC|al|Sw|Qz|Fr|Ib)\\.|Hu|Ma|Hy|Wu)")){ |
|
1068 |
logger.warn("Unexpected short nom author name part: " + full + "; " + refId); |
|
1069 |
// } |
|
1070 |
} |
|
1071 |
|
|
1072 |
return person; |
|
1073 |
} |
|
1074 |
|
|
1075 |
/** |
|
1076 |
* @param state |
|
1077 |
* @param refAuthorString |
|
1078 |
* @param nomAuthor |
|
1079 |
* @return |
|
1080 |
*/ |
|
1081 |
private static boolean handleSimilarAuthors(BerlinModelImportState state, String refAuthorString, |
|
1082 |
TeamOrPersonBase<?> nomAuthor) { |
|
1083 |
if (refAuthorString.equals(nomAuthor.getNomenclaturalTitle())){ |
|
1084 |
//nomTitle equal |
|
1085 |
return true; |
|
1086 |
}else{ |
|
1087 |
String nomTitle = nomAuthor.getTitleCache(); |
|
1088 |
if (refAuthorString.replace(" & ", ", ").equals(nomTitle.replace(" & ", ", "))){ |
|
1089 |
//nomTitle equal except for "&" |
|
1090 |
return true; |
|
1091 |
} |
|
1092 |
|
|
1093 |
if (refAuthorString.replace(" & ", ", ").equals(nomAuthor.getFullTitle().replace(" & ", ", "))){ |
|
1094 |
return true; |
|
1095 |
} |
|
1096 |
|
|
1097 |
if (refAuthorString.contains(",") && !nomTitle.contains(",") && nomAuthor.isInstanceOf(Person.class)){ |
|
1098 |
String[] splits = refAuthorString.split(","); |
|
1099 |
Person person = CdmBase.deproxy(nomAuthor, Person.class); |
|
1100 |
if (splits.length == 2){ |
|
1101 |
String newMatch = splits[1].trim() + " " + splits[0].trim(); |
|
1102 |
if (newMatch.equals(nomTitle)){ |
|
1103 |
if (isBlank(person.getFamilyName())){ |
|
1104 |
person.setFamilyName(splits[0].trim()); |
|
1105 |
} |
|
1106 |
if (isBlank(person.getInitials())){ |
|
1107 |
person.setInitials(splits[1].trim()); |
|
1108 |
} |
|
1109 |
return true; |
|
1110 |
} |
|
1111 |
} |
|
1112 |
} |
|
1113 |
} |
|
1114 |
return false; |
|
1115 |
} |
|
1116 |
|
|
1117 |
/** |
|
927 | 1118 |
* @param lowerCase |
928 | 1119 |
* @param config |
929 | 1120 |
* @return |
Also available in: Unified diff
ref #7799 implement author parsing for AuthorTeam and Reference.RefAuthorString