Project

General

Profile

« Previous | Next » 

Revision f4b9ac06

Added by Andreas Müller over 5 years ago

ref #7799 implement author parsing for AuthorTeam and Reference.RefAuthorString

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelAuthorTeamImport.java
24 24
import eu.etaxonomy.cdm.io.common.IOValidator;
25 25
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
26 26
import eu.etaxonomy.cdm.io.common.Source;
27
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
27 28
import eu.etaxonomy.cdm.model.agent.Person;
28 29
import eu.etaxonomy.cdm.model.agent.Team;
30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
29 31
import eu.etaxonomy.cdm.model.common.CdmBase;
30 32

  
31 33

  
......
47 49
	private ResultSet rsSequence;
48 50
	private Source source;
49 51

  
52
    private ImportDeduplicationHelper<BerlinModelImportState> deduplicationHelper;
53

  
54

  
50 55
	public BerlinModelAuthorTeamImport(){
51 56
		super(dbTableName, pluralString);
52 57
	}
......
56 61
    protected void doInvoke(BerlinModelImportState state){
57 62
		BerlinModelImportConfigurator config = state.getConfig();
58 63
		source = config.getSource();
64
		this.deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state);
59 65

  
60 66
		logger.info("start make " + pluralString + " ...");
61 67

  
......
71 77
		String strQuerySequence =
72 78
			" SELECT *  " +
73 79
            " FROM AuthorTeamSequence " +
74
				strWhere +
80
                (state.getConfig().isEuroMed() ? "" : strWhere) +
75 81
            " ORDER By authorTeamFk, Sequence ";
76 82

  
77 83
		int recordsPerTransaction = config.getRecordsPerTransaction();
......
87 93
			return;
88 94
		}
89 95

  
90

  
91 96
		logger.info("end make " + pluralString + " ... " + getSuccessString(true));
97
		this.deduplicationHelper = null;
92 98
		return;
93 99
	}
94 100

  
95 101
	@Override
96 102
	protected String getIdQuery(BerlinModelImportState state){
97
		String strWhere = " WHERE (1=1) ";
103
		if (state.getConfig().isEuroMed()){
104
		    return " SELECT authorTeamId "
105
		         + " FROM v_cdm_exp_authorTeamsAll ORDER BY authorTeamId "
106
		         ;
107
		}
108

  
109
	    String strWhere = " WHERE (1=1) ";
98 110
		if (state.getConfig().getAuthorTeamFilter() != null){
99 111
			strWhere += " AND " + state.getConfig().getAuthorTeamFilter();
100 112
		}
......
119 131

  
120 132

  
121 133
	@Override
122
    public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
134
    public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
123 135
		boolean success = true ;
136
		deduplicationHelper.restartSession();
124 137
		BerlinModelImportConfigurator config = state.getConfig();
125
		Set<Team> teamsToSave = new HashSet<Team>();
126
		Map<String, Person> personMap = partitioner.getObjectMap(BerlinModelAuthorImport.NAMESPACE);
138
		Set<TeamOrPersonBase<?>> authorsToSave = new HashSet<>();
139
		@SuppressWarnings("unchecked")
140
        Map<String, Person> personMap = partitioner.getObjectMap(BerlinModelAuthorImport.NAMESPACE);
127 141

  
128 142
		ResultSet rs = partitioner.getResultSet();
129 143
		//for each reference
......
143 157
					Boolean preliminaryFlag = rs.getBoolean("PreliminaryFlag");
144 158
					String authorTeamCache = rs.getString("AuthorTeamCache");
145 159
					String fullAuthorTeamCache = rs.getString("FullAuthorTeamCache");
146
					if (CdmUtils.isBlank(fullAuthorTeamCache)){
147
						fullAuthorTeamCache = authorTeamCache;
160
					if (isBlank(fullAuthorTeamCache)){
161
//						fullAuthorTeamCache = authorTeamCache;
162
						if (isBlank(authorTeamCache) && preliminaryFlag){
163
						    logger.warn("authorTeamCache and fullAuthorTeamCache are blank/null and preliminaryFlag is true. This makes no sense and should not happen: " + teamId);
164
						}
148 165
					}
149
					team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
150
					team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag);
166
//					team.setTitleCache(fullAuthorTeamCache, preliminaryFlag);
167
//					team.setNomenclaturalTitle(authorTeamCache, preliminaryFlag);
151 168

  
152
					success &= makeSequence(team, teamId, rsSequence, personMap);
153
					if (team.getTeamMembers().size()== 0 && preliminaryFlag == false){
154
						team.setProtectedTitleCache(true);
155
						team.setProtectedNomenclaturalTitleCache(true);
156
					}
169
					success &= makeSequence(state, team, teamId, rsSequence, personMap);
170

  
171
					TeamOrPersonBase<?> author = handleTeam(state, team, authorTeamCache,
172
					        fullAuthorTeamCache, preliminaryFlag, teamId);
173

  
174
					if (author == team && team.getTeamMembers().size() == 0 && preliminaryFlag == false){
175
                        team.setProtectedTitleCache(true);
176
                        team.setProtectedNomenclaturalTitleCache(true);
177
                    }
157 178

  
158 179
					//created, notes
159
					doIdCreatedUpdatedNotes(state, team, rs, teamId, NAMESPACE);
180
					doIdCreatedUpdatedNotes(state, author, rs, teamId, NAMESPACE);
160 181

  
161
					teamsToSave.add(team);
182
					authorsToSave.add(author);
162 183
				}catch(Exception ex){
163 184
					logger.error(ex.getMessage());
164 185
					ex.printStackTrace();
......
171 192
		}
172 193

  
173 194
		//logger.info(i + " " + pluralString + " handled");
174
		getAgentService().saveOrUpdate((Collection)teamsToSave);
195
		getAgentService().saveOrUpdate((Collection)authorsToSave);
175 196

  
176 197
		return success;
177 198
	}
178 199

  
179 200

  
180
	@Override
201
	/**
202
     * @param state
203
     * @param team
204
     * @param authorTeamCache
205
     * @param fullAuthorTeamCache
206
     * @param preliminaryFlag
207
     * @return
208
     */
209
    private TeamOrPersonBase<?> handleTeam(BerlinModelImportState state, Team team, String authorTeamCache,
210
            String fullAuthorTeamCache, boolean preliminaryFlag, int authorTeamId) {
211
        if (!team.getTeamMembers().isEmpty()){
212
            return team;
213
        }
214

  
215
        TeamOrPersonBase<?> result = team;
216
        if (isBlank(authorTeamCache)){
217
            logger.warn("Blank authorTeamCache not yet handled: " + authorTeamId);
218
        }
219

  
220
        if (!hasTeamSeparator(authorTeamCache) && !hasTeamSeparator(fullAuthorTeamCache)){
221
            Person person = makePerson(fullAuthorTeamCache, authorTeamCache, preliminaryFlag, authorTeamId);
222
            result = deduplicatePerson(state, person);
223
            if (result != person){
224
                logger.debug("Single person team deduplicated: " + authorTeamId);
225
            }else{
226
                person.addImportSource(String.valueOf(authorTeamId), NAMESPACE, state.getTransactionalSourceReference(), null);
227

  
228
            }
229
        }else{
230
            String[] fullTeams = splitTeam(fullAuthorTeamCache);
231
            String[] nomTeams = splitTeam(authorTeamCache);
232
            if (fullTeams.length == nomTeams.length || fullTeams.length == 0){
233
                for (int i = 0; i< nomTeams.length ;i++){
234
                    String fullTeam = fullTeams.length == 0? null: fullTeams[i].trim();
235
                    Person member = makePerson(fullTeam, nomTeams[i].trim(), preliminaryFlag, authorTeamId);
236
                    if (member == null){
237
                        logger.warn("Unexpected short nom. author: " + nomTeams[i].trim() + "; " + authorTeamId);
238
                        continue;
239
                    }
240
                    if (i == nomTeams.length -1 && isEtAl(member)){
241
                        team.setHasMoreMembers(true);
242
                    }else{
243
                        Person dedupMember = deduplicatePerson(state, member);
244
                        if (dedupMember != member){
245
                            logger.debug("Member deduplicated: " + authorTeamId);
246
                        }
247
                        //TODO add idInBM
248
                        team.addTeamMember(dedupMember);
249
                    }
250
                }
251
                //check nomenclatural title
252
                if (team.getCacheStrategy().getNomenclaturalTitle(team).equals(authorTeamCache)){
253
                    team.setProtectedNomenclaturalTitleCache(false);
254
                }else if(team.getCacheStrategy().getNomenclaturalTitle(team).replace(" ,", ",").equals(authorTeamCache)){
255
                    //also accept teams with ' , ' as separator as not protected
256
                    team.setProtectedTitleCache(false);
257
                }else{
258
                    team.setNomenclaturalTitle(authorTeamCache, true);
259
                    logger.warn("Creation of nomTitle for team with members did not work: " + authorTeamCache + " <-> " + team.getCacheStrategy().getNomenclaturalTitle(team)+ " : " + authorTeamId);
260
                }
261
                //check titleCache
262
                if (team.generateTitle().equals(fullAuthorTeamCache)){
263
                    team.setProtectedTitleCache(false);
264
                }else if(fullAuthorTeamCache == null){
265
                    //do nothing
266
                }else if(team.generateTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
267
                    //also accept teams with ', ' as final member separator as not protected
268
                    team.setProtectedTitleCache(false);
269
                }else if(team.getFullTitle().replace(" & ", ", ").equals(fullAuthorTeamCache.replace(" & ", ", "))){
270
                    //also accept teams with ', ' as final member separator as not protected
271
                    team.setProtectedTitleCache(false);
272
                }else{
273
                    String fullTitle = team.getFullTitle().replace(" & ", ", ");
274
                    team.setTitleCache(fullAuthorTeamCache, true);
275
                    logger.warn("Creation of titleCache for team with members did not work: " + fullAuthorTeamCache + " <-> " + team.generateTitle()+ " : " + authorTeamId);
276
                }
277
            }else{
278
                logger.warn("AuthorTeamCache and fullAuthorTeamCache have not the same team size: " + authorTeamCache + " <-> " + fullAuthorTeamCache+ " : " + authorTeamId);
279
            }
280
        }
281
        return result;
282
    }
283

  
284

  
285
    /**
286
     * @param member
287
     * @return
288
     */
289
    private Person deduplicatePerson(BerlinModelImportState state, Person person) {
290
        Person result = deduplicationHelper.getExistingAuthor(state, person);
291
        return result;
292
    }
293

  
294

  
295
    /**
296
     * @param member
297
     * @return
298
     */
299
    protected static boolean isEtAl(Person member) {
300
        if (member != null && isEtAl(member.getTitleCache()) && isEtAl(member.getNomenclaturalTitle())){
301
            return true;
302
        }
303
        return false;
304
    }
305

  
306
    private static boolean isEtAl(String str) {
307
        if (str == null || !str.equals("al.")){
308
            return false;
309
        }else{
310
            return true;
311
        }
312
    }
313

  
314
    private Person makePerson(String full, String nom, boolean preliminaryFlag, int authorTeamId) {
315
        Person person = Person.NewInstance(nom, null, null, null);
316
        if (isBlank(full)){
317
            //do nothing
318
        }else if (!full.matches(".*[\\s\\.].*")){
319
            person.setFamilyName(full);
320
        }else if (nom.equals(full)){
321
            parsePerson(person, full, preliminaryFlag);
322
        }else{
323
            parsePerson(person, full, true);
324
        }
325
        if (nom.length() <= 2 || (nom.length() == 3 && nom.endsWith(".")) ){
326
            if (!nom.matches("((L|Sm|DC|al|Sw|Qz|Fr|Ib)\\.|Hu|Ma|Hy|Wu)")){
327
                logger.warn("Unexpected short nom author name part: " + nom + "; " + authorTeamId);
328
            }
329
        }
330

  
331
        return person;
332
    }
333

  
334
    /**
335
     * @param person
336
     */
337
    private void parsePerson(Person person, String str, boolean preliminary) {
338
        if (str.matches("\\p{javaUpperCase}\\.(\\s\\p{javaUpperCase}\\.)*\\s\\p{javaUpperCase}\\p{javaLowerCase}{2,}")){
339
            String[] splits = str.split("\\s");
340
            person.setFamilyName(splits[splits.length-1]);
341
            String initials = splits[0];
342
            for (int i = 1; i < splits.length -1; i++ ){
343
                initials += " " + splits[i];
344
            }
345
            person.setInitials(initials);
346
            person.setProtectedTitleCache(false);
347
        }else{
348
            person.setTitleCache(str, preliminary);
349
        }
350

  
351
    }
352

  
353
    private static final String TEAM_SPLITTER = "(,|;|&| et | Et )";
354

  
355
    /**
356
     * @param fullAuthorTeamCache
357
     * @param TEAM_SPLITTER
358
     * @return
359
     */
360
    protected static String[] splitTeam(String teamCache) {
361
        if (teamCache == null){
362
            return new String[0];
363
        }
364
        return teamCache.split(TEAM_SPLITTER);
365
    }
366

  
367

  
368
    /**
369
     * @param authorTeamCache
370
     * @return
371
     */
372
    protected static boolean hasTeamSeparator(String teamCache) {
373
        if (isBlank(teamCache)){
374
            return false;
375
        }else if (teamCache.contains(",") || teamCache.contains("&")||teamCache.contains(" et ")||teamCache.endsWith(" al.")){
376
            return true;
377
        }else{
378
            return false;
379
        }
380
    }
381

  
382

  
383
    @Override
181 384
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state)  {
182 385
		String nameSpace;
183 386
		Class<?> cdmClass;
......
226 429
		return result;
227 430
	}
228 431

  
229
	private boolean makeSequence(Team team, int teamId, ResultSet rsSequence, Map<String, Person> personMap){
432
	private boolean makeSequence(BerlinModelImportState state, Team team, int teamId, ResultSet rsSequence, Map<String, Person> personMap){
230 433
		try {
231 434
			if (rsSequence.isBeforeFirst()){
232 435
				rsSequence.next();
......
245 448
				}
246 449
			}
247 450
			while (sequenceTeamFk < teamId){
248
				logger.warn("Sequence team FK is smaller then team ID. Some teams for a sequence may not be available");
451
				if (! state.getConfig().isEuroMed()){
452
				    logger.warn("Sequence team FK is smaller then team ID. Some teams for a sequence may not be available");
453
				}
249 454
				rsSequence.next();
250 455
				sequenceTeamFk = rsSequence.getInt("AuthorTeamFk");
251 456
			}
app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelReferenceImport.java
57 57
import eu.etaxonomy.cdm.io.common.mapping.berlinModel.CdmOneToManyMapper;
58 58
import eu.etaxonomy.cdm.io.common.mapping.berlinModel.CdmStringMapper;
59 59
import eu.etaxonomy.cdm.io.common.mapping.berlinModel.CdmUriMapper;
60
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
61
import eu.etaxonomy.cdm.model.agent.Person;
60 62
import eu.etaxonomy.cdm.model.agent.Team;
61 63
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
62 64
import eu.etaxonomy.cdm.model.common.CdmBase;
......
69 71
import eu.etaxonomy.cdm.model.reference.IPrintSeries;
70 72
import eu.etaxonomy.cdm.model.reference.Reference;
71 73
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
74
import eu.etaxonomy.cdm.strategy.cache.agent.TeamDefaultCacheStrategy;
72 75

  
73 76
/**
74 77
 * @author a.mueller
......
89 92
	public static final UUID DATE_STRING_UUID = UUID.fromString("e4130eae-606e-4b0c-be4f-e93dc161be7d");
90 93
	public static final UUID IS_PAPER_UUID = UUID.fromString("8a326129-d0d0-4f9d-bbdf-8d86b037c65e");
91 94

  
95
	private static ImportDeduplicationHelper<BerlinModelImportState> deduplicationHelper;
92 96

  
93 97
	private final int modCount = 1000;
94 98
	private static final String pluralString = "references";
......
179 183
	@Override
180 184
	protected void doInvoke(BerlinModelImportState state){
181 185
		logger.info("start make " + getPluralString() + " ...");
186
		deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state);
182 187

  
183 188
		boolean success = true;
184 189
		initializeMappers(state);
......
251 256
		if (! success){
252 257
			state.setUnsuccessfull();
253 258
		}
259
	    deduplicationHelper = null;
254 260
		return;
255 261
	}
256 262

  
......
399 405

  
400 406
			//team map
401 407
			nameSpace = BerlinModelAuthorTeamImport.NAMESPACE;
402
			cdmClass = Team.class;
408
			cdmClass = TeamOrPersonBase.class;
403 409
			idSet = teamIdSet;
404 410
			@SuppressWarnings("unchecked")
405 411
            Map<String, Team> teamMap = (Map<String, Team>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
......
407 413

  
408 414
            //refAuthor map
409 415
            nameSpace = REF_AUTHOR_NAMESPACE;
410
            cdmClass = Team.class;
416
            cdmClass = TeamOrPersonBase.class;
411 417
            idSet = teamStringSet2;
412 418
            @SuppressWarnings("unchecked")
413 419
            Map<String, Team> refAuthorMap = (Map<String, Team>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
......
550 556
		String nomTitleAbbrev = rs.getString("nomTitleAbbrev");
551 557
		boolean isPreliminary = rs.getBoolean("PreliminaryFlag");
552 558
		String refAuthorString = rs.getString("refAuthorString");
553
		Integer nomAuthorTeamFk = rs.getInt("NomAuthorTeamFk");
554
		String strNomAuthorTeamFk = String.valueOf(nomAuthorTeamFk);
555
		TeamOrPersonBase<?> nomAuthor = teamMap.get(strNomAuthorTeamFk);
559
		Integer nomAuthorTeamFk = nullSafeInt(rs, "NomAuthorTeamFk");
560

  
561
		TeamOrPersonBase<?> nomAuthor = null;
562
		if (nomAuthorTeamFk != null){
563
		    String strNomAuthorTeamFk = String.valueOf(nomAuthorTeamFk);
564
		    nomAuthor = teamMap.get(strNomAuthorTeamFk);
565
		    if (nomAuthor == null){
566
		        logger.warn("NomAuthor ("+strNomAuthorTeamFk+") not found in teamMap for " + refId);
567
		    }
568
		}
556 569

  
557 570
		Reference sourceReference = state.getTransactionalSourceReference();
558 571

  
......
571 584
		}
572 585

  
573 586
		//author
574
		TeamOrPersonBase<?> author = getAuthorship(state, refAuthorString, nomAuthor);
587
		TeamOrPersonBase<?> author = getAuthorship(state, refAuthorString, nomAuthor, refId);
575 588
		ref.setAuthorship(author);
576 589

  
577 590
		//save
......
899 912
	}
900 913

  
901 914

  
902
	private static TeamOrPersonBase<?> getAuthorship(BerlinModelImportState state, String authorString, TeamOrPersonBase<?> nomAuthor){
915
	private static TeamOrPersonBase<?> getAuthorship(BerlinModelImportState state, String refAuthorString,
916
	        TeamOrPersonBase<?> nomAuthor, Integer refId){
903 917

  
904 918
	    TeamOrPersonBase<?> result;
905 919
		if (nomAuthor != null){
906 920
			result = nomAuthor;
907
		} else if (StringUtils.isNotBlank(authorString)){
921
			if (isNotBlank(refAuthorString) && !nomAuthor.getTitleCache().equals(refAuthorString)){
922
			    boolean isSimilar = handleSimilarAuthors(state, refAuthorString, nomAuthor);
923
			    if (! isSimilar){
924
			        logger.warn("refAuthorString differs from nomAuthor.titleCache: " + refAuthorString
925
			                + " <-> " + nomAuthor.getTitleCache() + "; RefId: " + refId);
926
			    }
927
			}
928

  
929
		} else if (isNotBlank(refAuthorString)){
930
		    refAuthorString = refAuthorString.trim();
908 931
			//TODO match with existing Persons/Teams
909
		    Team team = state.getRelatedObject(REF_AUTHOR_NAMESPACE, authorString, Team.class);
910
			if (team == null){
911
			    team = Team.NewInstance();
912
			    team.setNomenclaturalTitle(authorString);
913
			    team.setTitleCache(authorString, true);
914
			    state.addRelatedObject(REF_AUTHOR_NAMESPACE, authorString, team);
915
			    team.addImportSource(authorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference(), null);
932
		    TeamOrPersonBase<?> author = state.getRelatedObject(REF_AUTHOR_NAMESPACE, refAuthorString, TeamOrPersonBase.class);
933
			if (author == null){
934
			    if (!BerlinModelAuthorTeamImport.hasTeamSeparator(refAuthorString)){
935
			        author = makePerson(refAuthorString, refId);
936
			    }else{
937
			        author = makeTeam(state, refAuthorString, refId);
938
			    }
939
			    state.addRelatedObject(REF_AUTHOR_NAMESPACE, refAuthorString, author);
940
			    author.addImportSource(refAuthorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference(), null);
916 941
			}
917
			result = team;
942
			result = author;
918 943
		}else{
919 944
			result = null;
920 945
		}
......
922 947
		return result;
923 948
	}
924 949

  
950
    /**
951
     * @param state
952
     * @param refAuthorString
953
     * @param refId
954
     * @return
955
     */
956
    private static Team makeTeam(BerlinModelImportState state, String refAuthorString, Integer refId) {
957
        Team team = Team.NewInstance();
958
        if (containsEdOrColon(refAuthorString)){
959
            team.setTitleCache(refAuthorString, true);
960
        }else{
961
            String[] fullTeams = BerlinModelAuthorTeamImport.splitTeam(refAuthorString);
962
            boolean lastWasInitials = false;
963
            for (int i = 0; i< fullTeams.length ;i++){
964
                if (lastWasInitials){
965
                    lastWasInitials = false;
966
                    continue;
967
                }
968
                String fullTeam = fullTeams[i].trim();
969
                String initials = null;
970
                if (fullTeams.length > i+1){
971
                    String nextSplit = fullTeams[i+1].trim();
972
                    if (isInitial(nextSplit)){
973
                        lastWasInitials = true;
974
                        initials = nextSplit;
975
                    }
976
                }
977
                Person member = makePerson(fullTeam, refId);
978

  
979
                if (initials != null && !member.isProtectedTitleCache()){
980
                    member.setInitials(initials);
981
                }else if (initials != null){
982
                    member.setTitleCache(member.getTitleCache() + ", " + initials, true);
983
                }
984

  
985
                if (i == fullTeams.length -1 && BerlinModelAuthorTeamImport.isEtAl(member)){
986
                    team.setHasMoreMembers(true);
987
                }else{
988
                    Person dedupMember = deduplicatePerson(state, member);
989
                    if (dedupMember != member){
990
                        logger.debug("Member deduplicated: " + refId);
991
                    }else{
992
                        member.addImportSource(refAuthorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference(), null);
993
                    }
994
                    //TODO add idInBM
995
                    team.addTeamMember(dedupMember);
996
                }
997
            }
998
        }
925 999

  
926
	/**
1000
        TeamDefaultCacheStrategy formatter = (TeamDefaultCacheStrategy) team.getCacheStrategy();
1001
        formatter.setEtAlPosition(100);
1002
        if (formatter.getTitleCache(team).equals(refAuthorString)){
1003
            team.setProtectedTitleCache(false);
1004
        }else if(formatter.getTitleCache(team).replace(" & ", ", ").equals(refAuthorString.replace(" & ", ", ").replace(" ,", ","))){
1005
            //also accept teams with ', ' as final member separator as not protected
1006
            team.setProtectedTitleCache(false);
1007
        }else if(formatter.getFullTitle(team).replace(" & ", ", ").equals(refAuthorString.replace(" & ", ", "))){
1008
            //.. or teams with initials first
1009
            team.setProtectedTitleCache(false);
1010
        }else if (containsEdOrColon(refAuthorString)){
1011
            //nothing to do, it is expected to be protected
1012
        }else{
1013
            team.setTitleCache(refAuthorString, true);
1014
            logger.warn("Creation of titleCache for team with members did not (fully) work: " + refAuthorString + " <-> " + formatter.getTitleCache(team)+ " : " + refId);
1015
        }
1016
        return team;
1017
    }
1018

  
1019
    /**
1020
     * @param refAuthorString
1021
     * @return
1022
     */
1023
    private static boolean containsEdOrColon(String str) {
1024
        if (str.contains(" ed.") || str.contains(" Ed.") || str.contains("(ed.")
1025
                || str.contains("[ed.") || str.contains("(Eds)") || str.contains("(Eds.)") ||
1026
                str.contains("(eds.)") || str.contains(":")|| str.contains(";")){
1027
            return true;
1028
        }else{
1029
            return false;
1030
        }
1031
    }
1032

  
1033
    /**
1034
     * @param nextSplit
1035
     * @return
1036
     */
1037
    private static boolean isInitial(String str) {
1038
        if (str == null){
1039
            return false;
1040
        }
1041
        boolean matches = str.trim().matches("(\\p{javaUpperCase}|Yu|Th|Ch|Lj|Sz|Dz|Sh)\\.?(\\s*[-\\s]\\s*(\\p{javaUpperCase}|Yu)\\.?)*(\\s+(van|von))?");
1042
        return matches;
1043
    }
1044

  
1045
    private static Person deduplicatePerson(BerlinModelImportState state, Person person) {
1046
        Person result = deduplicationHelper.getExistingAuthor(state, person);
1047
        return result;
1048
    }
1049

  
1050
    private static Person makePerson(String full, Integer refId) {
1051
        Person person = Person.NewInstance();
1052
        person.setTitleCache(full, true);
1053
        if (!full.matches(".*[\\s\\.].*")){
1054
            person.setFamilyName(full);
1055
            person.setProtectedTitleCache(false);
1056
        }else if (full.matches("(\\p{javaUpperCase}|Kh)\\.(\\s\\p{javaUpperCase}\\.)*\\s\\p{javaUpperCase}\\p{javaLowerCase}{2,}")){
1057
            String[] splits = full.split("\\s");
1058
            person.setFamilyName(splits[splits.length-1]);
1059
            String initials = splits[0];
1060
            for (int i = 1; i < splits.length -1; i++ ){
1061
                initials += " " + splits[i];
1062
            }
1063
            person.setInitials(initials);
1064
            person.setProtectedTitleCache(false);
1065
        }
1066
        if ((full.length() <= 2 && !full.matches("(Li|Bo|Em|Ay|Ma)")) || (full.length() == 3 && full.endsWith(".") && !full.equals("al.")) ){
1067
//            if (!full.matches("((L|Sm|DC|al|Sw|Qz|Fr|Ib)\\.|Hu|Ma|Hy|Wu)")){
1068
                logger.warn("Unexpected short nom author name part: " + full + "; " + refId);
1069
//            }
1070
        }
1071

  
1072
        return person;
1073
    }
1074

  
1075
    /**
1076
     * @param state
1077
     * @param refAuthorString
1078
     * @param nomAuthor
1079
     * @return
1080
     */
1081
    private static boolean handleSimilarAuthors(BerlinModelImportState state, String refAuthorString,
1082
            TeamOrPersonBase<?> nomAuthor) {
1083
        if (refAuthorString.equals(nomAuthor.getNomenclaturalTitle())){
1084
            //nomTitle equal
1085
            return true;
1086
        }else{
1087
            String nomTitle = nomAuthor.getTitleCache();
1088
            if (refAuthorString.replace(" & ", ", ").equals(nomTitle.replace(" & ", ", "))){
1089
                //nomTitle equal except for "&"
1090
                return true;
1091
            }
1092

  
1093
            if (refAuthorString.replace(" & ", ", ").equals(nomAuthor.getFullTitle().replace(" & ", ", "))){
1094
                return true;
1095
            }
1096

  
1097
            if (refAuthorString.contains(",") && !nomTitle.contains(",") && nomAuthor.isInstanceOf(Person.class)){
1098
                String[] splits = refAuthorString.split(",");
1099
                Person person = CdmBase.deproxy(nomAuthor, Person.class);
1100
                if (splits.length == 2){
1101
                    String newMatch = splits[1].trim() + " " + splits[0].trim();
1102
                    if (newMatch.equals(nomTitle)){
1103
                        if (isBlank(person.getFamilyName())){
1104
                            person.setFamilyName(splits[0].trim());
1105
                        }
1106
                        if (isBlank(person.getInitials())){
1107
                            person.setInitials(splits[1].trim());
1108
                        }
1109
                        return true;
1110
                    }
1111
                }
1112
            }
1113
        }
1114
        return false;
1115
    }
1116

  
1117
    /**
927 1118
	 * @param lowerCase
928 1119
	 * @param config
929 1120
	 * @return

Also available in: Unified diff