1
|
/**
|
2
|
* Copyright (C) 2022 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.strategy.parser;
|
10
|
|
11
|
import java.util.ArrayList;
|
12
|
import java.util.List;
|
13
|
import java.util.regex.Matcher;
|
14
|
import java.util.regex.Pattern;
|
15
|
|
16
|
import org.apache.commons.lang3.StringUtils;
|
17
|
|
18
|
import eu.etaxonomy.cdm.common.UTF8;
|
19
|
import eu.etaxonomy.cdm.model.agent.Person;
|
20
|
import eu.etaxonomy.cdm.model.agent.Team;
|
21
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
22
|
|
23
|
/**
|
24
|
* @author a.mueller
|
25
|
* @date 05.03.2022
|
26
|
*/
|
27
|
public class BibliographicAuthorParser {
|
28
|
|
29
|
private static final String etAl = "\\set\\s+al\\.?";
|
30
|
private static final String team = ".+\\s*(&.+|"+etAl+")";
|
31
|
private static final Pattern teamRe = Pattern.compile(team);
|
32
|
private static final String initialChars = "[A-Z"+UTF8.CAPITAL_A_ACUTE
|
33
|
+ UTF8.CAPITAL_E_ACUTE
|
34
|
+ UTF8.CAPITAL_I_ACUTE
|
35
|
+ UTF8.CAPITAL_O_ACUTE
|
36
|
+ UTF8.CAPITAL_U_ACUTE
|
37
|
+ "]";
|
38
|
private static String initialsRe = "("+initialChars+"\\.?\\s?|(del?|de la|de los|v[ao]n)\\s*){1,5}";
|
39
|
private static String initialsStrictRe = "((?!"+initialsRe+"\\s).)*\\s+("+initialsRe+")";
|
40
|
private static Pattern pattern = Pattern.compile(initialsStrictRe);
|
41
|
|
42
|
|
43
|
private static BibliographicAuthorParser singleton;
|
44
|
public static final BibliographicAuthorParser Instance() {
|
45
|
if (singleton == null) {
|
46
|
singleton = new BibliographicAuthorParser();
|
47
|
}
|
48
|
return singleton;
|
49
|
}
|
50
|
|
51
|
public TeamOrPersonBase<?> parse(String authorStr) {
|
52
|
TeamOrPersonBase<?> result;
|
53
|
if (StringUtils.isBlank(authorStr)) {
|
54
|
return null;
|
55
|
}
|
56
|
Matcher matcher = teamRe.matcher(authorStr);
|
57
|
if (matcher.matches()) {
|
58
|
Team team = Team.NewInstance();
|
59
|
result = team;
|
60
|
String bracketPart = matcher.group(1);
|
61
|
List<Person> members = getMembers(authorStr.substring(0, authorStr.replace(bracketPart, "").length()));
|
62
|
members.stream().forEach(m->team.addTeamMember(m));
|
63
|
if (bracketPart.matches(etAl) || bracketPart.matches("\\s*&\\s*al\\.?")) {
|
64
|
team.setHasMoreMembers(true);
|
65
|
}else {
|
66
|
bracketPart = bracketPart.substring(1).trim();
|
67
|
members = getMembers(bracketPart);
|
68
|
//TODO this should be only 1 Person so we may call single person directly
|
69
|
members.stream().forEach(m->team.addTeamMember(m));
|
70
|
}
|
71
|
}else {
|
72
|
List<Person> members = getMembers(authorStr);
|
73
|
if (members.size() == 1) {
|
74
|
result = members.get(0);
|
75
|
}else {
|
76
|
Team team = Team.NewInstance();
|
77
|
result = team;
|
78
|
members.stream().forEach(m->team.addTeamMember(m));
|
79
|
}
|
80
|
}
|
81
|
return result;
|
82
|
}
|
83
|
|
84
|
private List<Person> getMembers(String membersStr) {
|
85
|
|
86
|
|
87
|
List<Person> result = new ArrayList<>();
|
88
|
String[] split = membersStr.split(",");
|
89
|
|
90
|
boolean isLast = false;
|
91
|
// boolean lastWasFamily;
|
92
|
for (int i = 0; i<split.length; i++) {
|
93
|
Person person = Person.NewInstance();
|
94
|
isLast = i >= split.length-1;
|
95
|
String str = split[i];
|
96
|
Matcher matcher = pattern.matcher(str);
|
97
|
if (matcher.matches()) {
|
98
|
//initials not separated by comma
|
99
|
String initials = matcher.group(4);
|
100
|
String family = str.replaceAll(initials + "$", "").trim();
|
101
|
person.setFamilyName(family);
|
102
|
person.setInitials(initials.trim());
|
103
|
}else {
|
104
|
if (isLast) {
|
105
|
person.setTitleCache(str.trim(), true);
|
106
|
}else {
|
107
|
String next = split[i+1].trim();
|
108
|
if (next.matches(initialsRe)) {
|
109
|
person.setFamilyName(str.trim());
|
110
|
person.setInitials(next.trim());
|
111
|
i++;
|
112
|
while(i+1 < split.length && split[i+1].trim().matches(initialsRe)) {
|
113
|
next = split[i+1].trim();
|
114
|
person.setInitials(next.trim());
|
115
|
i++;
|
116
|
}
|
117
|
}else {
|
118
|
person.setTitleCache(str.trim(), true);
|
119
|
}
|
120
|
}
|
121
|
}
|
122
|
result.add(person);
|
123
|
}
|
124
|
return result;
|
125
|
}
|
126
|
}
|