Project

General

Profile

Download (4.58 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2022 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.strategy.parser;
10

    
11
import java.util.ArrayList;
12
import java.util.List;
13
import java.util.regex.Matcher;
14
import java.util.regex.Pattern;
15

    
16
import org.apache.commons.lang3.StringUtils;
17

    
18
import eu.etaxonomy.cdm.common.UTF8;
19
import eu.etaxonomy.cdm.model.agent.Person;
20
import eu.etaxonomy.cdm.model.agent.Team;
21
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
22

    
23
/**
24
 * @author a.mueller
25
 * @date 05.03.2022
26
 */
27
public class BibliographicAuthorParser {
28

    
29
    private static final String etAl = "\\set\\s+al\\.?";
30
    private static final String team = ".+\\s*(&.+|"+etAl+")";
31
    private static final Pattern teamRe = Pattern.compile(team);
32
    private static final String initialChars = "[A-Z"+UTF8.CAPITAL_A_ACUTE
33
            + UTF8.CAPITAL_E_ACUTE
34
            + UTF8.CAPITAL_I_ACUTE
35
            + UTF8.CAPITAL_O_ACUTE
36
            + UTF8.CAPITAL_U_ACUTE
37
            + "]";
38
    private static String initialsRe = "("+initialChars+"\\.?\\s?|(del?|de la|de los|v[ao]n)\\s*){1,5}";
39
    private static String initialsStrictRe = "((?!"+initialsRe+"\\s).)*\\s+("+initialsRe+")";
40
    private static Pattern pattern = Pattern.compile(initialsStrictRe);
41

    
42

    
43
    private static BibliographicAuthorParser singleton;
44
    public static final BibliographicAuthorParser Instance() {
45
        if (singleton == null) {
46
            singleton = new BibliographicAuthorParser();
47
        }
48
        return singleton;
49
    }
50

    
51
    public TeamOrPersonBase<?> parse(String authorStr) {
52
        TeamOrPersonBase<?> result;
53
        if (StringUtils.isBlank(authorStr)) {
54
            return null;
55
        }
56
        Matcher matcher = teamRe.matcher(authorStr);
57
        if (matcher.matches()) {
58
            Team team = Team.NewInstance();
59
            result = team;
60
            String bracketPart = matcher.group(1);
61
            List<Person> members = getMembers(authorStr.substring(0, authorStr.replace(bracketPart, "").length()));
62
            members.stream().forEach(m->team.addTeamMember(m));
63
            if (bracketPart.matches(etAl) || bracketPart.matches("\\s*&\\s*al\\.?")) {
64
                team.setHasMoreMembers(true);
65
            }else {
66
                bracketPart = bracketPart.substring(1).trim();
67
                members = getMembers(bracketPart);
68
                //TODO this should be only 1 Person so we may call single person directly
69
                members.stream().forEach(m->team.addTeamMember(m));
70
            }
71
        }else {
72
            List<Person> members = getMembers(authorStr);
73
            if (members.size() == 1) {
74
                result = members.get(0);
75
            }else {
76
                Team team = Team.NewInstance();
77
                result = team;
78
                members.stream().forEach(m->team.addTeamMember(m));
79
            }
80
        }
81
        return result;
82
    }
83

    
84
    private List<Person> getMembers(String membersStr) {
85

    
86

    
87
        List<Person> result = new ArrayList<>();
88
        String[] split = membersStr.split(",");
89

    
90
        boolean isLast = false;
91
//        boolean lastWasFamily;
92
        for (int i = 0; i<split.length; i++) {
93
            Person person = Person.NewInstance();
94
            isLast = i >= split.length-1;
95
            String str = split[i];
96
            Matcher matcher = pattern.matcher(str);
97
            if (matcher.matches()) {
98
                //initials not separated by comma
99
                String initials = matcher.group(4);
100
                String family = str.replaceAll(initials + "$", "").trim();
101
                person.setFamilyName(family);
102
                person.setInitials(initials.trim());
103
            }else {
104
                if (isLast) {
105
                    person.setTitleCache(str.trim(), true);
106
                }else {
107
                    String next = split[i+1].trim();
108
                    if (next.matches(initialsRe)) {
109
                        person.setFamilyName(str.trim());
110
                        person.setInitials(next.trim());
111
                        i++;
112
                        while(i+1 < split.length && split[i+1].trim().matches(initialsRe)) {
113
                             next = split[i+1].trim();
114
                             person.setInitials(next.trim());
115
                             i++;
116
                        }
117
                    }else {
118
                        person.setTitleCache(str.trim(), true);
119
                    }
120
                }
121
            }
122
            result.add(person);
123
        }
124
        return result;
125
    }
126
}
(1-1/9)