1
|
/**
|
2
|
* Copyright (C) 2009 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.eflora.centralAfrica.ericaceae;
|
11
|
|
12
|
import java.util.List;
|
13
|
import java.util.regex.Matcher;
|
14
|
import java.util.regex.Pattern;
|
15
|
|
16
|
import org.apache.log4j.Logger;
|
17
|
import org.jdom.Element;
|
18
|
import org.springframework.stereotype.Component;
|
19
|
|
20
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
21
|
import eu.etaxonomy.cdm.ext.ipni.IpniService;
|
22
|
import eu.etaxonomy.cdm.io.eflora.EfloraImportState;
|
23
|
import eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport;
|
24
|
import eu.etaxonomy.cdm.model.agent.Person;
|
25
|
import eu.etaxonomy.cdm.model.agent.Team;
|
26
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
27
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
28
|
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
|
29
|
import eu.etaxonomy.cdm.model.common.TimePeriod;
|
30
|
import eu.etaxonomy.cdm.model.description.Feature;
|
31
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
32
|
import eu.etaxonomy.cdm.model.description.TextData;
|
33
|
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
|
34
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
35
|
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
|
36
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
37
|
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
|
38
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
39
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
40
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
41
|
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
|
42
|
|
43
|
|
44
|
/**
|
45
|
* @author a.mueller
|
46
|
*
|
47
|
*/
|
48
|
@Component
|
49
|
public class CentralAfricaEricaceaeTaxonImport extends EfloraTaxonImport {
|
50
|
private static final long serialVersionUID = 6442665916458420942L;
|
51
|
private static final Logger logger = Logger.getLogger(CentralAfricaEricaceaeTaxonImport.class);
|
52
|
|
53
|
|
54
|
@Override
|
55
|
protected TeamOrPersonBase handleNomenclaturalReference(TaxonName name, String value) {
|
56
|
Reference nomRef = ReferenceFactory.newGeneric();
|
57
|
nomRef.setTitleCache(value, true);
|
58
|
parseNomStatus(nomRef, name);
|
59
|
name.setNomenclaturalReference(nomRef);
|
60
|
|
61
|
String microReference = parseReferenceYearAndDetail(nomRef);
|
62
|
microReference = removeTrailing(microReference, ")");
|
63
|
|
64
|
microReference = parseHomonym(microReference, name);
|
65
|
name.setNomenclaturalMicroReference(microReference);
|
66
|
|
67
|
TeamOrPersonBase<?> nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
|
68
|
TeamOrPersonBase<?> refTeam = nomRef.getAuthorship();
|
69
|
if (nameTeam == null ){
|
70
|
logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
|
71
|
}else if (refTeam == null ){
|
72
|
logger.warn("Name has nom. ref. but no nom.ref. author. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
|
73
|
}else if (! authorTeamsMatch(refTeam, nameTeam)){
|
74
|
logger.warn("Nom.Ref. author and comb. author do not match: " + nomRef.getTitleCache() + " <-> " + nameTeam.getNomenclaturalTitle());
|
75
|
}else {
|
76
|
nomRef.setAuthorship(nameTeam);
|
77
|
nomRef.setTitle(CdmUtils.Nz(nomRef.getTitle()) + " - no title given yet -");
|
78
|
nameTeam.setTitleCache(refTeam.getTitleCache(), true);
|
79
|
}
|
80
|
return nameTeam;
|
81
|
}
|
82
|
|
83
|
/**
|
84
|
* Extracts the date published part and returns micro reference
|
85
|
* @param ref
|
86
|
* @return
|
87
|
*/
|
88
|
protected String parseReferenceYearAndDetail(Reference ref){
|
89
|
String detailResult = null;
|
90
|
String titleToParse = ref.getTitleCache();
|
91
|
titleToParse = removeReferenceBracket(titleToParse, ref);
|
92
|
|
93
|
int detailStart = titleToParse.indexOf(":");
|
94
|
if (detailStart >= 0){
|
95
|
detailResult = titleToParse.substring(detailStart + 1);
|
96
|
titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
|
97
|
detailResult = detailResult.trim();
|
98
|
}
|
99
|
|
100
|
String reYear = "\\s[1-2]{1}[0-9]{3}";
|
101
|
String reYearPeriod = reYear;
|
102
|
//
|
103
|
// //pattern for the whole string
|
104
|
Pattern patReference = Pattern.compile( reYearPeriod );
|
105
|
Matcher matcher = patReference.matcher(titleToParse);
|
106
|
if (matcher.find()){
|
107
|
int start = matcher.start();
|
108
|
int end = matcher.end();
|
109
|
//
|
110
|
String strPeriod = titleToParse.substring(start, end);
|
111
|
TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
|
112
|
ref.setDatePublished(datePublished);
|
113
|
String author = titleToParse.substring(0, start).trim();
|
114
|
author = parseInRefrence(ref, author);
|
115
|
TeamOrPersonBase team = parseSingleTeam(author);
|
116
|
ref.setAuthorship(team);
|
117
|
ref.setProtectedTitleCache(false);
|
118
|
}else{
|
119
|
logger.warn("Could not parse reference: " + titleToParse);
|
120
|
}
|
121
|
return detailResult;
|
122
|
|
123
|
}
|
124
|
|
125
|
private String parseInRefrence(Reference ref, String author) {
|
126
|
int pos = author.indexOf(" in ");
|
127
|
if (pos > -1){
|
128
|
String inAuthorString = author.substring(pos + 4);
|
129
|
String myAuthorString = author.substring(0, pos);
|
130
|
Reference inReference = ReferenceFactory.newGeneric();
|
131
|
TeamOrPersonBase inAuthor = parseSingleTeam(inAuthorString);
|
132
|
inReference.setAuthorship(inAuthor);
|
133
|
ref.setInReference(inReference);
|
134
|
return myAuthorString;
|
135
|
}else{
|
136
|
return author;
|
137
|
}
|
138
|
|
139
|
}
|
140
|
|
141
|
private String removeReferenceBracket(String refString, Reference ref) {
|
142
|
String titleToParse = refString;
|
143
|
String reBracket = "\\(.*\\).?";
|
144
|
Pattern patBracket = Pattern.compile(reBracket);
|
145
|
Matcher matcher = patBracket.matcher(titleToParse);
|
146
|
|
147
|
if (matcher.matches()){
|
148
|
int start = matcher.start() + 1;
|
149
|
int end = matcher.end() -1 ;
|
150
|
if (! titleToParse.endsWith("")){
|
151
|
end = end - 1;
|
152
|
}
|
153
|
titleToParse = titleToParse.substring(start, end);
|
154
|
|
155
|
ref.setTitleCache(titleToParse);
|
156
|
}
|
157
|
return titleToParse;
|
158
|
}
|
159
|
|
160
|
/**
|
161
|
* @param taxon
|
162
|
* @param name
|
163
|
* @param value
|
164
|
*/
|
165
|
@Override
|
166
|
protected TeamOrPersonBase<?> handleNameUsage(Taxon taxon, TaxonName name,
|
167
|
String referenceTitle, TeamOrPersonBase lastTeam) {
|
168
|
|
169
|
Reference ref = ReferenceFactory.newGeneric();
|
170
|
|
171
|
ref.setTitleCache(referenceTitle, true);
|
172
|
|
173
|
TeamOrPersonBase<?> team = getReferenceAuthor(ref, name);
|
174
|
ref.setAuthorship(team);
|
175
|
|
176
|
String[] multipleReferences = ref.getTitleCache().split("&");
|
177
|
|
178
|
TaxonDescription description = getDescription(taxon);
|
179
|
for (String singleReferenceString : multipleReferences){
|
180
|
Reference singleRef = ReferenceFactory.newGeneric();
|
181
|
singleRef.setTitleCache(singleReferenceString.trim(), true);
|
182
|
singleRef.setAuthorship(team);
|
183
|
|
184
|
String microReference = parseReferenceYearAndDetailForUsage(singleRef);
|
185
|
|
186
|
singleRef.setTitle( CdmUtils.Nz(singleRef.getTitle()) + " - no title given yet -");
|
187
|
|
188
|
// parseReferenceType(ref);
|
189
|
|
190
|
TextData textData = TextData.NewInstance(Feature.CITATION());
|
191
|
textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, singleRef, microReference,
|
192
|
name, null);
|
193
|
description.addElement(textData);
|
194
|
}
|
195
|
return team;
|
196
|
}
|
197
|
|
198
|
private String parseReferenceYearAndDetailForUsage(Reference ref) {
|
199
|
String detailResult = null;
|
200
|
String titleToParse = ref.getTitleCache().trim();
|
201
|
|
202
|
int detailStart = titleToParse.indexOf(":");
|
203
|
if (detailStart >= 0){
|
204
|
detailResult = titleToParse.substring(detailStart + 1);
|
205
|
titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
|
206
|
detailResult = detailResult.trim();
|
207
|
}
|
208
|
|
209
|
String reYear = "^[1-2]{1}[0-9]{3}[a-e]?$";
|
210
|
String reYearPeriod = reYear;
|
211
|
//
|
212
|
// //pattern for the whole string
|
213
|
Pattern patReference = Pattern.compile( reYearPeriod );
|
214
|
Matcher matcher = patReference.matcher(titleToParse);
|
215
|
if (! matcher.find()){
|
216
|
logger.warn("Could not parse year: " + titleToParse);
|
217
|
}else{
|
218
|
if (Pattern.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse)){
|
219
|
String title = titleToParse.substring(4,5);
|
220
|
ref.setTitle(title);
|
221
|
titleToParse = titleToParse.substring(0, 4);
|
222
|
}
|
223
|
ref.setProtectedTitleCache(false);
|
224
|
}
|
225
|
TimePeriod datePublished = TimePeriodParser.parseString(titleToParse);
|
226
|
ref.setDatePublished(datePublished);
|
227
|
return detailResult;
|
228
|
|
229
|
}
|
230
|
|
231
|
protected TeamOrPersonBase getReferenceAuthor (Reference ref, INonViralName name) {
|
232
|
String titleString = ref.getTitleCache();
|
233
|
String re = "\\(.*\\)";
|
234
|
Pattern pattern = Pattern.compile(re);
|
235
|
Matcher matcher = pattern.matcher(titleString);
|
236
|
if (matcher.find()){
|
237
|
int start = matcher.start();
|
238
|
String authorString = titleString.substring(0, start).trim();
|
239
|
String restString = titleString.substring(start + 1 , matcher.end() - 1);
|
240
|
TeamOrPersonBase team = getAuthorTeam(authorString, name);
|
241
|
ref.setTitleCache(restString, true);
|
242
|
return team;
|
243
|
}else{
|
244
|
logger.warn("Title does not match: " + titleString);
|
245
|
return null;
|
246
|
}
|
247
|
|
248
|
}
|
249
|
|
250
|
private TeamOrPersonBase getAuthorTeam(String authorString, INonViralName name) {
|
251
|
//TODO atomize
|
252
|
// TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
|
253
|
// String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
|
254
|
|
255
|
// if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
|
256
|
// logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
|
257
|
TeamOrPersonBase result = parseSingleTeam(authorString);
|
258
|
result.setTitleCache(authorString, true);
|
259
|
return result;
|
260
|
// }else{
|
261
|
// nameTeam.setTitleCache(authorString, true);
|
262
|
// return nameTeam;
|
263
|
// }
|
264
|
}
|
265
|
|
266
|
/**
|
267
|
* @param refAuthorTeam
|
268
|
* @param nameTeam
|
269
|
* @return
|
270
|
*/
|
271
|
private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam, TeamOrPersonBase nameTeam) {
|
272
|
String nameTeamString = nameTeam.getNomenclaturalTitle();
|
273
|
String refAuthorTeamString = refAuthorTeam.getTitleCache();
|
274
|
if (nameTeamString.equalsIgnoreCase(refAuthorTeamString)){
|
275
|
return true;
|
276
|
}
|
277
|
|
278
|
if (nameTeamString.endsWith(".")){
|
279
|
nameTeamString = nameTeamString.substring(0, nameTeamString.length() - 1 );
|
280
|
if (refAuthorTeamString.startsWith(nameTeamString)){
|
281
|
return true;
|
282
|
}else{
|
283
|
return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
|
284
|
}
|
285
|
}else{
|
286
|
if (nameTeamString.endsWith(refAuthorTeamString) || refAuthorTeamString.endsWith(nameTeamString)){
|
287
|
return true;
|
288
|
}else{
|
289
|
return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
|
290
|
}
|
291
|
}
|
292
|
}
|
293
|
|
294
|
private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam, TeamOrPersonBase refAuthorTeam) {
|
295
|
if ( nameTeam.isInstanceOf(Team.class) && ((Team)nameTeam).getTeamMembers().size()> 1 ||
|
296
|
refAuthorTeam.isInstanceOf(Team.class) && ((Team)refAuthorTeam).getTeamMembers().size()> 1){
|
297
|
//class
|
298
|
if (! (nameTeam.isInstanceOf(Team.class) && refAuthorTeam.isInstanceOf(Team.class) ) ){
|
299
|
logger.warn("Only one author is a real team");
|
300
|
return false;
|
301
|
}
|
302
|
Team realNameTeam = (Team)nameTeam;
|
303
|
Team realRefAuthorTeam = (Team)refAuthorTeam;
|
304
|
//size
|
305
|
if (realNameTeam.getTeamMembers().size() != realRefAuthorTeam.getTeamMembers().size()){
|
306
|
logger.warn("Teams do not have the same size");
|
307
|
return false;
|
308
|
}
|
309
|
//empty teams
|
310
|
if (realNameTeam.getTeamMembers().size() == 0){
|
311
|
logger.warn("Teams are empty");
|
312
|
return false;
|
313
|
}
|
314
|
//compare each team member
|
315
|
for (int i = 0; i < realNameTeam.getTeamMembers().size(); i++){
|
316
|
Person namePerson = realNameTeam.getTeamMembers().get(i);
|
317
|
Person refPerson = realRefAuthorTeam.getTeamMembers().get(i);
|
318
|
if ( authorTeamsMatch(refPerson, namePerson) == false){
|
319
|
return false;
|
320
|
}
|
321
|
}
|
322
|
return true;
|
323
|
}
|
324
|
boolean result = checkIpniAuthor(nameTeam.getNomenclaturalTitle(), refAuthorTeam);
|
325
|
return result;
|
326
|
}
|
327
|
|
328
|
private boolean checkIpniAuthor(String nameTeamString, TeamOrPersonBase refAuthorTeam) {
|
329
|
IpniService ipniService = new IpniService();
|
330
|
List<Person> ipniAuthors = ipniService.getAuthors(nameTeamString, null, null, null, null, null);
|
331
|
if (ipniAuthors != null){
|
332
|
for (Person ipniAuthor : ipniAuthors){
|
333
|
if (ipniAuthor.getFamilyName() != null && ipniAuthor.getFamilyName().equalsIgnoreCase(refAuthorTeam.getTitleCache())){
|
334
|
return true;
|
335
|
}
|
336
|
logger.warn(ipniAuthor.getTitleCache() + " <-> " + refAuthorTeam.getTitleCache());
|
337
|
}
|
338
|
}else{
|
339
|
logger.warn("IPNI not available");
|
340
|
}
|
341
|
return false;
|
342
|
}
|
343
|
|
344
|
/**
|
345
|
* @param state
|
346
|
* @param elNom
|
347
|
* @param taxon
|
348
|
* @param homotypicalGroup
|
349
|
*/
|
350
|
@Override
|
351
|
protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
|
352
|
verifyNoChildren(elNom);
|
353
|
String typeRef = elNom.getTextNormalize();
|
354
|
typeRef = removeStartingTypeRefMinus(typeRef);
|
355
|
typeRef = removeTypePrefix(typeRef);
|
356
|
TypeDesignationBase typeDesignation = SpecimenTypeDesignation.NewInstance();
|
357
|
makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef, typeDesignation);
|
358
|
for (TaxonName name : homotypicalGroup.getTypifiedNames()){
|
359
|
name.addTypeDesignation(typeDesignation, true);
|
360
|
}
|
361
|
}
|
362
|
|
363
|
private String removeTypePrefix(String typeRef) {
|
364
|
typeRef = typeRef.trim().replace("Type: ", "").replace("Types: ", "").trim();
|
365
|
return typeRef;
|
366
|
}
|
367
|
|
368
|
@Override
|
369
|
protected void handleGenus(String value, INonViralName taxonName) {
|
370
|
// do nothing
|
371
|
}
|
372
|
|
373
|
|
374
|
|
375
|
}
|