1
|
/**
|
2
|
* Copyright (C) 2009 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.eflora.centralAfrica.ericaceae;
|
11
|
|
12
|
import java.util.List;
|
13
|
import java.util.regex.Matcher;
|
14
|
import java.util.regex.Pattern;
|
15
|
|
16
|
import org.apache.log4j.Logger;
|
17
|
import org.jdom.Element;
|
18
|
import org.springframework.stereotype.Component;
|
19
|
|
20
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
21
|
import eu.etaxonomy.cdm.ext.ipni.IpniService;
|
22
|
import eu.etaxonomy.cdm.io.eflora.EfloraImportState;
|
23
|
import eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport;
|
24
|
import eu.etaxonomy.cdm.model.agent.Person;
|
25
|
import eu.etaxonomy.cdm.model.agent.Team;
|
26
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
27
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
28
|
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
|
29
|
import eu.etaxonomy.cdm.model.common.TimePeriod;
|
30
|
import eu.etaxonomy.cdm.model.description.Feature;
|
31
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
32
|
import eu.etaxonomy.cdm.model.description.TextData;
|
33
|
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
|
34
|
import eu.etaxonomy.cdm.model.name.NonViralName;
|
35
|
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
|
36
|
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
|
37
|
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
|
38
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
39
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
40
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
41
|
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
|
42
|
|
43
|
|
44
|
/**
|
45
|
* @author a.mueller
|
46
|
*
|
47
|
*/
|
48
|
@Component
|
49
|
public class CentralAfricaEricaceaeTaxonImport extends EfloraTaxonImport {
|
50
|
private static final Logger logger = Logger.getLogger(CentralAfricaEricaceaeTaxonImport.class);
|
51
|
|
52
|
|
53
|
|
54
|
|
55
|
/* (non-Javadoc)
|
56
|
* @see eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport#handleNomenclaturalReference(eu.etaxonomy.cdm.model.name.NonViralName, java.lang.String)
|
57
|
*/
|
58
|
@Override
|
59
|
protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
|
60
|
Reference nomRef = ReferenceFactory.newGeneric();
|
61
|
nomRef.setTitleCache(value, true);
|
62
|
parseNomStatus(nomRef, name);
|
63
|
name.setNomenclaturalReference(nomRef);
|
64
|
|
65
|
String microReference = parseReferenceYearAndDetail(nomRef);
|
66
|
microReference = removeTrailing(microReference, ")");
|
67
|
|
68
|
microReference = parseHomonym(microReference, name);
|
69
|
name.setNomenclaturalMicroReference(microReference);
|
70
|
|
71
|
TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
|
72
|
TeamOrPersonBase refTeam = nomRef.getAuthorship();
|
73
|
if (nameTeam == null ){
|
74
|
logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
|
75
|
}else if (refTeam == null ){
|
76
|
logger.warn("Name has nom. ref. but no nom.ref. author. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
|
77
|
}else if (! authorTeamsMatch(refTeam, nameTeam)){
|
78
|
logger.warn("Nom.Ref. author and comb. author do not match: " + nomRef.getTitleCache() + " <-> " + nameTeam.getNomenclaturalTitle());
|
79
|
}else {
|
80
|
nomRef.setAuthorship(nameTeam);
|
81
|
nomRef.setTitle(CdmUtils.Nz(nomRef.getTitle()) + " - no title given yet -");
|
82
|
nameTeam.setTitleCache(refTeam.getTitleCache(), true);
|
83
|
}
|
84
|
return nameTeam;
|
85
|
}
|
86
|
|
87
|
/**
|
88
|
* Extracts the date published part and returns micro reference
|
89
|
* @param ref
|
90
|
* @return
|
91
|
*/
|
92
|
protected String parseReferenceYearAndDetail(Reference ref){
|
93
|
String detailResult = null;
|
94
|
String titleToParse = ref.getTitleCache();
|
95
|
titleToParse = removeReferenceBracket(titleToParse, ref);
|
96
|
|
97
|
int detailStart = titleToParse.indexOf(":");
|
98
|
if (detailStart >= 0){
|
99
|
detailResult = titleToParse.substring(detailStart + 1);
|
100
|
titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
|
101
|
detailResult = detailResult.trim();
|
102
|
}
|
103
|
|
104
|
String reYear = "\\s[1-2]{1}[0-9]{3}";
|
105
|
String reYearPeriod = reYear;
|
106
|
//
|
107
|
// //pattern for the whole string
|
108
|
Pattern patReference = Pattern.compile( reYearPeriod );
|
109
|
Matcher matcher = patReference.matcher(titleToParse);
|
110
|
if (matcher.find()){
|
111
|
int start = matcher.start();
|
112
|
int end = matcher.end();
|
113
|
//
|
114
|
String strPeriod = titleToParse.substring(start, end);
|
115
|
TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
|
116
|
ref.setDatePublished(datePublished);
|
117
|
String author = titleToParse.substring(0, start).trim();
|
118
|
author = parseInRefrence(ref, author);
|
119
|
TeamOrPersonBase team = parseSingleTeam(author);
|
120
|
ref.setAuthorship(team);
|
121
|
ref.setProtectedTitleCache(false);
|
122
|
}else{
|
123
|
logger.warn("Could not parse reference: " + titleToParse);
|
124
|
}
|
125
|
return detailResult;
|
126
|
|
127
|
}
|
128
|
|
129
|
private String parseInRefrence(Reference ref, String author) {
|
130
|
int pos = author.indexOf(" in ");
|
131
|
if (pos > -1){
|
132
|
String inAuthorString = author.substring(pos + 4);
|
133
|
String myAuthorString = author.substring(0, pos);
|
134
|
Reference inReference = ReferenceFactory.newGeneric();
|
135
|
TeamOrPersonBase inAuthor = parseSingleTeam(inAuthorString);
|
136
|
inReference.setAuthorship(inAuthor);
|
137
|
ref.setInReference(inReference);
|
138
|
return myAuthorString;
|
139
|
}else{
|
140
|
return author;
|
141
|
}
|
142
|
|
143
|
}
|
144
|
|
145
|
private String removeReferenceBracket(String refString, Reference ref) {
|
146
|
String titleToParse = refString;
|
147
|
String reBracket = "\\(.*\\).?";
|
148
|
Pattern patBracket = Pattern.compile(reBracket);
|
149
|
Matcher matcher = patBracket.matcher(titleToParse);
|
150
|
|
151
|
if (matcher.matches()){
|
152
|
int start = matcher.start() + 1;
|
153
|
int end = matcher.end() -1 ;
|
154
|
if (! titleToParse.endsWith("")){
|
155
|
end = end - 1;
|
156
|
}
|
157
|
titleToParse = titleToParse.substring(start, end);
|
158
|
|
159
|
ref.setTitleCache(titleToParse);
|
160
|
}
|
161
|
return titleToParse;
|
162
|
}
|
163
|
|
164
|
/**
|
165
|
* @param taxon
|
166
|
* @param name
|
167
|
* @param value
|
168
|
*/
|
169
|
@Override
|
170
|
protected TeamOrPersonBase<?> handleNameUsage(Taxon taxon, NonViralName<?> name, String referenceTitle, TeamOrPersonBase lastTeam) {
|
171
|
Reference<?> ref = ReferenceFactory.newGeneric();
|
172
|
|
173
|
ref.setTitleCache(referenceTitle, true);
|
174
|
|
175
|
TeamOrPersonBase<?> team = getReferenceAuthor(ref, name);
|
176
|
ref.setAuthorship(team);
|
177
|
|
178
|
String[] multipleReferences = ref.getTitleCache().split("&");
|
179
|
|
180
|
TaxonDescription description = getDescription(taxon);
|
181
|
for (String singleReferenceString : multipleReferences){
|
182
|
Reference<?> singleRef = ReferenceFactory.newGeneric();
|
183
|
singleRef.setTitleCache(singleReferenceString.trim(), true);
|
184
|
singleRef.setAuthorship(team);
|
185
|
|
186
|
String microReference = parseReferenceYearAndDetailForUsage(singleRef);
|
187
|
|
188
|
singleRef.setTitle( CdmUtils.Nz(singleRef.getTitle()) + " - no title given yet -");
|
189
|
|
190
|
// parseReferenceType(ref);
|
191
|
|
192
|
TextData textData = TextData.NewInstance(Feature.CITATION());
|
193
|
textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, singleRef, microReference, name, null);
|
194
|
description.addElement(textData);
|
195
|
}
|
196
|
return team;
|
197
|
}
|
198
|
|
199
|
private String parseReferenceYearAndDetailForUsage(Reference ref) {
|
200
|
String detailResult = null;
|
201
|
String titleToParse = ref.getTitleCache().trim();
|
202
|
|
203
|
int detailStart = titleToParse.indexOf(":");
|
204
|
if (detailStart >= 0){
|
205
|
detailResult = titleToParse.substring(detailStart + 1);
|
206
|
titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
|
207
|
detailResult = detailResult.trim();
|
208
|
}
|
209
|
|
210
|
String reYear = "^[1-2]{1}[0-9]{3}[a-e]?$";
|
211
|
String reYearPeriod = reYear;
|
212
|
//
|
213
|
// //pattern for the whole string
|
214
|
Pattern patReference = Pattern.compile( reYearPeriod );
|
215
|
Matcher matcher = patReference.matcher(titleToParse);
|
216
|
if (! matcher.find()){
|
217
|
logger.warn("Could not parse year: " + titleToParse);
|
218
|
}else{
|
219
|
if (Pattern.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse)){
|
220
|
String title = titleToParse.substring(4,5);
|
221
|
ref.setTitle(title);
|
222
|
titleToParse = titleToParse.substring(0, 4);
|
223
|
}
|
224
|
ref.setProtectedTitleCache(false);
|
225
|
}
|
226
|
TimePeriod datePublished = TimePeriodParser.parseString(titleToParse);
|
227
|
ref.setDatePublished(datePublished);
|
228
|
return detailResult;
|
229
|
|
230
|
}
|
231
|
|
232
|
protected TeamOrPersonBase getReferenceAuthor (Reference ref, NonViralName name) {
|
233
|
String titleString = ref.getTitleCache();
|
234
|
String re = "\\(.*\\)";
|
235
|
Pattern pattern = Pattern.compile(re);
|
236
|
Matcher matcher = pattern.matcher(titleString);
|
237
|
if (matcher.find()){
|
238
|
int start = matcher.start();
|
239
|
String authorString = titleString.substring(0, start).trim();
|
240
|
String restString = titleString.substring(start + 1 , matcher.end() - 1);
|
241
|
TeamOrPersonBase team = getAuthorTeam(authorString, name);
|
242
|
ref.setTitleCache(restString, true);
|
243
|
return team;
|
244
|
}else{
|
245
|
logger.warn("Title does not match: " + titleString);
|
246
|
return null;
|
247
|
}
|
248
|
|
249
|
}
|
250
|
|
251
|
private TeamOrPersonBase getAuthorTeam(String authorString, NonViralName name) {
|
252
|
//TODO atomize
|
253
|
// TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
|
254
|
// String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
|
255
|
|
256
|
// if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
|
257
|
// logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
|
258
|
TeamOrPersonBase result = parseSingleTeam(authorString);
|
259
|
result.setTitleCache(authorString, true);
|
260
|
return result;
|
261
|
// }else{
|
262
|
// nameTeam.setTitleCache(authorString, true);
|
263
|
// return nameTeam;
|
264
|
// }
|
265
|
}
|
266
|
|
267
|
/**
|
268
|
* @param refAuthorTeam
|
269
|
* @param nameTeam
|
270
|
* @return
|
271
|
*/
|
272
|
private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam, TeamOrPersonBase nameTeam) {
|
273
|
String nameTeamString = nameTeam.getNomenclaturalTitle();
|
274
|
String refAuthorTeamString = refAuthorTeam.getTitleCache();
|
275
|
if (nameTeamString.equalsIgnoreCase(refAuthorTeamString)){
|
276
|
return true;
|
277
|
}
|
278
|
|
279
|
if (nameTeamString.endsWith(".")){
|
280
|
nameTeamString = nameTeamString.substring(0, nameTeamString.length() - 1 );
|
281
|
if (refAuthorTeamString.startsWith(nameTeamString)){
|
282
|
return true;
|
283
|
}else{
|
284
|
return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
|
285
|
}
|
286
|
}else{
|
287
|
if (nameTeamString.endsWith(refAuthorTeamString) || refAuthorTeamString.endsWith(nameTeamString)){
|
288
|
return true;
|
289
|
}else{
|
290
|
return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
|
291
|
}
|
292
|
}
|
293
|
}
|
294
|
|
295
|
private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam, TeamOrPersonBase refAuthorTeam) {
|
296
|
if ( nameTeam.isInstanceOf(Team.class) && ((Team)nameTeam).getTeamMembers().size()> 1 ||
|
297
|
refAuthorTeam.isInstanceOf(Team.class) && ((Team)refAuthorTeam).getTeamMembers().size()> 1){
|
298
|
//class
|
299
|
if (! (nameTeam.isInstanceOf(Team.class) && refAuthorTeam.isInstanceOf(Team.class) ) ){
|
300
|
logger.warn("Only one author is a real team");
|
301
|
return false;
|
302
|
}
|
303
|
Team realNameTeam = (Team)nameTeam;
|
304
|
Team realRefAuthorTeam = (Team)refAuthorTeam;
|
305
|
//size
|
306
|
if (realNameTeam.getTeamMembers().size() != realRefAuthorTeam.getTeamMembers().size()){
|
307
|
logger.warn("Teams do not have the same size");
|
308
|
return false;
|
309
|
}
|
310
|
//empty teams
|
311
|
if (realNameTeam.getTeamMembers().size() == 0){
|
312
|
logger.warn("Teams are empty");
|
313
|
return false;
|
314
|
}
|
315
|
//compare each team member
|
316
|
for (int i = 0; i < realNameTeam.getTeamMembers().size(); i++){
|
317
|
Person namePerson = realNameTeam.getTeamMembers().get(i);
|
318
|
Person refPerson = realRefAuthorTeam.getTeamMembers().get(i);
|
319
|
if ( authorTeamsMatch(refPerson, namePerson) == false){
|
320
|
return false;
|
321
|
}
|
322
|
}
|
323
|
return true;
|
324
|
}
|
325
|
boolean result = checkIpniAuthor(nameTeam.getNomenclaturalTitle(), refAuthorTeam);
|
326
|
return result;
|
327
|
}
|
328
|
|
329
|
private boolean checkIpniAuthor(String nameTeamString, TeamOrPersonBase refAuthorTeam) {
|
330
|
IpniService ipniService = new IpniService();
|
331
|
List<Person> ipniAuthors = ipniService.getAuthors(nameTeamString, null, null, null, null, null);
|
332
|
if (ipniAuthors != null){
|
333
|
for (Person ipniAuthor : ipniAuthors){
|
334
|
if (ipniAuthor.getLastname() != null && ipniAuthor.getLastname().equalsIgnoreCase(refAuthorTeam.getTitleCache())){
|
335
|
return true;
|
336
|
}
|
337
|
logger.warn(ipniAuthor.getTitleCache() + " <-> " + refAuthorTeam.getTitleCache());
|
338
|
}
|
339
|
}else{
|
340
|
logger.warn("IPNI not available");
|
341
|
}
|
342
|
return false;
|
343
|
}
|
344
|
|
345
|
/**
|
346
|
* @param state
|
347
|
* @param elNom
|
348
|
* @param taxon
|
349
|
* @param homotypicalGroup
|
350
|
*/
|
351
|
@Override
|
352
|
protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
|
353
|
verifyNoChildren(elNom);
|
354
|
String typeRef = elNom.getTextNormalize();
|
355
|
typeRef = removeStartingTypeRefMinus(typeRef);
|
356
|
typeRef = removeTypePrefix(typeRef);
|
357
|
TypeDesignationBase typeDesignation = SpecimenTypeDesignation.NewInstance();
|
358
|
makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef, typeDesignation);
|
359
|
for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
|
360
|
name.addTypeDesignation(typeDesignation, true);
|
361
|
}
|
362
|
}
|
363
|
|
364
|
private String removeTypePrefix(String typeRef) {
|
365
|
typeRef = typeRef.trim().replace("Type: ", "").replace("Types: ", "").trim();
|
366
|
return typeRef;
|
367
|
}
|
368
|
|
369
|
@Override
|
370
|
protected void handleGenus(String value, TaxonNameBase taxonName) {
|
371
|
// do nothing
|
372
|
}
|
373
|
|
374
|
|
375
|
|
376
|
}
|