Project

General

Profile

Download (13 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.eflora.centralAfrica.ericaceae;
11

    
12
import java.util.List;
13
import java.util.regex.Matcher;
14
import java.util.regex.Pattern;
15

    
16
import org.apache.log4j.Logger;
17
import org.jdom.Element;
18
import org.springframework.stereotype.Component;
19

    
20
import eu.etaxonomy.cdm.common.CdmUtils;
21
import eu.etaxonomy.cdm.ext.ipni.IpniService;
22
import eu.etaxonomy.cdm.io.eflora.EfloraImportState;
23
import eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport;
24
import eu.etaxonomy.cdm.model.agent.Person;
25
import eu.etaxonomy.cdm.model.agent.Team;
26
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.TimePeriod;
29
import eu.etaxonomy.cdm.model.description.Feature;
30
import eu.etaxonomy.cdm.model.description.TaxonDescription;
31
import eu.etaxonomy.cdm.model.description.TextData;
32
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
33
import eu.etaxonomy.cdm.model.name.INonViralName;
34
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
35
import eu.etaxonomy.cdm.model.name.TaxonName;
36
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
37
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
38
import eu.etaxonomy.cdm.model.reference.Reference;
39
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
40
import eu.etaxonomy.cdm.model.taxon.Taxon;
41
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
42

    
43
/**
44
 * @author a.mueller
45
 */
46
@Component
47
public class CentralAfricaEricaceaeTaxonImport  extends EfloraTaxonImport  {
48

    
49
    private static final long serialVersionUID = 6442665916458420942L;
50
    private static final Logger logger = Logger.getLogger(CentralAfricaEricaceaeTaxonImport.class);
51

    
52
	@Override
53
	protected TeamOrPersonBase<?> handleNomenclaturalReference(TaxonName name, String value) {
54
		Reference nomRef = ReferenceFactory.newGeneric();
55
		nomRef.setTitleCache(value, true);
56
		parseNomStatus(nomRef, name);
57
		name.setNomenclaturalReference(nomRef);
58

    
59
		String microReference = parseReferenceYearAndDetail(nomRef);
60
		microReference = removeTrailing(microReference, ")");
61

    
62
		microReference = parseHomonym(microReference, name);
63
		name.setNomenclaturalMicroReference(microReference);
64

    
65
		TeamOrPersonBase<?>  nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
66
		TeamOrPersonBase<?>  refTeam = nomRef.getAuthorship();
67
		if (nameTeam == null ){
68
			logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
69
		}else if (refTeam == null ){
70
			logger.warn("Name has nom. ref. but no nom.ref. author. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
71
		}else if (! authorTeamsMatch(refTeam, nameTeam)){
72
			logger.warn("Nom.Ref. author and comb. author do not match: " + nomRef.getTitleCache() + " <-> " + nameTeam.getNomenclaturalTitleCache());
73
		}else {
74
			nomRef.setAuthorship(nameTeam);
75
			nomRef.setTitle(CdmUtils.Nz(nomRef.getTitle()) + " - no title given yet -");
76
			nameTeam.setTitleCache(refTeam.getTitleCache(), true);
77
		}
78
		return nameTeam;
79
	}
80

    
81
	/**
82
	 * Extracts the date published part and returns micro reference
83
	 * @param ref
84
	 * @return
85
	 */
86
	protected String parseReferenceYearAndDetail(Reference ref){
87
		String detailResult = null;
88
		String titleToParse = ref.getTitleCache();
89
		titleToParse = removeReferenceBracket(titleToParse, ref);
90

    
91
		int detailStart = titleToParse.indexOf(":");
92
		if (detailStart >=  0){
93
			detailResult = titleToParse.substring(detailStart + 1);
94
			titleToParse = titleToParse.substring(0, titleToParse.length() -  detailResult.length() - 1).trim();
95
			detailResult = detailResult.trim();
96
		}
97

    
98
		String reYear = "\\s[1-2]{1}[0-9]{3}";
99
		String reYearPeriod = reYear;
100
//
101
//		//pattern for the whole string
102
		Pattern patReference = Pattern.compile( reYearPeriod );
103
		Matcher matcher = patReference.matcher(titleToParse);
104
		if (matcher.find()){
105
			int start = matcher.start();
106
			int end = matcher.end();
107
//
108
			String strPeriod = titleToParse.substring(start, end);
109
			TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
110
			ref.setDatePublished(datePublished);
111
			String author = titleToParse.substring(0, start).trim();
112
			author = parseInRefrence(ref, author);
113
			TeamOrPersonBase team = parseSingleTeam(author);
114
			ref.setAuthorship(team);
115
			ref.setProtectedTitleCache(false);
116
		}else{
117
			logger.warn("Could not parse reference: " +  titleToParse);
118
		}
119
		return detailResult;
120

    
121
	}
122

    
123
	private String parseInRefrence(Reference ref, String author) {
124
		int pos = author.indexOf(" in ");
125
		if (pos > -1){
126
			String inAuthorString = author.substring(pos + 4);
127
			String myAuthorString = author.substring(0, pos);
128
			Reference inReference = ReferenceFactory.newGeneric();
129
			TeamOrPersonBase inAuthor = parseSingleTeam(inAuthorString);
130
			inReference.setAuthorship(inAuthor);
131
			ref.setInReference(inReference);
132
			return myAuthorString;
133
		}else{
134
			return author;
135
		}
136

    
137
	}
138

    
139
	private String removeReferenceBracket(String refString, Reference ref) {
140
		String titleToParse = refString;
141
		String reBracket = "\\(.*\\).?";
142
		Pattern patBracket = Pattern.compile(reBracket);
143
		Matcher matcher = patBracket.matcher(titleToParse);
144

    
145
		if (matcher.matches()){
146
			int start = matcher.start() + 1;
147
			int end = matcher.end() -1 ;
148
			if (! titleToParse.endsWith("")){
149
				end = end - 1;
150
			}
151
			titleToParse = titleToParse.substring(start, end);
152

    
153
			ref.setTitleCache(titleToParse);
154
		}
155
		return titleToParse;
156
	}
157

    
158
	/**
159
	 * @param taxon
160
	 * @param name
161
	 * @param value
162
	 */
163
	@Override
164
	protected TeamOrPersonBase<?> handleNameUsage(Taxon taxon, TaxonName name,
165
	        String referenceTitle, TeamOrPersonBase lastTeam) {
166

    
167
		Reference ref = ReferenceFactory.newGeneric();
168

    
169
		ref.setTitleCache(referenceTitle, true);
170

    
171
		TeamOrPersonBase<?> team = getReferenceAuthor(ref, name);
172
		ref.setAuthorship(team);
173

    
174
		String[] multipleReferences = ref.getTitleCache().split("&");
175

    
176
		TaxonDescription description = getDescription(taxon);
177
		for (String singleReferenceString : multipleReferences){
178
			Reference singleRef = ReferenceFactory.newGeneric();
179
			singleRef.setTitleCache(singleReferenceString.trim(), true);
180
			singleRef.setAuthorship(team);
181

    
182
			String microReference = parseReferenceYearAndDetailForUsage(singleRef);
183

    
184
			singleRef.setTitle( CdmUtils.Nz(singleRef.getTitle()) + " - no title given yet -");
185

    
186
	//		parseReferenceType(ref);
187

    
188
			TextData textData = TextData.NewInstance(Feature.CITATION());
189
			textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, singleRef, microReference,
190
			        name, null);
191
			description.addElement(textData);
192
		}
193
		return team;
194
	}
195

    
196
	private String parseReferenceYearAndDetailForUsage(Reference ref) {
197
		String detailResult = null;
198
		String titleToParse = ref.getTitleCache().trim();
199

    
200
		int detailStart = titleToParse.indexOf(":");
201
		if (detailStart >=  0){
202
			detailResult = titleToParse.substring(detailStart + 1);
203
			titleToParse = titleToParse.substring(0, titleToParse.length() -  detailResult.length() - 1).trim();
204
			detailResult = detailResult.trim();
205
		}
206

    
207
		String reYear = "^[1-2]{1}[0-9]{3}[a-e]?$";
208
		String reYearPeriod = reYear;
209
//
210
//			//pattern for the whole string
211
		Pattern patReference = Pattern.compile( reYearPeriod );
212
		Matcher matcher = patReference.matcher(titleToParse);
213
		if (! matcher.find()){
214
			logger.warn("Could not parse year: " +  titleToParse);
215
		}else{
216
			if (Pattern.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse)){
217
				String title = titleToParse.substring(4,5);
218
				ref.setTitle(title);
219
				titleToParse = titleToParse.substring(0, 4);
220
			}
221
			ref.setProtectedTitleCache(false);
222
		}
223
		TimePeriod datePublished = TimePeriodParser.parseString(titleToParse);
224
		ref.setDatePublished(datePublished);
225
		return detailResult;
226

    
227
	}
228

    
229
	protected TeamOrPersonBase getReferenceAuthor (Reference ref, INonViralName name) {
230
		String titleString = ref.getTitleCache();
231
		String re = "\\(.*\\)";
232
		Pattern pattern = Pattern.compile(re);
233
		Matcher matcher = pattern.matcher(titleString);
234
		if (matcher.find()){
235
			int start = matcher.start();
236
			String authorString = titleString.substring(0, start).trim();
237
			String restString = titleString.substring(start + 1 , matcher.end() - 1);
238
			TeamOrPersonBase team = getAuthorTeam(authorString, name);
239
			ref.setTitleCache(restString, true);
240
			return team;
241
		}else{
242
			logger.warn("Title does not match: " + titleString);
243
			return null;
244
		}
245

    
246
	}
247

    
248
	private TeamOrPersonBase getAuthorTeam(String authorString, INonViralName name) {
249
		//TODO atomize
250
//		TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
251
//		String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
252

    
253
//		if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
254
//			logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
255
			TeamOrPersonBase result = parseSingleTeam(authorString);
256
			result.setTitleCache(authorString, true);
257
			return result;
258
//		}else{
259
//			nameTeam.setTitleCache(authorString, true);
260
//			return nameTeam;
261
//		}
262
	}
263

    
264
	/**
265
	 * @param refAuthorTeam
266
	 * @param nameTeam
267
	 * @return
268
	 */
269
	private boolean authorTeamsMatch(TeamOrPersonBase<?> refAuthorTeam, TeamOrPersonBase<?> nameTeam) {
270
		String nameTeamString = nameTeam.getNomenclaturalTitleCache();
271
		String refAuthorTeamString = refAuthorTeam.getTitleCache();
272
		if (nameTeamString.equalsIgnoreCase(refAuthorTeamString)){
273
			return true;
274
		}
275

    
276
		if (nameTeamString.endsWith(".")){
277
			nameTeamString = nameTeamString.substring(0, nameTeamString.length() - 1 );
278
			if (refAuthorTeamString.startsWith(nameTeamString)){
279
				return true;
280
			}else{
281
				return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
282
			}
283
		}else{
284
			if (nameTeamString.endsWith(refAuthorTeamString) || refAuthorTeamString.endsWith(nameTeamString)){
285
				return true;
286
			}else{
287
				return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
288
			}
289
		}
290
	}
291

    
292
	private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam, TeamOrPersonBase refAuthorTeam) {
293
		if ( nameTeam.isInstanceOf(Team.class) && ((Team)nameTeam).getTeamMembers().size()> 1 ||
294
				refAuthorTeam.isInstanceOf(Team.class) && ((Team)refAuthorTeam).getTeamMembers().size()> 1){
295
			//class
296
			if (! (nameTeam.isInstanceOf(Team.class) && refAuthorTeam.isInstanceOf(Team.class) ) ){
297
				logger.warn("Only one author is a real team");
298
				return false;
299
			}
300
			Team realNameTeam = (Team)nameTeam;
301
			Team realRefAuthorTeam = (Team)refAuthorTeam;
302
			//size
303
			if (realNameTeam.getTeamMembers().size() != realRefAuthorTeam.getTeamMembers().size()){
304
				logger.warn("Teams do not have the same size");
305
				return false;
306
			}
307
			//empty teams
308
			if (realNameTeam.getTeamMembers().size() == 0){
309
				logger.warn("Teams are empty");
310
				return false;
311
			}
312
			//compare each team member
313
			for (int i = 0; i < realNameTeam.getTeamMembers().size(); i++){
314
				Person namePerson = realNameTeam.getTeamMembers().get(i);
315
				Person refPerson = realRefAuthorTeam.getTeamMembers().get(i);
316
				if ( authorTeamsMatch(refPerson, namePerson) == false){
317
					return false;
318
				}
319
			}
320
			return true;
321
		}
322
		boolean result = checkIpniAuthor(nameTeam.getNomenclaturalTitleCache(), refAuthorTeam);
323
		return result;
324
	}
325

    
326
	private boolean checkIpniAuthor(String nameTeamString, TeamOrPersonBase refAuthorTeam) {
327
		IpniService ipniService = new IpniService();
328
		List<Person> ipniAuthors = ipniService.getAuthors(nameTeamString, null, null, null, null, null);
329
		if (ipniAuthors != null){
330
			for (Person ipniAuthor : ipniAuthors){
331
				if (ipniAuthor.getFamilyName() != null && ipniAuthor.getFamilyName().equalsIgnoreCase(refAuthorTeam.getTitleCache())){
332
					return true;
333
				}
334
				logger.warn(ipniAuthor.getTitleCache() + " <-> " + refAuthorTeam.getTitleCache());
335
			}
336
		}else{
337
			logger.warn("IPNI not available");
338
		}
339
		return false;
340
	}
341

    
342
	@Override
343
	protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
344
		verifyNoChildren(elNom);
345
		String typeRef = elNom.getTextNormalize();
346
		typeRef = removeStartingTypeRefMinus(typeRef);
347
		typeRef = removeTypePrefix(typeRef);
348
		TypeDesignationBase<?> typeDesignation = SpecimenTypeDesignation.NewInstance();
349
		makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef, typeDesignation);
350
		for (TaxonName name : homotypicalGroup.getTypifiedNames()){
351
			name.addTypeDesignation(typeDesignation, true);
352
		}
353
	}
354

    
355
	private String removeTypePrefix(String typeRef) {
356
		typeRef = typeRef.trim().replace("Type: ", "").replace("Types: ", "").trim();
357
		return typeRef;
358
	}
359

    
360
	@Override
361
    protected void handleGenus(String value, INonViralName taxonName) {
362
		// do nothing
363
	}
364
}
(3-3/4)