Project

General

Profile

Download (13.1 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.eflora.centralAfrica.ericaceae;
11

    
12
import java.util.List;
13
import java.util.regex.Matcher;
14
import java.util.regex.Pattern;
15

    
16
import org.apache.log4j.Logger;
17
import org.jdom.Element;
18
import org.springframework.stereotype.Component;
19

    
20
import eu.etaxonomy.cdm.common.CdmUtils;
21
import eu.etaxonomy.cdm.ext.ipni.IpniService;
22
import eu.etaxonomy.cdm.io.eflora.EfloraImportState;
23
import eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport;
24
import eu.etaxonomy.cdm.model.agent.Person;
25
import eu.etaxonomy.cdm.model.agent.Team;
26
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
29
import eu.etaxonomy.cdm.model.common.TimePeriod;
30
import eu.etaxonomy.cdm.model.description.Feature;
31
import eu.etaxonomy.cdm.model.description.TaxonDescription;
32
import eu.etaxonomy.cdm.model.description.TextData;
33
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
34
import eu.etaxonomy.cdm.model.name.NonViralName;
35
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
36
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
37
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
38
import eu.etaxonomy.cdm.model.reference.Reference;
39
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
40
import eu.etaxonomy.cdm.model.taxon.Taxon;
41
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
42

    
43

    
44
/**
45
 * @author a.mueller
46
 *
47
 */
48
@Component
49
public class CentralAfricaEricaceaeTaxonImport  extends EfloraTaxonImport  {
50
	private static final Logger logger = Logger.getLogger(CentralAfricaEricaceaeTaxonImport.class);
51

    
52

    
53

    
54

    
55
	/* (non-Javadoc)
56
	 * @see eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport#handleNomenclaturalReference(eu.etaxonomy.cdm.model.name.NonViralName, java.lang.String)
57
	 */
58
	@Override
59
	protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
60
		Reference nomRef = ReferenceFactory.newGeneric();
61
		nomRef.setTitleCache(value, true);
62
		parseNomStatus(nomRef, name);
63
		name.setNomenclaturalReference(nomRef);
64

    
65
		String microReference = parseReferenceYearAndDetail(nomRef);
66
		microReference = removeTrailing(microReference, ")");
67

    
68
		microReference = parseHomonym(microReference, name);
69
		name.setNomenclaturalMicroReference(microReference);
70

    
71
		TeamOrPersonBase  nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
72
		TeamOrPersonBase  refTeam = nomRef.getAuthorship();
73
		if (nameTeam == null ){
74
			logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
75
		}else if (refTeam == null ){
76
			logger.warn("Name has nom. ref. but no nom.ref. author. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
77
		}else if (! authorTeamsMatch(refTeam, nameTeam)){
78
			logger.warn("Nom.Ref. author and comb. author do not match: " + nomRef.getTitleCache() + " <-> " + nameTeam.getNomenclaturalTitle());
79
		}else {
80
			nomRef.setAuthorship(nameTeam);
81
			nomRef.setTitle(CdmUtils.Nz(nomRef.getTitle()) + " - no title given yet -");
82
			nameTeam.setTitleCache(refTeam.getTitleCache(), true);
83
		}
84
		return nameTeam;
85
	}
86

    
87
	/**
88
	 * Extracts the date published part and returns micro reference
89
	 * @param ref
90
	 * @return
91
	 */
92
	protected String parseReferenceYearAndDetail(Reference ref){
93
		String detailResult = null;
94
		String titleToParse = ref.getTitleCache();
95
		titleToParse = removeReferenceBracket(titleToParse, ref);
96

    
97
		int detailStart = titleToParse.indexOf(":");
98
		if (detailStart >=  0){
99
			detailResult = titleToParse.substring(detailStart + 1);
100
			titleToParse = titleToParse.substring(0, titleToParse.length() -  detailResult.length() - 1).trim();
101
			detailResult = detailResult.trim();
102
		}
103

    
104
		String reYear = "\\s[1-2]{1}[0-9]{3}";
105
		String reYearPeriod = reYear;
106
//
107
//		//pattern for the whole string
108
		Pattern patReference = Pattern.compile( reYearPeriod );
109
		Matcher matcher = patReference.matcher(titleToParse);
110
		if (matcher.find()){
111
			int start = matcher.start();
112
			int end = matcher.end();
113
//
114
			String strPeriod = titleToParse.substring(start, end);
115
			TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
116
			ref.setDatePublished(datePublished);
117
			String author = titleToParse.substring(0, start).trim();
118
			author = parseInRefrence(ref, author);
119
			TeamOrPersonBase team = parseSingleTeam(author);
120
			ref.setAuthorship(team);
121
			ref.setProtectedTitleCache(false);
122
		}else{
123
			logger.warn("Could not parse reference: " +  titleToParse);
124
		}
125
		return detailResult;
126

    
127
	}
128

    
129
	private String parseInRefrence(Reference ref, String author) {
130
		int pos = author.indexOf(" in ");
131
		if (pos > -1){
132
			String inAuthorString = author.substring(pos + 4);
133
			String myAuthorString = author.substring(0, pos);
134
			Reference inReference = ReferenceFactory.newGeneric();
135
			TeamOrPersonBase inAuthor = parseSingleTeam(inAuthorString);
136
			inReference.setAuthorship(inAuthor);
137
			ref.setInReference(inReference);
138
			return myAuthorString;
139
		}else{
140
			return author;
141
		}
142

    
143
	}
144

    
145
	private String removeReferenceBracket(String refString, Reference ref) {
146
		String titleToParse = refString;
147
		String reBracket = "\\(.*\\).?";
148
		Pattern patBracket = Pattern.compile(reBracket);
149
		Matcher matcher = patBracket.matcher(titleToParse);
150

    
151
		if (matcher.matches()){
152
			int start = matcher.start() + 1;
153
			int end = matcher.end() -1 ;
154
			if (! titleToParse.endsWith("")){
155
				end = end - 1;
156
			}
157
			titleToParse = titleToParse.substring(start, end);
158

    
159
			ref.setTitleCache(titleToParse);
160
		}
161
		return titleToParse;
162
	}
163

    
164
	/**
165
	 * @param taxon
166
	 * @param name
167
	 * @param value
168
	 */
169
	@Override
170
	protected TeamOrPersonBase<?> handleNameUsage(Taxon taxon, NonViralName<?> name, String referenceTitle, TeamOrPersonBase lastTeam) {
171
		Reference ref = ReferenceFactory.newGeneric();
172

    
173
		ref.setTitleCache(referenceTitle, true);
174

    
175
		TeamOrPersonBase<?> team = getReferenceAuthor(ref, name);
176
		ref.setAuthorship(team);
177

    
178
		String[] multipleReferences = ref.getTitleCache().split("&");
179

    
180
		TaxonDescription description = getDescription(taxon);
181
		for (String singleReferenceString : multipleReferences){
182
			Reference singleRef = ReferenceFactory.newGeneric();
183
			singleRef.setTitleCache(singleReferenceString.trim(), true);
184
			singleRef.setAuthorship(team);
185

    
186
			String microReference = parseReferenceYearAndDetailForUsage(singleRef);
187

    
188
			singleRef.setTitle( CdmUtils.Nz(singleRef.getTitle()) + " - no title given yet -");
189

    
190
	//		parseReferenceType(ref);
191

    
192
			TextData textData = TextData.NewInstance(Feature.CITATION());
193
			textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, singleRef, microReference, name, null);
194
			description.addElement(textData);
195
		}
196
		return team;
197
	}
198

    
199
	private String parseReferenceYearAndDetailForUsage(Reference ref) {
200
		String detailResult = null;
201
		String titleToParse = ref.getTitleCache().trim();
202

    
203
		int detailStart = titleToParse.indexOf(":");
204
		if (detailStart >=  0){
205
			detailResult = titleToParse.substring(detailStart + 1);
206
			titleToParse = titleToParse.substring(0, titleToParse.length() -  detailResult.length() - 1).trim();
207
			detailResult = detailResult.trim();
208
		}
209

    
210
		String reYear = "^[1-2]{1}[0-9]{3}[a-e]?$";
211
		String reYearPeriod = reYear;
212
//
213
//			//pattern for the whole string
214
		Pattern patReference = Pattern.compile( reYearPeriod );
215
		Matcher matcher = patReference.matcher(titleToParse);
216
		if (! matcher.find()){
217
			logger.warn("Could not parse year: " +  titleToParse);
218
		}else{
219
			if (Pattern.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse)){
220
				String title = titleToParse.substring(4,5);
221
				ref.setTitle(title);
222
				titleToParse = titleToParse.substring(0, 4);
223
			}
224
			ref.setProtectedTitleCache(false);
225
		}
226
		TimePeriod datePublished = TimePeriodParser.parseString(titleToParse);
227
		ref.setDatePublished(datePublished);
228
		return detailResult;
229

    
230
	}
231

    
232
	protected TeamOrPersonBase getReferenceAuthor (Reference ref, NonViralName name) {
233
		String titleString = ref.getTitleCache();
234
		String re = "\\(.*\\)";
235
		Pattern pattern = Pattern.compile(re);
236
		Matcher matcher = pattern.matcher(titleString);
237
		if (matcher.find()){
238
			int start = matcher.start();
239
			String authorString = titleString.substring(0, start).trim();
240
			String restString = titleString.substring(start + 1 , matcher.end() - 1);
241
			TeamOrPersonBase team = getAuthorTeam(authorString, name);
242
			ref.setTitleCache(restString, true);
243
			return team;
244
		}else{
245
			logger.warn("Title does not match: " + titleString);
246
			return null;
247
		}
248

    
249
	}
250

    
251
	private TeamOrPersonBase getAuthorTeam(String authorString, NonViralName name) {
252
		//TODO atomize
253
//		TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
254
//		String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
255

    
256
//		if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
257
//			logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
258
			TeamOrPersonBase result = parseSingleTeam(authorString);
259
			result.setTitleCache(authorString, true);
260
			return result;
261
//		}else{
262
//			nameTeam.setTitleCache(authorString, true);
263
//			return nameTeam;
264
//		}
265
	}
266

    
267
	/**
268
	 * @param refAuthorTeam
269
	 * @param nameTeam
270
	 * @return
271
	 */
272
	private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam, TeamOrPersonBase nameTeam) {
273
		String nameTeamString = nameTeam.getNomenclaturalTitle();
274
		String refAuthorTeamString = refAuthorTeam.getTitleCache();
275
		if (nameTeamString.equalsIgnoreCase(refAuthorTeamString)){
276
			return true;
277
		}
278

    
279
		if (nameTeamString.endsWith(".")){
280
			nameTeamString = nameTeamString.substring(0, nameTeamString.length() - 1 );
281
			if (refAuthorTeamString.startsWith(nameTeamString)){
282
				return true;
283
			}else{
284
				return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
285
			}
286
		}else{
287
			if (nameTeamString.endsWith(refAuthorTeamString) || refAuthorTeamString.endsWith(nameTeamString)){
288
				return true;
289
			}else{
290
				return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
291
			}
292
		}
293
	}
294

    
295
	private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam, TeamOrPersonBase refAuthorTeam) {
296
		if ( nameTeam.isInstanceOf(Team.class) && ((Team)nameTeam).getTeamMembers().size()> 1 ||
297
				refAuthorTeam.isInstanceOf(Team.class) && ((Team)refAuthorTeam).getTeamMembers().size()> 1){
298
			//class
299
			if (! (nameTeam.isInstanceOf(Team.class) && refAuthorTeam.isInstanceOf(Team.class) ) ){
300
				logger.warn("Only one author is a real team");
301
				return false;
302
			}
303
			Team realNameTeam = (Team)nameTeam;
304
			Team realRefAuthorTeam = (Team)refAuthorTeam;
305
			//size
306
			if (realNameTeam.getTeamMembers().size() != realRefAuthorTeam.getTeamMembers().size()){
307
				logger.warn("Teams do not have the same size");
308
				return false;
309
			}
310
			//empty teams
311
			if (realNameTeam.getTeamMembers().size() == 0){
312
				logger.warn("Teams are empty");
313
				return false;
314
			}
315
			//compare each team member
316
			for (int i = 0; i < realNameTeam.getTeamMembers().size(); i++){
317
				Person namePerson = realNameTeam.getTeamMembers().get(i);
318
				Person refPerson = realRefAuthorTeam.getTeamMembers().get(i);
319
				if ( authorTeamsMatch(refPerson, namePerson) == false){
320
					return false;
321
				}
322
			}
323
			return true;
324
		}
325
		boolean result = checkIpniAuthor(nameTeam.getNomenclaturalTitle(), refAuthorTeam);
326
		return result;
327
	}
328

    
329
	private boolean checkIpniAuthor(String nameTeamString, TeamOrPersonBase refAuthorTeam) {
330
		IpniService ipniService = new IpniService();
331
		List<Person> ipniAuthors = ipniService.getAuthors(nameTeamString, null, null, null, null, null);
332
		if (ipniAuthors != null){
333
			for (Person ipniAuthor : ipniAuthors){
334
				if (ipniAuthor.getLastname() != null && ipniAuthor.getLastname().equalsIgnoreCase(refAuthorTeam.getTitleCache())){
335
					return true;
336
				}
337
				logger.warn(ipniAuthor.getTitleCache() + " <-> " + refAuthorTeam.getTitleCache());
338
			}
339
		}else{
340
			logger.warn("IPNI not available");
341
		}
342
		return false;
343
	}
344

    
345
	/**
346
	 * @param state
347
	 * @param elNom
348
	 * @param taxon
349
	 * @param homotypicalGroup
350
	 */
351
	@Override
352
	protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
353
		verifyNoChildren(elNom);
354
		String typeRef = elNom.getTextNormalize();
355
		typeRef = removeStartingTypeRefMinus(typeRef);
356
		typeRef = removeTypePrefix(typeRef);
357
		TypeDesignationBase typeDesignation = SpecimenTypeDesignation.NewInstance();
358
		makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef, typeDesignation);
359
		for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
360
			name.addTypeDesignation(typeDesignation, true);
361
		}
362
	}
363

    
364
	private String removeTypePrefix(String typeRef) {
365
		typeRef = typeRef.trim().replace("Type: ", "").replace("Types: ", "").trim();
366
		return typeRef;
367
	}
368

    
369
	@Override
370
    protected void handleGenus(String value, TaxonNameBase taxonName) {
371
		// do nothing
372
	}
373

    
374

    
375

    
376
}
(3-3/4)