Project

General

Profile

Download (13.1 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.eflora.centralAfrica.ericaceae;
11

    
12
import java.util.List;
13
import java.util.regex.Matcher;
14
import java.util.regex.Pattern;
15

    
16
import org.apache.log4j.Logger;
17
import org.jdom.Element;
18
import org.springframework.stereotype.Component;
19

    
20
import eu.etaxonomy.cdm.common.CdmUtils;
21
import eu.etaxonomy.cdm.ext.ipni.IpniService;
22
import eu.etaxonomy.cdm.io.eflora.EfloraImportState;
23
import eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport;
24
import eu.etaxonomy.cdm.model.agent.Person;
25
import eu.etaxonomy.cdm.model.agent.Team;
26
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.TimePeriod;
29
import eu.etaxonomy.cdm.model.description.Feature;
30
import eu.etaxonomy.cdm.model.description.TaxonDescription;
31
import eu.etaxonomy.cdm.model.description.TextData;
32
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
33
import eu.etaxonomy.cdm.model.name.INonViralName;
34
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
35
import eu.etaxonomy.cdm.model.name.TaxonName;
36
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
37
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
38
import eu.etaxonomy.cdm.model.reference.Reference;
39
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
40
import eu.etaxonomy.cdm.model.taxon.Taxon;
41
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
42

    
43

    
44
/**
45
 * @author a.mueller
46
 *
47
 */
48
@Component
49
public class CentralAfricaEricaceaeTaxonImport  extends EfloraTaxonImport  {
50
    private static final long serialVersionUID = 6442665916458420942L;
51
    private static final Logger logger = Logger.getLogger(CentralAfricaEricaceaeTaxonImport.class);
52

    
53

    
54
	@Override
55
	protected TeamOrPersonBase handleNomenclaturalReference(TaxonName name, String value) {
56
		Reference nomRef = ReferenceFactory.newGeneric();
57
		nomRef.setTitleCache(value, true);
58
		parseNomStatus(nomRef, name);
59
		name.setNomenclaturalReference(nomRef);
60

    
61
		String microReference = parseReferenceYearAndDetail(nomRef);
62
		microReference = removeTrailing(microReference, ")");
63

    
64
		microReference = parseHomonym(microReference, name);
65
		name.setNomenclaturalMicroReference(microReference);
66

    
67
		TeamOrPersonBase<?>  nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
68
		TeamOrPersonBase<?>  refTeam = nomRef.getAuthorship();
69
		if (nameTeam == null ){
70
			logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
71
		}else if (refTeam == null ){
72
			logger.warn("Name has nom. ref. but no nom.ref. author. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
73
		}else if (! authorTeamsMatch(refTeam, nameTeam)){
74
			logger.warn("Nom.Ref. author and comb. author do not match: " + nomRef.getTitleCache() + " <-> " + nameTeam.getNomenclaturalTitle());
75
		}else {
76
			nomRef.setAuthorship(nameTeam);
77
			nomRef.setTitle(CdmUtils.Nz(nomRef.getTitle()) + " - no title given yet -");
78
			nameTeam.setTitleCache(refTeam.getTitleCache(), true);
79
		}
80
		return nameTeam;
81
	}
82

    
83
	/**
84
	 * Extracts the date published part and returns micro reference
85
	 * @param ref
86
	 * @return
87
	 */
88
	protected String parseReferenceYearAndDetail(Reference ref){
89
		String detailResult = null;
90
		String titleToParse = ref.getTitleCache();
91
		titleToParse = removeReferenceBracket(titleToParse, ref);
92

    
93
		int detailStart = titleToParse.indexOf(":");
94
		if (detailStart >=  0){
95
			detailResult = titleToParse.substring(detailStart + 1);
96
			titleToParse = titleToParse.substring(0, titleToParse.length() -  detailResult.length() - 1).trim();
97
			detailResult = detailResult.trim();
98
		}
99

    
100
		String reYear = "\\s[1-2]{1}[0-9]{3}";
101
		String reYearPeriod = reYear;
102
//
103
//		//pattern for the whole string
104
		Pattern patReference = Pattern.compile( reYearPeriod );
105
		Matcher matcher = patReference.matcher(titleToParse);
106
		if (matcher.find()){
107
			int start = matcher.start();
108
			int end = matcher.end();
109
//
110
			String strPeriod = titleToParse.substring(start, end);
111
			TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
112
			ref.setDatePublished(datePublished);
113
			String author = titleToParse.substring(0, start).trim();
114
			author = parseInRefrence(ref, author);
115
			TeamOrPersonBase team = parseSingleTeam(author);
116
			ref.setAuthorship(team);
117
			ref.setProtectedTitleCache(false);
118
		}else{
119
			logger.warn("Could not parse reference: " +  titleToParse);
120
		}
121
		return detailResult;
122

    
123
	}
124

    
125
	private String parseInRefrence(Reference ref, String author) {
126
		int pos = author.indexOf(" in ");
127
		if (pos > -1){
128
			String inAuthorString = author.substring(pos + 4);
129
			String myAuthorString = author.substring(0, pos);
130
			Reference inReference = ReferenceFactory.newGeneric();
131
			TeamOrPersonBase inAuthor = parseSingleTeam(inAuthorString);
132
			inReference.setAuthorship(inAuthor);
133
			ref.setInReference(inReference);
134
			return myAuthorString;
135
		}else{
136
			return author;
137
		}
138

    
139
	}
140

    
141
	private String removeReferenceBracket(String refString, Reference ref) {
142
		String titleToParse = refString;
143
		String reBracket = "\\(.*\\).?";
144
		Pattern patBracket = Pattern.compile(reBracket);
145
		Matcher matcher = patBracket.matcher(titleToParse);
146

    
147
		if (matcher.matches()){
148
			int start = matcher.start() + 1;
149
			int end = matcher.end() -1 ;
150
			if (! titleToParse.endsWith("")){
151
				end = end - 1;
152
			}
153
			titleToParse = titleToParse.substring(start, end);
154

    
155
			ref.setTitleCache(titleToParse);
156
		}
157
		return titleToParse;
158
	}
159

    
160
	/**
161
	 * @param taxon
162
	 * @param name
163
	 * @param value
164
	 */
165
	@Override
166
	protected TeamOrPersonBase<?> handleNameUsage(Taxon taxon, TaxonName name,
167
	        String referenceTitle, TeamOrPersonBase lastTeam) {
168

    
169
		Reference ref = ReferenceFactory.newGeneric();
170

    
171
		ref.setTitleCache(referenceTitle, true);
172

    
173
		TeamOrPersonBase<?> team = getReferenceAuthor(ref, name);
174
		ref.setAuthorship(team);
175

    
176
		String[] multipleReferences = ref.getTitleCache().split("&");
177

    
178
		TaxonDescription description = getDescription(taxon);
179
		for (String singleReferenceString : multipleReferences){
180
			Reference singleRef = ReferenceFactory.newGeneric();
181
			singleRef.setTitleCache(singleReferenceString.trim(), true);
182
			singleRef.setAuthorship(team);
183

    
184
			String microReference = parseReferenceYearAndDetailForUsage(singleRef);
185

    
186
			singleRef.setTitle( CdmUtils.Nz(singleRef.getTitle()) + " - no title given yet -");
187

    
188
	//		parseReferenceType(ref);
189

    
190
			TextData textData = TextData.NewInstance(Feature.CITATION());
191
			textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, singleRef, microReference,
192
			        name, null);
193
			description.addElement(textData);
194
		}
195
		return team;
196
	}
197

    
198
	private String parseReferenceYearAndDetailForUsage(Reference ref) {
199
		String detailResult = null;
200
		String titleToParse = ref.getTitleCache().trim();
201

    
202
		int detailStart = titleToParse.indexOf(":");
203
		if (detailStart >=  0){
204
			detailResult = titleToParse.substring(detailStart + 1);
205
			titleToParse = titleToParse.substring(0, titleToParse.length() -  detailResult.length() - 1).trim();
206
			detailResult = detailResult.trim();
207
		}
208

    
209
		String reYear = "^[1-2]{1}[0-9]{3}[a-e]?$";
210
		String reYearPeriod = reYear;
211
//
212
//			//pattern for the whole string
213
		Pattern patReference = Pattern.compile( reYearPeriod );
214
		Matcher matcher = patReference.matcher(titleToParse);
215
		if (! matcher.find()){
216
			logger.warn("Could not parse year: " +  titleToParse);
217
		}else{
218
			if (Pattern.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse)){
219
				String title = titleToParse.substring(4,5);
220
				ref.setTitle(title);
221
				titleToParse = titleToParse.substring(0, 4);
222
			}
223
			ref.setProtectedTitleCache(false);
224
		}
225
		TimePeriod datePublished = TimePeriodParser.parseString(titleToParse);
226
		ref.setDatePublished(datePublished);
227
		return detailResult;
228

    
229
	}
230

    
231
	protected TeamOrPersonBase getReferenceAuthor (Reference ref, INonViralName name) {
232
		String titleString = ref.getTitleCache();
233
		String re = "\\(.*\\)";
234
		Pattern pattern = Pattern.compile(re);
235
		Matcher matcher = pattern.matcher(titleString);
236
		if (matcher.find()){
237
			int start = matcher.start();
238
			String authorString = titleString.substring(0, start).trim();
239
			String restString = titleString.substring(start + 1 , matcher.end() - 1);
240
			TeamOrPersonBase team = getAuthorTeam(authorString, name);
241
			ref.setTitleCache(restString, true);
242
			return team;
243
		}else{
244
			logger.warn("Title does not match: " + titleString);
245
			return null;
246
		}
247

    
248
	}
249

    
250
	private TeamOrPersonBase getAuthorTeam(String authorString, INonViralName name) {
251
		//TODO atomize
252
//		TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
253
//		String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
254

    
255
//		if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
256
//			logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
257
			TeamOrPersonBase result = parseSingleTeam(authorString);
258
			result.setTitleCache(authorString, true);
259
			return result;
260
//		}else{
261
//			nameTeam.setTitleCache(authorString, true);
262
//			return nameTeam;
263
//		}
264
	}
265

    
266
	/**
267
	 * @param refAuthorTeam
268
	 * @param nameTeam
269
	 * @return
270
	 */
271
	private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam, TeamOrPersonBase nameTeam) {
272
		String nameTeamString = nameTeam.getNomenclaturalTitle();
273
		String refAuthorTeamString = refAuthorTeam.getTitleCache();
274
		if (nameTeamString.equalsIgnoreCase(refAuthorTeamString)){
275
			return true;
276
		}
277

    
278
		if (nameTeamString.endsWith(".")){
279
			nameTeamString = nameTeamString.substring(0, nameTeamString.length() - 1 );
280
			if (refAuthorTeamString.startsWith(nameTeamString)){
281
				return true;
282
			}else{
283
				return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
284
			}
285
		}else{
286
			if (nameTeamString.endsWith(refAuthorTeamString) || refAuthorTeamString.endsWith(nameTeamString)){
287
				return true;
288
			}else{
289
				return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
290
			}
291
		}
292
	}
293

    
294
	private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam, TeamOrPersonBase refAuthorTeam) {
295
		if ( nameTeam.isInstanceOf(Team.class) && ((Team)nameTeam).getTeamMembers().size()> 1 ||
296
				refAuthorTeam.isInstanceOf(Team.class) && ((Team)refAuthorTeam).getTeamMembers().size()> 1){
297
			//class
298
			if (! (nameTeam.isInstanceOf(Team.class) && refAuthorTeam.isInstanceOf(Team.class) ) ){
299
				logger.warn("Only one author is a real team");
300
				return false;
301
			}
302
			Team realNameTeam = (Team)nameTeam;
303
			Team realRefAuthorTeam = (Team)refAuthorTeam;
304
			//size
305
			if (realNameTeam.getTeamMembers().size() != realRefAuthorTeam.getTeamMembers().size()){
306
				logger.warn("Teams do not have the same size");
307
				return false;
308
			}
309
			//empty teams
310
			if (realNameTeam.getTeamMembers().size() == 0){
311
				logger.warn("Teams are empty");
312
				return false;
313
			}
314
			//compare each team member
315
			for (int i = 0; i < realNameTeam.getTeamMembers().size(); i++){
316
				Person namePerson = realNameTeam.getTeamMembers().get(i);
317
				Person refPerson = realRefAuthorTeam.getTeamMembers().get(i);
318
				if ( authorTeamsMatch(refPerson, namePerson) == false){
319
					return false;
320
				}
321
			}
322
			return true;
323
		}
324
		boolean result = checkIpniAuthor(nameTeam.getNomenclaturalTitle(), refAuthorTeam);
325
		return result;
326
	}
327

    
328
	private boolean checkIpniAuthor(String nameTeamString, TeamOrPersonBase refAuthorTeam) {
329
		IpniService ipniService = new IpniService();
330
		List<Person> ipniAuthors = ipniService.getAuthors(nameTeamString, null, null, null, null, null);
331
		if (ipniAuthors != null){
332
			for (Person ipniAuthor : ipniAuthors){
333
				if (ipniAuthor.getFamilyName() != null && ipniAuthor.getFamilyName().equalsIgnoreCase(refAuthorTeam.getTitleCache())){
334
					return true;
335
				}
336
				logger.warn(ipniAuthor.getTitleCache() + " <-> " + refAuthorTeam.getTitleCache());
337
			}
338
		}else{
339
			logger.warn("IPNI not available");
340
		}
341
		return false;
342
	}
343

    
344
	/**
345
	 * @param state
346
	 * @param elNom
347
	 * @param taxon
348
	 * @param homotypicalGroup
349
	 */
350
	@Override
351
	protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
352
		verifyNoChildren(elNom);
353
		String typeRef = elNom.getTextNormalize();
354
		typeRef = removeStartingTypeRefMinus(typeRef);
355
		typeRef = removeTypePrefix(typeRef);
356
		TypeDesignationBase typeDesignation = SpecimenTypeDesignation.NewInstance();
357
		makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef, typeDesignation);
358
		for (TaxonName name : homotypicalGroup.getTypifiedNames()){
359
			name.addTypeDesignation(typeDesignation, true);
360
		}
361
	}
362

    
363
	private String removeTypePrefix(String typeRef) {
364
		typeRef = typeRef.trim().replace("Type: ", "").replace("Types: ", "").trim();
365
		return typeRef;
366
	}
367

    
368
	@Override
369
    protected void handleGenus(String value, INonViralName taxonName) {
370
		// do nothing
371
	}
372

    
373

    
374

    
375
}
(3-3/4)