ref #6369 adapt existing occurrences of interface to removed generics in cdmlib-app
[cdmlib-apps.git] / cdm-eflora / src / main / java / eu / etaxonomy / cdm / io / eflora / centralAfrica / ericaceae / CentralAfricaEricaceaeTaxonImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.eflora.centralAfrica.ericaceae;
11
12 import java.util.List;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15
16 import org.apache.log4j.Logger;
17 import org.jdom.Element;
18 import org.springframework.stereotype.Component;
19
20 import eu.etaxonomy.cdm.common.CdmUtils;
21 import eu.etaxonomy.cdm.ext.ipni.IpniService;
22 import eu.etaxonomy.cdm.io.eflora.EfloraImportState;
23 import eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport;
24 import eu.etaxonomy.cdm.model.agent.Person;
25 import eu.etaxonomy.cdm.model.agent.Team;
26 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27 import eu.etaxonomy.cdm.model.common.CdmBase;
28 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
29 import eu.etaxonomy.cdm.model.common.TimePeriod;
30 import eu.etaxonomy.cdm.model.description.Feature;
31 import eu.etaxonomy.cdm.model.description.TaxonDescription;
32 import eu.etaxonomy.cdm.model.description.TextData;
33 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
34 import eu.etaxonomy.cdm.model.name.INonViralName;
35 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
36 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
37 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
38 import eu.etaxonomy.cdm.model.reference.Reference;
39 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
40 import eu.etaxonomy.cdm.model.taxon.Taxon;
41 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
42
43
44 /**
45 * @author a.mueller
46 *
47 */
48 @Component
49 public class CentralAfricaEricaceaeTaxonImport extends EfloraTaxonImport {
50 private static final long serialVersionUID = 6442665916458420942L;
51 private static final Logger logger = Logger.getLogger(CentralAfricaEricaceaeTaxonImport.class);
52
53
54 @Override
55 protected TeamOrPersonBase handleNomenclaturalReference(TaxonNameBase name, String value) {
56 Reference nomRef = ReferenceFactory.newGeneric();
57 nomRef.setTitleCache(value, true);
58 parseNomStatus(nomRef, name);
59 name.setNomenclaturalReference(nomRef);
60
61 String microReference = parseReferenceYearAndDetail(nomRef);
62 microReference = removeTrailing(microReference, ")");
63
64 microReference = parseHomonym(microReference, name);
65 name.setNomenclaturalMicroReference(microReference);
66
67 TeamOrPersonBase<?> nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
68 TeamOrPersonBase<?> refTeam = nomRef.getAuthorship();
69 if (nameTeam == null ){
70 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
71 }else if (refTeam == null ){
72 logger.warn("Name has nom. ref. but no nom.ref. author. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
73 }else if (! authorTeamsMatch(refTeam, nameTeam)){
74 logger.warn("Nom.Ref. author and comb. author do not match: " + nomRef.getTitleCache() + " <-> " + nameTeam.getNomenclaturalTitle());
75 }else {
76 nomRef.setAuthorship(nameTeam);
77 nomRef.setTitle(CdmUtils.Nz(nomRef.getTitle()) + " - no title given yet -");
78 nameTeam.setTitleCache(refTeam.getTitleCache(), true);
79 }
80 return nameTeam;
81 }
82
83 /**
84 * Extracts the date published part and returns micro reference
85 * @param ref
86 * @return
87 */
88 protected String parseReferenceYearAndDetail(Reference ref){
89 String detailResult = null;
90 String titleToParse = ref.getTitleCache();
91 titleToParse = removeReferenceBracket(titleToParse, ref);
92
93 int detailStart = titleToParse.indexOf(":");
94 if (detailStart >= 0){
95 detailResult = titleToParse.substring(detailStart + 1);
96 titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
97 detailResult = detailResult.trim();
98 }
99
100 String reYear = "\\s[1-2]{1}[0-9]{3}";
101 String reYearPeriod = reYear;
102 //
103 // //pattern for the whole string
104 Pattern patReference = Pattern.compile( reYearPeriod );
105 Matcher matcher = patReference.matcher(titleToParse);
106 if (matcher.find()){
107 int start = matcher.start();
108 int end = matcher.end();
109 //
110 String strPeriod = titleToParse.substring(start, end);
111 TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
112 ref.setDatePublished(datePublished);
113 String author = titleToParse.substring(0, start).trim();
114 author = parseInRefrence(ref, author);
115 TeamOrPersonBase team = parseSingleTeam(author);
116 ref.setAuthorship(team);
117 ref.setProtectedTitleCache(false);
118 }else{
119 logger.warn("Could not parse reference: " + titleToParse);
120 }
121 return detailResult;
122
123 }
124
125 private String parseInRefrence(Reference ref, String author) {
126 int pos = author.indexOf(" in ");
127 if (pos > -1){
128 String inAuthorString = author.substring(pos + 4);
129 String myAuthorString = author.substring(0, pos);
130 Reference inReference = ReferenceFactory.newGeneric();
131 TeamOrPersonBase inAuthor = parseSingleTeam(inAuthorString);
132 inReference.setAuthorship(inAuthor);
133 ref.setInReference(inReference);
134 return myAuthorString;
135 }else{
136 return author;
137 }
138
139 }
140
141 private String removeReferenceBracket(String refString, Reference ref) {
142 String titleToParse = refString;
143 String reBracket = "\\(.*\\).?";
144 Pattern patBracket = Pattern.compile(reBracket);
145 Matcher matcher = patBracket.matcher(titleToParse);
146
147 if (matcher.matches()){
148 int start = matcher.start() + 1;
149 int end = matcher.end() -1 ;
150 if (! titleToParse.endsWith("")){
151 end = end - 1;
152 }
153 titleToParse = titleToParse.substring(start, end);
154
155 ref.setTitleCache(titleToParse);
156 }
157 return titleToParse;
158 }
159
160 /**
161 * @param taxon
162 * @param name
163 * @param value
164 */
165 @Override
166 protected TeamOrPersonBase<?> handleNameUsage(Taxon taxon, INonViralName name,
167 String referenceTitle, TeamOrPersonBase lastTeam) {
168
169 Reference ref = ReferenceFactory.newGeneric();
170
171 ref.setTitleCache(referenceTitle, true);
172
173 TeamOrPersonBase<?> team = getReferenceAuthor(ref, name);
174 ref.setAuthorship(team);
175
176 String[] multipleReferences = ref.getTitleCache().split("&");
177
178 TaxonDescription description = getDescription(taxon);
179 for (String singleReferenceString : multipleReferences){
180 Reference singleRef = ReferenceFactory.newGeneric();
181 singleRef.setTitleCache(singleReferenceString.trim(), true);
182 singleRef.setAuthorship(team);
183
184 String microReference = parseReferenceYearAndDetailForUsage(singleRef);
185
186 singleRef.setTitle( CdmUtils.Nz(singleRef.getTitle()) + " - no title given yet -");
187
188 // parseReferenceType(ref);
189
190 TextData textData = TextData.NewInstance(Feature.CITATION());
191 textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, singleRef, microReference, (TaxonNameBase) name, null);
192 description.addElement(textData);
193 }
194 return team;
195 }
196
197 private String parseReferenceYearAndDetailForUsage(Reference ref) {
198 String detailResult = null;
199 String titleToParse = ref.getTitleCache().trim();
200
201 int detailStart = titleToParse.indexOf(":");
202 if (detailStart >= 0){
203 detailResult = titleToParse.substring(detailStart + 1);
204 titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
205 detailResult = detailResult.trim();
206 }
207
208 String reYear = "^[1-2]{1}[0-9]{3}[a-e]?$";
209 String reYearPeriod = reYear;
210 //
211 // //pattern for the whole string
212 Pattern patReference = Pattern.compile( reYearPeriod );
213 Matcher matcher = patReference.matcher(titleToParse);
214 if (! matcher.find()){
215 logger.warn("Could not parse year: " + titleToParse);
216 }else{
217 if (Pattern.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse)){
218 String title = titleToParse.substring(4,5);
219 ref.setTitle(title);
220 titleToParse = titleToParse.substring(0, 4);
221 }
222 ref.setProtectedTitleCache(false);
223 }
224 TimePeriod datePublished = TimePeriodParser.parseString(titleToParse);
225 ref.setDatePublished(datePublished);
226 return detailResult;
227
228 }
229
230 protected TeamOrPersonBase getReferenceAuthor (Reference ref, INonViralName name) {
231 String titleString = ref.getTitleCache();
232 String re = "\\(.*\\)";
233 Pattern pattern = Pattern.compile(re);
234 Matcher matcher = pattern.matcher(titleString);
235 if (matcher.find()){
236 int start = matcher.start();
237 String authorString = titleString.substring(0, start).trim();
238 String restString = titleString.substring(start + 1 , matcher.end() - 1);
239 TeamOrPersonBase team = getAuthorTeam(authorString, name);
240 ref.setTitleCache(restString, true);
241 return team;
242 }else{
243 logger.warn("Title does not match: " + titleString);
244 return null;
245 }
246
247 }
248
249 private TeamOrPersonBase getAuthorTeam(String authorString, INonViralName name) {
250 //TODO atomize
251 // TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
252 // String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
253
254 // if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
255 // logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
256 TeamOrPersonBase result = parseSingleTeam(authorString);
257 result.setTitleCache(authorString, true);
258 return result;
259 // }else{
260 // nameTeam.setTitleCache(authorString, true);
261 // return nameTeam;
262 // }
263 }
264
265 /**
266 * @param refAuthorTeam
267 * @param nameTeam
268 * @return
269 */
270 private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam, TeamOrPersonBase nameTeam) {
271 String nameTeamString = nameTeam.getNomenclaturalTitle();
272 String refAuthorTeamString = refAuthorTeam.getTitleCache();
273 if (nameTeamString.equalsIgnoreCase(refAuthorTeamString)){
274 return true;
275 }
276
277 if (nameTeamString.endsWith(".")){
278 nameTeamString = nameTeamString.substring(0, nameTeamString.length() - 1 );
279 if (refAuthorTeamString.startsWith(nameTeamString)){
280 return true;
281 }else{
282 return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
283 }
284 }else{
285 if (nameTeamString.endsWith(refAuthorTeamString) || refAuthorTeamString.endsWith(nameTeamString)){
286 return true;
287 }else{
288 return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
289 }
290 }
291 }
292
293 private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam, TeamOrPersonBase refAuthorTeam) {
294 if ( nameTeam.isInstanceOf(Team.class) && ((Team)nameTeam).getTeamMembers().size()> 1 ||
295 refAuthorTeam.isInstanceOf(Team.class) && ((Team)refAuthorTeam).getTeamMembers().size()> 1){
296 //class
297 if (! (nameTeam.isInstanceOf(Team.class) && refAuthorTeam.isInstanceOf(Team.class) ) ){
298 logger.warn("Only one author is a real team");
299 return false;
300 }
301 Team realNameTeam = (Team)nameTeam;
302 Team realRefAuthorTeam = (Team)refAuthorTeam;
303 //size
304 if (realNameTeam.getTeamMembers().size() != realRefAuthorTeam.getTeamMembers().size()){
305 logger.warn("Teams do not have the same size");
306 return false;
307 }
308 //empty teams
309 if (realNameTeam.getTeamMembers().size() == 0){
310 logger.warn("Teams are empty");
311 return false;
312 }
313 //compare each team member
314 for (int i = 0; i < realNameTeam.getTeamMembers().size(); i++){
315 Person namePerson = realNameTeam.getTeamMembers().get(i);
316 Person refPerson = realRefAuthorTeam.getTeamMembers().get(i);
317 if ( authorTeamsMatch(refPerson, namePerson) == false){
318 return false;
319 }
320 }
321 return true;
322 }
323 boolean result = checkIpniAuthor(nameTeam.getNomenclaturalTitle(), refAuthorTeam);
324 return result;
325 }
326
327 private boolean checkIpniAuthor(String nameTeamString, TeamOrPersonBase refAuthorTeam) {
328 IpniService ipniService = new IpniService();
329 List<Person> ipniAuthors = ipniService.getAuthors(nameTeamString, null, null, null, null, null);
330 if (ipniAuthors != null){
331 for (Person ipniAuthor : ipniAuthors){
332 if (ipniAuthor.getLastname() != null && ipniAuthor.getLastname().equalsIgnoreCase(refAuthorTeam.getTitleCache())){
333 return true;
334 }
335 logger.warn(ipniAuthor.getTitleCache() + " <-> " + refAuthorTeam.getTitleCache());
336 }
337 }else{
338 logger.warn("IPNI not available");
339 }
340 return false;
341 }
342
343 /**
344 * @param state
345 * @param elNom
346 * @param taxon
347 * @param homotypicalGroup
348 */
349 @Override
350 protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
351 verifyNoChildren(elNom);
352 String typeRef = elNom.getTextNormalize();
353 typeRef = removeStartingTypeRefMinus(typeRef);
354 typeRef = removeTypePrefix(typeRef);
355 TypeDesignationBase typeDesignation = SpecimenTypeDesignation.NewInstance();
356 makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef, typeDesignation);
357 for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
358 name.addTypeDesignation(typeDesignation, true);
359 }
360 }
361
362 private String removeTypePrefix(String typeRef) {
363 typeRef = typeRef.trim().replace("Type: ", "").replace("Types: ", "").trim();
364 return typeRef;
365 }
366
367 @Override
368 protected void handleGenus(String value, INonViralName taxonName) {
369 // do nothing
370 }
371
372
373
374 }