e4ea69465e89640b5ea1a0faebd692d429d0eef7
[cdmlib-apps.git] / cdmlib-eflora / src / main / java / eu / etaxonomy / cdm / io / eflora / centralAfrica / ericaceae / CentralAfricaEricaceaeTaxonImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.eflora.centralAfrica.ericaceae;
11
12 import java.util.List;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15
16 import org.apache.log4j.Logger;
17 import org.jdom.Element;
18 import org.springframework.stereotype.Component;
19
20 import eu.etaxonomy.cdm.common.CdmUtils;
21 import eu.etaxonomy.cdm.ext.ipni.IpniService;
22 import eu.etaxonomy.cdm.io.eflora.EfloraImportState;
23 import eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport;
24 import eu.etaxonomy.cdm.model.agent.Person;
25 import eu.etaxonomy.cdm.model.agent.Team;
26 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27 import eu.etaxonomy.cdm.model.common.CdmBase;
28 import eu.etaxonomy.cdm.model.common.TimePeriod;
29 import eu.etaxonomy.cdm.model.description.Feature;
30 import eu.etaxonomy.cdm.model.description.TaxonDescription;
31 import eu.etaxonomy.cdm.model.description.TextData;
32 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
33 import eu.etaxonomy.cdm.model.name.NonViralName;
34 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
35 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
36 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
37 import eu.etaxonomy.cdm.model.reference.Reference;
38 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
39 import eu.etaxonomy.cdm.model.taxon.Taxon;
40
41
42 /**
43 * @author a.mueller
44 *
45 */
46 @Component
47 public class CentralAfricaEricaceaeTaxonImport extends EfloraTaxonImport {
48 private static final Logger logger = Logger.getLogger(CentralAfricaEricaceaeTaxonImport.class);
49
50
51
52
53 /* (non-Javadoc)
54 * @see eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport#handleNomenclaturalReference(eu.etaxonomy.cdm.model.name.NonViralName, java.lang.String)
55 */
56 @Override
57 protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
58 Reference nomRef = ReferenceFactory.newGeneric();
59 nomRef.setTitleCache(value, true);
60 parseNomStatus(nomRef, name);
61 name.setNomenclaturalReference(nomRef);
62
63 String microReference = parseReferenceYearAndDetail(nomRef);
64 microReference = removeTrailing(microReference, ")");
65
66 microReference = parseHomonym(microReference, name);
67 name.setNomenclaturalMicroReference(microReference);
68
69 TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorTeam(), TeamOrPersonBase.class);
70 TeamOrPersonBase refTeam = nomRef.getAuthorTeam();
71 if (nameTeam == null ){
72 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
73 }else if (refTeam == null ){
74 logger.warn("Name has nom. ref. but no nom.ref. author. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
75 }else if (! authorTeamsMatch(refTeam, nameTeam)){
76 logger.warn("Nom.Ref. author and comb. author do not match: " + nomRef.getTitleCache() + " <-> " + nameTeam.getNomenclaturalTitle());
77 }else {
78 nomRef.setAuthorTeam(nameTeam);
79 nomRef.setTitle(CdmUtils.Nz(nomRef.getTitle()) + " - no title given yet -");
80 nameTeam.setTitleCache(refTeam.getTitleCache(), true);
81 }
82 return nameTeam;
83 }
84
85 /**
86 * Extracts the date published part and returns micro reference
87 * @param ref
88 * @return
89 */
90 protected String parseReferenceYearAndDetail(Reference ref){
91 String detailResult = null;
92 String titleToParse = ref.getTitleCache();
93 titleToParse = removeReferenceBracket(titleToParse, ref);
94
95 int detailStart = titleToParse.indexOf(":");
96 if (detailStart >= 0){
97 detailResult = titleToParse.substring(detailStart + 1);
98 titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
99 detailResult = detailResult.trim();
100 }
101
102 String reYear = "\\s[1-2]{1}[0-9]{3}";
103 String reYearPeriod = reYear;
104 //
105 // //pattern for the whole string
106 Pattern patReference = Pattern.compile( reYearPeriod );
107 Matcher matcher = patReference.matcher(titleToParse);
108 if (matcher.find()){
109 int start = matcher.start();
110 int end = matcher.end();
111 //
112 String strPeriod = titleToParse.substring(start, end);
113 TimePeriod datePublished = TimePeriod.parseString(strPeriod);
114 ref.setDatePublished(datePublished);
115 String author = titleToParse.substring(0, start).trim();
116 author = parseInRefrence(ref, author);
117 TeamOrPersonBase team = parseSingleTeam(author);
118 ref.setAuthorTeam(team);
119 ref.setProtectedTitleCache(false);
120 }else{
121 logger.warn("Could not parse reference: " + titleToParse);
122 }
123 return detailResult;
124
125 }
126
127 private String parseInRefrence(Reference ref, String author) {
128 int pos = author.indexOf(" in ");
129 if (pos > -1){
130 String inAuthorString = author.substring(pos + 4);
131 String myAuthorString = author.substring(0, pos);
132 Reference inReference = ReferenceFactory.newGeneric();
133 TeamOrPersonBase inAuthor = parseSingleTeam(inAuthorString);
134 inReference.setAuthorTeam(inAuthor);
135 ref.setInReference(inReference);
136 return myAuthorString;
137 }else{
138 return author;
139 }
140
141 }
142
143 private String removeReferenceBracket(String refString, Reference ref) {
144 String titleToParse = refString;
145 String reBracket = "\\(.*\\).?";
146 Pattern patBracket = Pattern.compile(reBracket);
147 Matcher matcher = patBracket.matcher(titleToParse);
148
149 if (matcher.matches()){
150 int start = matcher.start() + 1;
151 int end = matcher.end() -1 ;
152 if (! titleToParse.endsWith("")){
153 end = end - 1;
154 }
155 titleToParse = titleToParse.substring(start, end);
156
157 ref.setTitleCache(titleToParse);
158 }
159 return titleToParse;
160 }
161
162 /**
163 * @param taxon
164 * @param name
165 * @param value
166 */
167 @Override
168 protected TeamOrPersonBase handleNameUsage(Taxon taxon, NonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
169 Reference ref = ReferenceFactory.newGeneric();
170
171 ref.setTitleCache(referenceTitle, true);
172
173 TeamOrPersonBase team = getReferenceAuthor(ref, name);
174 ref.setAuthorTeam(team);
175
176 String[] multipleReferences = ref.getTitleCache().split("&");
177
178 TaxonDescription description = getDescription(taxon);
179 for (String singleReferenceString : multipleReferences){
180 Reference singleRef = ReferenceFactory.newGeneric();
181 singleRef.setTitleCache(singleReferenceString.trim(), true);
182 singleRef.setAuthorTeam(team);
183
184 String microReference = parseReferenceYearAndDetailForUsage(singleRef);
185
186 singleRef.setTitle( CdmUtils.Nz(singleRef.getTitle()) + " - no title given yet -");
187
188 // parseReferenceType(ref);
189
190 TextData textData = TextData.NewInstance(Feature.CITATION());
191 textData.addSource(null, null, singleRef, microReference, name, null);
192 description.addElement(textData);
193 }
194 return team;
195 }
196
197 private String parseReferenceYearAndDetailForUsage(Reference ref) {
198 String detailResult = null;
199 String titleToParse = ref.getTitleCache().trim();
200
201 int detailStart = titleToParse.indexOf(":");
202 if (detailStart >= 0){
203 detailResult = titleToParse.substring(detailStart + 1);
204 titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
205 detailResult = detailResult.trim();
206 }
207
208 String reYear = "^[1-2]{1}[0-9]{3}[a-e]?$";
209 String reYearPeriod = reYear;
210 //
211 // //pattern for the whole string
212 Pattern patReference = Pattern.compile( reYearPeriod );
213 Matcher matcher = patReference.matcher(titleToParse);
214 if (! matcher.find()){
215 logger.warn("Could not parse year: " + titleToParse);
216 }else{
217 if (Pattern.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse)){
218 String title = titleToParse.substring(4,5);
219 ref.setTitle(title);
220 titleToParse = titleToParse.substring(0, 4);
221 }
222 ref.setProtectedTitleCache(false);
223 }
224 TimePeriod datePublished = TimePeriod.parseString(titleToParse);
225 ref.setDatePublished(datePublished);
226 return detailResult;
227
228 }
229
230 protected TeamOrPersonBase getReferenceAuthor (Reference ref, NonViralName name) {
231 String titleString = ref.getTitleCache();
232 String re = "\\(.*\\)";
233 Pattern pattern = Pattern.compile(re);
234 Matcher matcher = pattern.matcher(titleString);
235 if (matcher.find()){
236 int start = matcher.start();
237 String authorString = titleString.substring(0, start).trim();
238 String restString = titleString.substring(start + 1 , matcher.end() - 1);
239 TeamOrPersonBase team = getAuthorTeam(authorString, name);
240 ref.setTitleCache(restString, true);
241 return team;
242 }else{
243 logger.warn("Title does not match: " + titleString);
244 return null;
245 }
246
247 }
248
249 private TeamOrPersonBase getAuthorTeam(String authorString, NonViralName name) {
250 //TODO atomize
251 // TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorTeam(), TeamOrPersonBase.class);
252 // String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
253
254 // if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
255 // logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
256 TeamOrPersonBase result = parseSingleTeam(authorString);
257 result.setTitleCache(authorString, true);
258 return result;
259 // }else{
260 // nameTeam.setTitleCache(authorString, true);
261 // return nameTeam;
262 // }
263 }
264
265 /**
266 * @param refAuthorTeam
267 * @param nameTeam
268 * @return
269 */
270 private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam, TeamOrPersonBase nameTeam) {
271 String nameTeamString = nameTeam.getNomenclaturalTitle();
272 String refAuthorTeamString = refAuthorTeam.getTitleCache();
273 if (nameTeamString.equalsIgnoreCase(refAuthorTeamString)){
274 return true;
275 }
276
277 if (nameTeamString.endsWith(".")){
278 nameTeamString = nameTeamString.substring(0, nameTeamString.length() - 1 );
279 if (refAuthorTeamString.startsWith(nameTeamString)){
280 return true;
281 }else{
282 return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
283 }
284 }else{
285 if (nameTeamString.endsWith(refAuthorTeamString) || refAuthorTeamString.endsWith(nameTeamString)){
286 return true;
287 }else{
288 return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
289 }
290 }
291 }
292
293 private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam, TeamOrPersonBase refAuthorTeam) {
294 if ( nameTeam.isInstanceOf(Team.class) && ((Team)nameTeam).getTeamMembers().size()> 1 ||
295 refAuthorTeam.isInstanceOf(Team.class) && ((Team)refAuthorTeam).getTeamMembers().size()> 1){
296 //class
297 if (! (nameTeam.isInstanceOf(Team.class) && refAuthorTeam.isInstanceOf(Team.class) ) ){
298 logger.warn("Only one author is a real team");
299 return false;
300 }
301 Team realNameTeam = (Team)nameTeam;
302 Team realRefAuthorTeam = (Team)refAuthorTeam;
303 //size
304 if (realNameTeam.getTeamMembers().size() != realRefAuthorTeam.getTeamMembers().size()){
305 logger.warn("Teams do not have the same size");
306 return false;
307 }
308 //empty teams
309 if (realNameTeam.getTeamMembers().size() == 0){
310 logger.warn("Teams are empty");
311 return false;
312 }
313 //compare each team member
314 for (int i = 0; i < realNameTeam.getTeamMembers().size(); i++){
315 Person namePerson = realNameTeam.getTeamMembers().get(i);
316 Person refPerson = realRefAuthorTeam.getTeamMembers().get(i);
317 if ( authorTeamsMatch(refPerson, namePerson) == false){
318 return false;
319 }
320 }
321 return true;
322 }
323 boolean result = checkIpniAuthor(nameTeam.getNomenclaturalTitle(), refAuthorTeam);
324 return result;
325 }
326
327 private boolean checkIpniAuthor(String nameTeamString, TeamOrPersonBase refAuthorTeam) {
328 IpniService ipniService = new IpniService();
329 List<Person> ipniAuthors = ipniService.getAuthors(nameTeamString, null, null, null, null, null);
330 if (ipniAuthors != null){
331 for (Person ipniAuthor : ipniAuthors){
332 if (ipniAuthor.getLastname() != null && ipniAuthor.getLastname().equalsIgnoreCase(refAuthorTeam.getTitleCache())){
333 return true;
334 }
335 logger.warn(ipniAuthor.getTitleCache() + " <-> " + refAuthorTeam.getTitleCache());
336 }
337 }else{
338 logger.warn("IPNI not available");
339 }
340 return false;
341 }
342
343 /**
344 * @param state
345 * @param elNom
346 * @param taxon
347 * @param homotypicalGroup
348 */
349 @Override
350 protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
351 verifyNoChildren(elNom);
352 String typeRef = elNom.getTextNormalize();
353 typeRef = removeStartingTypeRefMinus(typeRef);
354 typeRef = removeTypePrefix(typeRef);
355 TypeDesignationBase typeDesignation = SpecimenTypeDesignation.NewInstance();
356 makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef, typeDesignation);
357 for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
358 name.addTypeDesignation(typeDesignation, true);
359 }
360 }
361
362 private String removeTypePrefix(String typeRef) {
363 typeRef = typeRef.trim().replace("Type: ", "").replace("Types: ", "").trim();
364 return typeRef;
365 }
366
367 protected void handleGenus(String value, TaxonNameBase taxonName) {
368 // do nothing
369 }
370
371
372
373 }