adapt pom version in cdmlib-app
[cdmlib-apps.git] / cdm-eflora / src / main / java / eu / etaxonomy / cdm / io / eflora / centralAfrica / ericaceae / CentralAfricaEricaceaeTaxonImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.eflora.centralAfrica.ericaceae;
11
12 import java.util.List;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15
16 import org.apache.log4j.Logger;
17 import org.jdom.Element;
18 import org.springframework.stereotype.Component;
19
20 import eu.etaxonomy.cdm.common.CdmUtils;
21 import eu.etaxonomy.cdm.ext.ipni.IpniService;
22 import eu.etaxonomy.cdm.io.eflora.EfloraImportState;
23 import eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport;
24 import eu.etaxonomy.cdm.model.agent.Person;
25 import eu.etaxonomy.cdm.model.agent.Team;
26 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27 import eu.etaxonomy.cdm.model.common.CdmBase;
28 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
29 import eu.etaxonomy.cdm.model.common.TimePeriod;
30 import eu.etaxonomy.cdm.model.description.Feature;
31 import eu.etaxonomy.cdm.model.description.TaxonDescription;
32 import eu.etaxonomy.cdm.model.description.TextData;
33 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
34 import eu.etaxonomy.cdm.model.name.NonViralName;
35 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
36 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
37 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
38 import eu.etaxonomy.cdm.model.reference.Reference;
39 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
40 import eu.etaxonomy.cdm.model.taxon.Taxon;
41 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
42
43
44 /**
45 * @author a.mueller
46 *
47 */
48 @Component
49 public class CentralAfricaEricaceaeTaxonImport extends EfloraTaxonImport {
50 private static final Logger logger = Logger.getLogger(CentralAfricaEricaceaeTaxonImport.class);
51
52
53
54
55 /* (non-Javadoc)
56 * @see eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport#handleNomenclaturalReference(eu.etaxonomy.cdm.model.name.NonViralName, java.lang.String)
57 */
58 @Override
59 protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
60 Reference nomRef = ReferenceFactory.newGeneric();
61 nomRef.setTitleCache(value, true);
62 parseNomStatus(nomRef, name);
63 name.setNomenclaturalReference(nomRef);
64
65 String microReference = parseReferenceYearAndDetail(nomRef);
66 microReference = removeTrailing(microReference, ")");
67
68 microReference = parseHomonym(microReference, name);
69 name.setNomenclaturalMicroReference(microReference);
70
71 TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
72 TeamOrPersonBase refTeam = nomRef.getAuthorship();
73 if (nameTeam == null ){
74 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
75 }else if (refTeam == null ){
76 logger.warn("Name has nom. ref. but no nom.ref. author. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
77 }else if (! authorTeamsMatch(refTeam, nameTeam)){
78 logger.warn("Nom.Ref. author and comb. author do not match: " + nomRef.getTitleCache() + " <-> " + nameTeam.getNomenclaturalTitle());
79 }else {
80 nomRef.setAuthorship(nameTeam);
81 nomRef.setTitle(CdmUtils.Nz(nomRef.getTitle()) + " - no title given yet -");
82 nameTeam.setTitleCache(refTeam.getTitleCache(), true);
83 }
84 return nameTeam;
85 }
86
87 /**
88 * Extracts the date published part and returns micro reference
89 * @param ref
90 * @return
91 */
92 protected String parseReferenceYearAndDetail(Reference ref){
93 String detailResult = null;
94 String titleToParse = ref.getTitleCache();
95 titleToParse = removeReferenceBracket(titleToParse, ref);
96
97 int detailStart = titleToParse.indexOf(":");
98 if (detailStart >= 0){
99 detailResult = titleToParse.substring(detailStart + 1);
100 titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
101 detailResult = detailResult.trim();
102 }
103
104 String reYear = "\\s[1-2]{1}[0-9]{3}";
105 String reYearPeriod = reYear;
106 //
107 // //pattern for the whole string
108 Pattern patReference = Pattern.compile( reYearPeriod );
109 Matcher matcher = patReference.matcher(titleToParse);
110 if (matcher.find()){
111 int start = matcher.start();
112 int end = matcher.end();
113 //
114 String strPeriod = titleToParse.substring(start, end);
115 TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
116 ref.setDatePublished(datePublished);
117 String author = titleToParse.substring(0, start).trim();
118 author = parseInRefrence(ref, author);
119 TeamOrPersonBase team = parseSingleTeam(author);
120 ref.setAuthorship(team);
121 ref.setProtectedTitleCache(false);
122 }else{
123 logger.warn("Could not parse reference: " + titleToParse);
124 }
125 return detailResult;
126
127 }
128
129 private String parseInRefrence(Reference ref, String author) {
130 int pos = author.indexOf(" in ");
131 if (pos > -1){
132 String inAuthorString = author.substring(pos + 4);
133 String myAuthorString = author.substring(0, pos);
134 Reference inReference = ReferenceFactory.newGeneric();
135 TeamOrPersonBase inAuthor = parseSingleTeam(inAuthorString);
136 inReference.setAuthorship(inAuthor);
137 ref.setInReference(inReference);
138 return myAuthorString;
139 }else{
140 return author;
141 }
142
143 }
144
145 private String removeReferenceBracket(String refString, Reference ref) {
146 String titleToParse = refString;
147 String reBracket = "\\(.*\\).?";
148 Pattern patBracket = Pattern.compile(reBracket);
149 Matcher matcher = patBracket.matcher(titleToParse);
150
151 if (matcher.matches()){
152 int start = matcher.start() + 1;
153 int end = matcher.end() -1 ;
154 if (! titleToParse.endsWith("")){
155 end = end - 1;
156 }
157 titleToParse = titleToParse.substring(start, end);
158
159 ref.setTitleCache(titleToParse);
160 }
161 return titleToParse;
162 }
163
164 /**
165 * @param taxon
166 * @param name
167 * @param value
168 */
169 @Override
170 protected TeamOrPersonBase<?> handleNameUsage(Taxon taxon, NonViralName<?> name, String referenceTitle, TeamOrPersonBase lastTeam) {
171 Reference ref = ReferenceFactory.newGeneric();
172
173 ref.setTitleCache(referenceTitle, true);
174
175 TeamOrPersonBase<?> team = getReferenceAuthor(ref, name);
176 ref.setAuthorship(team);
177
178 String[] multipleReferences = ref.getTitleCache().split("&");
179
180 TaxonDescription description = getDescription(taxon);
181 for (String singleReferenceString : multipleReferences){
182 Reference singleRef = ReferenceFactory.newGeneric();
183 singleRef.setTitleCache(singleReferenceString.trim(), true);
184 singleRef.setAuthorship(team);
185
186 String microReference = parseReferenceYearAndDetailForUsage(singleRef);
187
188 singleRef.setTitle( CdmUtils.Nz(singleRef.getTitle()) + " - no title given yet -");
189
190 // parseReferenceType(ref);
191
192 TextData textData = TextData.NewInstance(Feature.CITATION());
193 textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, singleRef, microReference, name, null);
194 description.addElement(textData);
195 }
196 return team;
197 }
198
199 private String parseReferenceYearAndDetailForUsage(Reference ref) {
200 String detailResult = null;
201 String titleToParse = ref.getTitleCache().trim();
202
203 int detailStart = titleToParse.indexOf(":");
204 if (detailStart >= 0){
205 detailResult = titleToParse.substring(detailStart + 1);
206 titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
207 detailResult = detailResult.trim();
208 }
209
210 String reYear = "^[1-2]{1}[0-9]{3}[a-e]?$";
211 String reYearPeriod = reYear;
212 //
213 // //pattern for the whole string
214 Pattern patReference = Pattern.compile( reYearPeriod );
215 Matcher matcher = patReference.matcher(titleToParse);
216 if (! matcher.find()){
217 logger.warn("Could not parse year: " + titleToParse);
218 }else{
219 if (Pattern.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse)){
220 String title = titleToParse.substring(4,5);
221 ref.setTitle(title);
222 titleToParse = titleToParse.substring(0, 4);
223 }
224 ref.setProtectedTitleCache(false);
225 }
226 TimePeriod datePublished = TimePeriodParser.parseString(titleToParse);
227 ref.setDatePublished(datePublished);
228 return detailResult;
229
230 }
231
232 protected TeamOrPersonBase getReferenceAuthor (Reference ref, NonViralName name) {
233 String titleString = ref.getTitleCache();
234 String re = "\\(.*\\)";
235 Pattern pattern = Pattern.compile(re);
236 Matcher matcher = pattern.matcher(titleString);
237 if (matcher.find()){
238 int start = matcher.start();
239 String authorString = titleString.substring(0, start).trim();
240 String restString = titleString.substring(start + 1 , matcher.end() - 1);
241 TeamOrPersonBase team = getAuthorTeam(authorString, name);
242 ref.setTitleCache(restString, true);
243 return team;
244 }else{
245 logger.warn("Title does not match: " + titleString);
246 return null;
247 }
248
249 }
250
251 private TeamOrPersonBase getAuthorTeam(String authorString, NonViralName name) {
252 //TODO atomize
253 // TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
254 // String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
255
256 // if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
257 // logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
258 TeamOrPersonBase result = parseSingleTeam(authorString);
259 result.setTitleCache(authorString, true);
260 return result;
261 // }else{
262 // nameTeam.setTitleCache(authorString, true);
263 // return nameTeam;
264 // }
265 }
266
267 /**
268 * @param refAuthorTeam
269 * @param nameTeam
270 * @return
271 */
272 private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam, TeamOrPersonBase nameTeam) {
273 String nameTeamString = nameTeam.getNomenclaturalTitle();
274 String refAuthorTeamString = refAuthorTeam.getTitleCache();
275 if (nameTeamString.equalsIgnoreCase(refAuthorTeamString)){
276 return true;
277 }
278
279 if (nameTeamString.endsWith(".")){
280 nameTeamString = nameTeamString.substring(0, nameTeamString.length() - 1 );
281 if (refAuthorTeamString.startsWith(nameTeamString)){
282 return true;
283 }else{
284 return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
285 }
286 }else{
287 if (nameTeamString.endsWith(refAuthorTeamString) || refAuthorTeamString.endsWith(nameTeamString)){
288 return true;
289 }else{
290 return checkSingleAndIpniAuthor(nameTeam, refAuthorTeam);
291 }
292 }
293 }
294
295 private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam, TeamOrPersonBase refAuthorTeam) {
296 if ( nameTeam.isInstanceOf(Team.class) && ((Team)nameTeam).getTeamMembers().size()> 1 ||
297 refAuthorTeam.isInstanceOf(Team.class) && ((Team)refAuthorTeam).getTeamMembers().size()> 1){
298 //class
299 if (! (nameTeam.isInstanceOf(Team.class) && refAuthorTeam.isInstanceOf(Team.class) ) ){
300 logger.warn("Only one author is a real team");
301 return false;
302 }
303 Team realNameTeam = (Team)nameTeam;
304 Team realRefAuthorTeam = (Team)refAuthorTeam;
305 //size
306 if (realNameTeam.getTeamMembers().size() != realRefAuthorTeam.getTeamMembers().size()){
307 logger.warn("Teams do not have the same size");
308 return false;
309 }
310 //empty teams
311 if (realNameTeam.getTeamMembers().size() == 0){
312 logger.warn("Teams are empty");
313 return false;
314 }
315 //compare each team member
316 for (int i = 0; i < realNameTeam.getTeamMembers().size(); i++){
317 Person namePerson = realNameTeam.getTeamMembers().get(i);
318 Person refPerson = realRefAuthorTeam.getTeamMembers().get(i);
319 if ( authorTeamsMatch(refPerson, namePerson) == false){
320 return false;
321 }
322 }
323 return true;
324 }
325 boolean result = checkIpniAuthor(nameTeam.getNomenclaturalTitle(), refAuthorTeam);
326 return result;
327 }
328
329 private boolean checkIpniAuthor(String nameTeamString, TeamOrPersonBase refAuthorTeam) {
330 IpniService ipniService = new IpniService();
331 List<Person> ipniAuthors = ipniService.getAuthors(nameTeamString, null, null, null, null, null);
332 if (ipniAuthors != null){
333 for (Person ipniAuthor : ipniAuthors){
334 if (ipniAuthor.getLastname() != null && ipniAuthor.getLastname().equalsIgnoreCase(refAuthorTeam.getTitleCache())){
335 return true;
336 }
337 logger.warn(ipniAuthor.getTitleCache() + " <-> " + refAuthorTeam.getTitleCache());
338 }
339 }else{
340 logger.warn("IPNI not available");
341 }
342 return false;
343 }
344
345 /**
346 * @param state
347 * @param elNom
348 * @param taxon
349 * @param homotypicalGroup
350 */
351 @Override
352 protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
353 verifyNoChildren(elNom);
354 String typeRef = elNom.getTextNormalize();
355 typeRef = removeStartingTypeRefMinus(typeRef);
356 typeRef = removeTypePrefix(typeRef);
357 TypeDesignationBase typeDesignation = SpecimenTypeDesignation.NewInstance();
358 makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef, typeDesignation);
359 for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
360 name.addTypeDesignation(typeDesignation, true);
361 }
362 }
363
364 private String removeTypePrefix(String typeRef) {
365 typeRef = typeRef.trim().replace("Type: ", "").replace("Types: ", "").trim();
366 return typeRef;
367 }
368
369 @Override
370 protected void handleGenus(String value, TaxonNameBase taxonName) {
371 // do nothing
372 }
373
374
375
376 }