some import changes
[cdmlib.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / eflora / centralAfrica / ericaceae / CentralAfricaEricaceaeTaxonImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.eflora.centralAfrica.ericaceae;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
16
17 import org.apache.commons.lang.StringUtils;
18 import org.apache.log4j.Logger;
19 import org.jdom.Element;
20 import org.springframework.stereotype.Component;
21
22 import eu.etaxonomy.cdm.common.CdmUtils;
23 import eu.etaxonomy.cdm.ext.ipni.IpniService;
24 import eu.etaxonomy.cdm.io.eflora.EfloraImportState;
25 import eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport;
26 import eu.etaxonomy.cdm.model.agent.INomenclaturalAuthor;
27 import eu.etaxonomy.cdm.model.agent.Person;
28 import eu.etaxonomy.cdm.model.agent.Team;
29 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
30 import eu.etaxonomy.cdm.model.common.CdmBase;
31 import eu.etaxonomy.cdm.model.common.TimePeriod;
32 import eu.etaxonomy.cdm.model.description.Feature;
33 import eu.etaxonomy.cdm.model.description.TaxonDescription;
34 import eu.etaxonomy.cdm.model.description.TextData;
35 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
36 import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
37 import eu.etaxonomy.cdm.model.name.NonViralName;
38 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
39 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
40 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
41 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
42 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
43 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
44 import eu.etaxonomy.cdm.model.taxon.Taxon;
45
46
47 /**
48 * @author a.mueller
49 *
50 */
51 @Component
52 public class CentralAfricaEricaceaeTaxonImport extends EfloraTaxonImport {
53 private static final Logger logger = Logger.getLogger(CentralAfricaEricaceaeTaxonImport.class);
54
55
56
57
58 /* (non-Javadoc)
59 * @see eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport#handleNomenclaturalReference(eu.etaxonomy.cdm.model.name.NonViralName, java.lang.String)
60 */
61 @Override
62 protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
63 ReferenceBase nomRef = ReferenceFactory.newGeneric();
64 nomRef.setTitleCache(value, true);
65 parseNomStatus(nomRef, name);
66 name.setNomenclaturalReference(nomRef);
67
68 String microReference = parseReferenceYearAndDetail(nomRef);
69
70 microReference = parseHomonym(microReference, name);
71 name.setNomenclaturalMicroReference(microReference);
72
73 TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorTeam(), TeamOrPersonBase.class);
74 if (nameTeam == null ){
75 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
76 }else if (nomRef.getAuthorTeam() == null ){
77 logger.warn("Name has nom. ref. but no nom.ref. author. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
78 }else if (! authorTeamsMatch(nomRef.getAuthorTeam(), nameTeam)){
79 logger.warn("Nom.Ref. author and comb. author do not match: " + nomRef.getTitleCache() + " <-> " + nameTeam.getNomenclaturalTitle());
80 }else {
81 nomRef.setAuthorTeam(nameTeam);
82 nameTeam.setTitleCache(nomRef.getAuthorTeam().getTitleCache(), true);
83 }
84 return nameTeam;
85 }
86
87 /**
88 * Extracts the date published part and returns micro reference
89 * @param ref
90 * @return
91 */
92 protected String parseReferenceYearAndDetail(ReferenceBase ref){
93 String detailResult = null;
94 String titleToParse = ref.getTitleCache();
95 titleToParse = removeReferenceBracket(titleToParse, ref);
96
97 int detailStart = titleToParse.indexOf(":");
98 if (detailStart >= 0){
99 detailResult = titleToParse.substring(detailStart + 1);
100 titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
101 detailResult = detailResult.trim();
102 }
103
104 String reYear = "\\s[1-2]{1}[0-9]{3}";
105 String reYearPeriod = reYear;
106 //
107 // //pattern for the whole string
108 Pattern patReference = Pattern.compile( reYearPeriod );
109 Matcher matcher = patReference.matcher(titleToParse);
110 if (matcher.find()){
111 int start = matcher.start();
112 int end = matcher.end();
113 //
114 String strPeriod = titleToParse.substring(start, end);
115 TimePeriod datePublished = TimePeriod.parseString(strPeriod);
116 ref.setDatePublished(datePublished);
117 String author = titleToParse.substring(0, start).trim();
118 Team team = Team.NewTitledInstance(author, author);
119 ref.setAuthorTeam(team);
120 ref.setProtectedTitleCache(false);
121 }else{
122 logger.warn("Could not parse reference: " + titleToParse);
123 }
124 return detailResult;
125
126 }
127
128 private String removeReferenceBracket(String refString, ReferenceBase ref) {
129 String titleToParse = refString;
130 String reBracket = "\\(.*\\).?";
131 Pattern patBracket = Pattern.compile(reBracket);
132 Matcher matcher = patBracket.matcher(titleToParse);
133
134 if (matcher.matches()){
135 int start = matcher.start() + 1;
136 int end = matcher.end() -1 ;
137 if (! titleToParse.endsWith("")){
138 end = end - 1;
139 }
140 titleToParse = titleToParse.substring(start, end);
141
142 ref.setTitleCache(titleToParse);
143 }
144 return titleToParse;
145 }
146
147 /**
148 * @param taxon
149 * @param name
150 * @param value
151 */
152 @Override
153 protected TeamOrPersonBase handleNameUsage(Taxon taxon, NonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
154 ReferenceBase ref = ReferenceFactory.newGeneric();
155
156 ref.setTitleCache(referenceTitle, true);
157
158 TeamOrPersonBase team = getReferenceAuthor(ref, name);
159 ref.setAuthorTeam(team);
160
161 String[] multipleReferences = ref.getTitleCache().split("&");
162
163 TaxonDescription description = getDescription(taxon);
164 for (String singleReferenceString : multipleReferences){
165 ReferenceBase singleRef = ReferenceFactory.newGeneric();
166 singleRef.setTitleCache(singleReferenceString, true);
167 singleRef.setAuthorTeam(team);
168
169 String microReference = parseReferenceYearAndDetailForUsage(singleRef);
170
171 // parseReferenceType(ref);
172
173 TextData textData = TextData.NewInstance(Feature.CITATION());
174 textData.addSource(null, null, ref, microReference, name, null);
175 description.addElement(textData);
176 }
177 return team;
178 }
179
180 private String parseReferenceYearAndDetailForUsage(ReferenceBase ref) {
181 String detailResult = null;
182 String titleToParse = ref.getTitleCache().trim();
183
184 int detailStart = titleToParse.indexOf(":");
185 if (detailStart >= 0){
186 detailResult = titleToParse.substring(detailStart + 1);
187 titleToParse = titleToParse.substring(0, titleToParse.length() - detailResult.length() - 1).trim();
188 detailResult = detailResult.trim();
189 }
190
191 String reYear = "^[1-2]{1}[0-9]{3}[a-e]?$";
192 String reYearPeriod = reYear;
193 //
194 // //pattern for the whole string
195 Pattern patReference = Pattern.compile( reYearPeriod );
196 Matcher matcher = patReference.matcher(titleToParse);
197 if (! matcher.find()){
198 logger.warn("Could not parse year: " + titleToParse);
199 }
200 TimePeriod datePublished = TimePeriod.parseString(titleToParse);
201 ref.setDatePublished(datePublished);
202 return detailResult;
203
204 }
205
206 protected TeamOrPersonBase getReferenceAuthor (ReferenceBase ref, NonViralName name) {
207 String titleString = ref.getTitleCache();
208 String re = "\\(.*\\)";
209 Pattern pattern = Pattern.compile(re);
210 Matcher matcher = pattern.matcher(titleString);
211 if (matcher.find()){
212 int start = matcher.start();
213 String authorString = titleString.substring(0, start).trim();
214 String restString = titleString.substring(start + 1 , matcher.end() - 1);
215 TeamOrPersonBase team = getAuthorTeam(authorString, name);
216 ref.setTitleCache(restString, true);
217 return team;
218 }else{
219 logger.warn("Title does not match: " + titleString);
220 return null;
221 }
222
223 }
224
225 private TeamOrPersonBase getAuthorTeam(String authorString, NonViralName name) {
226 //TODO atomize
227 // TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorTeam(), TeamOrPersonBase.class);
228 // String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
229
230 // if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
231 // logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
232 Team result = Team.NewInstance();
233 result.setTitleCache(authorString, true);
234 return result;
235 // }else{
236 // nameTeam.setTitleCache(authorString, true);
237 // return nameTeam;
238 // }
239 }
240
241 /**
242 * @param refAuthorTeam
243 * @param nameTeam
244 * @return
245 */
246 private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam, TeamOrPersonBase nameTeam) {
247 String nameTeamString = nameTeam.getNomenclaturalTitle();
248 String refAuthorTeamString = refAuthorTeam.getTitleCache();
249 if (nameTeamString.equalsIgnoreCase(refAuthorTeamString)){
250 return true;
251 }
252
253 if (nameTeamString.endsWith(".")){
254 nameTeamString = nameTeamString.substring(0, nameTeamString.length() - 1 );
255 if (refAuthorTeamString.startsWith(nameTeamString)){
256 return true;
257 }else{
258 return checkIpniAuthor(nameTeamString + ".", refAuthorTeam);
259 }
260 }else{
261 if (nameTeamString.endsWith(refAuthorTeamString) || refAuthorTeamString.endsWith(nameTeamString)){
262 return true;
263 }else{
264 return checkIpniAuthor(nameTeamString, refAuthorTeam);
265 }
266 }
267 }
268
269 private boolean checkIpniAuthor(String nameTeamString, TeamOrPersonBase refAuthorTeam) {
270 IpniService ipniService = new IpniService();
271 List<Person> ipniAuthors = ipniService.getAuthors(nameTeamString, null, null, null, null, null);
272 for (Person ipniAuthor : ipniAuthors){
273 if (ipniAuthor.getLastname() != null && ipniAuthor.getLastname().equalsIgnoreCase(refAuthorTeam.getTitleCache())){
274 return true;
275 }
276 System.out.println(ipniAuthor.getTitleCache() + " <-> " + refAuthorTeam.getTitleCache());
277 }
278 return false;
279 }
280
281 /**
282 * @param state
283 * @param elNom
284 * @param taxon
285 * @param homotypicalGroup
286 */
287 @Override
288 protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
289 verifyNoChildren(elNom);
290 String typeRef = elNom.getTextNormalize();
291 typeRef = removeStartingTypeRefMinus(typeRef);
292 typeRef = removeTypePrefix(typeRef);
293 TypeDesignationBase typeDesignation = SpecimenTypeDesignation.NewInstance();
294 makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef, typeDesignation);
295 for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
296 name.addTypeDesignation(typeDesignation, true);
297 }
298 }
299
300 private String removeTypePrefix(String typeRef) {
301 typeRef = typeRef.trim().replace("Type: ", "").replace("Types: ", "").trim();
302 return typeRef;
303 }
304
305
306
307 }