2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.eflora
.centralAfrica
.ericaceae
;
12 import java
.util
.List
;
13 import java
.util
.regex
.Matcher
;
14 import java
.util
.regex
.Pattern
;
16 import org
.apache
.log4j
.Logger
;
17 import org
.jdom
.Element
;
18 import org
.springframework
.stereotype
.Component
;
20 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
21 import eu
.etaxonomy
.cdm
.ext
.ipni
.IpniService
;
22 import eu
.etaxonomy
.cdm
.io
.eflora
.EfloraImportState
;
23 import eu
.etaxonomy
.cdm
.io
.eflora
.EfloraTaxonImport
;
24 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
25 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
26 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
27 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
28 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
29 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
30 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
31 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
32 import eu
.etaxonomy
.cdm
.model
.name
.HomotypicalGroup
;
33 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
34 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignation
;
35 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
36 import eu
.etaxonomy
.cdm
.model
.name
.TypeDesignationBase
;
37 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
38 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
39 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
47 public class CentralAfricaEricaceaeTaxonImport
extends EfloraTaxonImport
{
48 private static final Logger logger
= Logger
.getLogger(CentralAfricaEricaceaeTaxonImport
.class);
54 * @see eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport#handleNomenclaturalReference(eu.etaxonomy.cdm.model.name.NonViralName, java.lang.String)
57 protected TeamOrPersonBase
handleNomenclaturalReference(NonViralName name
, String value
) {
58 Reference nomRef
= ReferenceFactory
.newGeneric();
59 nomRef
.setTitleCache(value
, true);
60 parseNomStatus(nomRef
, name
);
61 name
.setNomenclaturalReference(nomRef
);
63 String microReference
= parseReferenceYearAndDetail(nomRef
);
64 microReference
= removeTrailing(microReference
, ")");
66 microReference
= parseHomonym(microReference
, name
);
67 name
.setNomenclaturalMicroReference(microReference
);
69 TeamOrPersonBase nameTeam
= CdmBase
.deproxy(name
.getCombinationAuthorTeam(), TeamOrPersonBase
.class);
70 TeamOrPersonBase refTeam
= nomRef
.getAuthorTeam();
71 if (nameTeam
== null ){
72 logger
.warn("Name has nom. ref. but no author team. Name: " + name
.getTitleCache() + ", Nom.Ref.: " + value
);
73 }else if (refTeam
== null ){
74 logger
.warn("Name has nom. ref. but no nom.ref. author. Name: " + name
.getTitleCache() + ", Nom.Ref.: " + value
);
75 }else if (! authorTeamsMatch(refTeam
, nameTeam
)){
76 logger
.warn("Nom.Ref. author and comb. author do not match: " + nomRef
.getTitleCache() + " <-> " + nameTeam
.getNomenclaturalTitle());
78 nomRef
.setAuthorTeam(nameTeam
);
79 nomRef
.setTitle(CdmUtils
.Nz(nomRef
.getTitle()) + " - no title given yet -");
80 nameTeam
.setTitleCache(refTeam
.getTitleCache(), true);
86 * Extracts the date published part and returns micro reference
90 protected String
parseReferenceYearAndDetail(Reference ref
){
91 String detailResult
= null;
92 String titleToParse
= ref
.getTitleCache();
93 titleToParse
= removeReferenceBracket(titleToParse
, ref
);
95 int detailStart
= titleToParse
.indexOf(":");
96 if (detailStart
>= 0){
97 detailResult
= titleToParse
.substring(detailStart
+ 1);
98 titleToParse
= titleToParse
.substring(0, titleToParse
.length() - detailResult
.length() - 1).trim();
99 detailResult
= detailResult
.trim();
102 String reYear
= "\\s[1-2]{1}[0-9]{3}";
103 String reYearPeriod
= reYear
;
105 // //pattern for the whole string
106 Pattern patReference
= Pattern
.compile( reYearPeriod
);
107 Matcher matcher
= patReference
.matcher(titleToParse
);
109 int start
= matcher
.start();
110 int end
= matcher
.end();
112 String strPeriod
= titleToParse
.substring(start
, end
);
113 TimePeriod datePublished
= TimePeriod
.parseString(strPeriod
);
114 ref
.setDatePublished(datePublished
);
115 String author
= titleToParse
.substring(0, start
).trim();
116 author
= parseInRefrence(ref
, author
);
117 TeamOrPersonBase team
= parseSingleTeam(author
);
118 ref
.setAuthorTeam(team
);
119 ref
.setProtectedTitleCache(false);
121 logger
.warn("Could not parse reference: " + titleToParse
);
127 private String
parseInRefrence(Reference ref
, String author
) {
128 int pos
= author
.indexOf(" in ");
130 String inAuthorString
= author
.substring(pos
+ 4);
131 String myAuthorString
= author
.substring(0, pos
);
132 Reference inReference
= ReferenceFactory
.newGeneric();
133 TeamOrPersonBase inAuthor
= parseSingleTeam(inAuthorString
);
134 inReference
.setAuthorTeam(inAuthor
);
135 ref
.setInReference(inReference
);
136 return myAuthorString
;
143 private String
removeReferenceBracket(String refString
, Reference ref
) {
144 String titleToParse
= refString
;
145 String reBracket
= "\\(.*\\).?";
146 Pattern patBracket
= Pattern
.compile(reBracket
);
147 Matcher matcher
= patBracket
.matcher(titleToParse
);
149 if (matcher
.matches()){
150 int start
= matcher
.start() + 1;
151 int end
= matcher
.end() -1 ;
152 if (! titleToParse
.endsWith("")){
155 titleToParse
= titleToParse
.substring(start
, end
);
157 ref
.setTitleCache(titleToParse
);
168 protected TeamOrPersonBase
handleNameUsage(Taxon taxon
, NonViralName name
, String referenceTitle
, TeamOrPersonBase lastTeam
) {
169 Reference ref
= ReferenceFactory
.newGeneric();
171 ref
.setTitleCache(referenceTitle
, true);
173 TeamOrPersonBase team
= getReferenceAuthor(ref
, name
);
174 ref
.setAuthorTeam(team
);
176 String
[] multipleReferences
= ref
.getTitleCache().split("&");
178 TaxonDescription description
= getDescription(taxon
);
179 for (String singleReferenceString
: multipleReferences
){
180 Reference singleRef
= ReferenceFactory
.newGeneric();
181 singleRef
.setTitleCache(singleReferenceString
.trim(), true);
182 singleRef
.setAuthorTeam(team
);
184 String microReference
= parseReferenceYearAndDetailForUsage(singleRef
);
186 singleRef
.setTitle( CdmUtils
.Nz(singleRef
.getTitle()) + " - no title given yet -");
188 // parseReferenceType(ref);
190 TextData textData
= TextData
.NewInstance(Feature
.CITATION());
191 textData
.addSource(null, null, singleRef
, microReference
, name
, null);
192 description
.addElement(textData
);
197 private String
parseReferenceYearAndDetailForUsage(Reference ref
) {
198 String detailResult
= null;
199 String titleToParse
= ref
.getTitleCache().trim();
201 int detailStart
= titleToParse
.indexOf(":");
202 if (detailStart
>= 0){
203 detailResult
= titleToParse
.substring(detailStart
+ 1);
204 titleToParse
= titleToParse
.substring(0, titleToParse
.length() - detailResult
.length() - 1).trim();
205 detailResult
= detailResult
.trim();
208 String reYear
= "^[1-2]{1}[0-9]{3}[a-e]?$";
209 String reYearPeriod
= reYear
;
211 // //pattern for the whole string
212 Pattern patReference
= Pattern
.compile( reYearPeriod
);
213 Matcher matcher
= patReference
.matcher(titleToParse
);
214 if (! matcher
.find()){
215 logger
.warn("Could not parse year: " + titleToParse
);
217 if (Pattern
.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse
)){
218 String title
= titleToParse
.substring(4,5);
220 titleToParse
= titleToParse
.substring(0, 4);
222 ref
.setProtectedTitleCache(false);
224 TimePeriod datePublished
= TimePeriod
.parseString(titleToParse
);
225 ref
.setDatePublished(datePublished
);
230 protected TeamOrPersonBase
getReferenceAuthor (Reference ref
, NonViralName name
) {
231 String titleString
= ref
.getTitleCache();
232 String re
= "\\(.*\\)";
233 Pattern pattern
= Pattern
.compile(re
);
234 Matcher matcher
= pattern
.matcher(titleString
);
236 int start
= matcher
.start();
237 String authorString
= titleString
.substring(0, start
).trim();
238 String restString
= titleString
.substring(start
+ 1 , matcher
.end() - 1);
239 TeamOrPersonBase team
= getAuthorTeam(authorString
, name
);
240 ref
.setTitleCache(restString
, true);
243 logger
.warn("Title does not match: " + titleString
);
249 private TeamOrPersonBase
getAuthorTeam(String authorString
, NonViralName name
) {
251 // TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorTeam(), TeamOrPersonBase.class);
252 // String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
254 // if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
255 // logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
256 TeamOrPersonBase result
= parseSingleTeam(authorString
);
257 result
.setTitleCache(authorString
, true);
260 // nameTeam.setTitleCache(authorString, true);
266 * @param refAuthorTeam
270 private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam
, TeamOrPersonBase nameTeam
) {
271 String nameTeamString
= nameTeam
.getNomenclaturalTitle();
272 String refAuthorTeamString
= refAuthorTeam
.getTitleCache();
273 if (nameTeamString
.equalsIgnoreCase(refAuthorTeamString
)){
277 if (nameTeamString
.endsWith(".")){
278 nameTeamString
= nameTeamString
.substring(0, nameTeamString
.length() - 1 );
279 if (refAuthorTeamString
.startsWith(nameTeamString
)){
282 return checkSingleAndIpniAuthor(nameTeam
, refAuthorTeam
);
285 if (nameTeamString
.endsWith(refAuthorTeamString
) || refAuthorTeamString
.endsWith(nameTeamString
)){
288 return checkSingleAndIpniAuthor(nameTeam
, refAuthorTeam
);
293 private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam
, TeamOrPersonBase refAuthorTeam
) {
294 if ( nameTeam
.isInstanceOf(Team
.class) && ((Team
)nameTeam
).getTeamMembers().size()> 1 ||
295 refAuthorTeam
.isInstanceOf(Team
.class) && ((Team
)refAuthorTeam
).getTeamMembers().size()> 1){
297 if (! (nameTeam
.isInstanceOf(Team
.class) && refAuthorTeam
.isInstanceOf(Team
.class) ) ){
298 logger
.warn("Only one author is a real team");
301 Team realNameTeam
= (Team
)nameTeam
;
302 Team realRefAuthorTeam
= (Team
)refAuthorTeam
;
304 if (realNameTeam
.getTeamMembers().size() != realRefAuthorTeam
.getTeamMembers().size()){
305 logger
.warn("Teams do not have the same size");
309 if (realNameTeam
.getTeamMembers().size() == 0){
310 logger
.warn("Teams are empty");
313 //compare each team member
314 for (int i
= 0; i
< realNameTeam
.getTeamMembers().size(); i
++){
315 Person namePerson
= realNameTeam
.getTeamMembers().get(i
);
316 Person refPerson
= realRefAuthorTeam
.getTeamMembers().get(i
);
317 if ( authorTeamsMatch(refPerson
, namePerson
) == false){
323 boolean result
= checkIpniAuthor(nameTeam
.getNomenclaturalTitle(), refAuthorTeam
);
327 private boolean checkIpniAuthor(String nameTeamString
, TeamOrPersonBase refAuthorTeam
) {
328 IpniService ipniService
= new IpniService();
329 List
<Person
> ipniAuthors
= ipniService
.getAuthors(nameTeamString
, null, null, null, null, null);
330 if (ipniAuthors
!= null){
331 for (Person ipniAuthor
: ipniAuthors
){
332 if (ipniAuthor
.getLastname() != null && ipniAuthor
.getLastname().equalsIgnoreCase(refAuthorTeam
.getTitleCache())){
335 logger
.warn(ipniAuthor
.getTitleCache() + " <-> " + refAuthorTeam
.getTitleCache());
338 logger
.warn("IPNI not available");
347 * @param homotypicalGroup
350 protected void handleTypeRef(EfloraImportState state
, Element elNom
, Taxon taxon
, HomotypicalGroup homotypicalGroup
) {
351 verifyNoChildren(elNom
);
352 String typeRef
= elNom
.getTextNormalize();
353 typeRef
= removeStartingTypeRefMinus(typeRef
);
354 typeRef
= removeTypePrefix(typeRef
);
355 TypeDesignationBase typeDesignation
= SpecimenTypeDesignation
.NewInstance();
356 makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef
, typeDesignation
);
357 for (TaxonNameBase name
: homotypicalGroup
.getTypifiedNames()){
358 name
.addTypeDesignation(typeDesignation
, true);
362 private String
removeTypePrefix(String typeRef
) {
363 typeRef
= typeRef
.trim().replace("Type: ", "").replace("Types: ", "").trim();
367 protected void handleGenus(String value
, TaxonNameBase taxonName
) {