2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.eflora
.centralAfrica
.ericaceae
;
12 import java
.util
.List
;
13 import java
.util
.regex
.Matcher
;
14 import java
.util
.regex
.Pattern
;
16 import org
.apache
.log4j
.Logger
;
17 import org
.jdom
.Element
;
18 import org
.springframework
.stereotype
.Component
;
20 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
21 import eu
.etaxonomy
.cdm
.ext
.ipni
.IpniService
;
22 import eu
.etaxonomy
.cdm
.io
.eflora
.EfloraImportState
;
23 import eu
.etaxonomy
.cdm
.io
.eflora
.EfloraTaxonImport
;
24 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
25 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
26 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
27 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
28 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
29 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
30 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
31 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
32 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
33 import eu
.etaxonomy
.cdm
.model
.name
.HomotypicalGroup
;
34 import eu
.etaxonomy
.cdm
.model
.name
.INonViralName
;
35 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignation
;
36 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
37 import eu
.etaxonomy
.cdm
.model
.name
.TypeDesignationBase
;
38 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
39 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
40 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
41 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
49 public class CentralAfricaEricaceaeTaxonImport
extends EfloraTaxonImport
{
50 private static final long serialVersionUID
= 6442665916458420942L;
51 private static final Logger logger
= Logger
.getLogger(CentralAfricaEricaceaeTaxonImport
.class);
55 protected TeamOrPersonBase
handleNomenclaturalReference(TaxonNameBase name
, String value
) {
56 Reference nomRef
= ReferenceFactory
.newGeneric();
57 nomRef
.setTitleCache(value
, true);
58 parseNomStatus(nomRef
, name
);
59 name
.setNomenclaturalReference(nomRef
);
61 String microReference
= parseReferenceYearAndDetail(nomRef
);
62 microReference
= removeTrailing(microReference
, ")");
64 microReference
= parseHomonym(microReference
, name
);
65 name
.setNomenclaturalMicroReference(microReference
);
67 TeamOrPersonBase
<?
> nameTeam
= CdmBase
.deproxy(name
.getCombinationAuthorship(), TeamOrPersonBase
.class);
68 TeamOrPersonBase
<?
> refTeam
= nomRef
.getAuthorship();
69 if (nameTeam
== null ){
70 logger
.warn("Name has nom. ref. but no author team. Name: " + name
.getTitleCache() + ", Nom.Ref.: " + value
);
71 }else if (refTeam
== null ){
72 logger
.warn("Name has nom. ref. but no nom.ref. author. Name: " + name
.getTitleCache() + ", Nom.Ref.: " + value
);
73 }else if (! authorTeamsMatch(refTeam
, nameTeam
)){
74 logger
.warn("Nom.Ref. author and comb. author do not match: " + nomRef
.getTitleCache() + " <-> " + nameTeam
.getNomenclaturalTitle());
76 nomRef
.setAuthorship(nameTeam
);
77 nomRef
.setTitle(CdmUtils
.Nz(nomRef
.getTitle()) + " - no title given yet -");
78 nameTeam
.setTitleCache(refTeam
.getTitleCache(), true);
84 * Extracts the date published part and returns micro reference
88 protected String
parseReferenceYearAndDetail(Reference ref
){
89 String detailResult
= null;
90 String titleToParse
= ref
.getTitleCache();
91 titleToParse
= removeReferenceBracket(titleToParse
, ref
);
93 int detailStart
= titleToParse
.indexOf(":");
94 if (detailStart
>= 0){
95 detailResult
= titleToParse
.substring(detailStart
+ 1);
96 titleToParse
= titleToParse
.substring(0, titleToParse
.length() - detailResult
.length() - 1).trim();
97 detailResult
= detailResult
.trim();
100 String reYear
= "\\s[1-2]{1}[0-9]{3}";
101 String reYearPeriod
= reYear
;
103 // //pattern for the whole string
104 Pattern patReference
= Pattern
.compile( reYearPeriod
);
105 Matcher matcher
= patReference
.matcher(titleToParse
);
107 int start
= matcher
.start();
108 int end
= matcher
.end();
110 String strPeriod
= titleToParse
.substring(start
, end
);
111 TimePeriod datePublished
= TimePeriodParser
.parseString(strPeriod
);
112 ref
.setDatePublished(datePublished
);
113 String author
= titleToParse
.substring(0, start
).trim();
114 author
= parseInRefrence(ref
, author
);
115 TeamOrPersonBase team
= parseSingleTeam(author
);
116 ref
.setAuthorship(team
);
117 ref
.setProtectedTitleCache(false);
119 logger
.warn("Could not parse reference: " + titleToParse
);
125 private String
parseInRefrence(Reference ref
, String author
) {
126 int pos
= author
.indexOf(" in ");
128 String inAuthorString
= author
.substring(pos
+ 4);
129 String myAuthorString
= author
.substring(0, pos
);
130 Reference inReference
= ReferenceFactory
.newGeneric();
131 TeamOrPersonBase inAuthor
= parseSingleTeam(inAuthorString
);
132 inReference
.setAuthorship(inAuthor
);
133 ref
.setInReference(inReference
);
134 return myAuthorString
;
141 private String
removeReferenceBracket(String refString
, Reference ref
) {
142 String titleToParse
= refString
;
143 String reBracket
= "\\(.*\\).?";
144 Pattern patBracket
= Pattern
.compile(reBracket
);
145 Matcher matcher
= patBracket
.matcher(titleToParse
);
147 if (matcher
.matches()){
148 int start
= matcher
.start() + 1;
149 int end
= matcher
.end() -1 ;
150 if (! titleToParse
.endsWith("")){
153 titleToParse
= titleToParse
.substring(start
, end
);
155 ref
.setTitleCache(titleToParse
);
166 protected TeamOrPersonBase
<?
> handleNameUsage(Taxon taxon
, INonViralName name
,
167 String referenceTitle
, TeamOrPersonBase lastTeam
) {
169 Reference ref
= ReferenceFactory
.newGeneric();
171 ref
.setTitleCache(referenceTitle
, true);
173 TeamOrPersonBase
<?
> team
= getReferenceAuthor(ref
, name
);
174 ref
.setAuthorship(team
);
176 String
[] multipleReferences
= ref
.getTitleCache().split("&");
178 TaxonDescription description
= getDescription(taxon
);
179 for (String singleReferenceString
: multipleReferences
){
180 Reference singleRef
= ReferenceFactory
.newGeneric();
181 singleRef
.setTitleCache(singleReferenceString
.trim(), true);
182 singleRef
.setAuthorship(team
);
184 String microReference
= parseReferenceYearAndDetailForUsage(singleRef
);
186 singleRef
.setTitle( CdmUtils
.Nz(singleRef
.getTitle()) + " - no title given yet -");
188 // parseReferenceType(ref);
190 TextData textData
= TextData
.NewInstance(Feature
.CITATION());
191 textData
.addSource(OriginalSourceType
.PrimaryTaxonomicSource
, null, null, singleRef
, microReference
, (TaxonNameBase
) name
, null);
192 description
.addElement(textData
);
197 private String
parseReferenceYearAndDetailForUsage(Reference ref
) {
198 String detailResult
= null;
199 String titleToParse
= ref
.getTitleCache().trim();
201 int detailStart
= titleToParse
.indexOf(":");
202 if (detailStart
>= 0){
203 detailResult
= titleToParse
.substring(detailStart
+ 1);
204 titleToParse
= titleToParse
.substring(0, titleToParse
.length() - detailResult
.length() - 1).trim();
205 detailResult
= detailResult
.trim();
208 String reYear
= "^[1-2]{1}[0-9]{3}[a-e]?$";
209 String reYearPeriod
= reYear
;
211 // //pattern for the whole string
212 Pattern patReference
= Pattern
.compile( reYearPeriod
);
213 Matcher matcher
= patReference
.matcher(titleToParse
);
214 if (! matcher
.find()){
215 logger
.warn("Could not parse year: " + titleToParse
);
217 if (Pattern
.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse
)){
218 String title
= titleToParse
.substring(4,5);
220 titleToParse
= titleToParse
.substring(0, 4);
222 ref
.setProtectedTitleCache(false);
224 TimePeriod datePublished
= TimePeriodParser
.parseString(titleToParse
);
225 ref
.setDatePublished(datePublished
);
230 protected TeamOrPersonBase
getReferenceAuthor (Reference ref
, INonViralName name
) {
231 String titleString
= ref
.getTitleCache();
232 String re
= "\\(.*\\)";
233 Pattern pattern
= Pattern
.compile(re
);
234 Matcher matcher
= pattern
.matcher(titleString
);
236 int start
= matcher
.start();
237 String authorString
= titleString
.substring(0, start
).trim();
238 String restString
= titleString
.substring(start
+ 1 , matcher
.end() - 1);
239 TeamOrPersonBase team
= getAuthorTeam(authorString
, name
);
240 ref
.setTitleCache(restString
, true);
243 logger
.warn("Title does not match: " + titleString
);
249 private TeamOrPersonBase
getAuthorTeam(String authorString
, INonViralName name
) {
251 // TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorship(), TeamOrPersonBase.class);
252 // String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
254 // if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
255 // logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
256 TeamOrPersonBase result
= parseSingleTeam(authorString
);
257 result
.setTitleCache(authorString
, true);
260 // nameTeam.setTitleCache(authorString, true);
266 * @param refAuthorTeam
270 private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam
, TeamOrPersonBase nameTeam
) {
271 String nameTeamString
= nameTeam
.getNomenclaturalTitle();
272 String refAuthorTeamString
= refAuthorTeam
.getTitleCache();
273 if (nameTeamString
.equalsIgnoreCase(refAuthorTeamString
)){
277 if (nameTeamString
.endsWith(".")){
278 nameTeamString
= nameTeamString
.substring(0, nameTeamString
.length() - 1 );
279 if (refAuthorTeamString
.startsWith(nameTeamString
)){
282 return checkSingleAndIpniAuthor(nameTeam
, refAuthorTeam
);
285 if (nameTeamString
.endsWith(refAuthorTeamString
) || refAuthorTeamString
.endsWith(nameTeamString
)){
288 return checkSingleAndIpniAuthor(nameTeam
, refAuthorTeam
);
293 private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam
, TeamOrPersonBase refAuthorTeam
) {
294 if ( nameTeam
.isInstanceOf(Team
.class) && ((Team
)nameTeam
).getTeamMembers().size()> 1 ||
295 refAuthorTeam
.isInstanceOf(Team
.class) && ((Team
)refAuthorTeam
).getTeamMembers().size()> 1){
297 if (! (nameTeam
.isInstanceOf(Team
.class) && refAuthorTeam
.isInstanceOf(Team
.class) ) ){
298 logger
.warn("Only one author is a real team");
301 Team realNameTeam
= (Team
)nameTeam
;
302 Team realRefAuthorTeam
= (Team
)refAuthorTeam
;
304 if (realNameTeam
.getTeamMembers().size() != realRefAuthorTeam
.getTeamMembers().size()){
305 logger
.warn("Teams do not have the same size");
309 if (realNameTeam
.getTeamMembers().size() == 0){
310 logger
.warn("Teams are empty");
313 //compare each team member
314 for (int i
= 0; i
< realNameTeam
.getTeamMembers().size(); i
++){
315 Person namePerson
= realNameTeam
.getTeamMembers().get(i
);
316 Person refPerson
= realRefAuthorTeam
.getTeamMembers().get(i
);
317 if ( authorTeamsMatch(refPerson
, namePerson
) == false){
323 boolean result
= checkIpniAuthor(nameTeam
.getNomenclaturalTitle(), refAuthorTeam
);
327 private boolean checkIpniAuthor(String nameTeamString
, TeamOrPersonBase refAuthorTeam
) {
328 IpniService ipniService
= new IpniService();
329 List
<Person
> ipniAuthors
= ipniService
.getAuthors(nameTeamString
, null, null, null, null, null);
330 if (ipniAuthors
!= null){
331 for (Person ipniAuthor
: ipniAuthors
){
332 if (ipniAuthor
.getLastname() != null && ipniAuthor
.getLastname().equalsIgnoreCase(refAuthorTeam
.getTitleCache())){
335 logger
.warn(ipniAuthor
.getTitleCache() + " <-> " + refAuthorTeam
.getTitleCache());
338 logger
.warn("IPNI not available");
347 * @param homotypicalGroup
350 protected void handleTypeRef(EfloraImportState state
, Element elNom
, Taxon taxon
, HomotypicalGroup homotypicalGroup
) {
351 verifyNoChildren(elNom
);
352 String typeRef
= elNom
.getTextNormalize();
353 typeRef
= removeStartingTypeRefMinus(typeRef
);
354 typeRef
= removeTypePrefix(typeRef
);
355 TypeDesignationBase typeDesignation
= SpecimenTypeDesignation
.NewInstance();
356 makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef
, typeDesignation
);
357 for (TaxonNameBase name
: homotypicalGroup
.getTypifiedNames()){
358 name
.addTypeDesignation(typeDesignation
, true);
362 private String
removeTypePrefix(String typeRef
) {
363 typeRef
= typeRef
.trim().replace("Type: ", "").replace("Types: ", "").trim();
368 protected void handleGenus(String value
, INonViralName taxonName
) {