2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.eflora
.centralAfrica
.ericaceae
;
12 import java
.util
.List
;
13 import java
.util
.regex
.Matcher
;
14 import java
.util
.regex
.Pattern
;
16 import org
.apache
.log4j
.Logger
;
17 import org
.jdom
.Element
;
18 import org
.springframework
.stereotype
.Component
;
20 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
21 import eu
.etaxonomy
.cdm
.ext
.ipni
.IpniService
;
22 import eu
.etaxonomy
.cdm
.io
.eflora
.EfloraImportState
;
23 import eu
.etaxonomy
.cdm
.io
.eflora
.EfloraTaxonImport
;
24 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
25 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
26 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
27 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
28 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
29 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
30 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
31 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
32 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
33 import eu
.etaxonomy
.cdm
.model
.name
.HomotypicalGroup
;
34 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
35 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignation
;
36 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
37 import eu
.etaxonomy
.cdm
.model
.name
.TypeDesignationBase
;
38 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
39 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
40 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
41 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
49 public class CentralAfricaEricaceaeTaxonImport
extends EfloraTaxonImport
{
50 private static final Logger logger
= Logger
.getLogger(CentralAfricaEricaceaeTaxonImport
.class);
56 * @see eu.etaxonomy.cdm.io.eflora.EfloraTaxonImport#handleNomenclaturalReference(eu.etaxonomy.cdm.model.name.NonViralName, java.lang.String)
59 protected TeamOrPersonBase
handleNomenclaturalReference(NonViralName name
, String value
) {
60 Reference nomRef
= ReferenceFactory
.newGeneric();
61 nomRef
.setTitleCache(value
, true);
62 parseNomStatus(nomRef
, name
);
63 name
.setNomenclaturalReference(nomRef
);
65 String microReference
= parseReferenceYearAndDetail(nomRef
);
66 microReference
= removeTrailing(microReference
, ")");
68 microReference
= parseHomonym(microReference
, name
);
69 name
.setNomenclaturalMicroReference(microReference
);
71 TeamOrPersonBase nameTeam
= CdmBase
.deproxy(name
.getCombinationAuthorTeam(), TeamOrPersonBase
.class);
72 TeamOrPersonBase refTeam
= nomRef
.getAuthorTeam();
73 if (nameTeam
== null ){
74 logger
.warn("Name has nom. ref. but no author team. Name: " + name
.getTitleCache() + ", Nom.Ref.: " + value
);
75 }else if (refTeam
== null ){
76 logger
.warn("Name has nom. ref. but no nom.ref. author. Name: " + name
.getTitleCache() + ", Nom.Ref.: " + value
);
77 }else if (! authorTeamsMatch(refTeam
, nameTeam
)){
78 logger
.warn("Nom.Ref. author and comb. author do not match: " + nomRef
.getTitleCache() + " <-> " + nameTeam
.getNomenclaturalTitle());
80 nomRef
.setAuthorTeam(nameTeam
);
81 nomRef
.setTitle(CdmUtils
.Nz(nomRef
.getTitle()) + " - no title given yet -");
82 nameTeam
.setTitleCache(refTeam
.getTitleCache(), true);
88 * Extracts the date published part and returns micro reference
92 protected String
parseReferenceYearAndDetail(Reference ref
){
93 String detailResult
= null;
94 String titleToParse
= ref
.getTitleCache();
95 titleToParse
= removeReferenceBracket(titleToParse
, ref
);
97 int detailStart
= titleToParse
.indexOf(":");
98 if (detailStart
>= 0){
99 detailResult
= titleToParse
.substring(detailStart
+ 1);
100 titleToParse
= titleToParse
.substring(0, titleToParse
.length() - detailResult
.length() - 1).trim();
101 detailResult
= detailResult
.trim();
104 String reYear
= "\\s[1-2]{1}[0-9]{3}";
105 String reYearPeriod
= reYear
;
107 // //pattern for the whole string
108 Pattern patReference
= Pattern
.compile( reYearPeriod
);
109 Matcher matcher
= patReference
.matcher(titleToParse
);
111 int start
= matcher
.start();
112 int end
= matcher
.end();
114 String strPeriod
= titleToParse
.substring(start
, end
);
115 TimePeriod datePublished
= TimePeriodParser
.parseString(strPeriod
);
116 ref
.setDatePublished(datePublished
);
117 String author
= titleToParse
.substring(0, start
).trim();
118 author
= parseInRefrence(ref
, author
);
119 TeamOrPersonBase team
= parseSingleTeam(author
);
120 ref
.setAuthorTeam(team
);
121 ref
.setProtectedTitleCache(false);
123 logger
.warn("Could not parse reference: " + titleToParse
);
129 private String
parseInRefrence(Reference ref
, String author
) {
130 int pos
= author
.indexOf(" in ");
132 String inAuthorString
= author
.substring(pos
+ 4);
133 String myAuthorString
= author
.substring(0, pos
);
134 Reference inReference
= ReferenceFactory
.newGeneric();
135 TeamOrPersonBase inAuthor
= parseSingleTeam(inAuthorString
);
136 inReference
.setAuthorTeam(inAuthor
);
137 ref
.setInReference(inReference
);
138 return myAuthorString
;
145 private String
removeReferenceBracket(String refString
, Reference ref
) {
146 String titleToParse
= refString
;
147 String reBracket
= "\\(.*\\).?";
148 Pattern patBracket
= Pattern
.compile(reBracket
);
149 Matcher matcher
= patBracket
.matcher(titleToParse
);
151 if (matcher
.matches()){
152 int start
= matcher
.start() + 1;
153 int end
= matcher
.end() -1 ;
154 if (! titleToParse
.endsWith("")){
157 titleToParse
= titleToParse
.substring(start
, end
);
159 ref
.setTitleCache(titleToParse
);
170 protected TeamOrPersonBase
<?
> handleNameUsage(Taxon taxon
, NonViralName
<?
> name
, String referenceTitle
, TeamOrPersonBase lastTeam
) {
171 Reference
<?
> ref
= ReferenceFactory
.newGeneric();
173 ref
.setTitleCache(referenceTitle
, true);
175 TeamOrPersonBase
<?
> team
= getReferenceAuthor(ref
, name
);
176 ref
.setAuthorTeam(team
);
178 String
[] multipleReferences
= ref
.getTitleCache().split("&");
180 TaxonDescription description
= getDescription(taxon
);
181 for (String singleReferenceString
: multipleReferences
){
182 Reference
<?
> singleRef
= ReferenceFactory
.newGeneric();
183 singleRef
.setTitleCache(singleReferenceString
.trim(), true);
184 singleRef
.setAuthorTeam(team
);
186 String microReference
= parseReferenceYearAndDetailForUsage(singleRef
);
188 singleRef
.setTitle( CdmUtils
.Nz(singleRef
.getTitle()) + " - no title given yet -");
190 // parseReferenceType(ref);
192 TextData textData
= TextData
.NewInstance(Feature
.CITATION());
193 textData
.addSource(OriginalSourceType
.PrimaryTaxonomicSource
, null, null, singleRef
, microReference
, name
, null);
194 description
.addElement(textData
);
199 private String
parseReferenceYearAndDetailForUsage(Reference ref
) {
200 String detailResult
= null;
201 String titleToParse
= ref
.getTitleCache().trim();
203 int detailStart
= titleToParse
.indexOf(":");
204 if (detailStart
>= 0){
205 detailResult
= titleToParse
.substring(detailStart
+ 1);
206 titleToParse
= titleToParse
.substring(0, titleToParse
.length() - detailResult
.length() - 1).trim();
207 detailResult
= detailResult
.trim();
210 String reYear
= "^[1-2]{1}[0-9]{3}[a-e]?$";
211 String reYearPeriod
= reYear
;
213 // //pattern for the whole string
214 Pattern patReference
= Pattern
.compile( reYearPeriod
);
215 Matcher matcher
= patReference
.matcher(titleToParse
);
216 if (! matcher
.find()){
217 logger
.warn("Could not parse year: " + titleToParse
);
219 if (Pattern
.matches("^[1-2]{1}[0-9]{3}[a-e]$", titleToParse
)){
220 String title
= titleToParse
.substring(4,5);
222 titleToParse
= titleToParse
.substring(0, 4);
224 ref
.setProtectedTitleCache(false);
226 TimePeriod datePublished
= TimePeriodParser
.parseString(titleToParse
);
227 ref
.setDatePublished(datePublished
);
232 protected TeamOrPersonBase
getReferenceAuthor (Reference ref
, NonViralName name
) {
233 String titleString
= ref
.getTitleCache();
234 String re
= "\\(.*\\)";
235 Pattern pattern
= Pattern
.compile(re
);
236 Matcher matcher
= pattern
.matcher(titleString
);
238 int start
= matcher
.start();
239 String authorString
= titleString
.substring(0, start
).trim();
240 String restString
= titleString
.substring(start
+ 1 , matcher
.end() - 1);
241 TeamOrPersonBase team
= getAuthorTeam(authorString
, name
);
242 ref
.setTitleCache(restString
, true);
245 logger
.warn("Title does not match: " + titleString
);
251 private TeamOrPersonBase
getAuthorTeam(String authorString
, NonViralName name
) {
253 // TeamOrPersonBase nameTeam = CdmBase.deproxy(name.getCombinationAuthorTeam(), TeamOrPersonBase.class);
254 // String nameTeamTitle = nameTeam == null ? "" : nameTeam.getNomenclaturalTitle();
256 // if (nameTeam == null || ! authorTeamsMatch(authorString, nameTeamTitle)){
257 // logger.warn("Author teams do not match: " + authorString + " <-> " + nameTeamTitle);
258 TeamOrPersonBase result
= parseSingleTeam(authorString
);
259 result
.setTitleCache(authorString
, true);
262 // nameTeam.setTitleCache(authorString, true);
268 * @param refAuthorTeam
272 private boolean authorTeamsMatch(TeamOrPersonBase refAuthorTeam
, TeamOrPersonBase nameTeam
) {
273 String nameTeamString
= nameTeam
.getNomenclaturalTitle();
274 String refAuthorTeamString
= refAuthorTeam
.getTitleCache();
275 if (nameTeamString
.equalsIgnoreCase(refAuthorTeamString
)){
279 if (nameTeamString
.endsWith(".")){
280 nameTeamString
= nameTeamString
.substring(0, nameTeamString
.length() - 1 );
281 if (refAuthorTeamString
.startsWith(nameTeamString
)){
284 return checkSingleAndIpniAuthor(nameTeam
, refAuthorTeam
);
287 if (nameTeamString
.endsWith(refAuthorTeamString
) || refAuthorTeamString
.endsWith(nameTeamString
)){
290 return checkSingleAndIpniAuthor(nameTeam
, refAuthorTeam
);
295 private boolean checkSingleAndIpniAuthor(TeamOrPersonBase nameTeam
, TeamOrPersonBase refAuthorTeam
) {
296 if ( nameTeam
.isInstanceOf(Team
.class) && ((Team
)nameTeam
).getTeamMembers().size()> 1 ||
297 refAuthorTeam
.isInstanceOf(Team
.class) && ((Team
)refAuthorTeam
).getTeamMembers().size()> 1){
299 if (! (nameTeam
.isInstanceOf(Team
.class) && refAuthorTeam
.isInstanceOf(Team
.class) ) ){
300 logger
.warn("Only one author is a real team");
303 Team realNameTeam
= (Team
)nameTeam
;
304 Team realRefAuthorTeam
= (Team
)refAuthorTeam
;
306 if (realNameTeam
.getTeamMembers().size() != realRefAuthorTeam
.getTeamMembers().size()){
307 logger
.warn("Teams do not have the same size");
311 if (realNameTeam
.getTeamMembers().size() == 0){
312 logger
.warn("Teams are empty");
315 //compare each team member
316 for (int i
= 0; i
< realNameTeam
.getTeamMembers().size(); i
++){
317 Person namePerson
= realNameTeam
.getTeamMembers().get(i
);
318 Person refPerson
= realRefAuthorTeam
.getTeamMembers().get(i
);
319 if ( authorTeamsMatch(refPerson
, namePerson
) == false){
325 boolean result
= checkIpniAuthor(nameTeam
.getNomenclaturalTitle(), refAuthorTeam
);
329 private boolean checkIpniAuthor(String nameTeamString
, TeamOrPersonBase refAuthorTeam
) {
330 IpniService ipniService
= new IpniService();
331 List
<Person
> ipniAuthors
= ipniService
.getAuthors(nameTeamString
, null, null, null, null, null);
332 if (ipniAuthors
!= null){
333 for (Person ipniAuthor
: ipniAuthors
){
334 if (ipniAuthor
.getLastname() != null && ipniAuthor
.getLastname().equalsIgnoreCase(refAuthorTeam
.getTitleCache())){
337 logger
.warn(ipniAuthor
.getTitleCache() + " <-> " + refAuthorTeam
.getTitleCache());
340 logger
.warn("IPNI not available");
349 * @param homotypicalGroup
352 protected void handleTypeRef(EfloraImportState state
, Element elNom
, Taxon taxon
, HomotypicalGroup homotypicalGroup
) {
353 verifyNoChildren(elNom
);
354 String typeRef
= elNom
.getTextNormalize();
355 typeRef
= removeStartingTypeRefMinus(typeRef
);
356 typeRef
= removeTypePrefix(typeRef
);
357 TypeDesignationBase typeDesignation
= SpecimenTypeDesignation
.NewInstance();
358 makeSpecimenTypeDesignation(new StringBuffer("Type"), typeRef
, typeDesignation
);
359 for (TaxonNameBase name
: homotypicalGroup
.getTypifiedNames()){
360 name
.addTypeDesignation(typeDesignation
, true);
364 private String
removeTypePrefix(String typeRef
) {
365 typeRef
= typeRef
.trim().replace("Type: ", "").replace("Types: ", "").trim();
369 protected void handleGenus(String value
, TaxonNameBase taxonName
) {