2 * Copyright (C) 2016 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.io
.greece
;
11 import java
.util
.Arrays
;
12 import java
.util
.List
;
15 import java
.util
.regex
.Matcher
;
16 import java
.util
.regex
.Pattern
;
18 import org
.apache
.log4j
.Logger
;
19 import org
.springframework
.stereotype
.Component
;
21 import eu
.etaxonomy
.cdm
.io
.mexico
.SimpleExcelTaxonImportState
;
22 import eu
.etaxonomy
.cdm
.model
.name
.INonViralName
;
23 import eu
.etaxonomy
.cdm
.model
.name
.NameRelationshipType
;
24 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
25 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
26 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
27 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
28 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
29 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymType
;
30 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
31 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
32 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
33 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
41 public class FloraHellenicaSynonymImport
<CONFIG
extends FloraHellenicaImportConfigurator
>
42 extends FloraHellenicaImportBase
<CONFIG
>{
44 private static final long serialVersionUID
= -3565782012921316901L;
45 private static final Logger logger
= Logger
.getLogger(FloraHellenicaSynonymImport
.class);
47 private static final String ACCEPTED_NAME
= "Accepted name";
48 private static final String SYNONYM
= "synonym";
49 private static final String UNIQUE_ID_OF_ACCEPTED_NAME
= "Unique ID of accepted name";
51 private static List
<String
> expectedKeys
= Arrays
.asList(new String
[]{
52 SYNONYM
, UNIQUE_ID_OF_ACCEPTED_NAME
, ACCEPTED_NAME
55 private NonViralNameParserImpl parser
= NonViralNameParserImpl
.NewInstance();
58 protected String
getWorksheetName(CONFIG config
) {
62 boolean isFirst
= true;
67 protected void firstPass(SimpleExcelTaxonImportState
<CONFIG
> state
) {
69 String line
= state
.getCurrentLine() + ": ";
70 Map
<String
, String
> record
= state
.getOriginalRecord();
72 Set
<String
> keys
= record
.keySet();
73 for (String key
: keys
) {
74 if (! expectedKeys
.contains(key
)){
75 logger
.warn(line
+ "Unexpected Key: " + key
);
79 System
.out
.println("Start synonyms");
83 String row
= "row" + state
.getCurrentLine();
84 TaxonBase
<?
> relatedTaxon
= makeSynonym(state
, line
, record
, row
);
85 if (relatedTaxon
!= null){
86 getTaxonService().saveOrUpdate(relatedTaxon
);
98 private TaxonBase
<?
> makeSynonym(SimpleExcelTaxonImportState
<CONFIG
> state
, String line
,
99 Map
<String
, String
> record
,
102 Taxon acceptedTaxon
= getAcceptedTaxon(record
, state
, UNIQUE_ID_OF_ACCEPTED_NAME
);
103 if (acceptedTaxon
== null){
104 logger
.warn(line
+ "Accepted not found: " + record
.get(UNIQUE_ID_OF_ACCEPTED_NAME
));
106 // acceptedTaxon = Taxon.NewInstance(null, null);
109 String synonymStr
= getValue(record
, SYNONYM
);
111 String
[] parsedSynStr
= parseAuct(synonymStr
, line
);
113 boolean isMisapplied
= parsedSynStr
[1] != null;
114 boolean hasNonAuthor
= parsedSynStr
[2] != null;
115 boolean hasStatus
= parsedSynStr
[3] != null;
116 boolean isNec
= hasNonAuthor
&& parsedSynStr
[2].contains(" nec ");
119 String misappliedNecAuthor
= null;
120 if (isMisapplied
&& hasNonAuthor
&& !isNec
){
121 parsedSynStr
[0] = parsedSynStr
[0] + " " + parsedSynStr
[2];
122 }else if (isMisapplied
&& hasNonAuthor
&& isNec
){
123 misappliedNecAuthor
= parsedSynStr
[2];
126 INonViralName nvn
= parser
.parseFullName(parsedSynStr
[0], NomenclaturalCode
.ICNAFP
, null);
127 if (nvn
.isProtectedTitleCache()){
128 logger
.warn(line
+ "Name could not be parsed: " + parsedSynStr
[0] + " (full:" + synonymStr
+ ")");
130 if (misappliedNecAuthor
!= null){
131 nvn
.setAuthorshipCache(misappliedNecAuthor
);
133 TaxonName name
= TaxonName
.castAndDeproxy(nvn
);
137 NomenclaturalStatusType status
= NomenclaturalStatusType
.getNomenclaturalStatusTypeByAbbreviation(parsedSynStr
[3], name
);
138 name
.addStatus(status
, null, null);
139 } catch (UnknownCdmTypeException e
) {
140 logger
.warn(line
+ "Nom. status not recognized: " + parsedSynStr
[3]);
143 name
= replaceNameAuthorsAndReferences(state
, name
, true);
148 Reference sec
= null;// getMisappliedRef(state, parsedSynStr[1]);
149 result
= Taxon
.NewInstance(name
, sec
);
150 result
.setAppendedPhrase(getMisappliedRef(state
, parsedSynStr
[1]));
151 acceptedTaxon
.addMisappliedName((Taxon
)result
, getSecReference(state
), null);
153 logger
.warn(line
+ "nec for misapplied names still needs to be checked: " + synonymStr
);
156 SynonymType synType
= null;
157 result
= acceptedTaxon
.addSynonymName(name
, getSecReference(state
), null, synType
);
159 handleSynonymNon(state
, name
, parsedSynStr
[2], line
);
162 result
.addImportSource(lineId
, getWorksheetName(state
.getConfig()), getSourceCitation(state
), null);
173 * @param parsedSynStr
175 private void handleSynonymNon(SimpleExcelTaxonImportState
<CONFIG
> state
,
176 TaxonName name
, String nonPart
, String line
) {
177 String
[] splits
= nonPart
.split(" nec ");
179 TaxonName lastHomonym
= null;
180 for (String split
: splits
){
181 split
= split
.trim();
182 // Saponaria illyrica Ard.
183 // Crepis nemausensis Gouan
184 // S. columnae Aurnier
185 // S. columnae Aurnier nec (Rchb. f.) H. Fleischm.
186 // T. glaucescens Rchb.
188 if (split
.matches("(Saponaria illyrica Ard.|Crepis nemausensis Gouan|S. columnae Aurnier|T. glaucescens Rchb.|Linaria stricta Guss.)"
190 if (split
.startsWith("S.")){
191 split
= split
.replace("S.", "Serapias");
192 }else if (split
.startsWith("T.")){
193 split
= split
.replace("T.", "Taraxacum");
195 nonName
= TaxonName
.castAndDeproxy(this.parser
.parseFullName(split
));
196 nonName
= replaceNameAuthorsAndReferences(state
, nonName
, true);
197 name
.addRelationshipFromName(nonName
, NameRelationshipType
.BLOCKING_NAME_FOR(), null, null);
199 String nameStr
= name
.getNameCache().replace(" hort.", "") + " " + split
;
200 nonName
= TaxonName
.castAndDeproxy(this.parser
.parseFullName(nameStr
));
201 nonName
= replaceNameAuthorsAndReferences(state
, nonName
, true);
202 name
.addRelationshipToName(nonName
, NameRelationshipType
.LATER_HOMONYM(), null, null);
203 if (lastHomonym
!= null){
204 nonName
.addRelationshipToName(lastHomonym
, NameRelationshipType
.LATER_HOMONYM(), null, null);
206 lastHomonym
= nonName
;
208 getNameService().saveOrUpdate(nonName
);
209 if (nonName
.isProtectedTitleCache()){
210 logger
.warn(line
+ "Non-Name could not be parsed: " + nonName
.getTitleCache());
213 //seems to work correctly
214 // if (splits.length>1){
215 // logger.warn(line + "nec synonyms maybe not yet correctly implemented: " + name.getTitleCache() + "; " + nonPart);
219 private Reference flGraecReference
;
220 private Reference balkanReference
;
222 flGraecReference
= ReferenceFactory
.newBook();
223 flGraecReference
.setTitle("fl. graec.");
224 balkanReference
= ReferenceFactory
.newBook();
225 balkanReference
.setTitle("balc.");
232 private String
getMisappliedRef(SimpleExcelTaxonImportState
<CONFIG
> state
, String refString
) {
233 // if ("fl. graec.".equals(refString)){
234 // return flGraecReference;
235 // }else if ("balc.".equals(refString)){
236 // return balkanReference;
237 if ("fl. graec.".equals(refString
)){
238 return "auct. fl. graec.";
239 }else if ("balc.".equals(refString
)){
240 return "auct. balc.";
242 logger
.warn("Auct. reference not recognized: " + refString
);
247 private String regExMisapplied
= "(.+) auct\\. (fl\\. graec\\.|balc\\.), non (.+)";
248 private Pattern patternMisapplied
= Pattern
.compile(regExMisapplied
);
250 private String regExNon
= "(.+), non (.+)";
251 private Pattern patternNon
= Pattern
.compile(regExNon
);
253 private String regExStatus
= "(.+),\\s+((?:nom.|comb.|orth.)\\s+(.+))";
254 private Pattern patternStat
= Pattern
.compile(regExStatus
);
259 private String
[] parseAuct(String synonymStr
, String line
) {
260 String
[] result
= new String
[4];
261 if (synonymStr
!= null){
262 result
[0] = synonymStr
;
263 Matcher matcher
= patternMisapplied
.matcher(synonymStr
);
264 if (matcher
.matches()){
265 result
[0] = matcher
.group(1);
266 result
[1] = matcher
.group(2);
267 if (! result
[1].equals("fl. graec.") && ! result
[1].equals("balc.")){
268 logger
.warn(line
+ "Misapplied sensu not recognized: " + result
[1]);
270 result
[2] = matcher
.group(3);
272 matcher
= patternNon
.matcher(synonymStr
);
273 if (matcher
.matches()){
274 result
[0] = matcher
.group(1);
275 result
[2] = matcher
.group(2);
277 matcher
= patternStat
.matcher(synonymStr
);
278 if (matcher
.matches()){
279 result
[0] = matcher
.group(1);
280 result
[3] = matcher
.group(2);