1
|
/**
|
2
|
* Copyright (C) 2016 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.greece;
|
10
|
|
11
|
import java.util.Arrays;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.List;
|
14
|
import java.util.Set;
|
15
|
import java.util.regex.Matcher;
|
16
|
import java.util.regex.Pattern;
|
17
|
|
18
|
import org.apache.log4j.Logger;
|
19
|
import org.springframework.stereotype.Component;
|
20
|
import org.springframework.transaction.TransactionStatus;
|
21
|
|
22
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
23
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
24
|
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
|
25
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
26
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
27
|
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
|
28
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
29
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
30
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
31
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
32
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
33
|
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
|
34
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
35
|
|
36
|
/**
|
37
|
* @author a.mueller
|
38
|
* @date 14.12.2016
|
39
|
*/
|
40
|
|
41
|
@Component
|
42
|
public class FloraHellenicaSynonymImport<CONFIG extends FloraHellenicaImportConfigurator>
|
43
|
extends FloraHellenicaImportBase<CONFIG>{
|
44
|
|
45
|
private static final long serialVersionUID = -3565782012921316901L;
|
46
|
private static final Logger logger = Logger.getLogger(FloraHellenicaSynonymImport.class);
|
47
|
|
48
|
private static final String ACCEPTED_NAME = "Accepted name";
|
49
|
private static final String SYNONYM = "synonym";
|
50
|
private static final String UNIQUE_ID_OF_ACCEPTED_NAME = "Unique ID of accepted name";
|
51
|
|
52
|
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
53
|
SYNONYM, UNIQUE_ID_OF_ACCEPTED_NAME, ACCEPTED_NAME
|
54
|
});
|
55
|
|
56
|
private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
57
|
|
58
|
@Override
|
59
|
protected String getWorksheetName() {
|
60
|
return "synonyms";
|
61
|
}
|
62
|
|
63
|
private boolean isFirst = true;
|
64
|
private TransactionStatus tx = null;
|
65
|
/**
|
66
|
* {@inheritDoc}
|
67
|
*/
|
68
|
@Override
|
69
|
protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
70
|
if (isFirst){
|
71
|
tx = this.startTransaction();
|
72
|
isFirst = false;
|
73
|
}
|
74
|
|
75
|
String line = state.getCurrentLine() + ": ";
|
76
|
HashMap<String, String> record = state.getOriginalRecord();
|
77
|
|
78
|
Set<String> keys = record.keySet();
|
79
|
for (String key: keys) {
|
80
|
if (! expectedKeys.contains(key)){
|
81
|
logger.warn(line + "Unexpected Key: " + key);
|
82
|
}
|
83
|
}
|
84
|
|
85
|
String row = "row" + state.getCurrentLine();
|
86
|
TaxonBase<?> relatedTaxon = makeSynonym(state, line, record, row);
|
87
|
getTaxonService().saveOrUpdate(relatedTaxon);
|
88
|
}
|
89
|
|
90
|
@Override
|
91
|
protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
92
|
if (tx != null){
|
93
|
this.commitTransaction(tx);
|
94
|
tx = null;
|
95
|
}
|
96
|
}
|
97
|
|
98
|
|
99
|
/**
|
100
|
* @param state
|
101
|
* @param line
|
102
|
* @param record
|
103
|
* @param noStr
|
104
|
* @return
|
105
|
*/
|
106
|
private TaxonBase<?> makeSynonym(SimpleExcelTaxonImportState<CONFIG> state, String line,
|
107
|
HashMap<String, String> record,
|
108
|
String lineId) {
|
109
|
|
110
|
Taxon acceptedTaxon = getAcceptedTaxon(record, state, UNIQUE_ID_OF_ACCEPTED_NAME);
|
111
|
if (acceptedTaxon == null){
|
112
|
logger.warn(line + "Accepted not found: " + record.get(UNIQUE_ID_OF_ACCEPTED_NAME));
|
113
|
return null;
|
114
|
// acceptedTaxon = Taxon.NewInstance(null, null);
|
115
|
}
|
116
|
|
117
|
String synonymStr = getValue(record, SYNONYM);
|
118
|
|
119
|
String[] parsedSynStr = parseAuct(synonymStr, line);
|
120
|
|
121
|
boolean isMisapplied = parsedSynStr[1] != null;
|
122
|
boolean hasNonAuthor = parsedSynStr[2] != null;
|
123
|
boolean hasStatus = parsedSynStr[3] != null;
|
124
|
boolean isNec = hasNonAuthor && parsedSynStr[2].contains(" nec ");
|
125
|
|
126
|
|
127
|
if (isMisapplied && hasNonAuthor && !isNec){
|
128
|
parsedSynStr[0] = parsedSynStr[0] + " " + parsedSynStr[2];
|
129
|
}
|
130
|
|
131
|
INonViralName nvn = parser.parseFullName(parsedSynStr[0], NomenclaturalCode.ICNAFP, null);
|
132
|
if (nvn.isProtectedTitleCache()){
|
133
|
logger.warn(line + "Name could not be parsed: " + synonymStr);
|
134
|
}
|
135
|
TaxonNameBase<?,?> name = TaxonNameBase.castAndDeproxy(nvn);
|
136
|
if (hasStatus){
|
137
|
try {
|
138
|
NomenclaturalStatusType status = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(parsedSynStr[3], name);
|
139
|
name.addStatus(status, null, null);
|
140
|
} catch (UnknownCdmTypeException e) {
|
141
|
logger.warn(line + "Nom. status not recognized: " + parsedSynStr[3]);
|
142
|
}
|
143
|
}
|
144
|
|
145
|
TaxonBase<?> result;
|
146
|
if (isMisapplied){
|
147
|
result = Taxon.NewInstance(name, getMisappliedRef(state, parsedSynStr[1]));
|
148
|
acceptedTaxon.addMisappliedName((Taxon)result, getSecReference(state), null);
|
149
|
if (isNec){
|
150
|
logger.warn(line + "nec not yet handled for misapplied names: " + synonymStr);
|
151
|
}
|
152
|
}else{
|
153
|
SynonymType synType = null;
|
154
|
result = acceptedTaxon.addSynonymName(name, getSecReference(state), null, synType);
|
155
|
if (hasNonAuthor){
|
156
|
handleSynonymNon(state, name, parsedSynStr[2], line);
|
157
|
}
|
158
|
}
|
159
|
result.addImportSource(lineId, getWorksheetName(), getSourceCitation(state), null);
|
160
|
|
161
|
return result;
|
162
|
|
163
|
}
|
164
|
|
165
|
|
166
|
|
167
|
/**
|
168
|
* @param state
|
169
|
* @param name
|
170
|
* @param parsedSynStr
|
171
|
*/
|
172
|
private void handleSynonymNon(SimpleExcelTaxonImportState<CONFIG> state,
|
173
|
TaxonNameBase<?, ?> name, String nonPart, String line) {
|
174
|
String[] splits = nonPart.split(" nec ");
|
175
|
for (String split : splits){
|
176
|
split = split.trim();
|
177
|
// Saponaria illyrica Ard.
|
178
|
// Crepis nemausensis Gouan
|
179
|
// S. columnae Aurnier
|
180
|
// S. columnae Aurnier nec (Rchb. f.) H. Fleischm.
|
181
|
// T. glaucescens Rchb.
|
182
|
TaxonNameBase<?,?> nonName;
|
183
|
if (split.matches("(Saponaria illyrica Ard.|Crepis nemausensis Gouan|S. columnae Aurnier|T. glaucescens Rchb.)"
|
184
|
+ "")){
|
185
|
if (split.startsWith("S.")){
|
186
|
split = split.replace("S.", "Serapias");
|
187
|
}else if (split.startsWith("T.")){
|
188
|
split = split.replace("T.", "Taraxacum");
|
189
|
}
|
190
|
nonName = TaxonNameBase.castAndDeproxy(this.parser.parseFullName(split));
|
191
|
name.addRelationshipFromName(nonName, NameRelationshipType.BLOCKING_NAME_FOR(), null);
|
192
|
}else{
|
193
|
String nameStr = name.getNameCache() + " " + split;
|
194
|
nonName = TaxonNameBase.castAndDeproxy(this.parser.parseFullName(nameStr));
|
195
|
name.addRelationshipToName(nonName, NameRelationshipType.LATER_HOMONYM(), null);
|
196
|
}
|
197
|
getNameService().saveOrUpdate(nonName);
|
198
|
if (nonName.isProtectedTitleCache()){
|
199
|
logger.warn(line + "Non-Name could not be parsed: " + nonName.getTitleCache());
|
200
|
}
|
201
|
}
|
202
|
if (splits.length>1){
|
203
|
logger.warn(line + "nec synonyms maybe not yet correctly implemented: " + name.getTitleCache() + "; " + nonPart);
|
204
|
}
|
205
|
}
|
206
|
|
207
|
private Reference flGraecReference;
|
208
|
private Reference balkanReference;
|
209
|
{
|
210
|
flGraecReference = ReferenceFactory.newBook();
|
211
|
flGraecReference.setTitle("fl. graec.");
|
212
|
balkanReference = ReferenceFactory.newBook();
|
213
|
balkanReference.setTitle("balc.");
|
214
|
}
|
215
|
/**
|
216
|
* @param state
|
217
|
* @param string
|
218
|
* @return
|
219
|
*/
|
220
|
private Reference getMisappliedRef(SimpleExcelTaxonImportState<CONFIG> state, String refString) {
|
221
|
if ("fl. graec.".equals(refString)){
|
222
|
return flGraecReference;
|
223
|
}else if ("balc.".equals(refString)){
|
224
|
return balkanReference;
|
225
|
}else{
|
226
|
logger.warn("Auct. reference not recognized: " + refString);
|
227
|
return null;
|
228
|
}
|
229
|
}
|
230
|
|
231
|
private String regExMisapplied = "(.+) auct\\. (fl\\. graec\\.|balc\\.), non (.+)";
|
232
|
private Pattern patternMisapplied = Pattern.compile(regExMisapplied);
|
233
|
|
234
|
private String regExNon = "(.+), non (.+)";
|
235
|
private Pattern patternNon = Pattern.compile(regExNon);
|
236
|
|
237
|
private String regExStatus = "(.+),\\s+((?:nom.|comb.|orth.)\\s+(.+))";
|
238
|
private Pattern patternStat = Pattern.compile(regExStatus);
|
239
|
|
240
|
/**
|
241
|
* @param synonymStr
|
242
|
*/
|
243
|
private String[] parseAuct(String synonymStr, String line) {
|
244
|
String[] result = new String[4];
|
245
|
if (synonymStr != null){
|
246
|
result[0] = synonymStr;
|
247
|
Matcher matcher = patternMisapplied.matcher(synonymStr);
|
248
|
if (matcher.matches()){
|
249
|
result[0] = matcher.group(1);
|
250
|
result[1] = matcher.group(2);
|
251
|
if (! result[1].equals("fl. graec.") && ! result[1].equals("balc.")){
|
252
|
logger.warn(line + "Misapplied sensu not recognized: " + result[1]);
|
253
|
}
|
254
|
result[2] = matcher.group(3);
|
255
|
}else{
|
256
|
matcher = patternNon.matcher(synonymStr);
|
257
|
if (matcher.matches()){
|
258
|
result[0] = matcher.group(1);
|
259
|
result[2] = matcher.group(2);
|
260
|
}else{
|
261
|
matcher = patternStat.matcher(synonymStr);
|
262
|
if (matcher.matches()){
|
263
|
result[0] = matcher.group(1);
|
264
|
result[3] = matcher.group(2);
|
265
|
}
|
266
|
}
|
267
|
}
|
268
|
}
|
269
|
return result;
|
270
|
}
|
271
|
|
272
|
}
|