1
|
/**
|
2
|
* Copyright (C) 2016 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.greece;
|
10
|
|
11
|
import java.util.Arrays;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.List;
|
14
|
import java.util.Set;
|
15
|
import java.util.regex.Matcher;
|
16
|
import java.util.regex.Pattern;
|
17
|
|
18
|
import org.apache.log4j.Logger;
|
19
|
import org.springframework.stereotype.Component;
|
20
|
|
21
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
22
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
23
|
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
|
24
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
25
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
26
|
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
|
27
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
28
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
29
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
30
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
31
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
32
|
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
|
33
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
34
|
|
35
|
/**
|
36
|
* @author a.mueller
|
37
|
* @date 14.12.2016
|
38
|
*/
|
39
|
|
40
|
@Component
|
41
|
public class FloraHellenicaSynonymImport<CONFIG extends FloraHellenicaImportConfigurator>
|
42
|
extends FloraHellenicaImportBase<CONFIG>{
|
43
|
|
44
|
private static final long serialVersionUID = -3565782012921316901L;
|
45
|
private static final Logger logger = Logger.getLogger(FloraHellenicaSynonymImport.class);
|
46
|
|
47
|
private static final String ACCEPTED_NAME = "Accepted name";
|
48
|
private static final String SYNONYM = "synonym";
|
49
|
private static final String UNIQUE_ID_OF_ACCEPTED_NAME = "Unique ID of accepted name";
|
50
|
|
51
|
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
52
|
SYNONYM, UNIQUE_ID_OF_ACCEPTED_NAME, ACCEPTED_NAME
|
53
|
});
|
54
|
|
55
|
private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
56
|
|
57
|
@Override
|
58
|
protected String getWorksheetName() {
|
59
|
return "synonyms";
|
60
|
}
|
61
|
|
62
|
boolean isFirst = true;
|
63
|
/**
|
64
|
* {@inheritDoc}
|
65
|
*/
|
66
|
@Override
|
67
|
protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
68
|
|
69
|
String line = state.getCurrentLine() + ": ";
|
70
|
HashMap<String, String> record = state.getOriginalRecord();
|
71
|
|
72
|
Set<String> keys = record.keySet();
|
73
|
for (String key: keys) {
|
74
|
if (! expectedKeys.contains(key)){
|
75
|
logger.warn(line + "Unexpected Key: " + key);
|
76
|
}
|
77
|
}
|
78
|
if (isFirst){
|
79
|
System.out.println("Start synonyms");
|
80
|
isFirst = false;
|
81
|
}
|
82
|
|
83
|
String row = "row" + state.getCurrentLine();
|
84
|
TaxonBase<?> relatedTaxon = makeSynonym(state, line, record, row);
|
85
|
if (relatedTaxon != null){
|
86
|
getTaxonService().saveOrUpdate(relatedTaxon);
|
87
|
}
|
88
|
}
|
89
|
|
90
|
|
91
|
/**
|
92
|
* @param state
|
93
|
* @param line
|
94
|
* @param record
|
95
|
* @param noStr
|
96
|
* @return
|
97
|
*/
|
98
|
private TaxonBase<?> makeSynonym(SimpleExcelTaxonImportState<CONFIG> state, String line,
|
99
|
HashMap<String, String> record,
|
100
|
String lineId) {
|
101
|
|
102
|
Taxon acceptedTaxon = getAcceptedTaxon(record, state, UNIQUE_ID_OF_ACCEPTED_NAME);
|
103
|
if (acceptedTaxon == null){
|
104
|
logger.warn(line + "Accepted not found: " + record.get(UNIQUE_ID_OF_ACCEPTED_NAME));
|
105
|
return null;
|
106
|
// acceptedTaxon = Taxon.NewInstance(null, null);
|
107
|
}
|
108
|
|
109
|
String synonymStr = getValue(record, SYNONYM);
|
110
|
|
111
|
String[] parsedSynStr = parseAuct(synonymStr, line);
|
112
|
|
113
|
boolean isMisapplied = parsedSynStr[1] != null;
|
114
|
boolean hasNonAuthor = parsedSynStr[2] != null;
|
115
|
boolean hasStatus = parsedSynStr[3] != null;
|
116
|
boolean isNec = hasNonAuthor && parsedSynStr[2].contains(" nec ");
|
117
|
|
118
|
|
119
|
String misappliedNecAuthor = null;
|
120
|
if (isMisapplied && hasNonAuthor && !isNec){
|
121
|
parsedSynStr[0] = parsedSynStr[0] + " " + parsedSynStr[2];
|
122
|
}else if (isMisapplied && hasNonAuthor && isNec){
|
123
|
misappliedNecAuthor = parsedSynStr[2];
|
124
|
}
|
125
|
|
126
|
INonViralName nvn = parser.parseFullName(parsedSynStr[0], NomenclaturalCode.ICNAFP, null);
|
127
|
if (nvn.isProtectedTitleCache()){
|
128
|
logger.warn(line + "Name could not be parsed: " + parsedSynStr[0] + " (full:" + synonymStr + ")");
|
129
|
}
|
130
|
if (misappliedNecAuthor != null){
|
131
|
nvn.setAuthorshipCache(misappliedNecAuthor);
|
132
|
}
|
133
|
|
134
|
|
135
|
TaxonNameBase<?,?> name = TaxonNameBase.castAndDeproxy(nvn);
|
136
|
if (hasStatus){
|
137
|
try {
|
138
|
NomenclaturalStatusType status = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(parsedSynStr[3], name);
|
139
|
name.addStatus(status, null, null);
|
140
|
} catch (UnknownCdmTypeException e) {
|
141
|
logger.warn(line + "Nom. status not recognized: " + parsedSynStr[3]);
|
142
|
}
|
143
|
}
|
144
|
name = replaceNameAuthorsAndReferences(state, name);
|
145
|
|
146
|
|
147
|
TaxonBase<?> result;
|
148
|
if (isMisapplied){
|
149
|
result = Taxon.NewInstance(name, getMisappliedRef(state, parsedSynStr[1]));
|
150
|
acceptedTaxon.addMisappliedName((Taxon)result, getSecReference(state), null);
|
151
|
if (isNec){
|
152
|
logger.warn(line + "nec for misapplied names still needs to be checked: " + synonymStr);
|
153
|
}
|
154
|
}else{
|
155
|
SynonymType synType = null;
|
156
|
result = acceptedTaxon.addSynonymName(name, getSecReference(state), null, synType);
|
157
|
if (hasNonAuthor){
|
158
|
handleSynonymNon(state, name, parsedSynStr[2], line);
|
159
|
}
|
160
|
}
|
161
|
result.addImportSource(lineId, getWorksheetName(), getSourceCitation(state), null);
|
162
|
|
163
|
return result;
|
164
|
|
165
|
}
|
166
|
|
167
|
|
168
|
|
169
|
/**
|
170
|
* @param state
|
171
|
* @param name
|
172
|
* @param parsedSynStr
|
173
|
*/
|
174
|
private void handleSynonymNon(SimpleExcelTaxonImportState<CONFIG> state,
|
175
|
TaxonNameBase<?, ?> name, String nonPart, String line) {
|
176
|
String[] splits = nonPart.split(" nec ");
|
177
|
|
178
|
TaxonNameBase<?,?> lastHomonym = null;
|
179
|
for (String split : splits){
|
180
|
split = split.trim();
|
181
|
// Saponaria illyrica Ard.
|
182
|
// Crepis nemausensis Gouan
|
183
|
// S. columnae Aurnier
|
184
|
// S. columnae Aurnier nec (Rchb. f.) H. Fleischm.
|
185
|
// T. glaucescens Rchb.
|
186
|
TaxonNameBase<?,?> nonName;
|
187
|
if (split.matches("(Saponaria illyrica Ard.|Crepis nemausensis Gouan|S. columnae Aurnier|T. glaucescens Rchb.|Linaria stricta Guss.)"
|
188
|
+ "")){
|
189
|
if (split.startsWith("S.")){
|
190
|
split = split.replace("S.", "Serapias");
|
191
|
}else if (split.startsWith("T.")){
|
192
|
split = split.replace("T.", "Taraxacum");
|
193
|
}
|
194
|
nonName = TaxonNameBase.castAndDeproxy(this.parser.parseFullName(split));
|
195
|
nonName = replaceNameAuthorsAndReferences(state, nonName);
|
196
|
name.addRelationshipFromName(nonName, NameRelationshipType.BLOCKING_NAME_FOR(), null);
|
197
|
}else{
|
198
|
String nameStr = name.getNameCache().replace(" hort.", "") + " " + split;
|
199
|
nonName = TaxonNameBase.castAndDeproxy(this.parser.parseFullName(nameStr));
|
200
|
nonName = replaceNameAuthorsAndReferences(state, nonName);
|
201
|
name.addRelationshipToName(nonName, NameRelationshipType.LATER_HOMONYM(), null);
|
202
|
if (lastHomonym != null){
|
203
|
nonName.addRelationshipToName(lastHomonym, NameRelationshipType.LATER_HOMONYM(), null);
|
204
|
}
|
205
|
lastHomonym = nonName;
|
206
|
}
|
207
|
getNameService().saveOrUpdate(nonName);
|
208
|
if (nonName.isProtectedTitleCache()){
|
209
|
logger.warn(line + "Non-Name could not be parsed: " + nonName.getTitleCache());
|
210
|
}
|
211
|
}
|
212
|
//seems to work correctly
|
213
|
// if (splits.length>1){
|
214
|
// logger.warn(line + "nec synonyms maybe not yet correctly implemented: " + name.getTitleCache() + "; " + nonPart);
|
215
|
// }
|
216
|
}
|
217
|
|
218
|
private Reference flGraecReference;
|
219
|
private Reference balkanReference;
|
220
|
{
|
221
|
flGraecReference = ReferenceFactory.newBook();
|
222
|
flGraecReference.setTitle("fl. graec.");
|
223
|
balkanReference = ReferenceFactory.newBook();
|
224
|
balkanReference.setTitle("balc.");
|
225
|
}
|
226
|
/**
|
227
|
* @param state
|
228
|
* @param string
|
229
|
* @return
|
230
|
*/
|
231
|
private Reference getMisappliedRef(SimpleExcelTaxonImportState<CONFIG> state, String refString) {
|
232
|
if ("fl. graec.".equals(refString)){
|
233
|
return flGraecReference;
|
234
|
}else if ("balc.".equals(refString)){
|
235
|
return balkanReference;
|
236
|
}else{
|
237
|
logger.warn("Auct. reference not recognized: " + refString);
|
238
|
return null;
|
239
|
}
|
240
|
}
|
241
|
|
242
|
private String regExMisapplied = "(.+) auct\\. (fl\\. graec\\.|balc\\.), non (.+)";
|
243
|
private Pattern patternMisapplied = Pattern.compile(regExMisapplied);
|
244
|
|
245
|
private String regExNon = "(.+), non (.+)";
|
246
|
private Pattern patternNon = Pattern.compile(regExNon);
|
247
|
|
248
|
private String regExStatus = "(.+),\\s+((?:nom.|comb.|orth.)\\s+(.+))";
|
249
|
private Pattern patternStat = Pattern.compile(regExStatus);
|
250
|
|
251
|
/**
|
252
|
* @param synonymStr
|
253
|
*/
|
254
|
private String[] parseAuct(String synonymStr, String line) {
|
255
|
String[] result = new String[4];
|
256
|
if (synonymStr != null){
|
257
|
result[0] = synonymStr;
|
258
|
Matcher matcher = patternMisapplied.matcher(synonymStr);
|
259
|
if (matcher.matches()){
|
260
|
result[0] = matcher.group(1);
|
261
|
result[1] = matcher.group(2);
|
262
|
if (! result[1].equals("fl. graec.") && ! result[1].equals("balc.")){
|
263
|
logger.warn(line + "Misapplied sensu not recognized: " + result[1]);
|
264
|
}
|
265
|
result[2] = matcher.group(3);
|
266
|
}else{
|
267
|
matcher = patternNon.matcher(synonymStr);
|
268
|
if (matcher.matches()){
|
269
|
result[0] = matcher.group(1);
|
270
|
result[2] = matcher.group(2);
|
271
|
}else{
|
272
|
matcher = patternStat.matcher(synonymStr);
|
273
|
if (matcher.matches()){
|
274
|
result[0] = matcher.group(1);
|
275
|
result[3] = matcher.group(2);
|
276
|
}
|
277
|
}
|
278
|
}
|
279
|
}
|
280
|
return result;
|
281
|
}
|
282
|
|
283
|
}
|