Project

General

Profile

Download (10.6 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2016 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.greece;
10

    
11
import java.util.Arrays;
12
import java.util.HashMap;
13
import java.util.List;
14
import java.util.Set;
15
import java.util.regex.Matcher;
16
import java.util.regex.Pattern;
17

    
18
import org.apache.log4j.Logger;
19
import org.springframework.stereotype.Component;
20

    
21
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
22
import eu.etaxonomy.cdm.model.name.INonViralName;
23
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
24
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
25
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
26
import eu.etaxonomy.cdm.model.name.TaxonName;
27
import eu.etaxonomy.cdm.model.reference.Reference;
28
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
29
import eu.etaxonomy.cdm.model.taxon.SynonymType;
30
import eu.etaxonomy.cdm.model.taxon.Taxon;
31
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
32
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
33
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
34

    
35
/**
36
 * @author a.mueller
37
 * @since 14.12.2016
38
 */
39

    
40
@Component
41
public class FloraHellenicaSynonymImport<CONFIG extends FloraHellenicaImportConfigurator>
42
            extends FloraHellenicaImportBase<CONFIG>{
43

    
44
    private static final long serialVersionUID = -3565782012921316901L;
45
    private static final Logger logger = Logger.getLogger(FloraHellenicaSynonymImport.class);
46

    
47
    private static final String ACCEPTED_NAME = "Accepted name";
48
    private static final String SYNONYM = "synonym";
49
    private static final String UNIQUE_ID_OF_ACCEPTED_NAME = "Unique ID of accepted name";
50

    
51
   private  static List<String> expectedKeys= Arrays.asList(new String[]{
52
            SYNONYM, UNIQUE_ID_OF_ACCEPTED_NAME, ACCEPTED_NAME
53
    });
54

    
55
    private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
56

    
57
    @Override
58
    protected String getWorksheetName() {
59
        return "synonyms";
60
    }
61

    
62
    boolean isFirst = true;
63
    /**
64
     * {@inheritDoc}
65
     */
66
    @Override
67
    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
68

    
69
        String line = state.getCurrentLine() + ": ";
70
        HashMap<String, String> record = state.getOriginalRecord();
71

    
72
        Set<String> keys = record.keySet();
73
        for (String key: keys) {
74
            if (! expectedKeys.contains(key)){
75
                logger.warn(line + "Unexpected Key: " + key);
76
            }
77
        }
78
        if (isFirst){
79
            System.out.println("Start synonyms");
80
            isFirst = false;
81
        }
82

    
83
        String row = "row" + state.getCurrentLine();
84
        TaxonBase<?> relatedTaxon = makeSynonym(state, line, record, row);
85
        if (relatedTaxon != null){
86
            getTaxonService().saveOrUpdate(relatedTaxon);
87
        }
88
    }
89

    
90

    
91
    /**
92
     * @param state
93
     * @param line
94
     * @param record
95
     * @param noStr
96
     * @return
97
     */
98
    private TaxonBase<?> makeSynonym(SimpleExcelTaxonImportState<CONFIG> state, String line,
99
            HashMap<String, String> record,
100
            String lineId) {
101

    
102
        Taxon acceptedTaxon = getAcceptedTaxon(record, state, UNIQUE_ID_OF_ACCEPTED_NAME);
103
        if (acceptedTaxon == null){
104
            logger.warn(line + "Accepted not found: " + record.get(UNIQUE_ID_OF_ACCEPTED_NAME));
105
            return null;
106
//            acceptedTaxon = Taxon.NewInstance(null, null);
107
        }
108

    
109
        String synonymStr = getValue(record, SYNONYM);
110

    
111
        String[] parsedSynStr = parseAuct(synonymStr, line);
112

    
113
        boolean isMisapplied = parsedSynStr[1] != null;
114
        boolean hasNonAuthor = parsedSynStr[2] != null;
115
        boolean hasStatus = parsedSynStr[3] != null;
116
        boolean isNec = hasNonAuthor && parsedSynStr[2].contains(" nec ");
117

    
118

    
119
        String misappliedNecAuthor = null;
120
        if (isMisapplied && hasNonAuthor && !isNec){
121
            parsedSynStr[0] = parsedSynStr[0] + " " + parsedSynStr[2];
122
        }else if (isMisapplied && hasNonAuthor && isNec){
123
            misappliedNecAuthor = parsedSynStr[2];
124
        }
125

    
126
        INonViralName nvn = parser.parseFullName(parsedSynStr[0], NomenclaturalCode.ICNAFP, null);
127
        if (nvn.isProtectedTitleCache()){
128
            logger.warn(line + "Name could not be parsed: " + parsedSynStr[0]  + "  (full:"  + synonymStr + ")");
129
        }
130
        if (misappliedNecAuthor != null){
131
            nvn.setAuthorshipCache(misappliedNecAuthor);
132
        }
133
        TaxonName name = TaxonName.castAndDeproxy(nvn);
134

    
135
        if (hasStatus){
136
            try {
137
                NomenclaturalStatusType status = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(parsedSynStr[3], name);
138
                name.addStatus(status, null, null);
139
            } catch (UnknownCdmTypeException e) {
140
                logger.warn(line + "Nom. status not recognized: " + parsedSynStr[3]);
141
            }
142
        }
143
        name = replaceNameAuthorsAndReferences(state, name);
144

    
145

    
146
        TaxonBase<?> result;
147
        if (isMisapplied){
148
            Reference sec = null;// getMisappliedRef(state, parsedSynStr[1]);
149
            result = Taxon.NewInstance(name, sec);
150
            result.setAppendedPhrase(getMisappliedRef(state, parsedSynStr[1]));
151
            acceptedTaxon.addMisappliedName((Taxon)result, getSecReference(state), null);
152
            if (isNec){
153
                logger.warn(line + "nec for misapplied names still needs to be checked: " + synonymStr);
154
            }
155
        }else{
156
            SynonymType synType = null;
157
            result = acceptedTaxon.addSynonymName(name, getSecReference(state), null, synType);
158
            if (hasNonAuthor){
159
                handleSynonymNon(state, name, parsedSynStr[2], line);
160
            }
161
        }
162
        result.addImportSource(lineId, getWorksheetName(), getSourceCitation(state), null);
163

    
164
        return result;
165

    
166
    }
167

    
168

    
169

    
170
    /**
171
     * @param state
172
     * @param name
173
     * @param parsedSynStr
174
     */
175
    private void handleSynonymNon(SimpleExcelTaxonImportState<CONFIG> state,
176
            TaxonName name, String nonPart, String line) {
177
        String[] splits = nonPart.split(" nec ");
178

    
179
        TaxonName lastHomonym = null;
180
        for (String split : splits){
181
            split = split.trim();
182
//            Saponaria illyrica Ard.
183
//            Crepis nemausensis Gouan
184
//            S. columnae Aurnier
185
//            S. columnae Aurnier nec (Rchb. f.) H. Fleischm.
186
//            T. glaucescens Rchb.
187
            TaxonName nonName;
188
            if (split.matches("(Saponaria illyrica Ard.|Crepis nemausensis Gouan|S. columnae Aurnier|T. glaucescens Rchb.|Linaria stricta Guss.)"
189
                    + "")){
190
                if (split.startsWith("S.")){
191
                    split = split.replace("S.", "Serapias");
192
                }else if (split.startsWith("T.")){
193
                    split = split.replace("T.", "Taraxacum");
194
                }
195
                nonName = TaxonName.castAndDeproxy(this.parser.parseFullName(split));
196
                nonName = replaceNameAuthorsAndReferences(state, nonName);
197
                name.addRelationshipFromName(nonName, NameRelationshipType.BLOCKING_NAME_FOR(), null);
198
            }else{
199
                String nameStr = name.getNameCache().replace(" hort.", "") + " " + split;
200
                nonName = TaxonName.castAndDeproxy(this.parser.parseFullName(nameStr));
201
                nonName = replaceNameAuthorsAndReferences(state, nonName);
202
                name.addRelationshipToName(nonName, NameRelationshipType.LATER_HOMONYM(), null);
203
                if (lastHomonym != null){
204
                    nonName.addRelationshipToName(lastHomonym, NameRelationshipType.LATER_HOMONYM(), null);
205
                }
206
                lastHomonym = nonName;
207
            }
208
            getNameService().saveOrUpdate(nonName);
209
            if (nonName.isProtectedTitleCache()){
210
                logger.warn(line + "Non-Name could not be parsed: " + nonName.getTitleCache());
211
            }
212
        }
213
        //seems to work correctly
214
//        if (splits.length>1){
215
//            logger.warn(line + "nec synonyms maybe not yet correctly implemented: " + name.getTitleCache() + "; " + nonPart);
216
//        }
217
    }
218

    
219
    private Reference flGraecReference;
220
    private Reference balkanReference;
221
    {
222
        flGraecReference = ReferenceFactory.newBook();
223
        flGraecReference.setTitle("fl. graec.");
224
        balkanReference = ReferenceFactory.newBook();
225
        balkanReference.setTitle("balc.");
226
    }
227
    /**
228
     * @param state
229
     * @param string
230
     * @return
231
     */
232
    private String getMisappliedRef(SimpleExcelTaxonImportState<CONFIG> state, String refString) {
233
//        if ("fl. graec.".equals(refString)){
234
//            return flGraecReference;
235
//        }else if ("balc.".equals(refString)){
236
//            return balkanReference;
237
        if ("fl. graec.".equals(refString)){
238
          return "auct. fl. graec.";
239
        }else if ("balc.".equals(refString)){
240
          return "auct. balc.";
241
        }else{
242
            logger.warn("Auct. reference not recognized: " + refString);
243
            return null;
244
        }
245
    }
246

    
247
    private String regExMisapplied = "(.+) auct\\. (fl\\. graec\\.|balc\\.), non (.+)";
248
    private Pattern patternMisapplied = Pattern.compile(regExMisapplied);
249

    
250
    private String regExNon = "(.+), non (.+)";
251
    private Pattern patternNon = Pattern.compile(regExNon);
252

    
253
    private String regExStatus = "(.+),\\s+((?:nom.|comb.|orth.)\\s+(.+))";
254
    private Pattern patternStat = Pattern.compile(regExStatus);
255

    
256
    /**
257
     * @param synonymStr
258
     */
259
    private String[] parseAuct(String synonymStr, String line) {
260
        String[] result = new String[4];
261
        if (synonymStr != null){
262
            result[0] = synonymStr;
263
            Matcher matcher = patternMisapplied.matcher(synonymStr);
264
            if (matcher.matches()){
265
                result[0] = matcher.group(1);
266
                result[1] = matcher.group(2);
267
                if (! result[1].equals("fl. graec.") && ! result[1].equals("balc.")){
268
                    logger.warn(line + "Misapplied sensu not recognized: " +  result[1]);
269
                }
270
                result[2] = matcher.group(3);
271
            }else{
272
                matcher = patternNon.matcher(synonymStr);
273
                if (matcher.matches()){
274
                    result[0] = matcher.group(1);
275
                    result[2] = matcher.group(2);
276
                }else{
277
                    matcher = patternStat.matcher(synonymStr);
278
                    if (matcher.matches()){
279
                        result[0] = matcher.group(1);
280
                        result[3] = matcher.group(2);
281
                    }
282
                }
283
            }
284
        }
285
        return result;
286
    }
287

    
288
}
(8-8/14)