Project

General

Profile

Download (9.8 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2016 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.greece;
10

    
11
import java.util.Arrays;
12
import java.util.HashMap;
13
import java.util.List;
14
import java.util.Set;
15
import java.util.regex.Matcher;
16
import java.util.regex.Pattern;
17

    
18
import org.apache.log4j.Logger;
19
import org.springframework.stereotype.Component;
20
import org.springframework.transaction.TransactionStatus;
21

    
22
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
23
import eu.etaxonomy.cdm.model.name.INonViralName;
24
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
25
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
26
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
27
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
28
import eu.etaxonomy.cdm.model.reference.Reference;
29
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
30
import eu.etaxonomy.cdm.model.taxon.SynonymType;
31
import eu.etaxonomy.cdm.model.taxon.Taxon;
32
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
33
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
34
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
35

    
36
/**
37
 * @author a.mueller
38
 * @date 14.12.2016
39
 */
40

    
41
@Component
42
public class FloraHellenicaSynonymImport<CONFIG extends FloraHellenicaImportConfigurator>
43
            extends FloraHellenicaImportBase<CONFIG>{
44

    
45
    private static final long serialVersionUID = -3565782012921316901L;
46
    private static final Logger logger = Logger.getLogger(FloraHellenicaSynonymImport.class);
47

    
48
    private static final String ACCEPTED_NAME = "Accepted name";
49
    private static final String SYNONYM = "synonym";
50
    private static final String UNIQUE_ID_OF_ACCEPTED_NAME = "Unique ID of accepted name";
51

    
52
   private  static List<String> expectedKeys= Arrays.asList(new String[]{
53
            SYNONYM, UNIQUE_ID_OF_ACCEPTED_NAME, ACCEPTED_NAME
54
    });
55

    
56
    private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
57

    
58
    @Override
59
    protected String getWorksheetName() {
60
        return "synonyms";
61
    }
62

    
63
    private boolean isFirst = true;
64
    private TransactionStatus tx = null;
65
    /**
66
     * {@inheritDoc}
67
     */
68
    @Override
69
    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
70
        if (isFirst){
71
            tx = this.startTransaction();
72
            isFirst = false;
73
        }
74

    
75
        String line = state.getCurrentLine() + ": ";
76
        HashMap<String, String> record = state.getOriginalRecord();
77

    
78
        Set<String> keys = record.keySet();
79
        for (String key: keys) {
80
            if (! expectedKeys.contains(key)){
81
                logger.warn(line + "Unexpected Key: " + key);
82
            }
83
        }
84

    
85
        String row = "row" + state.getCurrentLine();
86
        TaxonBase<?> relatedTaxon = makeSynonym(state, line, record, row);
87
        getTaxonService().saveOrUpdate(relatedTaxon);
88
    }
89

    
90
    @Override
91
    protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
92
        if (tx != null){
93
            this.commitTransaction(tx);
94
            tx = null;
95
        }
96
    }
97

    
98

    
99
    /**
100
     * @param state
101
     * @param line
102
     * @param record
103
     * @param noStr
104
     * @return
105
     */
106
    private TaxonBase<?> makeSynonym(SimpleExcelTaxonImportState<CONFIG> state, String line,
107
            HashMap<String, String> record,
108
            String lineId) {
109

    
110
        Taxon acceptedTaxon = getAcceptedTaxon(record, state, UNIQUE_ID_OF_ACCEPTED_NAME);
111
        if (acceptedTaxon == null){
112
            logger.warn(line + "Accepted not found: " + record.get(UNIQUE_ID_OF_ACCEPTED_NAME));
113
            return null;
114
//            acceptedTaxon = Taxon.NewInstance(null, null);
115
        }
116

    
117
        String synonymStr = getValue(record, SYNONYM);
118

    
119
        String[] parsedSynStr = parseAuct(synonymStr, line);
120

    
121
        boolean isMisapplied = parsedSynStr[1] != null;
122
        boolean hasNonAuthor = parsedSynStr[2] != null;
123
        boolean hasStatus = parsedSynStr[3] != null;
124
        boolean isNec = hasNonAuthor && parsedSynStr[2].contains(" nec ");
125

    
126

    
127
        if (isMisapplied && hasNonAuthor && !isNec){
128
            parsedSynStr[0] = parsedSynStr[0] + " " + parsedSynStr[2];
129
        }
130

    
131
        INonViralName nvn = parser.parseFullName(parsedSynStr[0], NomenclaturalCode.ICNAFP, null);
132
        if (nvn.isProtectedTitleCache()){
133
            logger.warn(line + "Name could not be parsed: " + synonymStr);
134
        }
135
        TaxonNameBase<?,?> name = TaxonNameBase.castAndDeproxy(nvn);
136
        if (hasStatus){
137
            try {
138
                NomenclaturalStatusType status = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(parsedSynStr[3], name);
139
                name.addStatus(status, null, null);
140
            } catch (UnknownCdmTypeException e) {
141
                logger.warn(line + "Nom. status not recognized: " + parsedSynStr[3]);
142
            }
143
        }
144

    
145
        TaxonBase<?> result;
146
        if (isMisapplied){
147
            result = Taxon.NewInstance(name, getMisappliedRef(state, parsedSynStr[1]));
148
            acceptedTaxon.addMisappliedName((Taxon)result, getSecReference(state), null);
149
            if (isNec){
150
                logger.warn(line + "nec not yet handled for misapplied names: " + synonymStr);
151
            }
152
        }else{
153
            SynonymType synType = null;
154
            result = acceptedTaxon.addSynonymName(name, getSecReference(state), null, synType);
155
            if (hasNonAuthor){
156
                handleSynonymNon(state, name, parsedSynStr[2], line);
157
            }
158
        }
159
        result.addImportSource(lineId, getWorksheetName(), getSourceCitation(state), null);
160

    
161
        return result;
162

    
163
    }
164

    
165

    
166

    
167
    /**
168
     * @param state
169
     * @param name
170
     * @param parsedSynStr
171
     */
172
    private void handleSynonymNon(SimpleExcelTaxonImportState<CONFIG> state,
173
            TaxonNameBase<?, ?> name, String nonPart, String line) {
174
        String[] splits = nonPart.split(" nec ");
175
        for (String split : splits){
176
            split = split.trim();
177
//            Saponaria illyrica Ard.
178
//            Crepis nemausensis Gouan
179
//            S. columnae Aurnier
180
//            S. columnae Aurnier nec (Rchb. f.) H. Fleischm.
181
//            T. glaucescens Rchb.
182
            TaxonNameBase<?,?> nonName;
183
            if (split.matches("(Saponaria illyrica Ard.|Crepis nemausensis Gouan|S. columnae Aurnier|T. glaucescens Rchb.)"
184
                    + "")){
185
                if (split.startsWith("S.")){
186
                    split = split.replace("S.", "Serapias");
187
                }else if (split.startsWith("T.")){
188
                    split = split.replace("T.", "Taraxacum");
189
                }
190
                nonName = TaxonNameBase.castAndDeproxy(this.parser.parseFullName(split));
191
                name.addRelationshipFromName(nonName, NameRelationshipType.BLOCKING_NAME_FOR(), null);
192
            }else{
193
                String nameStr = name.getNameCache() + " " + split;
194
                nonName = TaxonNameBase.castAndDeproxy(this.parser.parseFullName(nameStr));
195
                name.addRelationshipToName(nonName, NameRelationshipType.LATER_HOMONYM(), null);
196
            }
197
            getNameService().saveOrUpdate(nonName);
198
            if (nonName.isProtectedTitleCache()){
199
                logger.warn(line + "Non-Name could not be parsed: " + nonName.getTitleCache());
200
            }
201
        }
202
        if (splits.length>1){
203
            logger.warn(line + "nec synonyms maybe not yet correctly implemented: " + name.getTitleCache() + "; " + nonPart);
204
        }
205
    }
206

    
207
    private Reference flGraecReference;
208
    private Reference balkanReference;
209
    {
210
        flGraecReference = ReferenceFactory.newBook();
211
        flGraecReference.setTitle("fl. graec.");
212
        balkanReference = ReferenceFactory.newBook();
213
        balkanReference.setTitle("balc.");
214
    }
215
    /**
216
     * @param state
217
     * @param string
218
     * @return
219
     */
220
    private Reference getMisappliedRef(SimpleExcelTaxonImportState<CONFIG> state, String refString) {
221
        if ("fl. graec.".equals(refString)){
222
            return flGraecReference;
223
        }else if ("balc.".equals(refString)){
224
            return balkanReference;
225
        }else{
226
            logger.warn("Auct. reference not recognized: " + refString);
227
            return null;
228
        }
229
    }
230

    
231
    private String regExMisapplied = "(.+) auct\\. (fl\\. graec\\.|balc\\.), non (.+)";
232
    private Pattern patternMisapplied = Pattern.compile(regExMisapplied);
233

    
234
    private String regExNon = "(.+), non (.+)";
235
    private Pattern patternNon = Pattern.compile(regExNon);
236

    
237
    private String regExStatus = "(.+),\\s+((?:nom.|comb.|orth.)\\s+(.+))";
238
    private Pattern patternStat = Pattern.compile(regExStatus);
239

    
240
    /**
241
     * @param synonymStr
242
     */
243
    private String[] parseAuct(String synonymStr, String line) {
244
        String[] result = new String[4];
245
        if (synonymStr != null){
246
            result[0] = synonymStr;
247
            Matcher matcher = patternMisapplied.matcher(synonymStr);
248
            if (matcher.matches()){
249
                result[0] = matcher.group(1);
250
                result[1] = matcher.group(2);
251
                if (! result[1].equals("fl. graec.") && ! result[1].equals("balc.")){
252
                    logger.warn(line + "Misapplied sensu not recognized: " +  result[1]);
253
                }
254
                result[2] = matcher.group(3);
255
            }else{
256
                matcher = patternNon.matcher(synonymStr);
257
                if (matcher.matches()){
258
                    result[0] = matcher.group(1);
259
                    result[2] = matcher.group(2);
260
                }else{
261
                    matcher = patternStat.matcher(synonymStr);
262
                    if (matcher.matches()){
263
                        result[0] = matcher.group(1);
264
                        result[3] = matcher.group(2);
265
                    }
266
                }
267
            }
268
        }
269
        return result;
270
    }
271

    
272
}
(5-5/7)