Project

General

Profile

« Previous | Next » 

Revision f261b330

Added by Andreas Kohlbecker over 7 years ago

ref #6026 multiple improvements

  • better handling of fossil taxa
  • Authors spelling as misspellings
  • fix related to publication year
  • "Author abbreviation not checked" as annotations

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java
17 17
import eu.etaxonomy.cdm.model.reference.Reference;
18 18
import eu.etaxonomy.cdm.model.taxon.*;
19 19
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
20
import org.apache.commons.lang.ArrayUtils;
20 21
import org.apache.commons.lang.StringEscapeUtils;
21 22
import org.apache.commons.lang.StringUtils;
22 23
import org.apache.log4j.Level;
......
66 67
    private static final Pattern nomRefTokenizeP = Pattern.compile("^(.*):\\s([^\\.:]+)\\.(.*)$");
67 68
    private static final Pattern nomRefPubYearExtractP = Pattern.compile("(.*?)(1[7,8,9][0-9]{2}).*$|^.*?[0-9]{1,2}([\\./])[0-1]?[0-9]\\3([0-9]{2})\\.$"); // 1700 - 1999
68 69

  
70
    private MarkerType markerTypeFossil = null;
71

  
69 72
    private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
70
                            TaxonNode higherTaxonNode, boolean isSynonym) {
73
                            TaxonNode higherTaxonNode, boolean isSynonym, boolean isFossil) {
71 74

  
72 75
        String line = state.getCurrentLine() + ": ";
73 76

  
......
75 78
        String nameStr = getValue(record, NAMESTRING, true);
76 79
        String authorStr = getValue(record, AUTHORSTRING, true);
77 80
        String nomRefStr = getValue(record, LITSTRING, true);
81
        String authorsSpelling = getValue(record, AUTHORSSPELLING, true);
78 82

  
79 83
        String nomRefTitle = null;
80 84
        String nomRefDetail = null;
......
94 98
                Matcher m2 = nomRefPubYearExtractP.matcher(nomRefPupDate);
95 99
                if(m2.matches()){
96 100
                    nomRefPupYear = m2.group(2);
101
                    if(nomRefPupYear == null){
102
                        nomRefPupYear = m2.group(4);
103
                    }
104
                    if(nomRefPupYear == null){
105
                        logger.error("nomRefPupYear in " + nomRefStr + " is  NULL" );
106
                    }
97 107
                    if(nomRefPupYear.length() == 2 ){
98 108
                        // it is an abbreviated year from the 19** years
99 109
                        nomRefPupYear = "19" + nomRefPupYear;
......
108 118
            }
109 119
        }
110 120

  
111

  
112 121
        BotanicalName taxonName;
122
        // cache field for the taxonName.titleCache
123
        String taxonNameTitleCache = null;
113 124
        Map<String, AnnotationType> nameAnnotations = new HashMap<>();
114 125

  
115
        if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) && authorStr.endsWith(ANNOTATION_MARKER_STRING)){
126
        // TitleCache preprocessing
127
        if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || authorStr.endsWith(ANNOTATION_MARKER_STRING)){
116 128
            nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL());
117 129
            titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim();
118 130
            authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim();
119 131
        }
120 132

  
133
        // parse the full taxon name
121 134
        if(!StringUtils.isEmpty(nomRefTitle)){
122 135
            String referenceSeparator = nomRefTitle.startsWith("in ") ? " " : ", ";
123 136
            String taxonFullNameStr = titleCacheStr + referenceSeparator + nomRefTitle;
......
127 140
            taxonName = (BotanicalName) nameParser.parseFullName(titleCacheStr, NomenclaturalCode.ICNAFP, null);
128 141
        }
129 142

  
143
        taxonNameTitleCache = taxonName.getTitleCache().trim();
130 144
        if (taxonName.isProtectedTitleCache()) {
131 145
            logger.warn(line + "Name could not be parsed: " + titleCacheStr);
132 146
        } else {
133 147

  
134 148
            boolean doRestoreTitleCacheStr = false;
135 149
            // Check titleCache
136
            String generatedTitleCache = taxonName.getTitleCache();
137
            if (!generatedTitleCache.trim().equals(titleCacheStr)) {
138
                logger.warn(line + "The generated titleCache differs from the imported string : " + generatedTitleCache + " <> " + titleCacheStr + " will restore original titleCacheStr");
150
            if (!taxonNameTitleCache.equals(titleCacheStr)) {
151
                logger.warn(line + "The generated titleCache differs from the imported string : " + taxonNameTitleCache + " <> " + titleCacheStr + " will restore original titleCacheStr");
139 152
                doRestoreTitleCacheStr = true;
140 153
            }
141 154
            // Check Name
142
            if (!taxonName.getNameCache().trim().equals(nameStr)) {
143
                logger.warn(line + "parsed nameCache differs from " + NAMESTRING + " : " + taxonName.getNameCache() + " <> " + nameStr);
155
            String nameCache = taxonName.getNameCache();
156
            if (!nameCache.trim().equals(nameStr)) {
157
                logger.warn(line + "parsed nameCache differs from " + NAMESTRING + " : " + nameCache + " <> " + nameStr);
144 158
            }
145 159

  
146 160
            //  Author
......
156 170

  
157 171
            // deduplicate
158 172
            replaceAuthorNamesAndNomRef(state, taxonName);
173
        }
174

  
175
        // Annotations
176
        if(!nameAnnotations.isEmpty()){
177
            for(String text : nameAnnotations.keySet()){
178
                taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
179
            }
180
            getNameService().save(taxonName);
181
        }
182

  
183
        // Namerelations
184
        if(!StringUtils.isEmpty(authorsSpelling)){
185
            authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
159 186

  
160
            // Annotations
161
            if(!nameAnnotations.isEmpty()){
162
                for(String text : nameAnnotations.keySet()){
163
                    taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
187
            String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
188
            String[] nameStrTokens = StringUtils.split(nameStr, " ");
189

  
190
            ArrayUtils.reverse(authorSpellingTokens);
191
            ArrayUtils.reverse(nameStrTokens);
192

  
193
            for (int i = 0; i < nameStrTokens.length; i++){
194
                if(i < authorSpellingTokens.length){
195
                    nameStrTokens[i] = authorSpellingTokens[i];
164 196
                }
165
                getNameService().save(taxonName);
166 197
            }
198
            ArrayUtils.reverse(nameStrTokens);
199

  
200
            String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
201
            // build the fullnameString of the misspelled name
202
            misspelledNameStr = taxonNameTitleCache.replace(nameStr, misspelledNameStr);
203

  
204
            TaxonNameBase misspelledName = (BotanicalName) nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
205
            misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
206
            getNameService().save(misspelledName);
167 207
        }
168 208

  
169 209
        Reference sec = state.getConfig().getSecReference();
170 210
        Taxon taxon = Taxon.NewInstance(taxonName, sec);
211

  
212
        // Markers
213
        if(isFossil){
214
            taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
215
        }
216

  
171 217
        getTaxonService().save(taxon);
172 218
        if(higherTaxonNode != null){
173 219
            higherTaxonNode.addChildTaxon(taxon, null, null);
......
260 306
        }
261 307

  
262 308
        String reg_id = record.get(REGISTRATIONNO_PK);
309

  
263 310
        //higherTaxon
264
        TaxonNode higherTaxon = getHigherTaxon(record, (IAPTImportState)state);
311
        String higherTaxaString = record.get(HIGHERTAXON);
312
        boolean isFossil = false;
313
        if(higherTaxaString.startsWith("FOSSIL ")){
314
            higherTaxaString = higherTaxaString.replace("FOSSIL ", "");
315
            isFossil = true;
316
        }
317
        TaxonNode higherTaxon = getHigherTaxon(higherTaxaString, (IAPTImportState)state);
265 318

  
266 319
       //Taxon
267
        Taxon taxon = makeTaxon(record, state, higherTaxon, isSynonymOnly);
320
        Taxon taxon = makeTaxon(record, state, higherTaxon, isSynonymOnly, isFossil);
268 321
        if (taxon == null && ! isSynonymOnly){
269 322
            logger.warn(line + "taxon could not be created and is null");
270 323
            return;
......
281 334
		return;
282 335
    }
283 336

  
284
    private TaxonNode getHigherTaxon(HashMap<String, String> record, IAPTImportState state) {
285
        String higherTaxaString = record.get(HIGHERTAXON);
337
    private TaxonNode getHigherTaxon(String higherTaxaString, IAPTImportState state) {
338

  
286 339
        // higherTaxaString is like
287 340
        // - DICOTYLEDONES: LEGUMINOSAE: MIMOSOIDEAE
288 341
        // - FOSSIL DICOTYLEDONES: PROTEACEAE
......
376 429
        return ref;
377 430
    }
378 431

  
432
    private MarkerType markerTypeFossil(){
433
        if(this.markerTypeFossil == null){
434
            markerTypeFossil = MarkerType.NewInstance("isFossilTaxon", "isFossil", null);
435
            getTermService().save(this.markerTypeFossil);
436
        }
437
        return markerTypeFossil;
438
    }
439

  
379 440

  
380 441

  
381 442
}

Also available in: Unified diff