Project

General

Profile

« Previous | Next » 

Revision 565df21b

Added by Andreas Kohlbecker over 7 years ago

ref #6026 first basic import of types

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/app/iapt/IAPTActivator.java
81 81
        config.setSourceReferenceTitle(sourceReferenceTitle);
82 82
        config.setSecReference(secRef);
83 83
        config.setProgressMonitor(DefaultProgressMonitor.NewInstance());
84
        // config.setBatchSize(100); // causes Error during managed flush [Don't change the reference to a collection with delete-orphan enabled : eu.etaxonomy.cdm.model.taxon.TaxonNode.annotations]
84 85

  
85 86
        CdmDefaultImport<IAPTImportConfigurator> myImport = new CdmDefaultImport<>();
86 87

  
......
121 122

  
122 123

  
123 124
    public static URI iapt() {
124
        File f = new File(System.getProperty("user.home") + "/data/Projekte/Algea Name Registry/registry/sources/IAPT/Registration_DB_from_BGBM17-cleaned-02.xls");
125
        File f = new File(System.getProperty("user.home") + "/data/Projekte/Algea Name Registry/registry/sources/IAPT/iapt-100.xls");
125 126
        return f.toURI();
126 127
    }
127 128

  
app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java
9 9

  
10 10
package eu.etaxonomy.cdm.io.iapt;
11 11

  
12
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
12 13
import eu.etaxonomy.cdm.common.CdmUtils;
13 14
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
14 15
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
15 16
import eu.etaxonomy.cdm.model.common.*;
16 17
import eu.etaxonomy.cdm.model.name.*;
18
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
19
import eu.etaxonomy.cdm.model.occurrence.FieldUnit;
20
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
17 21
import eu.etaxonomy.cdm.model.reference.Reference;
18 22
import eu.etaxonomy.cdm.model.taxon.*;
19 23
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
......
66 70

  
67 71
    private static final Pattern nomRefTokenizeP = Pattern.compile("^(.*):\\s([^\\.:]+)\\.(.*)$");
68 72
    private static final Pattern nomRefPubYearExtractP = Pattern.compile("(.*?)(1[7,8,9][0-9]{2}).*$|^.*?[0-9]{1,2}([\\./])[0-1]?[0-9]\\3([0-9]{2})\\.$"); // 1700 - 1999
73
    private static final Pattern typeSplitPattern =  Pattern.compile("^(?:\"*[Tt]ype: (?<type>.*?))(?:[Hh]olotype:(?<holotype>.*?))?(?:[Ii]sotype[^:]*:(?<isotype>.*))?$");
74
    enum TypesName {
75
        type, holotype, isotype;
76

  
77
        public SpecimenTypeDesignationStatus status(){
78
            switch (this) {
79
                case holotype:
80
                    return SpecimenTypeDesignationStatus.HOLOTYPE();
81
                case isotype:
82
                    return SpecimenTypeDesignationStatus.ISOTYPE();
83
                default:
84
                    return null;
85
            }
86
        }
87
    }
69 88

  
70 89
    private MarkerType markerTypeFossil = null;
71 90
    private Rank rankUnrankedSupraGeneric = null;
......
73 92
    private AnnotationType annotationTypeCaveats = null;
74 93

  
75 94
    private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
76
                            TaxonNode higherTaxonNode, boolean isSynonym, boolean isFossil) {
95
                            TaxonNode higherTaxonNode, boolean isFossil) {
77 96

  
78 97
        String line = state.getCurrentLine() + ": ";
79 98

  
......
84 103
        String authorsSpelling = getValue(record, AUTHORSSPELLING, true);
85 104
        String notesTxt = getValue(record, NOTESTXT, true);
86 105
        String caveats = getValue(record, CAVEATS, true);
106
        String fullSynSubstStr = getValue(record, FULLSYNSUBST, true);
107
        String synSubstStr = getValue(record, SYNSUBSTSTR, true);
108
        String typeStr = getValue(record, TYPE, true);
87 109

  
88 110
        String nomRefTitle = null;
89 111
        String nomRefDetail = null;
......
125 147
            }
126 148
        }
127 149

  
128
        BotanicalName taxonName;
129
        // cache field for the taxonName.titleCache
150
        BotanicalName taxonName = makeBotanicalName(state, titleCacheStr, nameStr, authorStr, nomRefTitle);
151

  
152
        if(!StringUtils.isEmpty(notesTxt)){
153
            notesTxt = notesTxt.replace("Notes: ", "").trim();
154
            taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
155
        }
156
        if(!StringUtils.isEmpty(caveats)){
157
            caveats = caveats.replace("Caveats: ", "").trim();
158
            taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
159
        }
160
        //
161

  
162
        // Namerelations
163
        if(!StringUtils.isEmpty(authorsSpelling)){
164
            authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
165

  
166
            String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
167
            String[] nameStrTokens = StringUtils.split(nameStr, " ");
168

  
169
            ArrayUtils.reverse(authorSpellingTokens);
170
            ArrayUtils.reverse(nameStrTokens);
171

  
172
            for (int i = 0; i < nameStrTokens.length; i++){
173
                if(i < authorSpellingTokens.length){
174
                    nameStrTokens[i] = authorSpellingTokens[i];
175
                }
176
            }
177
            ArrayUtils.reverse(nameStrTokens);
178

  
179
            String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
180
            // build the fullnameString of the misspelled name
181
            misspelledNameStr = taxonName.getTitleCache().replace(nameStr, misspelledNameStr);
182

  
183
            TaxonNameBase misspelledName = (BotanicalName) nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
184
            misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
185
            getNameService().save(misspelledName);
186
        }
187

  
188
        // Replaced Synonyms
189
        if(!StringUtils.isEmpty(fullSynSubstStr)){
190
            fullSynSubstStr = fullSynSubstStr.replace("Syn. subst.: ", "");
191
            BotanicalName replacedSynonymName = makeBotanicalName(state, fullSynSubstStr, synSubstStr, null, null);
192
            replacedSynonymName.addReplacedSynonym(taxonName, null, null, null);
193
            getNameService().save(replacedSynonymName);
194
        }
195

  
196
        Reference sec = state.getConfig().getSecReference();
197
        Taxon taxon = Taxon.NewInstance(taxonName, sec);
198

  
199
        // Markers
200
        if(isFossil){
201
            taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
202
        }
203

  
204
        // Types
205
        if(!StringUtils.isEmpty(typeStr)){
206
            Matcher m = typeSplitPattern.matcher(typeStr);
207

  
208
            if(m.matches()){
209
                String typeString = m.group(TypesName.type.name());
210
                boolean isFieldUnit = typeStr.matches(".*([°']|\\d+\\s?m\\s|\\d+\\s?km\\s).*"); // check for location or unit m, km
211

  
212
                if(isFieldUnit) {
213
                    // type as fieldUnit
214
                    FieldUnit fu = FieldUnit.NewInstance();
215
                    fu.setTitleCache(typeString, true);
216
                    getOccurrenceService().save(fu);
217

  
218
                    // all others ..
219
                    addSpecimenTypes(taxonName, fu, m.group(TypesName.holotype.name()), TypesName.holotype, false);
220
                    addSpecimenTypes(taxonName, fu, m.group(TypesName.isotype.name()), TypesName.isotype, true);
221
                } else {
222
                    TaxonNameBase typeName = nameParser.parseFullName(typeString);
223
                    taxonName.addNameTypeDesignation(typeName, null, null, null, NameTypeDesignationStatus.AUTOMATIC(), true, true, true, true);
224
                }
225
            }
226
            getNameService().save(taxonName);
227

  
228
        }
229

  
230
        getTaxonService().save(taxon);
231
        if(higherTaxonNode != null){
232
            higherTaxonNode.addChildTaxon(taxon, null, null);
233
            getTaxonNodeService().save(higherTaxonNode);
234
        }
235

  
236
        return taxon;
237

  
238
    }
239

  
240
    private void addSpecimenTypes(BotanicalName taxonName, FieldUnit fieldUnit, String typeStr, TypesName typeName, boolean multiple){
241
        if(StringUtils.isEmpty(typeStr)){
242
            return;
243
        }
244
        typeStr = typeStr.trim().replaceAll("\\.$", "");
245

  
246
        List<String> typeData = new ArrayList<>();
247
        if(multiple){
248
            String[] tokens = typeStr.split("\\s?,\\s?");
249
            for (String t : tokens) {
250
                if(!t.isEmpty()){
251
                    typeData.add(t.trim());
252
                }
253
            }
254
        } else {
255
            typeData.add(typeStr.trim());
256
        }
257

  
258
        for(String type : typeData){
259
            DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.OtherSpecimen, fieldUnit);
260
            facade.setTitleCache(type, true);
261
            DerivedUnit specimen = facade.innerDerivedUnit();
262
            taxonName.addSpecimenTypeDesignation(specimen, typeName.status(), null, null, null, false, true);
263
       }
264
    }
265

  
266
    private BotanicalName makeBotanicalName(SimpleExcelTaxonImportState<CONFIG> state, String titleCacheStr, String nameStr, String authorStr, String nomRefTitle) {
267

  
268
        BotanicalName taxonName;// cache field for the taxonName.titleCache
130 269
        String taxonNameTitleCache = null;
131 270
        Map<String, AnnotationType> nameAnnotations = new HashMap<>();
132 271

  
272
        String line = state.getCurrentLine() + ": ";
273

  
133 274
        // TitleCache preprocessing
134 275
        if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || (authorStr != null && authorStr.endsWith(ANNOTATION_MARKER_STRING))){
135 276
            nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL());
......
166 307
                titleCacheCompareStr = titleCacheCompareStr.replaceAll("^X ", "× ");
167 308
                nameCompareStr = nameCompareStr.replace("^X ", "× ");
168 309
            }
169
            if(authorStr.contains(" et ")){
310
            if(authorStr != null && authorStr.contains(" et ")){
170 311
                titleCacheCompareStr = titleCacheCompareStr.replaceAll(" et ", " & ");
171 312
            }
172 313
            if (!taxonNameTitleCache.equals(titleCacheCompareStr)) {
......
199 340
            }
200 341
            getNameService().save(taxonName);
201 342
        }
202
        if(!StringUtils.isEmpty(notesTxt)){
203
            notesTxt = notesTxt.replace("Notes: ", "").trim();
204
            taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
205
        }
206
        if(!StringUtils.isEmpty(caveats)){
207
            caveats = caveats.replace("Caveats: ", "").trim();
208
            taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
209
        }
210
        //
211

  
212
        // Namerelations
213
        if(!StringUtils.isEmpty(authorsSpelling)){
214
            authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
215

  
216
            String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
217
            String[] nameStrTokens = StringUtils.split(nameStr, " ");
218

  
219
            ArrayUtils.reverse(authorSpellingTokens);
220
            ArrayUtils.reverse(nameStrTokens);
221

  
222
            for (int i = 0; i < nameStrTokens.length; i++){
223
                if(i < authorSpellingTokens.length){
224
                    nameStrTokens[i] = authorSpellingTokens[i];
225
                }
226
            }
227
            ArrayUtils.reverse(nameStrTokens);
228

  
229
            String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
230
            // build the fullnameString of the misspelled name
231
            misspelledNameStr = taxonNameTitleCache.replace(nameStr, misspelledNameStr);
232

  
233
            TaxonNameBase misspelledName = (BotanicalName) nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
234
            misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
235
            getNameService().save(misspelledName);
236
        }
237

  
238
        Reference sec = state.getConfig().getSecReference();
239
        Taxon taxon = Taxon.NewInstance(taxonName, sec);
240

  
241
        // Markers
242
        if(isFossil){
243
            taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
244
        }
245

  
246
        getTaxonService().save(taxon);
247
        if(higherTaxonNode != null){
248
            higherTaxonNode.addChildTaxon(taxon, null, null);
249
            getTaxonNodeService().save(higherTaxonNode);
250
        }
251

  
252
        return taxon;
253

  
343
        return taxonName;
254 344
    }
255 345

  
256 346
    /**
......
320 410
	@Override
321 411
    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
322 412

  
323
	    boolean isSynonymOnly = false;
324

  
325 413
        String lineNumber = state.getCurrentLine() + ": ";
326 414
        logger.setLevel(Level.DEBUG);
327 415
        HashMap<String, String> record = state.getOriginalRecord();
......
346 434
        TaxonNode higherTaxon = getHigherTaxon(higherTaxaString, (IAPTImportState)state);
347 435

  
348 436
       //Taxon
349
        Taxon taxon = makeTaxon(record, state, higherTaxon, isSynonymOnly, isFossil);
350
        if (taxon == null && ! isSynonymOnly){
437
        Taxon taxon = makeTaxon(record, state, higherTaxon, isFossil);
438
        if (taxon == null){
351 439
            logger.warn(lineNumber + "taxon could not be created and is null");
352 440
            return;
353 441
        }
354 442
        ((IAPTImportState)state).setCurrentTaxon(taxon);
355 443

  
356
        //Syn.
357
        //makeSynonyms(record, state, !isSynonymOnly);
358

  
359 444

  
360 445
		return;
361 446
    }
362 447

  
363 448
    private TaxonNode getHigherTaxon(String higherTaxaString, IAPTImportState state) {
364

  
365
        // higherTaxaString is like
366
        // - DICOTYLEDONES: LEGUMINOSAE: MIMOSOIDEAE
367
        // - FOSSIL DICOTYLEDONES: PROTEACEAE
368
        // - [fungi]
369
        // - [no group assigned]
370
        if(higherTaxaString.equals("[no group assigned]")){
371
            return null;
372
        }
373 449
        String[] higherTaxaNames = higherTaxaString.toLowerCase().replaceAll("[\\[\\]]", "").split(":");
374 450
        TaxonNode higherTaxonNode = null;
375 451

  
......
496 572
    }
497 573

  
498 574

  
499

  
500 575
}

Also available in: Unified diff