Project

General

Profile

« Previous | Next » 

Revision 18083589

Added by Andreas Müller over 6 years ago

ref #6606 latest changes to Bogota Specimen import (specimen with no CDM taxon ID)

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/app/bogota/BogotaSpecimenActivator.java
20 20
import eu.etaxonomy.cdm.io.common.CdmDefaultImport;
21 21
import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK;
22 22
import eu.etaxonomy.cdm.io.common.ImportResult;
23
import eu.etaxonomy.cdm.model.agent.Team;
23 24
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
25
import eu.etaxonomy.cdm.model.reference.IDatabase;
24 26
import eu.etaxonomy.cdm.model.reference.Reference;
25 27
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
28
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
26 29

  
27 30
/**
28 31
 * Activator for import of Bogota Checklist
......
38 41
//  static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_bogota();
39 42
    static final ICdmDataSource cdmDestination = CdmDestinations.cdm_bogota_production();
40 43

  
41
//    int minRow = 6;
42
//    int maxRow = 15; //minRow + 11999;
44
    int minRow = 1;
45
    int maxRow = 1000000; //minRow + 11999;
46
    boolean onlyNonCdmTaxa = true;
43 47

  
44
    int minRow = 180;
45
    int maxRow = 191; //minRow + 11999;
46 48

  
47 49
    boolean dedupRefs = false;
48 50
    boolean dedupAuthors = false;
......
63 65
        config.setMinLineNumber(minRow);
64 66
        config.setMaxLineNumber(maxRow);
65 67
        config.setDeduplicateReferences(dedupRefs);
68
        config.setSecReference(getSecReference());
66 69
        config.setDeduplicateAuthors(dedupAuthors);
70
        config.setOnlyNonCdmTaxa(onlyNonCdmTaxa);
67 71

  
68 72
        config.setSource(source);
69 73
        String fileName = source.toString();
......
98 102
        return result;
99 103
    }
100 104

  
105
    private Reference getSecReference() {
106

  
107
        IDatabase result = ReferenceFactory.newDatabase();
108
//        result.setTitleCache("Herbario. 2017. Identificaciones de muestras de herbario en el banco de datos del Jardín Botánico Nacional José Celestino Mutis. Bogotá [exportados 18-sep-2017]", true);
109
        result.setTitle("Identificaciones de muestras de herbario en el banco de datos del Jardín Botánico Nacional José Celestino Mutis.");
110
        result.setPlacePublished("Bogotá");
111
        result.setDatePublished(TimePeriodParser.parseString("2017"));
112
        result.getDatePublished().setFreeText("2017 [exportados 18-sep-2017]");
113

  
114
        Team team = Team.NewTitledInstance("Herbario", null);
115
        result.setAuthorship(team);
116

  
117
        result.setUuid(UUID.fromString("2bbc08ba-20d2-46cf-bf57-88b90a717733"));
118
        return (Reference)result;
119
    }
120

  
101 121

  
102 122
    /**
103 123
     * @param args
app-import/src/main/java/eu/etaxonomy/cdm/io/bogota/BogotaSpecimenImport.java
9 9
*/
10 10
package eu.etaxonomy.cdm.io.bogota;
11 11

  
12
import java.util.Arrays;
12 13
import java.util.HashMap;
14
import java.util.List;
13 15
import java.util.Map;
14 16
import java.util.UUID;
15 17

  
......
39 41
import eu.etaxonomy.cdm.model.location.NamedAreaType;
40 42
import eu.etaxonomy.cdm.model.location.Point;
41 43
import eu.etaxonomy.cdm.model.location.ReferenceSystem;
44
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
45
import eu.etaxonomy.cdm.model.name.Rank;
42 46
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
43 47
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
44 48
import eu.etaxonomy.cdm.model.name.TaxonName;
49
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
45 50
import eu.etaxonomy.cdm.model.occurrence.Collection;
46 51
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
47 52
import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
......
50 55
import eu.etaxonomy.cdm.model.taxon.Synonym;
51 56
import eu.etaxonomy.cdm.model.taxon.Taxon;
52 57
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
58
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
53 59
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
54 60
import eu.etaxonomy.cdm.strategy.parser.DeterminationModifierParser;
61
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
62
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
55 63
import eu.etaxonomy.cdm.strategy.parser.SpecimenTypeParser;
56 64
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
57 65

  
......
111 119

  
112 120
//    @SuppressWarnings("unchecked")
113 121
    private ImportDeduplicationHelper<SimpleExcelSpecimenImportState<?>> deduplicationHelper;
114
//           = (ImportDeduplicationHelper<SimpleExcelSpecimenImportState<?>>)ImportDeduplicationHelper.NewStandaloneInstance();
115 122

  
123
    private final Map<String, TaxonNode> taxonNodeMap = new HashMap<>();
124
    private Reference secRef;
116 125

  
117 126
    @Override
118 127
    protected String getWorksheetName() {
......
136 145
        try {
137 146

  
138 147
            //species
139
            TaxonBase<?> taxonBase = getOrCreateTaxon(state, line, record, voucherId);
140

  
148
            TaxonBase<?> taxonBase = getTaxonByCdmId(state, line, record, voucherId);
141 149
            if (taxonBase != null){
142
                Taxon taxon = getTaxon(taxonBase);
150
                handleRecordForTaxon(state, voucherId, line, taxonBase);
151
            }else if (record.get(COL_TAXON_UUID)!= null){
152
                //  do nothing
153
            }else{
154
                taxonBase = getOrCreateNewTaxon(state, record, line);
155
                handleRecordForTaxon(state, voucherId, line, taxonBase);
156
            }
143 157

  
144
                TaxonDescription taxonDescription = getTaxonDescription(state, line, taxon);
158
        } catch (Exception e) {
159
            state.getResult().addError("An unexpected exception appeared in record", e, null, line);
160
            e.printStackTrace();
161
        }
145 162

  
146
                DerivedUnit specimen = makeSpecimen(state, line, record, voucherId, taxonBase);
163
    }
147 164

  
148
                IndividualsAssociation indAssoc = IndividualsAssociation.NewInstance(specimen);
149
                indAssoc.addImportSource(voucherId, COL_VOUCHER_ID, getSourceCitation(state), null);
150
                taxonDescription.addElement(indAssoc);
165
    /**
166
     * @param state
167
     * @param record
168
     * @param line
169
     * @return
170
     */
171
    private Taxon getOrCreateNewTaxon(SimpleExcelSpecimenImportState<CONFIG> state,
172
            HashMap<String, String> record, String line) {
173
        String familyStr = record.get(COL_FAMILY);
174
        String genusStr = record.get(COL_GENUS);
175
        initTaxonMap(state);
176
        TaxonName speciesName = makeSpeciesName(state, line);
177
        String titleCache = speciesName.getTitleCache();
178
        TaxonNode existingSpeciesNode = taxonNodeMap.get(titleCache);
179
        if (existingSpeciesNode != null){
180
            return existingSpeciesNode.getTaxon();
181
        }else{
182
            Reference sec = getSecReference(state);
183
            Taxon newTaxon = Taxon.NewInstance(speciesName, sec);
184
            newTaxon.addSource(makeOriginalSource(state));
185
            TaxonNode existingGenusNode = taxonNodeMap.get(genusStr);
186
            if (existingGenusNode == null){
187
                TaxonName genusName = TaxonNameFactory.NewBotanicalInstance(Rank.GENUS());
188
                genusName.setGenusOrUninomial(genusStr);
189
                Taxon newGenus = Taxon.NewInstance(genusName, sec);
190
                newGenus.addSource(makeOriginalSource(state));
191
                TaxonNode existingFamilyNode = taxonNodeMap.get(familyStr);
192
                if (existingFamilyNode == null){
193
                    TaxonName familyName = TaxonNameFactory.NewBotanicalInstance(Rank.FAMILY());
194
                    familyName.setGenusOrUninomial(familyStr);
195
                    Taxon newFamily = Taxon.NewInstance(familyName, sec);
196
                    newFamily.addSource(makeOriginalSource(state));
197
                    TaxonNode plantaeNode = taxonNodeMap.get("Plantae");
198
                    existingFamilyNode = plantaeNode.addChildTaxon(newFamily, null, null);
199
                    save(existingFamilyNode);
200
                }
201
                existingGenusNode = existingFamilyNode.addChildTaxon(newGenus, null, null);
202
                save(existingGenusNode);
203
            }
204
            existingSpeciesNode = existingGenusNode.addChildTaxon(newTaxon, null, null);
205
            save(existingSpeciesNode);
206
            return newTaxon;
207
        }
208

  
209
    }
151 210

  
211
    /**
212
     * @param existingFamilyNode
213
     */
214
    private void save(TaxonNode node) {
215
        getTaxonNodeService().saveOrUpdate(node);
216
        taxonNodeMap.put(node.getTaxon().getName().getTitleCache(), node);
217

  
218
    }
219

  
220
    /**
221
     * @param state
222
     * @return
223
     */
224
    private Reference getSecReference(SimpleExcelSpecimenImportState<CONFIG> state) {
225
        if (this.secRef == null){
226
            Reference sec = state.getConfig().getSecReference();
227
            this.secRef = getReferenceService().find(sec.getUuid());
228
            if (this.secRef == null){
229
                this.secRef = sec;
230
                getReferenceService().save(sec);
152 231
            }
153
        } catch (Exception e) {
154
            state.getResult().addError("An unexpected exception appeared in record", e, null, line);
155
            e.printStackTrace();
156 232
        }
157 233

  
234

  
235
        return this.secRef;
236
    }
237

  
238
    /**
239
     * @param state
240
     * @param record
241
     * @param line
242
     * @return
243
     */
244
    private TaxonName makeSpeciesName(SimpleExcelSpecimenImportState<CONFIG> state, String line) {
245
        HashMap<String, String> record = state.getOriginalRecord();
246
        String genus = record.get(COL_GENUS);
247
        String species = record.get(COL_SPECIFIC_EPI);
248
        String basionymAuthorStr = record.get(COL_BASIONYM_AUTHOR);
249
        String authorStr = record.get(COL_AUTHOR);
250
        INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
251
        String fullName = genus + " " +  species +
252
                (basionymAuthorStr == null ? "" : " ("+basionymAuthorStr+")")
253
                + " " + authorStr;
254
        TaxonName newName = (TaxonName)parser.parseFullName(fullName , NomenclaturalCode.ICNAFP, Rank.SPECIES());
255
        String titleCache = newName.getTitleCache();
256
        if (newName.isProtectedTitleCache()){
257
            state.getResult().addWarning("Name not parsable: " +  fullName);
258
        }
259
        if (taxonNodeMap.get(titleCache)== null){
260
            getDeduplicationHelper(state).replaceAuthorNamesAndNomRef(state, newName);
261
            newName.addSource(makeOriginalSource(state));
262
        }
263

  
264
        return newName;
265
    }
266

  
267
    /**
268
     * @param state
269
     *
270
     */
271
    private void initTaxonMap(SimpleExcelSpecimenImportState<CONFIG> state) {
272
        if (taxonNodeMap.isEmpty()){
273
            List<String> propertyPaths = Arrays.asList(new String[]{"taxon.name"});
274
            List<TaxonNode> list = getTaxonNodeService().list(null, null, null, null, propertyPaths);
275
            for (TaxonNode node : list){
276
                if (node.getTaxon()!= null){
277
                    String strName = node.getTaxon().getName().getTitleCache();
278
                    TaxonNode existingNode = taxonNodeMap.get(strName);
279
                    if (existingNode != null){
280
                        state.getResult().addWarning("Taxon name exists more than once while initializing taxon map: " + strName, "initTaxonMap");
281
                    }else{
282
                        taxonNodeMap.put(strName, node);
283
                    }
284
                }
285
            }
286
        }
287
    }
288

  
289
    /**
290
     * @param state
291
     * @param record
292
     * @param voucherId
293
     * @param line
294
     * @param taxonBase
295
     * @param taxon
296
     */
297
    protected void handleRecordForTaxon(SimpleExcelSpecimenImportState<CONFIG> state,
298
            String voucherId, String line, TaxonBase<?> taxonBase) {
299

  
300
        HashMap<String, String> record = state.getOriginalRecord();
301
        Taxon taxon = getTaxon(taxonBase);
302

  
303
        TaxonDescription taxonDescription = getTaxonDescription(state, line, taxon);
304

  
305
        DerivedUnit specimen = makeSpecimen(state, line, record, voucherId, taxonBase);
306

  
307
        IndividualsAssociation indAssoc = IndividualsAssociation.NewInstance(specimen);
308
        indAssoc.addImportSource(voucherId, COL_VOUCHER_ID, getSourceCitation(state), null);
309
        taxonDescription.addElement(indAssoc);
158 310
    }
159 311

  
160 312

  
......
682 834
     * @param noStr
683 835
     * @return
684 836
     */
685
    private TaxonBase<?> getOrCreateTaxon(SimpleExcelSpecimenImportState<CONFIG> state, String line,
837
    private TaxonBase<?> getTaxonByCdmId(SimpleExcelSpecimenImportState<CONFIG> state, String line,
686 838
            HashMap<String, String> record, String noStr) {
687 839

  
688 840
        String strUuidTaxon = record.get(COL_TAXON_UUID);
689
        if (strUuidTaxon != null){
841
        if (strUuidTaxon != null && ! state.getConfig().isOnlyNonCdmTaxa()){
690 842
            UUID uuidTaxon;
691 843
            try {
692 844
                uuidTaxon = UUID.fromString(strUuidTaxon);
......
702 854
            }
703 855
            return result;
704 856
        }else{
705
            TaxonName taxonName = null;
706
            Reference sec = null;
707
            Taxon result = Taxon.NewInstance(taxonName, sec);
708
            result.addSource(makeOriginalSource(state));
709
            //TODO export uuid
710

  
711
//            state.getResult().addInfo("Taxon");
712
            //TODO
713 857
            return null;
714 858
        }
715 859
    }
app-import/src/main/java/eu/etaxonomy/cdm/io/bogota/BogotaSpecimenImportConfigurator.java
14 14
import eu.etaxonomy.cdm.database.ICdmDataSource;
15 15
import eu.etaxonomy.cdm.io.common.ImportStateBase;
16 16
import eu.etaxonomy.cdm.io.excel.common.ExcelImportConfiguratorBase;
17
import eu.etaxonomy.cdm.model.reference.Reference;
17 18

  
18 19
/**
19 20
 * @author a.mueller
......
27 28
    private int minLineNumber = 0;
28 29
    private int maxLineNumber = 1000000;
29 30

  
31
    private Reference secReference;
32
    private boolean onlyNonCdmTaxa;
33

  
30 34
    /**
31 35
     * @param source
32 36
     * @param cdmDestination
......
77 81
        this.maxLineNumber = maxLineNumber;
78 82
    }
79 83

  
84
    public Reference getSecReference() {
85
        return secReference;
86
    }
87

  
88
    public void setSecReference(Reference secReference) {
89
        this.secReference = secReference;
90
    }
91

  
92
    public boolean isOnlyNonCdmTaxa() {
93
        return onlyNonCdmTaxa;
94
    }
95

  
96
    public void setOnlyNonCdmTaxa(boolean onlyNonCdmTaxa) {
97
        this.onlyNonCdmTaxa = onlyNonCdmTaxa;
98
    }
80 99

  
81 100
}
app-import/src/main/java/eu/etaxonomy/cdm/io/mexico/SimpleExcelTaxonImportState.java
18 18
import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase;
19 19
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
20 20
import eu.etaxonomy.cdm.model.name.INonViralName;
21
import eu.etaxonomy.cdm.model.name.TaxonName;
21 22
import eu.etaxonomy.cdm.model.reference.Reference;
22 23
import eu.etaxonomy.cdm.model.taxon.Taxon;
23 24

  
......
32 33
    @SuppressWarnings("unused")
33 34
    private static final Logger logger = Logger.getLogger(SimpleExcelTaxonImportState.class);
34 35

  
35
    private Map<String, Reference> refMap = new HashMap<>();
36
    private final Map<String, Reference> refMap = new HashMap<>();
36 37

  
37
    private Map<String, TeamOrPersonBase<?>> agentMap = new HashMap<>();
38
    private final Map<String, TeamOrPersonBase<?>> agentMap = new HashMap<>();
38 39

  
39 40
    private final Map<String, Taxon> higherTaxonTaxonMap = new HashMap<>();
40 41

  
41 42
    //using titleCache
42
    private Map<String, INonViralName> nameMap = new HashMap<>();
43
    private Map<String, TaxonName> nameMap = new HashMap<>();
43 44

  
44 45
    private final Map<String, Taxon> taxonMap = new HashMap<>();
45 46

  
......
82 83
    }
83 84

  
84 85
    //names
85
    public void putName(String titleCache, INonViralName name){
86
    public void putName(String titleCache, TaxonName name){
86 87
        nameMap.put(titleCache, name);
87 88
    }
88 89
    public INonViralName getName(String titleCache){

Also available in: Unified diff