Project

General

Profile

« Previous | Next » 

Revision 7681e63f

Added by Andreas Müller about 6 years ago

ref #5499, ref #7308 improve edaphobas import

  • offical remarks
  • name additions
  • et al. authors and replace et
  • remove deleted taxa
  • some fixes

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/edaphobase/EdaphobaseAuthorImport.java
98 98
            while(rs.next()){
99 99
                List<Person> singlePersons = new ArrayList<>();
100 100
                String authorStr = rs.getString("tax_author_name");
101
                authorStr = authorStr.replace(" et ", " & ");
102

  
103
                boolean isEtAl = false;
104
                if (authorStr.endsWith(" & al.")){
105
                    isEtAl = true;
106
                    authorStr = authorStr.substring(0, authorStr.length()-6).trim();
107
                }
101 108

  
102 109
                String[] splits = authorStr.split("\\s*&\\s*");
103 110
                for (String split : splits){
......
105 112
                    for (String commaSplit : commaSplits){
106 113
                        Person person = personMap.get(commaSplit);
107 114
                        if (person == null){
108
                            person = Person.NewTitledInstance(commaSplit);
115
                            person = Person.NewInstance();
116
                            person.setNomenclaturalTitle(commaSplit);
109 117
                            personMap.put(commaSplit, person);
110 118
                        }
111 119
                        singlePersons.add(person);
112 120
                    }
113 121
                }
114
                if (singlePersons.size() > 1){
122
                if (singlePersons.size() > 1 || singlePersons.size() ==  1 && isEtAl){
115 123
                    Team team = Team.NewInstance();
124
                    team.setHasMoreMembers(isEtAl);
116 125
                    for (Person person: singlePersons){
117 126
                        team.addTeamMember(person);
118 127
                    }
app-import/src/main/java/eu/etaxonomy/cdm/io/edaphobase/EdaphobaseClassificationImport.java
22 22
import eu.etaxonomy.cdm.io.common.IPartitionedIO;
23 23
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
24 24
import eu.etaxonomy.cdm.model.common.CdmBase;
25
import eu.etaxonomy.cdm.model.name.TaxonName;
26 25
import eu.etaxonomy.cdm.model.reference.Reference;
27 26
import eu.etaxonomy.cdm.model.taxon.Classification;
28 27
import eu.etaxonomy.cdm.model.taxon.Synonym;
......
81 80
        ResultSet rs = partitioner.getResultSet();
82 81
        Map<String, Classification> map = partitioner.getObjectMap(CLASSIFICATION_NAMESPACE);
83 82
        Classification classification = map.get(state.getConfig().getClassificationUuid().toString());
84
        Reference sourceReference = state.getTransactionalSourceReference();
85 83

  
86 84
        Set<TaxonBase> taxaToSave = new HashSet<>();
87 85
        try {
88 86
            while (rs.next()){
89
                int id = rs.getInt("taxon_id");
90
                 //parentTaxonFk
91
                boolean isValid = rs.getBoolean("valid");
92
//                boolean idDeleted = rs.getBoolean("deleted");
93
//                String treeIndex = rs.getString("path_to_root");
94
//                Integer rankFk = rs.getInt("tax_rank_fk");
95
//                String officialRemark = rs.getString("official_remark");
96
//                boolean isGroup = rs.getBoolean("taxonomic_group");
97
                Integer parentTaxonFk = nullSafeInt(rs, "parent_taxon_fk");
98

  
99
                if (parentTaxonFk != null){
100
                    TaxonBase<?> parent = state.getRelatedObject(TAXON_NAMESPACE, parentTaxonFk.toString(), TaxonBase.class);
101
                    if (parent == null){
102
                        logger.warn("Parent taxon " + parentTaxonFk + " not found for taxon " + id );
103
                    }else{
87
                handleSingleRecord(state, rs, classification, taxaToSave);
88
            }
89
        } catch (SQLException e) {
90
            e.printStackTrace();
91
        }
92

  
93
        getTaxonService().saveOrUpdate(taxaToSave);
94
        return true;
95
    }
96

  
97
/**
98
     * @param state
99
     * @param rs
100
 * @param taxaToSave
101
 * @param classification
102
 * @throws SQLException
103
     */
104
    private void handleSingleRecord(EdaphobaseImportState state, ResultSet rs, Classification classification, Set<TaxonBase> taxaToSave) throws SQLException {
105
        Reference sourceReference = state.getTransactionalSourceReference();
106

  
107
        int id = rs.getInt("taxon_id");
108
        boolean isDeleted = rs.getBoolean("deleted");
109
        if (isDeleted){
110
            logger.warn("Deleted not handled according to mail Stephan 2018-03-07. ID: " + id );
111
            return;
112
        }
104 113

  
105
                        TaxonName parentName = parent.getName();
106

  
107
                        TaxonBase<?> child = state.getRelatedObject(TAXON_NAMESPACE, String.valueOf(id), TaxonBase.class);
108
//                        TaxonName childName = child.getName();
109

  
110
//                        handleMissingNameParts(CdmBase.deproxy(childName, TaxonName.class), CdmBase.deproxy(parentName, NonViralName.class));
111

  
112
                        if (isValid){
113
                            if (parent.isInstanceOf(Synonym.class)){
114
                                logger.warn("Parent taxon (" + parentTaxonFk + " is not valid for valid child " + id + ")");
115
                            }else{
116
                                Taxon accParent = CdmBase.deproxy(parent, Taxon.class);
117
                                classification.addParentChild(accParent, (Taxon)child, sourceReference, null);
118
                                taxaToSave.add(accParent);
119
                            }
120
                        }else{
121
//                            Synonym synonym = CdmBase.deproxy(child, Synonym.class);
122
//                            if (synonym == null){
123
//                                logger.warn("Synonym " + id + " not found for taxon ");
124
//                            }
125
//                            if(parent.isInstanceOf(Synonym.class)){
126
//                                String message = "Taxon ("+parentTaxonFk+") is not accepted but synonym. Can't add synonym ("+id+")";
127
//                                logger.warn(message);
128
//                            }else{
129
//                                Taxon accepted = CdmBase.deproxy(parent, Taxon.class);
130
////                                accepted.addSynonym(synonym, SynonymType.SYNONYM_OF());
131
//                                taxaToSave.add(accepted);
132
//                            }
114
         //parentTaxonFk
115
        boolean isValid = rs.getBoolean("valid");
116
//        boolean idDeleted = rs.getBoolean("deleted");
117
//        String treeIndex = rs.getString("path_to_root");
118
//        Integer rankFk = rs.getInt("tax_rank_fk");
119
//        String officialRemark = rs.getString("official_remark");
120
//        boolean isGroup = rs.getBoolean("taxonomic_group");
121
        Integer parentTaxonFk = nullSafeInt(rs, "parent_taxon_fk");
122

  
123
        if (parentTaxonFk != null){
124
            TaxonBase<?> parent = state.getRelatedObject(TAXON_NAMESPACE, parentTaxonFk.toString(), TaxonBase.class);
125
            if (parent == null){
126
                logger.warn("Parent taxon " + parentTaxonFk + " not found for taxon " + id );
127
            }else{
128

  
129
                TaxonBase<?> child = state.getRelatedObject(TAXON_NAMESPACE, String.valueOf(id), TaxonBase.class);
130

  
131
                if (isValid){
132
                    if (parent.isInstanceOf(Synonym.class)){
133
                        logger.warn("Parent taxon (" + parentTaxonFk + " is not valid for valid child " + id + ")");
134
                    }else{
135
                        Taxon accParent = CdmBase.deproxy(parent, Taxon.class);
136
                        if (child == null){
137
                            logger.warn("Child not found. ID= " + id);
133 138
                        }
139
                        classification.addParentChild(accParent, (Taxon)child, sourceReference, null);
140
                        taxaToSave.add(accParent);
134 141
                    }
142
                }else{
143
//                    Synonym synonym = CdmBase.deproxy(child, Synonym.class);
144
//                    if (synonym == null){
145
//                        logger.warn("Synonym " + id + " not found for taxon ");
146
//                    }
147
//                    if(parent.isInstanceOf(Synonym.class)){
148
//                        String message = "Taxon ("+parentTaxonFk+") is not accepted but synonym. Can't add synonym ("+id+")";
149
//                        logger.warn(message);
150
//                    }else{
151
//                        Taxon accepted = CdmBase.deproxy(parent, Taxon.class);
152
////                        accepted.addSynonym(synonym, SynonymType.SYNONYM_OF());
153
//                        taxaToSave.add(accepted);
154
//                    }
135 155
                }
156
            }
157
        }
136 158

  
137
//              //id
138
//              String nameSpace = "tax_taxon";
139
//              ImportHelper.setOriginalSource(taxonBase, state.getTransactionalSourceReference(), id, nameSpace);
140
//              ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, nameSpace);
159
//      //id
160
//      String nameSpace = "tax_taxon";
161
//      ImportHelper.setOriginalSource(taxonBase, state.getTransactionalSourceReference(), id, nameSpace);
162
//      ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, nameSpace);
141 163

  
142 164

  
143
            }
144
        } catch (SQLException e) {
145
            e.printStackTrace();
146
        }
147 165

  
148
        getTaxonService().saveOrUpdate(taxaToSave);
149
        return true;
150 166
    }
151 167

  
152 168
//    /**
app-import/src/main/java/eu/etaxonomy/cdm/io/edaphobase/EdaphobaseTaxonImport.java
25 25
import eu.etaxonomy.cdm.io.common.ImportHelper;
26 26
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27 27
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
28
import eu.etaxonomy.cdm.model.agent.Person;
29
import eu.etaxonomy.cdm.model.agent.Team;
28 30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
32
import eu.etaxonomy.cdm.model.common.Annotation;
33
import eu.etaxonomy.cdm.model.common.AnnotationType;
29 34
import eu.etaxonomy.cdm.model.common.CdmBase;
30 35
import eu.etaxonomy.cdm.model.common.Language;
31 36
import eu.etaxonomy.cdm.model.common.Marker;
......
33 38
import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary;
34 39
import eu.etaxonomy.cdm.model.common.Representation;
35 40
import eu.etaxonomy.cdm.model.name.IZoologicalName;
41
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
36 42
import eu.etaxonomy.cdm.model.name.Rank;
37 43
import eu.etaxonomy.cdm.model.name.RankClass;
38 44
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
......
41 47
import eu.etaxonomy.cdm.model.taxon.Synonym;
42 48
import eu.etaxonomy.cdm.model.taxon.Taxon;
43 49
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
50
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
44 51

  
45 52
/**
46 53
 * @author a.mueller
......
117 124
    }
118 125

  
119 126
    @Override
120
    public boolean doPartition(ResultSetPartitioner partitioner, EdaphobaseImportState state) {
127
    public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, EdaphobaseImportState state) {
121 128
        ResultSet rs = partitioner.getResultSet();
129
        @SuppressWarnings("rawtypes")
122 130
        Set<TaxonBase> taxaToSave = new HashSet<>();
123 131
        try {
124 132
            while (rs.next()){
......
151 159
        //rankFk
152 160
        Integer nomRefId = nullSafeInt(rs, "tax_document");
153 161
        boolean isValid = rs.getBoolean("valid");
154
        boolean idDeleted = rs.getBoolean("deleted");
162
        boolean isDeleted = rs.getBoolean("deleted");
155 163
        String displayString = rs.getString("display_string");
156 164
        Integer version = nullSafeInt(rs, "versionfield");
157 165
        String pages = rs.getString("pages");
......
168 176
        String grandParentNameStr = rs.getString("grandParentName");
169 177
        String grandGrandParentNameStr = rs.getString("grandGrandParentName");
170 178

  
179

  
180
        if (isDeleted){
181
            logger.warn("Deleted not handled according to mail Stephan 2018-03-07. ID: " + id );
182
            return;
183
        }
184
        boolean nameAdditionUsed =  isBlank(nameAddition);
185
        if (!nameAdditionUsed){
186
            nameAddition = nameAddition.trim();
187
        }
188

  
171 189
        isValid = checkValid(state, id, isValid);
172 190

  
191
        //for debug only
192
        if (id.equals(97600) || id.equals(97601)){
193
            logger.debug("now");
194
        }
195

  
173 196
        TaxonBase<?> taxonBase;
174 197

  
175 198
        rankStr= extractEnglish(rankStr);
......
181 204
        Rank rank = makeRank(state, rankStr);
182 205
        checkRankMarker(state, rank);
183 206
        IZoologicalName name = TaxonNameFactory.NewZoologicalInstance(rank);
184
        setNamePart(nameStr, rank, name);
185
        Rank parentRank = makeRank(state, parentRankStr);
186
        setNamePart(parentNameStr, parentRank, name);
187
        Rank parentParentRank = makeRank(state, grandParentRankStr);
188
        setNamePart(grandParentNameStr, parentParentRank, name);
189
        Rank grandParentParentRank = makeRank(state, grandGrandParentRankStr);
190
        setNamePart(grandGrandParentNameStr, grandParentParentRank, name);
191
        if (grandParentParentRank != null && grandParentParentRank.isLower(Rank.GENUS()) || isBlank(name.getGenusOrUninomial()) ){
192
            logger.warn("Grand-Grandparent rank is lower than genus for " +
193
                    name.getTitleCache() + " (edapho-id: " + id + "; cdm-id: " + name.getId() + ")");
207
        if (rank == null){
208
            name.setNameCache(nameStr, true);
209
        }else{
210
            setNamePart(nameStr, rank, name);
211
            Rank parentRank = makeRank(state, parentRankStr);
212
            setNamePart(parentNameStr, parentRank, name);
213
            Rank parentParentRank = makeRank(state, grandParentRankStr);
214
            setNamePart(grandParentNameStr, parentParentRank, name);
215
            Rank grandParentParentRank = makeRank(state, grandGrandParentRankStr);
216
            setNamePart(grandGrandParentNameStr, grandParentParentRank, name);
217
            if (grandParentParentRank != null && grandParentParentRank.isLower(Rank.GENUS()) || isBlank(name.getGenusOrUninomial()) && !name.isProtectedNameCache()){
218
                logger.warn("Grand-Grandparent rank is lower than genus for " +
219
                        name.getTitleCache() + " (edapho-id: " + id + "; cdm-id: " + name.getId() + ")");
220
            }
194 221
        }
195 222

  
196 223
        //Authors
197
        if (StringUtils.isNotBlank(authorName)){
224
        if (isNotBlank(authorName)){
225
            authorName = authorName.replace(" et ", " & ");
198 226
            TeamOrPersonBase<?> author = state.getRelatedObject(AUTHOR_NAMESPACE, authorName, TeamOrPersonBase.class);
199 227
            if (author == null){
200 228
                logger.warn("Author not found in state: "  + authorName);
......
209 237
            }
210 238
        }
211 239

  
240
        String capitalWord = NonViralNameParserImplRegExBase.capitalWord;
241
        String autNam = "(" + capitalWord + "( in "+capitalWord+")?|Schuurmans Stekhoven|Winiszewska-Ślipińska|Fürst von Lieven|de Coninck|de Man|de Ley|de Grisse|"
242
                + "van der Linde|Pschorn-Walcher|van der Berg|J. Goddey)";
243
        if (isNotBlank(nameAddition) && nameAddition.matches("(\\[|\\()?nomen.*")){
244
            if ("(nomen oblitum)".equals(nameAddition) ){
245
                name.addStatus(NomenclaturalStatusType.ZOO_OBLITUM(), null, null);
246
            }else if ("nomen dubium".equals(nameAddition) || "[nomen dubium]".equals(nameAddition)){
247
                name.addStatus(NomenclaturalStatusType.DOUBTFUL(), null, null);
248
            }else if ("nomen nudum".equals(nameAddition)){
249
                name.addStatus(NomenclaturalStatusType.NUDUM(), null, null);
250
            }else if (nameAddition.matches("nomen nudum \\["+autNam+"\\, 19\\d{2}]")){
251
                name.addStatus(NomenclaturalStatusType.NUDUM(), null, null);
252
                Person nomNudAuthor = parseNomenNudumAuthor(state, name, nameAddition);
253
                if (name.getCombinationAuthorship()!= null || name.getBasionymAuthorship() != null){
254
                    logger.warn("Author already exists for nomen nudum name with author. ID: " + id);
255
                }
256
                name.setCombinationAuthorship(nomNudAuthor);
257
            }else{
258
                logger.warn("'nomen xxx' name addition not recognized: " + nameAddition + ". ID: " + id);
259
            }
260
            nameAdditionUsed = true;
261
        }
262
        if (isNotBlank(nameAddition) && nameAddition.matches(autNam + "((, "+autNam+")? & " + autNam + ")?" +    ", \\d{4}")){
263
            nameAddition = nameAddition.replace(" et ", " & ");
264
            int pos = nameAddition.length()-6;
265
            String authorStr = nameAddition.substring(0, pos);
266
            Integer naYear = Integer.valueOf(nameAddition.substring(pos +  2));
267
            if (name.getPublicationYear() != null){
268
                logger.warn("Publication year already exists. ID=" +  id);
269
            }
270
            name.setPublicationYear(naYear);
271
            TeamOrPersonBase<?> author = getNameAdditionAuthor(authorStr);
272
            if (name.getCombinationAuthorship() != null){
273
                logger.warn("Combination author already exists. ID=" +  id);
274
            }
275
            name.setCombinationAuthorship(author);
276
            nameAdditionUsed = true;
277
        }
278
        if (isNotBlank(nameAddition) && nameAddition.matches("(nec|non) " + capitalWord +  ", \\d{4}")){
279
            String str = nameAddition.substring(4);
280
            String[] split = str.split(",");
281
            IZoologicalName homonym = (IZoologicalName)name.clone();
282
            homonym.setCombinationAuthorship(null);
283
            homonym.setBasionymAuthorship(null);
284
            homonym.setPublicationYear(null);
285
            homonym.setOriginalPublicationYear(null);
286
            TeamOrPersonBase<?> author = getNameAdditionAuthor(split[0]);
287
            homonym.setCombinationAuthorship(author);
288
            homonym.setPublicationYear(Integer.valueOf(split[1].trim()));
289
            nameAdditionUsed = true;
290
        }
291

  
212 292
        //nomRef
213 293
        if (nomRefId != null){
214 294
            Reference nomRef = state.getRelatedObject(REFERENCE_NAMESPACE, String.valueOf(nomRefId), Reference.class);
......
232 312
        handleTaxonomicGroupMarker(state, taxonBase, isGroup);
233 313
        taxaToSave.add(taxonBase);
234 314

  
315
        //sensu, auct.
316
        if (isNotBlank(nameAddition) && (nameAddition.startsWith("sensu ") || "auct.".equals(nameAddition))){
317
            nameAddition = nameAddition.replace(" et ", " & ");
318
            taxonBase.setSec(null);
319
            taxonBase.setAppendedPhrase(nameAddition);
320
            //TODO
321
            nameAdditionUsed = true;
322
        }
323

  
235 324
        //remarks
236
        doNotes(taxonBase, remark);
325
        doNotes(taxonBase, remark, AnnotationType.TECHNICAL());
326
        doNotes(taxonBase, officialRemark, AnnotationType.EDITORIAL());
237 327

  
238 328
        //id
239 329
        ImportHelper.setOriginalSource(taxonBase, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
240 330
        ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
241 331
        handleExampleIdentifiers(taxonBase, id);
332

  
333
        if (!nameAdditionUsed){
334
            logger.warn("name_addition not recognized: " +  nameAddition + ". ID="+id);
335
            name.setAppendedPhrase(nameAddition);
336
        }
337
        String orig = displayString.replace("nomen nudum [Hirschmann, 1951]", "Hirschmann, 1951")
338
                .replace("  ", " ");
339
        String nameTitleCache = name.getTitleCache().replace("species group", "group");
340
        String taxonTitleCache = taxonBase.getTitleCache().replace("species group", "group");
341
        if (!orig.equals(nameTitleCache) && !orig.equals(name.getFullTitleCache()) && !orig.equals(taxonTitleCache)){
342
            String titleCache = taxonBase.getAppendedPhrase() != null ? taxonBase.getTitleCache() : name.getTitleCache();
343
            logger.warn("Displaystring differs from titleCache. ID=" + id + ".\n   " + displayString + "\n   " + titleCache);
344
        }
242 345
    }
243 346

  
244 347

  
348
    /**
349
     * @param authorStr
350
     * @return
351
     */
352
    private TeamOrPersonBase<?> getNameAdditionAuthor(String authorStr) {
353
        TeamOrPersonBase<?> result;
354
        String[] splits = authorStr.split("(, | & )");
355
        if (splits.length == 1){
356
            Person person = Person.NewInstance();
357
            person.setNomenclaturalTitle(splits[0]);
358
            result = person;
359
        }else{
360
            Team team = Team.NewInstance();
361
            for (String split: splits){
362
                Person person = Person.NewInstance();
363
                person.setNomenclaturalTitle(split);
364
                team.addTeamMember(person);
365
            }
366
            result = team;
367
        }
368
        //TODO deduplicate
369
        return result;
370
    }
371

  
372
    /**
373
     * @param state
374
     * @param nameAddition
375
     * @return
376
     */
377
    private Person parseNomenNudumAuthor(EdaphobaseImportState state, IZoologicalName name, String nameAddition) {
378
        nameAddition = nameAddition.replace("nomen nudum [", "").replace("tz, 195]", "tz, 1952]")
379
                .replace("]", "");
380
        String[] split = nameAddition.split(", ");
381
        Integer year = Integer.valueOf(split[1]);
382
        name.setPublicationYear(year);
383
        //TODO deduplicate
384
        Person author = Person.NewInstance();
385
        author.setNomenclaturalTitle(split[0].trim());
386
        return author;
387
    }
388

  
245 389
    /**
246 390
     * @param state
247 391
     * @param id
......
447 591
        return rank;
448 592
    }
449 593

  
594
    protected void doNotes(AnnotatableEntity annotatableEntity, String notes, AnnotationType type) {
595
        if (StringUtils.isNotBlank(notes) && annotatableEntity != null ){
596
            String notesString = String.valueOf(notes);
597
            if (notesString.length() > 65530 ){
598
                notesString = notesString.substring(0, 65530) + "...";
599
                logger.warn("Notes string is longer than 65530 and was truncated: " + annotatableEntity);
600
            }
601
            Annotation notesAnnotation = Annotation.NewInstance(notesString, Language.UNDETERMINED());
602
            //notesAnnotation.setAnnotationType(AnnotationType.EDITORIAL());
603
            //notes.setCommentator(bmiConfig.getCommentator());
604
            annotatableEntity.addAnnotation(notesAnnotation);
605
        }
606
    }
607

  
450 608
    @Override
451 609
    protected boolean doCheck(EdaphobaseImportState state) {
452 610
        return true;

Also available in: Unified diff