Project

General

Profile

« Previous | Next » 

Revision 59ef8f07

Added by Andreas Müller about 8 years ago

Latest changes to Edaphobase import

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/edaphobase/EdaphobaseReferenceImport.java
13 13
import java.sql.SQLException;
14 14
import java.util.HashMap;
15 15
import java.util.HashSet;
16
import java.util.List;
17 16
import java.util.Map;
18 17
import java.util.Set;
19
import java.util.UUID;
20 18

  
19
import org.apache.commons.lang3.StringUtils;
21 20
import org.apache.log4j.Logger;
22
import org.codehaus.plexus.util.StringUtils;
23 21
import org.springframework.stereotype.Component;
24 22

  
23
import eu.etaxonomy.cdm.common.DOI;
25 24
import eu.etaxonomy.cdm.io.common.IPartitionedIO;
26 25
import eu.etaxonomy.cdm.io.common.ImportHelper;
27 26
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
28
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
29
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
30 27
import eu.etaxonomy.cdm.model.common.CdmBase;
31
import eu.etaxonomy.cdm.model.name.Rank;
32
import eu.etaxonomy.cdm.model.name.ZoologicalName;
28
import eu.etaxonomy.cdm.model.common.TimePeriod;
33 29
import eu.etaxonomy.cdm.model.reference.Reference;
34
import eu.etaxonomy.cdm.model.taxon.Synonym;
35
import eu.etaxonomy.cdm.model.taxon.Taxon;
36
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
30
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
37 31

  
38 32
/**
39 33
 * @author a.mueller
......
42 36
 */
43 37
@Component
44 38
public class EdaphobaseReferenceImport extends EdaphobaseImportBase {
45
    private static final long serialVersionUID = -9138378836474086070L;
46
    private static final Logger logger = Logger.getLogger(EdaphobaseReferenceImport.class);
39
    private static final long serialVersionUID = 6895687693249076160L;
47 40

  
48
    private static final String tableName = "tax_taxon";
41
    @SuppressWarnings("unused")
42
    private static final Logger logger = Logger.getLogger(EdaphobaseReferenceImport.class);
49 43

  
50
    private static final String pluralString = "taxa";
44
    private static final String tableName = "lit_document";
51 45

  
52
    private static final Object AUTHOR_NAMESPACE = "tax_author_name";
46
    private static final String pluralString = "documents";
47
//
48
//    private static final Object AUTHOR_NAMESPACE = "tax_author_name";
53 49

  
54 50
    /**
55 51
     * @param tableName
......
61 57

  
62 58
    @Override
63 59
    protected String getIdQuery(EdaphobaseImportState state) {
64
        return "SELECT DISTINCT taxon_id FROM tax_taxon t "
65
                + " ORDER BY taxon_id";
60
        return    " SELECT DISTINCT document_id "
61
                + " FROM lit_document ld INNER JOIN tax_taxon t ON t.tax_document = ld.document_id "
62
                + " UNION "
63
                + " SELECT DISTINCT pd.document_id "
64
                + " FROM lit_document ld INNER JOIN tax_taxon t ON t.tax_document = ld.document_id "
65
                + " INNER JOIN lit_document pd ON pd.document_id = ld.parent_document_fk_document_id "
66
                + " ORDER BY document_id ";
66 67
    }
67 68

  
68 69
    @Override
69 70
    protected String getRecordQuery(EdaphobaseImportConfigurator config) {
70
        String result = " SELECT DISTINCT t.*, r.value as rankStr, pr.value as parentRankStr, ppr.value as grandParentRankStr, "
71
                    + " pt.name as parentName, ppt.name as grandParentName "
72
                + " FROM tax_taxon t "
73
                    + " LEFT JOIN tax_taxon pt ON t.parent_taxon_fk = pt.taxon_id "
74
                    + " LEFT JOIN tax_taxon ppt ON pt.parent_taxon_fk = ppt.taxon_id"
75
                    + " LEFT OUTER JOIN tax_rank_en r ON r.element_id = t.tax_rank_fk "
76
                    + " LEFT OUTER JOIN tax_rank_en pr ON pr.element_id = pt.tax_rank_fk "
77
                    + " LEFT OUTER JOIN tax_rank_en ppr ON pr.element_id = ppt.tax_rank_fk "
78
                + " WHERE t.taxon_id IN (@IDSET)";
71
        String result = " SELECT * "
72
                + " FROM lit_document ld "
73
                + " WHERE ld.document_id IN (@IDSET)";
79 74
        result = result.replace("@IDSET", IPartitionedIO.ID_LIST_TOKEN);
80 75
        return result;
81 76
    }
82 77

  
83
    @Override
84
    protected void doInvoke(EdaphobaseImportState state) {
85
        super.doInvoke(state);
86
    }
87

  
88

  
89 78
    @Override
90 79
    public boolean doPartition(ResultSetPartitioner partitioner, EdaphobaseImportState state) {
91 80
        ResultSet rs = partitioner.getResultSet();
92
        Set<TaxonBase> taxaToSave = new HashSet<>();
81
        Set<Reference> referencesToSave = new HashSet<>();
93 82
        try {
94 83
            while (rs.next()){
95
//  //              "JPASampleBook"
96
//  //              "JPAJournal"
97
//    //            "JPASample"
98
//                "JPAThesis"
99
//      //          "JPALitOther"
100
//    //            "JPACollection"
101
//    //            "JPADocument"
102
//   //             "JPABibliography"
103
//   //             "JPAProject"
104
//   //             "JPARawData"
105
//                "JPAArticle"
106
//                "JPABook"
107
//                "JPAChapter"
108
//   //             "JPACollectionObject"
109
//   //                "JPACollectionContainer"
110

  
111

  
112

  
113
                Integer id = nullSafeInt(rs, "taxon_id");
114
                Integer year = nullSafeInt(rs, "tax_year");
115
                boolean isBrackets = rs.getBoolean("tax_brackets");
116
                String remark = rs.getString("remark");
117
                String nameStr = rs.getString("name");
118
                String authorName = rs.getString("tax_author_name");
119
                //parentTaxonFk
120
                //rankFk
121
                //document
122
                boolean isValid = rs.getBoolean("valid");
123
                boolean idDeleted = rs.getBoolean("deleted");
124
                String displayString = rs.getString("display_string");
125
                Integer version = nullSafeInt(rs, "versionfield");
126
                String pages = rs.getString("pages");
127
                String treeIndex = rs.getString("path_to_root");
128
//                Integer rankFk = nullSafeInt(rs, "tax_rank_fk");
129
                String nameAddition = rs.getString("name_addition");
130
                String officialRemark = rs.getString("official_remark");
131
                boolean isGroup = rs.getBoolean("taxonomic_group");
132
                String rankStr = rs.getString("rankStr");
133
                String parentRankStr = rs.getString("parentRankStr");
134
                String grandParentRankStr = rs.getString("grandParentRankStr");
135
                String parentNameStr = rs.getString("parentName");
136
                String grandParentNameStr = rs.getString("grandParentName");
137

  
138

  
139
                TaxonBase<?> taxonBase;
140
                Reference<?> sec = null; //TODO
141

  
142
                //Name etc.
143
                Rank rank = makeRank(state, rankStr);
144
                ZoologicalName name = ZoologicalName.NewInstance(rank);
145
                setNamePart(nameStr, rank, name);
146
                Rank parentRank = makeRank(state, parentRankStr);
147
                setNamePart(parentNameStr, parentRank, name);
148
                Rank parentParentRank = makeRank(state, grandParentRankStr);
149
                setNamePart(grandParentNameStr, parentParentRank, name);
150

  
151
                //Authors
152
                if (StringUtils.isNotBlank(authorName)){
153
                    TeamOrPersonBase<?> author = state.getRelatedObject(AUTHOR_NAMESPACE, authorName, TeamOrPersonBase.class);
154
                    if (author == null){
155
                        logger.warn("Author not found in state: "  + authorName);
156
                    }else{
157
                        if (isBrackets){
158
                            name.setBasionymAuthorship(author);
159
                            name.setOriginalPublicationYear(year);
160
                        }else{
161
                            name.setCombinationAuthorship(author);
162
                            name.setPublicationYear(year);
163
                        }
164
                    }
165
                }
166 84

  
167

  
168
                if (isValid){
169
                    taxonBase = Taxon.NewInstance(name, sec);
170
                }else{
171
                    taxonBase = Synonym.NewInstance(name, sec);
172
                }
173
                taxaToSave.add(taxonBase);
174

  
175
                //remarks
176
                doNotes(taxonBase, remark);
177

  
178
                //id
179
                ImportHelper.setOriginalSource(taxonBase, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
180
                ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
85
                handleSingleReference(state, rs, referencesToSave);
181 86

  
182 87
            }
183 88
        } catch (SQLException e) {
......
185 90
            e.printStackTrace();
186 91
        }
187 92

  
188
        getTaxonService().saveOrUpdate(taxaToSave);
93
        getReferenceService().saveOrUpdate(referencesToSave);
94

  
189 95
        return true;
190 96
    }
191 97

  
192

  
193
    @Override
194
    public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs,
195
            EdaphobaseImportState state) {
196
        Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
197
        Map<String, TeamOrPersonBase<?>> authorMap = new HashMap<>();
198
        Set<String> authorSet = new HashSet<>();
199
        try {
200
            while (rs.next()){
201
                String authorStr = rs.getString("tax_author_name");
202
                authorSet.add(authorStr);
203
            }
204
        } catch (SQLException e) {
205
            e.printStackTrace();
98
    /**
99
     * @param state
100
     * @param rs
101
     * @param referencesToSave
102
     * @throws SQLException
103
     */
104
    private void handleSingleReference(EdaphobaseImportState state, ResultSet rs, Set<Reference> referencesToSave) throws SQLException {
105
        Integer id = nullSafeInt(rs, "document_id");
106
        String dtype = rs.getString("dtype");
107
        String issue = rs.getString("issue");
108
        String orderer = rs.getString("orderer");
109
        String place = rs.getString("place");
110
        Integer pageFrom = nullSafeInt(rs, "page_from");
111
        Integer pageTo = nullSafeInt(rs, "page_to");
112
        String subtitle = rs.getString("subtitle");
113
        Integer year = nullSafeInt(rs, "year");
114
        String isbn = rs.getString("isbn");
115
        //refers_to_literature
116
        //refers_to_collection
117
        //refers_to_observation
118
        String remark = rs.getString("remark");
119
        String volume = rs.getString("volume");
120
        //abbreviation (no record)
121
        String title = rs.getString("title");
122
        String issn = rs.getString("issn");
123
        //circulation //2 records
124
        String keywords = rs.getString("keywords");
125
        String abstractt = rs.getString("abstract");
126
        String parallel_title = rs.getString("parallel_title");
127
        //language_fk_language_id
128
        //document_type_fk_document_type_id
129
        //editor_fk_person_id
130
        Integer editorFk = nullSafeInt(rs, "editor_fk_person_id");
131

  
132
//        Integer parentFk = nullSafeInt(rs, "parent_document_fk_document_id");
133
        //publisher_fk_publisher_id
134
        //deleted
135
        //chapter_no
136
        //versionfield
137
        String doi = rs.getString("doi");
138
        String displayString = rs.getString("display_string");
139
        //aquisistion_date, aquisition_type, adoption_date, ex_colletion, barcode_prefix, barcode_org_prefix
140
        //barcode_type, collection_status, barcode, typus_form,
141

  
142
        //taxon_for_scope, taxon_is_scope
143
        //language_fk, document_type_backup
144

  
145
        Integer documentType = nullSafeInt(rs, "document_type");
146
        //normalized_title, normalized_abk_official_remark
147

  
148
        Reference<?> ref = makeReferenceType(documentType, dtype);
149
        ref.setTitle(title);
150
        ref.setPlacePublished(place);
151
        ref.setIssn(issn);
152
        ref.setIsbn(isbn);
153
        if (pageFrom != null || pageTo != null){
154
            String pageStr = pageFrom == null ? "" : String.valueOf(pageFrom);
155
            pageStr = pageTo == null ? pageStr : "-" + pageTo;
156
            ref.setPages(pageStr);
206 157
        }
207

  
208
        //Authors
209
        Set<UUID> uuidSet = new HashSet<>();
210
        for (String authorStr : authorSet){
211
            UUID uuid = state.getAuthorUuid(authorStr);
212
            uuidSet.add(uuid);
158
        if (year != null){
159
            ref.setDatePublished(TimePeriod.NewInstance(year));
213 160
        }
214
        List<TeamOrPersonBase<?>> authors = (List)getAgentService().find(uuidSet);
215
        Map<UUID, TeamOrPersonBase<?>> authorUuidMap = new HashMap<>();
216
        for (TeamOrPersonBase<?> author : authors){
217
            authorUuidMap.put(author.getUuid(), author);
161
        ref.setVolume(volume);
162
        ref.setReferenceAbstract(abstractt);
163
        if (StringUtils.isNotBlank(doi)){
164
            try {
165
                String doiStr = doi;
166
                if (doiStr.startsWith("dx.doi.org/")){
167
                    doiStr = doiStr.substring(11);
168
                }
169
                ref.setDoi(DOI.fromString(doiStr));
170
            } catch (IllegalArgumentException e) {
171
                logger.warn("DOI could not be parsed: " + doi);
172
            }
218 173
        }
174
        ref.setEdition(issue);
219 175

  
220
        for (String authorStr : authorSet){
221
            UUID uuid = state.getAuthorUuid(authorStr);
222
            TeamOrPersonBase<?> author = authorUuidMap.get(uuid);
223
            authorMap.put(authorStr, author);
224
        }
225
        result.put(AUTHOR_NAMESPACE, authorMap);
176
        //id
177
        ImportHelper.setOriginalSource(ref, state.getTransactionalSourceReference(), id, REFERENCE_NAMESPACE);
226 178

  
227
        return result;
179
        referencesToSave.add(ref);
228 180
    }
229 181

  
230
    private void setNamePart(String nameStr, Rank rank, ZoologicalName name) {
231
        if (rank != null){
232
            if (rank.isSupraGeneric() || rank.isGenus()){
233
                if (StringUtils.isBlank(name.getGenusOrUninomial())){
234
                    name.setGenusOrUninomial(nameStr);
235
                }
236
            }else if (rank.isInfraGeneric()){
237
                if (StringUtils.isBlank(name.getInfraGenericEpithet())){
238
                    name.setInfraGenericEpithet(nameStr);
239
                }
240
            }else if (rank.isSpeciesAggregate() || rank.isSpecies()){
241
                if (StringUtils.isBlank(name.getSpecificEpithet())){
242
                    name.setSpecificEpithet(nameStr);
243
                }
244
            }else if (rank.isInfraSpecific()){
245
                if (StringUtils.isBlank(name.getInfraSpecificEpithet())){
246
                    name.setInfraSpecificEpithet(nameStr);
247
                }
248
            }
182

  
183
    /**
184
     * @param documentType
185
     * @return
186
     */
187
    private Reference<?> makeReferenceType(Integer documentType, String dtype) {
188
        if (documentType == 11914){
189
            return ReferenceFactory.newArticle();
190
        } else if (documentType == 11916){
191
            return ReferenceFactory.newBook();
192
        } else if (documentType == 11915){
193
            return ReferenceFactory.newPrintSeries();
194
        } else if (documentType == 11913){
195
            return ReferenceFactory.newJournal();
196
        } else if (documentType == 11917){
197
            return ReferenceFactory.newBookSection();
198
        } else if (documentType == 11912 || documentType == 11919 || documentType == 11924 ){
199
            Reference<?> ref = ReferenceFactory.newGeneric();
200
            return ref;
201
        } else {
202
            throw new RuntimeException("DocumentType not yet supported: " + documentType + ", " + dtype);
249 203
        }
250 204
    }
251 205

  
252
    private Rank makeRank(EdaphobaseImportState state, String rankStr) {
253
        Rank rank = null;
254
        try {
255
            rank = state.getTransformer().getRankByKey(rankStr);
256
        } catch (UndefinedTransformerMethodException e) {
257
            e.printStackTrace();
258
        }
259
        return rank;
206
    @Override
207
    public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs,
208
            EdaphobaseImportState state) {
209
        Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
210
//        Map<String, TeamOrPersonBase<?>> authorMap = new HashMap<>();
211
//        Set<String> authorSet = new HashSet<>();
212
//        try {
213
//            while (rs.next()){
214
//                String authorStr = rs.getString("tax_author_name");
215
//                authorSet.add(authorStr);
216
//            }
217
//        } catch (SQLException e) {
218
//            e.printStackTrace();
219
//        }
220
//
221
//        //Authors
222
//        Set<UUID> uuidSet = new HashSet<>();
223
//        for (String authorStr : authorSet){
224
//            UUID uuid = state.getAuthorUuid(authorStr);
225
//            uuidSet.add(uuid);
226
//        }
227
//        List<TeamOrPersonBase<?>> authors = (List)getAgentService().find(uuidSet);
228
//        Map<UUID, TeamOrPersonBase<?>> authorUuidMap = new HashMap<>();
229
//        for (TeamOrPersonBase<?> author : authors){
230
//            authorUuidMap.put(author.getUuid(), author);
231
//        }
232
//
233
//        for (String authorStr : authorSet){
234
//            UUID uuid = state.getAuthorUuid(authorStr);
235
//            TeamOrPersonBase<?> author = authorUuidMap.get(uuid);
236
//            authorMap.put(authorStr, author);
237
//        }
238
//        result.put(AUTHOR_NAMESPACE, authorMap);
239

  
240
        return result;
260 241
    }
261 242

  
243

  
244

  
262 245
    @Override
263 246
    protected boolean doCheck(EdaphobaseImportState state) {
264 247
        return false;
......
266 249

  
267 250
    @Override
268 251
    protected boolean isIgnore(EdaphobaseImportState state) {
269
        return ! state.getConfig().isDoTaxa();
252
        return ! state.getConfig().isDoReferences();
270 253
    }
271 254

  
272 255
}

Also available in: Unified diff