Revision 59ef8f07
Added by Andreas Müller about 8 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/edaphobase/EdaphobaseReferenceImport.java | ||
---|---|---|
13 | 13 |
import java.sql.SQLException; |
14 | 14 |
import java.util.HashMap; |
15 | 15 |
import java.util.HashSet; |
16 |
import java.util.List; |
|
17 | 16 |
import java.util.Map; |
18 | 17 |
import java.util.Set; |
19 |
import java.util.UUID; |
|
20 | 18 |
|
19 |
import org.apache.commons.lang3.StringUtils; |
|
21 | 20 |
import org.apache.log4j.Logger; |
22 |
import org.codehaus.plexus.util.StringUtils; |
|
23 | 21 |
import org.springframework.stereotype.Component; |
24 | 22 |
|
23 |
import eu.etaxonomy.cdm.common.DOI; |
|
25 | 24 |
import eu.etaxonomy.cdm.io.common.IPartitionedIO; |
26 | 25 |
import eu.etaxonomy.cdm.io.common.ImportHelper; |
27 | 26 |
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner; |
28 |
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException; |
|
29 |
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase; |
|
30 | 27 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
31 |
import eu.etaxonomy.cdm.model.name.Rank; |
|
32 |
import eu.etaxonomy.cdm.model.name.ZoologicalName; |
|
28 |
import eu.etaxonomy.cdm.model.common.TimePeriod; |
|
33 | 29 |
import eu.etaxonomy.cdm.model.reference.Reference; |
34 |
import eu.etaxonomy.cdm.model.taxon.Synonym; |
|
35 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
|
36 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase; |
|
30 |
import eu.etaxonomy.cdm.model.reference.ReferenceFactory; |
|
37 | 31 |
|
38 | 32 |
/** |
39 | 33 |
* @author a.mueller |
... | ... | |
42 | 36 |
*/ |
43 | 37 |
@Component |
44 | 38 |
public class EdaphobaseReferenceImport extends EdaphobaseImportBase { |
45 |
private static final long serialVersionUID = -9138378836474086070L; |
|
46 |
private static final Logger logger = Logger.getLogger(EdaphobaseReferenceImport.class); |
|
39 |
private static final long serialVersionUID = 6895687693249076160L; |
|
47 | 40 |
|
48 |
private static final String tableName = "tax_taxon"; |
|
41 |
@SuppressWarnings("unused") |
|
42 |
private static final Logger logger = Logger.getLogger(EdaphobaseReferenceImport.class); |
|
49 | 43 |
|
50 |
private static final String pluralString = "taxa";
|
|
44 |
private static final String tableName = "lit_document";
|
|
51 | 45 |
|
52 |
private static final Object AUTHOR_NAMESPACE = "tax_author_name"; |
|
46 |
private static final String pluralString = "documents"; |
|
47 |
// |
|
48 |
// private static final Object AUTHOR_NAMESPACE = "tax_author_name"; |
|
53 | 49 |
|
54 | 50 |
/** |
55 | 51 |
* @param tableName |
... | ... | |
61 | 57 |
|
62 | 58 |
@Override |
63 | 59 |
protected String getIdQuery(EdaphobaseImportState state) { |
64 |
return "SELECT DISTINCT taxon_id FROM tax_taxon t " |
|
65 |
+ " ORDER BY taxon_id"; |
|
60 |
return " SELECT DISTINCT document_id " |
|
61 |
+ " FROM lit_document ld INNER JOIN tax_taxon t ON t.tax_document = ld.document_id " |
|
62 |
+ " UNION " |
|
63 |
+ " SELECT DISTINCT pd.document_id " |
|
64 |
+ " FROM lit_document ld INNER JOIN tax_taxon t ON t.tax_document = ld.document_id " |
|
65 |
+ " INNER JOIN lit_document pd ON pd.document_id = ld.parent_document_fk_document_id " |
|
66 |
+ " ORDER BY document_id "; |
|
66 | 67 |
} |
67 | 68 |
|
68 | 69 |
@Override |
69 | 70 |
protected String getRecordQuery(EdaphobaseImportConfigurator config) { |
70 |
String result = " SELECT DISTINCT t.*, r.value as rankStr, pr.value as parentRankStr, ppr.value as grandParentRankStr, " |
|
71 |
+ " pt.name as parentName, ppt.name as grandParentName " |
|
72 |
+ " FROM tax_taxon t " |
|
73 |
+ " LEFT JOIN tax_taxon pt ON t.parent_taxon_fk = pt.taxon_id " |
|
74 |
+ " LEFT JOIN tax_taxon ppt ON pt.parent_taxon_fk = ppt.taxon_id" |
|
75 |
+ " LEFT OUTER JOIN tax_rank_en r ON r.element_id = t.tax_rank_fk " |
|
76 |
+ " LEFT OUTER JOIN tax_rank_en pr ON pr.element_id = pt.tax_rank_fk " |
|
77 |
+ " LEFT OUTER JOIN tax_rank_en ppr ON pr.element_id = ppt.tax_rank_fk " |
|
78 |
+ " WHERE t.taxon_id IN (@IDSET)"; |
|
71 |
String result = " SELECT * " |
|
72 |
+ " FROM lit_document ld " |
|
73 |
+ " WHERE ld.document_id IN (@IDSET)"; |
|
79 | 74 |
result = result.replace("@IDSET", IPartitionedIO.ID_LIST_TOKEN); |
80 | 75 |
return result; |
81 | 76 |
} |
82 | 77 |
|
83 |
@Override |
|
84 |
protected void doInvoke(EdaphobaseImportState state) { |
|
85 |
super.doInvoke(state); |
|
86 |
} |
|
87 |
|
|
88 |
|
|
89 | 78 |
@Override |
90 | 79 |
public boolean doPartition(ResultSetPartitioner partitioner, EdaphobaseImportState state) { |
91 | 80 |
ResultSet rs = partitioner.getResultSet(); |
92 |
Set<TaxonBase> taxaToSave = new HashSet<>();
|
|
81 |
Set<Reference> referencesToSave = new HashSet<>();
|
|
93 | 82 |
try { |
94 | 83 |
while (rs.next()){ |
95 |
// // "JPASampleBook" |
|
96 |
// // "JPAJournal" |
|
97 |
// // "JPASample" |
|
98 |
// "JPAThesis" |
|
99 |
// // "JPALitOther" |
|
100 |
// // "JPACollection" |
|
101 |
// // "JPADocument" |
|
102 |
// // "JPABibliography" |
|
103 |
// // "JPAProject" |
|
104 |
// // "JPARawData" |
|
105 |
// "JPAArticle" |
|
106 |
// "JPABook" |
|
107 |
// "JPAChapter" |
|
108 |
// // "JPACollectionObject" |
|
109 |
// // "JPACollectionContainer" |
|
110 |
|
|
111 |
|
|
112 |
|
|
113 |
Integer id = nullSafeInt(rs, "taxon_id"); |
|
114 |
Integer year = nullSafeInt(rs, "tax_year"); |
|
115 |
boolean isBrackets = rs.getBoolean("tax_brackets"); |
|
116 |
String remark = rs.getString("remark"); |
|
117 |
String nameStr = rs.getString("name"); |
|
118 |
String authorName = rs.getString("tax_author_name"); |
|
119 |
//parentTaxonFk |
|
120 |
//rankFk |
|
121 |
//document |
|
122 |
boolean isValid = rs.getBoolean("valid"); |
|
123 |
boolean idDeleted = rs.getBoolean("deleted"); |
|
124 |
String displayString = rs.getString("display_string"); |
|
125 |
Integer version = nullSafeInt(rs, "versionfield"); |
|
126 |
String pages = rs.getString("pages"); |
|
127 |
String treeIndex = rs.getString("path_to_root"); |
|
128 |
// Integer rankFk = nullSafeInt(rs, "tax_rank_fk"); |
|
129 |
String nameAddition = rs.getString("name_addition"); |
|
130 |
String officialRemark = rs.getString("official_remark"); |
|
131 |
boolean isGroup = rs.getBoolean("taxonomic_group"); |
|
132 |
String rankStr = rs.getString("rankStr"); |
|
133 |
String parentRankStr = rs.getString("parentRankStr"); |
|
134 |
String grandParentRankStr = rs.getString("grandParentRankStr"); |
|
135 |
String parentNameStr = rs.getString("parentName"); |
|
136 |
String grandParentNameStr = rs.getString("grandParentName"); |
|
137 |
|
|
138 |
|
|
139 |
TaxonBase<?> taxonBase; |
|
140 |
Reference<?> sec = null; //TODO |
|
141 |
|
|
142 |
//Name etc. |
|
143 |
Rank rank = makeRank(state, rankStr); |
|
144 |
ZoologicalName name = ZoologicalName.NewInstance(rank); |
|
145 |
setNamePart(nameStr, rank, name); |
|
146 |
Rank parentRank = makeRank(state, parentRankStr); |
|
147 |
setNamePart(parentNameStr, parentRank, name); |
|
148 |
Rank parentParentRank = makeRank(state, grandParentRankStr); |
|
149 |
setNamePart(grandParentNameStr, parentParentRank, name); |
|
150 |
|
|
151 |
//Authors |
|
152 |
if (StringUtils.isNotBlank(authorName)){ |
|
153 |
TeamOrPersonBase<?> author = state.getRelatedObject(AUTHOR_NAMESPACE, authorName, TeamOrPersonBase.class); |
|
154 |
if (author == null){ |
|
155 |
logger.warn("Author not found in state: " + authorName); |
|
156 |
}else{ |
|
157 |
if (isBrackets){ |
|
158 |
name.setBasionymAuthorship(author); |
|
159 |
name.setOriginalPublicationYear(year); |
|
160 |
}else{ |
|
161 |
name.setCombinationAuthorship(author); |
|
162 |
name.setPublicationYear(year); |
|
163 |
} |
|
164 |
} |
|
165 |
} |
|
166 | 84 |
|
167 |
|
|
168 |
if (isValid){ |
|
169 |
taxonBase = Taxon.NewInstance(name, sec); |
|
170 |
}else{ |
|
171 |
taxonBase = Synonym.NewInstance(name, sec); |
|
172 |
} |
|
173 |
taxaToSave.add(taxonBase); |
|
174 |
|
|
175 |
//remarks |
|
176 |
doNotes(taxonBase, remark); |
|
177 |
|
|
178 |
//id |
|
179 |
ImportHelper.setOriginalSource(taxonBase, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE); |
|
180 |
ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE); |
|
85 |
handleSingleReference(state, rs, referencesToSave); |
|
181 | 86 |
|
182 | 87 |
} |
183 | 88 |
} catch (SQLException e) { |
... | ... | |
185 | 90 |
e.printStackTrace(); |
186 | 91 |
} |
187 | 92 |
|
188 |
getTaxonService().saveOrUpdate(taxaToSave); |
|
93 |
getReferenceService().saveOrUpdate(referencesToSave); |
|
94 |
|
|
189 | 95 |
return true; |
190 | 96 |
} |
191 | 97 |
|
192 |
|
|
193 |
@Override |
|
194 |
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, |
|
195 |
EdaphobaseImportState state) { |
|
196 |
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>(); |
|
197 |
Map<String, TeamOrPersonBase<?>> authorMap = new HashMap<>(); |
|
198 |
Set<String> authorSet = new HashSet<>(); |
|
199 |
try { |
|
200 |
while (rs.next()){ |
|
201 |
String authorStr = rs.getString("tax_author_name"); |
|
202 |
authorSet.add(authorStr); |
|
203 |
} |
|
204 |
} catch (SQLException e) { |
|
205 |
e.printStackTrace(); |
|
98 |
/** |
|
99 |
* @param state |
|
100 |
* @param rs |
|
101 |
* @param referencesToSave |
|
102 |
* @throws SQLException |
|
103 |
*/ |
|
104 |
private void handleSingleReference(EdaphobaseImportState state, ResultSet rs, Set<Reference> referencesToSave) throws SQLException { |
|
105 |
Integer id = nullSafeInt(rs, "document_id"); |
|
106 |
String dtype = rs.getString("dtype"); |
|
107 |
String issue = rs.getString("issue"); |
|
108 |
String orderer = rs.getString("orderer"); |
|
109 |
String place = rs.getString("place"); |
|
110 |
Integer pageFrom = nullSafeInt(rs, "page_from"); |
|
111 |
Integer pageTo = nullSafeInt(rs, "page_to"); |
|
112 |
String subtitle = rs.getString("subtitle"); |
|
113 |
Integer year = nullSafeInt(rs, "year"); |
|
114 |
String isbn = rs.getString("isbn"); |
|
115 |
//refers_to_literature |
|
116 |
//refers_to_collection |
|
117 |
//refers_to_observation |
|
118 |
String remark = rs.getString("remark"); |
|
119 |
String volume = rs.getString("volume"); |
|
120 |
//abbreviation (no record) |
|
121 |
String title = rs.getString("title"); |
|
122 |
String issn = rs.getString("issn"); |
|
123 |
//circulation //2 records |
|
124 |
String keywords = rs.getString("keywords"); |
|
125 |
String abstractt = rs.getString("abstract"); |
|
126 |
String parallel_title = rs.getString("parallel_title"); |
|
127 |
//language_fk_language_id |
|
128 |
//document_type_fk_document_type_id |
|
129 |
//editor_fk_person_id |
|
130 |
Integer editorFk = nullSafeInt(rs, "editor_fk_person_id"); |
|
131 |
|
|
132 |
// Integer parentFk = nullSafeInt(rs, "parent_document_fk_document_id"); |
|
133 |
//publisher_fk_publisher_id |
|
134 |
//deleted |
|
135 |
//chapter_no |
|
136 |
//versionfield |
|
137 |
String doi = rs.getString("doi"); |
|
138 |
String displayString = rs.getString("display_string"); |
|
139 |
//aquisistion_date, aquisition_type, adoption_date, ex_colletion, barcode_prefix, barcode_org_prefix |
|
140 |
//barcode_type, collection_status, barcode, typus_form, |
|
141 |
|
|
142 |
//taxon_for_scope, taxon_is_scope |
|
143 |
//language_fk, document_type_backup |
|
144 |
|
|
145 |
Integer documentType = nullSafeInt(rs, "document_type"); |
|
146 |
//normalized_title, normalized_abk_official_remark |
|
147 |
|
|
148 |
Reference<?> ref = makeReferenceType(documentType, dtype); |
|
149 |
ref.setTitle(title); |
|
150 |
ref.setPlacePublished(place); |
|
151 |
ref.setIssn(issn); |
|
152 |
ref.setIsbn(isbn); |
|
153 |
if (pageFrom != null || pageTo != null){ |
|
154 |
String pageStr = pageFrom == null ? "" : String.valueOf(pageFrom); |
|
155 |
pageStr = pageTo == null ? pageStr : "-" + pageTo; |
|
156 |
ref.setPages(pageStr); |
|
206 | 157 |
} |
207 |
|
|
208 |
//Authors |
|
209 |
Set<UUID> uuidSet = new HashSet<>(); |
|
210 |
for (String authorStr : authorSet){ |
|
211 |
UUID uuid = state.getAuthorUuid(authorStr); |
|
212 |
uuidSet.add(uuid); |
|
158 |
if (year != null){ |
|
159 |
ref.setDatePublished(TimePeriod.NewInstance(year)); |
|
213 | 160 |
} |
214 |
List<TeamOrPersonBase<?>> authors = (List)getAgentService().find(uuidSet); |
|
215 |
Map<UUID, TeamOrPersonBase<?>> authorUuidMap = new HashMap<>(); |
|
216 |
for (TeamOrPersonBase<?> author : authors){ |
|
217 |
authorUuidMap.put(author.getUuid(), author); |
|
161 |
ref.setVolume(volume); |
|
162 |
ref.setReferenceAbstract(abstractt); |
|
163 |
if (StringUtils.isNotBlank(doi)){ |
|
164 |
try { |
|
165 |
String doiStr = doi; |
|
166 |
if (doiStr.startsWith("dx.doi.org/")){ |
|
167 |
doiStr = doiStr.substring(11); |
|
168 |
} |
|
169 |
ref.setDoi(DOI.fromString(doiStr)); |
|
170 |
} catch (IllegalArgumentException e) { |
|
171 |
logger.warn("DOI could not be parsed: " + doi); |
|
172 |
} |
|
218 | 173 |
} |
174 |
ref.setEdition(issue); |
|
219 | 175 |
|
220 |
for (String authorStr : authorSet){ |
|
221 |
UUID uuid = state.getAuthorUuid(authorStr); |
|
222 |
TeamOrPersonBase<?> author = authorUuidMap.get(uuid); |
|
223 |
authorMap.put(authorStr, author); |
|
224 |
} |
|
225 |
result.put(AUTHOR_NAMESPACE, authorMap); |
|
176 |
//id |
|
177 |
ImportHelper.setOriginalSource(ref, state.getTransactionalSourceReference(), id, REFERENCE_NAMESPACE); |
|
226 | 178 |
|
227 |
return result;
|
|
179 |
referencesToSave.add(ref);
|
|
228 | 180 |
} |
229 | 181 |
|
230 |
private void setNamePart(String nameStr, Rank rank, ZoologicalName name) { |
|
231 |
if (rank != null){ |
|
232 |
if (rank.isSupraGeneric() || rank.isGenus()){ |
|
233 |
if (StringUtils.isBlank(name.getGenusOrUninomial())){ |
|
234 |
name.setGenusOrUninomial(nameStr); |
|
235 |
} |
|
236 |
}else if (rank.isInfraGeneric()){ |
|
237 |
if (StringUtils.isBlank(name.getInfraGenericEpithet())){ |
|
238 |
name.setInfraGenericEpithet(nameStr); |
|
239 |
} |
|
240 |
}else if (rank.isSpeciesAggregate() || rank.isSpecies()){ |
|
241 |
if (StringUtils.isBlank(name.getSpecificEpithet())){ |
|
242 |
name.setSpecificEpithet(nameStr); |
|
243 |
} |
|
244 |
}else if (rank.isInfraSpecific()){ |
|
245 |
if (StringUtils.isBlank(name.getInfraSpecificEpithet())){ |
|
246 |
name.setInfraSpecificEpithet(nameStr); |
|
247 |
} |
|
248 |
} |
|
182 |
|
|
183 |
/** |
|
184 |
* @param documentType |
|
185 |
* @return |
|
186 |
*/ |
|
187 |
private Reference<?> makeReferenceType(Integer documentType, String dtype) { |
|
188 |
if (documentType == 11914){ |
|
189 |
return ReferenceFactory.newArticle(); |
|
190 |
} else if (documentType == 11916){ |
|
191 |
return ReferenceFactory.newBook(); |
|
192 |
} else if (documentType == 11915){ |
|
193 |
return ReferenceFactory.newPrintSeries(); |
|
194 |
} else if (documentType == 11913){ |
|
195 |
return ReferenceFactory.newJournal(); |
|
196 |
} else if (documentType == 11917){ |
|
197 |
return ReferenceFactory.newBookSection(); |
|
198 |
} else if (documentType == 11912 || documentType == 11919 || documentType == 11924 ){ |
|
199 |
Reference<?> ref = ReferenceFactory.newGeneric(); |
|
200 |
return ref; |
|
201 |
} else { |
|
202 |
throw new RuntimeException("DocumentType not yet supported: " + documentType + ", " + dtype); |
|
249 | 203 |
} |
250 | 204 |
} |
251 | 205 |
|
252 |
private Rank makeRank(EdaphobaseImportState state, String rankStr) { |
|
253 |
Rank rank = null; |
|
254 |
try { |
|
255 |
rank = state.getTransformer().getRankByKey(rankStr); |
|
256 |
} catch (UndefinedTransformerMethodException e) { |
|
257 |
e.printStackTrace(); |
|
258 |
} |
|
259 |
return rank; |
|
206 |
@Override |
|
207 |
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, |
|
208 |
EdaphobaseImportState state) { |
|
209 |
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>(); |
|
210 |
// Map<String, TeamOrPersonBase<?>> authorMap = new HashMap<>(); |
|
211 |
// Set<String> authorSet = new HashSet<>(); |
|
212 |
// try { |
|
213 |
// while (rs.next()){ |
|
214 |
// String authorStr = rs.getString("tax_author_name"); |
|
215 |
// authorSet.add(authorStr); |
|
216 |
// } |
|
217 |
// } catch (SQLException e) { |
|
218 |
// e.printStackTrace(); |
|
219 |
// } |
|
220 |
// |
|
221 |
// //Authors |
|
222 |
// Set<UUID> uuidSet = new HashSet<>(); |
|
223 |
// for (String authorStr : authorSet){ |
|
224 |
// UUID uuid = state.getAuthorUuid(authorStr); |
|
225 |
// uuidSet.add(uuid); |
|
226 |
// } |
|
227 |
// List<TeamOrPersonBase<?>> authors = (List)getAgentService().find(uuidSet); |
|
228 |
// Map<UUID, TeamOrPersonBase<?>> authorUuidMap = new HashMap<>(); |
|
229 |
// for (TeamOrPersonBase<?> author : authors){ |
|
230 |
// authorUuidMap.put(author.getUuid(), author); |
|
231 |
// } |
|
232 |
// |
|
233 |
// for (String authorStr : authorSet){ |
|
234 |
// UUID uuid = state.getAuthorUuid(authorStr); |
|
235 |
// TeamOrPersonBase<?> author = authorUuidMap.get(uuid); |
|
236 |
// authorMap.put(authorStr, author); |
|
237 |
// } |
|
238 |
// result.put(AUTHOR_NAMESPACE, authorMap); |
|
239 |
|
|
240 |
return result; |
|
260 | 241 |
} |
261 | 242 |
|
243 |
|
|
244 |
|
|
262 | 245 |
@Override |
263 | 246 |
protected boolean doCheck(EdaphobaseImportState state) { |
264 | 247 |
return false; |
... | ... | |
266 | 249 |
|
267 | 250 |
@Override |
268 | 251 |
protected boolean isIgnore(EdaphobaseImportState state) { |
269 |
return ! state.getConfig().isDoTaxa();
|
|
252 |
return ! state.getConfig().isDoReferences();
|
|
270 | 253 |
} |
271 | 254 |
|
272 | 255 |
} |
Also available in: Unified diff
Latest changes to Edaphobase import