ref #6517: make edaphobase taxon IDs stable for example data
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / edaphobase / EdaphobaseTaxonImport.java
1 /**
2 * Copyright (C) 2015 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.edaphobase;
10
11 import java.sql.ResultSet;
12 import java.sql.SQLException;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19
20 import org.apache.log4j.Logger;
21 import org.codehaus.plexus.util.StringUtils;
22 import org.springframework.stereotype.Component;
23
24 import eu.etaxonomy.cdm.io.common.IPartitionedIO;
25 import eu.etaxonomy.cdm.io.common.ImportHelper;
26 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
28 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
29 import eu.etaxonomy.cdm.model.common.CdmBase;
30 import eu.etaxonomy.cdm.model.common.Language;
31 import eu.etaxonomy.cdm.model.common.Marker;
32 import eu.etaxonomy.cdm.model.common.MarkerType;
33 import eu.etaxonomy.cdm.model.common.Representation;
34 import eu.etaxonomy.cdm.model.name.IZoologicalName;
35 import eu.etaxonomy.cdm.model.name.Rank;
36 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
37 import eu.etaxonomy.cdm.model.reference.Reference;
38 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
39 import eu.etaxonomy.cdm.model.taxon.Synonym;
40 import eu.etaxonomy.cdm.model.taxon.Taxon;
41 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
42
43 /**
44 * @author a.mueller
45 * @date 18.12.2015
46 *
47 */
48 @Component
49 public class EdaphobaseTaxonImport extends EdaphobaseImportBase {
50 private static final long serialVersionUID = -9138378836474086070L;
51 private static final Logger logger = Logger.getLogger(EdaphobaseTaxonImport.class);
52
53 private static final String tableName = "tax_taxon";
54
55 private static final String pluralString = "taxa";
56
57 private static final Object AUTHOR_NAMESPACE = "tax_author_name";
58
59 /**
60 * @param tableName
61 * @param pluralString
62 */
63 public EdaphobaseTaxonImport() {
64 super(tableName, pluralString);
65 }
66
67 @Override
68 protected String getIdQuery(EdaphobaseImportState state) {
69 return "SELECT DISTINCT taxon_id FROM tax_taxon t "
70 + " ORDER BY taxon_id";
71 }
72
73 @Override
74 protected String getRecordQuery(EdaphobaseImportConfigurator config) {
75 String result = " SELECT DISTINCT t.*, r.value as rankStr, pr.value as parentRankStr, ppr.value as grandParentRankStr, "
76 + " pt.name as parentName, ppt.name as grandParentName "
77 + " FROM tax_taxon t "
78 + " LEFT JOIN tax_taxon pt ON t.parent_taxon_fk = pt.taxon_id "
79 + " LEFT JOIN tax_taxon ppt ON pt.parent_taxon_fk = ppt.taxon_id"
80 + " LEFT OUTER JOIN tax_rank_en r ON r.element_id = t.tax_rank_fk "
81 + " LEFT OUTER JOIN tax_rank_en pr ON pr.element_id = pt.tax_rank_fk "
82 + " LEFT OUTER JOIN tax_rank_en ppr ON ppr.element_id = ppt.tax_rank_fk "
83 + " WHERE t.taxon_id IN (@IDSET)";
84 result = result.replace("@IDSET", IPartitionedIO.ID_LIST_TOKEN);
85 return result;
86 }
87
88 @Override
89 protected void doInvoke(EdaphobaseImportState state) {
90 super.doInvoke(state);
91 }
92
93
94 @Override
95 public boolean doPartition(ResultSetPartitioner partitioner, EdaphobaseImportState state) {
96 ResultSet rs = partitioner.getResultSet();
97 Set<TaxonBase> taxaToSave = new HashSet<>();
98 try {
99 while (rs.next()){
100 makeSingleTaxon(state, rs, taxaToSave);
101 }
102 } catch (SQLException | UndefinedTransformerMethodException e) {
103 e.printStackTrace();
104 }
105
106 getTaxonService().saveOrUpdate(taxaToSave);
107 return true;
108 }
109
110 /**
111 * @param state
112 * @param rs
113 * @param taxaToSave
114 * @throws SQLException
115 * @throws UndefinedTransformerMethodException
116 */
117 private void makeSingleTaxon(EdaphobaseImportState state, ResultSet rs, Set<TaxonBase> taxaToSave)
118 throws SQLException, UndefinedTransformerMethodException {
119 Integer id = nullSafeInt(rs, "taxon_id");
120 Integer year = nullSafeInt(rs, "tax_year");
121 boolean isBrackets = rs.getBoolean("tax_brackets");
122 String remark = rs.getString("remark");
123 String nameStr = rs.getString("name");
124 String authorName = rs.getString("tax_author_name");
125 //parentTaxonFk
126 //rankFk
127 Integer nomRefId = nullSafeInt(rs, "tax_document");
128 boolean isValid = rs.getBoolean("valid");
129 boolean idDeleted = rs.getBoolean("deleted");
130 String displayString = rs.getString("display_string");
131 Integer version = nullSafeInt(rs, "versionfield");
132 String pages = rs.getString("pages");
133 String treeIndex = rs.getString("path_to_root");
134 // Integer rankFk = nullSafeInt(rs, "tax_rank_fk");
135 String nameAddition = rs.getString("name_addition");
136 String officialRemark = rs.getString("official_remark");
137 boolean isGroup = rs.getBoolean("taxonomic_group");
138 String rankStr = rs.getString("rankStr");
139 String parentRankStr = rs.getString("parentRankStr");
140 String grandParentRankStr = rs.getString("grandParentRankStr");
141 String parentNameStr = rs.getString("parentName");
142 String grandParentNameStr = rs.getString("grandParentName");
143
144 TaxonBase<?> taxonBase;
145
146 //Name etc.
147 Rank rank = makeRank(state, rankStr);
148 checkRankMarker(state, rank);
149 IZoologicalName name = TaxonNameFactory.NewZoologicalInstance(rank);
150 setNamePart(nameStr, rank, name);
151 Rank parentRank = makeRank(state, parentRankStr);
152 setNamePart(parentNameStr, parentRank, name);
153 Rank parentParentRank = makeRank(state, grandParentRankStr);
154 setNamePart(grandParentNameStr, parentParentRank, name);
155 if (parentParentRank != null && parentParentRank.isLower(Rank.GENUS()) || isBlank(name.getGenusOrUninomial()) ){
156 logger.warn("Grandparent rank is lower than genus for " + name.getTitleCache() + " (edapho-id: " + id + "; cdm-id: " + name.getId());
157 }
158
159 //Authors
160 if (StringUtils.isNotBlank(authorName)){
161 TeamOrPersonBase<?> author = state.getRelatedObject(AUTHOR_NAMESPACE, authorName, TeamOrPersonBase.class);
162 if (author == null){
163 logger.warn("Author not found in state: " + authorName);
164 }else{
165 if (isBrackets){
166 name.setBasionymAuthorship(author);
167 name.setOriginalPublicationYear(year);
168 }else{
169 name.setCombinationAuthorship(author);
170 name.setPublicationYear(year);
171 }
172 }
173 }
174
175 //nomRef
176 if (nomRefId != null){
177 Reference nomRef = state.getRelatedObject(REFERENCE_NAMESPACE, String.valueOf(nomRefId), Reference.class);
178 if (nomRef == null){
179 logger.warn("Reference " + nomRefId + " could not be found");
180 }
181 name.setNomenclaturalReference(nomRef);
182 }
183 name.setNomenclaturalMicroReference(StringUtils.isBlank(pages)? null : pages);
184
185 //taxon
186 Reference secRef = state.getRelatedObject(REFERENCE_NAMESPACE, state.getConfig().getSecUuid().toString(), Reference.class);
187 if (secRef == null){
188 secRef = makeSecRef(state);
189 }
190 if (isValid){
191 taxonBase = Taxon.NewInstance(name, secRef);
192 }else{
193 taxonBase = Synonym.NewInstance(name, secRef);
194 }
195 handleTaxonomicGroupMarker(state, taxonBase, isGroup);
196 taxaToSave.add(taxonBase);
197
198 //remarks
199 doNotes(taxonBase, remark);
200
201 //id
202 ImportHelper.setOriginalSource(taxonBase, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
203 ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
204 handleExampleIdentifiers(taxonBase, id);
205 }
206
207
208 static Map<Integer,UUID> idMap = new HashMap<>();
209 static{
210 idMap.put(86594, UUID.fromString("715c2370-45a4-450c-99f7-e196758979ca")); //Aporrectodea caliginosa
211 idMap.put(86593, UUID.fromString("230f1a69-5dcd-4829-a01c-17490a2fdf34")); //Aporrectodea
212 idMap.put(86684, UUID.fromString("0982dc0e-1a79-45a0-8abc-8166625b94b8")); //Achaeta
213 idMap.put(104328, UUID.fromString("15f0b5f8-44e4-4ae1-8b40-f36f0a049b27")); //Chamaedrilus
214 idMap.put(97537, UUID.fromString("899c62e3-a116-4c5b-b22a-c76e761cc32e")); //Araeolaimoides caecus
215 }
216
217 /**
218 * @param taxonBase
219 * @param id
220 */
221 private void handleExampleIdentifiers(TaxonBase<?> taxonBase, Integer id) {
222 if (idMap.get(id) != null){
223 taxonBase.setUuid(idMap.get(id));
224 }
225 }
226
227 /**
228 * @param state
229 * @param rank
230 * @throws UndefinedTransformerMethodException
231 */
232 private void checkRankMarker(EdaphobaseImportState state, Rank rank) throws UndefinedTransformerMethodException {
233
234 if (rank != null){
235 Set<Marker> markers = rank.getMarkers();
236 if ( markers.size() == 0){ //we assume that no markers exist. at least not for markers of unused ranks
237 UUID edaphoRankMarkerTypeUuid = state.getTransformer().getMarkerTypeUuid("EdaphoRankMarker");
238 MarkerType marker = getMarkerType(state, edaphoRankMarkerTypeUuid, "Edaphobase rank", "Rank used in Edaphobase", "EdaRk" );
239 Representation rep = Representation.NewInstance("Rang, verwendet in Edaphobase", "Edaphobase Rang", "EdaRg", Language.GERMAN());
240 marker.addRepresentation(rep);
241 rank.addMarker(Marker.NewInstance(marker, true));
242 getTermService().saveOrUpdate(rank);
243 }
244 }else{
245 logger.warn("Rank is null and marker can not be set");
246 }
247 }
248
249 /**
250 * @param state
251 * @param isGroup
252 * @param taxonBase
253 */
254 private void handleTaxonomicGroupMarker(EdaphobaseImportState state, TaxonBase<?> taxonBase, boolean isGroup) {
255 if (! isGroup){
256 return;
257 }else{
258 try {
259 MarkerType markerType = getMarkerType(state, state.getTransformer().getMarkerTypeUuid("TaxGrossgruppe"), "Tax. Gruppe", "Taxonomische Grossgruppe", "TGG", null, Language.GERMAN());
260 if (taxonBase.isInstanceOf(Synonym.class)){
261 logger.warn("Syonym is marked as 'taxonomische Grossgruppe'");
262 }
263 taxonBase.addMarker(Marker.NewInstance(markerType, true));
264 } catch (UndefinedTransformerMethodException e) {
265 }
266 }
267 }
268
269 /**
270 * @param state
271 * @return
272 */
273 private Reference makeSecRef(EdaphobaseImportState state) {
274 Reference ref = ReferenceFactory.newDatabase();
275 ref.setTitle(state.getConfig().getEdaphobaseSecundumTitle());
276 ref.setUuid(state.getConfig().getSecUuid());
277 state.addRelatedObject(REFERENCE_NAMESPACE, ref.getUuid().toString(), ref);
278 getReferenceService().save(ref);
279 return ref;
280 }
281
282 @Override
283 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs,
284 EdaphobaseImportState state) {
285 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
286 Map<String, TeamOrPersonBase<?>> authorMap = new HashMap<>();
287 Set<String> authorSet = new HashSet<>();
288 Set<String> referenceIdSet = new HashSet<String>();
289
290 try {
291 while (rs.next()){
292 String authorStr = rs.getString("tax_author_name");
293 authorSet.add(authorStr);
294 handleForeignKey(rs, referenceIdSet, "tax_document");
295 }
296 } catch (SQLException e) {
297 e.printStackTrace();
298 }
299
300 //Authors
301 Set<UUID> uuidSet = new HashSet<>();
302 for (String authorStr : authorSet){
303 UUID uuid = state.getAuthorUuid(authorStr);
304 uuidSet.add(uuid);
305 }
306 List<TeamOrPersonBase<?>> authors = (List)getAgentService().find(uuidSet);
307 Map<UUID, TeamOrPersonBase<?>> authorUuidMap = new HashMap<>();
308 for (TeamOrPersonBase<?> author : authors){
309 authorUuidMap.put(author.getUuid(), author);
310 }
311
312 for (String authorStr : authorSet){
313 UUID uuid = state.getAuthorUuid(authorStr);
314 TeamOrPersonBase<?> author = authorUuidMap.get(uuid);
315 authorMap.put(authorStr, author);
316 }
317 result.put(AUTHOR_NAMESPACE, authorMap);
318
319 //reference map
320 String nameSpace = REFERENCE_NAMESPACE;
321 Class<?> cdmClass = Reference.class;
322 Set<String> idSet = referenceIdSet;
323 Map<String, Reference> referenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
324 result.put(nameSpace, referenceMap);
325
326 //secundum
327 UUID secUuid = state.getConfig().getSecUuid();
328 Reference secRef = getReferenceService().find(secUuid);
329 referenceMap.put(secUuid.toString(), secRef);
330
331 return result;
332 }
333
334 private void setNamePart(String nameStr, Rank rank, IZoologicalName name) {
335 if (rank != null){
336 if (rank.isSupraGeneric() || rank.isGenus()){
337 if (StringUtils.isBlank(name.getGenusOrUninomial())){
338 name.setGenusOrUninomial(nameStr);
339 }
340 }else if (rank.isInfraGeneric()){
341 if (StringUtils.isBlank(name.getInfraGenericEpithet())){
342 name.setInfraGenericEpithet(nameStr);
343 }
344 }else if (rank.isSpeciesAggregate() || rank.isSpecies()){
345 if (StringUtils.isBlank(name.getSpecificEpithet())){
346 name.setSpecificEpithet(nameStr);
347 }
348 }else if (rank.isInfraSpecific()){
349 if (StringUtils.isBlank(name.getInfraSpecificEpithet())){
350 name.setInfraSpecificEpithet(nameStr);
351 }
352 }
353 }
354 }
355
356 private Rank makeRank(EdaphobaseImportState state, String rankStr) {
357 Rank rank = null;
358 try {
359 rank = state.getTransformer().getRankByKey(rankStr);
360 } catch (UndefinedTransformerMethodException e) {
361 e.printStackTrace();
362 }
363 return rank;
364 }
365
366 @Override
367 protected boolean doCheck(EdaphobaseImportState state) {
368 return false;
369 }
370
371 @Override
372 protected boolean isIgnore(EdaphobaseImportState state) {
373 return ! state.getConfig().isDoTaxa();
374 }
375
376 }