Revision c4591f7a
Added by Andreas Müller over 5 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelOccurrenceSourceImport.java | ||
---|---|---|
11 | 11 |
|
12 | 12 |
import java.sql.ResultSet; |
13 | 13 |
import java.sql.SQLException; |
14 |
import java.util.ArrayList; |
|
15 | 14 |
import java.util.HashMap; |
16 | 15 |
import java.util.HashSet; |
17 |
import java.util.List; |
|
18 | 16 |
import java.util.Map; |
19 | 17 |
import java.util.Set; |
20 | 18 |
|
... | ... | |
31 | 29 |
import eu.etaxonomy.cdm.model.description.DescriptionElementBase; |
32 | 30 |
import eu.etaxonomy.cdm.model.description.DescriptionElementSource; |
33 | 31 |
import eu.etaxonomy.cdm.model.description.Distribution; |
34 |
import eu.etaxonomy.cdm.model.name.INonViralName;
|
|
32 |
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
|
35 | 33 |
import eu.etaxonomy.cdm.model.name.TaxonName; |
36 | 34 |
import eu.etaxonomy.cdm.model.reference.Reference; |
35 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
|
37 | 36 |
|
38 | 37 |
|
39 | 38 |
/** |
... | ... | |
53 | 52 |
|
54 | 53 |
|
55 | 54 |
private Map<String, Integer> sourceNumberRefIdMap; |
56 |
private Set<String> unfoundReferences = new HashSet<>(); |
|
55 |
private Map<String, Set<Integer>> nameCache2NameIdMap; |
|
56 |
private Set<String> notFoundReferences = new HashSet<>(); |
|
57 | 57 |
|
58 | 58 |
|
59 | 59 |
public BerlinModelOccurrenceSourceImport(){ |
... | ... | |
83 | 83 |
|
84 | 84 |
@Override |
85 | 85 |
protected void doInvoke(BerlinModelImportState state) { |
86 |
unfoundReferences = new HashSet<>();
|
|
86 |
notFoundReferences = new HashSet<>();
|
|
87 | 87 |
|
88 | 88 |
try { |
89 | 89 |
sourceNumberRefIdMap = makeSourceNumberReferenceIdMap(state); |
90 |
nameCache2NameIdMap = makeNameCache2NameIdMap(state); |
|
90 | 91 |
} catch (SQLException e) { |
91 | 92 |
e.printStackTrace(); |
92 | 93 |
throw new RuntimeException(e); |
93 | 94 |
} |
94 | 95 |
super.doInvoke(state); |
95 | 96 |
sourceNumberRefIdMap = null; |
96 |
if (unfoundReferences.size()>0){
|
|
97 |
String unfound = "'" + CdmUtils.concat("','", unfoundReferences.toArray(new String[]{})) + "'";
|
|
97 |
if (notFoundReferences.size()>0){
|
|
98 |
String unfound = "'" + CdmUtils.concat("','", notFoundReferences.toArray(new String[]{})) + "'";
|
|
98 | 99 |
logger.warn("Not found references: " + unfound); |
99 | 100 |
} |
100 | 101 |
return; |
... | ... | |
134 | 135 |
DescriptionElementSource originalSource = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource); |
135 | 136 |
originalSource.setCitation(ref); |
136 | 137 |
TaxonName taxonName; |
137 |
taxonName = TaxonName.castAndDeproxy(getName(state, oldName, oldNameFk)); |
|
138 |
taxonName = TaxonName.castAndDeproxy(getName(state, oldName, oldNameFk, occurrenceSourceId, distribution));
|
|
138 | 139 |
if (taxonName != null){ |
139 |
originalSource.setNameUsedInSource(taxonName); |
|
140 |
if(isNotBlank(oldName) && !oldName.equals(taxonName.getNameCache())){ |
|
141 |
originalSource.setOriginalNameString(oldName); |
|
142 |
} |
|
143 |
originalSource.setNameUsedInSource(taxonName); |
|
140 | 144 |
}else if(isNotBlank(oldName)){ |
141 | 145 |
originalSource.setOriginalNameString(oldName); |
142 | 146 |
} |
143 | 147 |
distribution.addSource(originalSource); |
144 | 148 |
}else{ |
145 | 149 |
logger.warn("reference for sourceNumber "+sourceNumber+" could not be found. OccurrenceSourceId: " + occurrenceSourceId ); |
146 |
unfoundReferences.add(sourceNumber);
|
|
150 |
notFoundReferences.add(sourceNumber);
|
|
147 | 151 |
} |
148 | 152 |
}else{ |
149 | 153 |
logger.warn("distribution ("+occurrenceFk+") for occurrence source (" + occurrenceSourceId + ") could not be found." ); |
... | ... | |
170 | 174 |
|
171 | 175 |
try{ |
172 | 176 |
Set<String> occurrenceIdSet = new HashSet<>(); |
173 |
Set<String> referenceIdSet = new HashSet<>(); |
|
174 | 177 |
Set<String> nameIdSet = new HashSet<>(); |
175 | 178 |
Set<String> sourceNumberSet = new HashSet<>(); |
179 |
Set<String> oldNamesSet = new HashSet<>(); |
|
176 | 180 |
while (rs.next()){ |
177 | 181 |
handleForeignKey(rs, occurrenceIdSet, "occurrenceFk"); |
178 | 182 |
handleForeignKey(rs, nameIdSet, "oldNameFk"); |
179 | 183 |
sourceNumberSet.add(CdmUtils.NzTrim(rs.getString("SourceNumber"))); |
184 |
oldNamesSet.add(CdmUtils.NzTrim(rs.getString("oldName"))); |
|
180 | 185 |
} |
181 | 186 |
|
182 | 187 |
sourceNumberSet.remove(""); |
183 |
referenceIdSet = handleSourceNumber(sourceNumberSet); |
|
184 |
|
|
188 |
Set<String> referenceIdSet = handleSourceNumber(sourceNumberSet); |
|
189 |
oldNamesSet.remove(""); |
|
190 |
Set<String> oldNameIdSet = handleOldNames(oldNamesSet); |
|
191 |
nameIdSet.addAll(oldNameIdSet); |
|
185 | 192 |
|
186 | 193 |
//occurrence map |
187 | 194 |
nameSpace = BerlinModelOccurrenceImport.NAMESPACE; |
... | ... | |
224 | 231 |
return referenceIdSet; |
225 | 232 |
} |
226 | 233 |
|
234 |
private Set<String> handleOldNames(Set<String> oldNamesSet) { |
|
235 |
Set<String> oldNameIdSet = new HashSet<>(); |
|
236 |
|
|
237 |
for(String oldName : oldNamesSet){ |
|
238 |
if (isNotBlank(oldName)){ |
|
239 |
Set<Integer> nameIds = nameCache2NameIdMap.get(oldName); |
|
240 |
for (Integer nameId : nameIds){ |
|
241 |
oldNameIdSet.add(String.valueOf(nameId)); |
|
242 |
} |
|
243 |
} |
|
244 |
} |
|
245 |
return oldNameIdSet; |
|
246 |
} |
|
247 |
|
|
227 | 248 |
|
228 | 249 |
|
229 | 250 |
/** |
... | ... | |
233 | 254 |
* @return |
234 | 255 |
*/ |
235 | 256 |
boolean isFirstTimeNoNameByService = true; |
236 |
private INonViralName getName(BerlinModelImportState state, String oldName, Integer oldNameFk) {
|
|
257 |
private TaxonName getName(BerlinModelImportState state, String oldName, Integer oldNameFk, Integer occSourceId, Distribution distribution) {
|
|
237 | 258 |
TaxonName taxonName = (TaxonName)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(oldNameFk)); |
238 |
if (taxonName == null && oldName != null){ |
|
239 |
if (isFirstTimeNoNameByService){ |
|
240 |
logger.warn("oldName not checked against names in BerlinModel. Just take it as a string"); |
|
241 |
isFirstTimeNoNameByService = false; |
|
242 |
} |
|
243 |
List<INonViralName> names = new ArrayList<>(); |
|
244 |
// names = getNameService().getNamesByNameCache(oldName); |
|
245 |
if (names.isEmpty()){ |
|
246 |
return null; |
|
247 |
}else { |
|
248 |
if (names.size()> 1){ |
|
249 |
logger.info("There is more than one name matching oldName: " + oldName + "."); |
|
250 |
} |
|
251 |
return names.get(0); |
|
252 |
//taxonName = nameParser.parseSimpleName(oldName); |
|
253 |
} |
|
259 |
if (oldNameFk != null && taxonName == null){ |
|
260 |
logger.warn("OldNameFk "+oldNameFk+" exists but taxonName not found for occSource: " + occSourceId); |
|
261 |
} |
|
262 |
if (isNotBlank(oldName)){ |
|
263 |
if (taxonName == null){ |
|
264 |
if (isFirstTimeNoNameByService){ |
|
265 |
logger.warn("oldName not checked against names in BerlinModel. Just take it as a string"); |
|
266 |
isFirstTimeNoNameByService = false; |
|
267 |
} |
|
268 |
Set<TaxonName> names = getOldNames(state, oldName); |
|
269 |
if (names.isEmpty()){ |
|
270 |
logger.warn("No name found for freetext oldName '"+oldName+"'; occSourceId: " + occSourceId); |
|
271 |
//taxonName = nameParser.parseSimpleName(oldName); |
|
272 |
return null; |
|
273 |
}else { |
|
274 |
if (names.size()> 1){ |
|
275 |
TaxonName synName = getFirstSynonymName(state, names, distribution, occSourceId); |
|
276 |
if (synName == null){ |
|
277 |
logger.warn("There is more than one matching oldName for '"+oldName+"' but none of them is a synonym of the accepted taxon. Take arbitrary one. OccSourceId: " + occSourceId); |
|
278 |
return names.iterator().next(); |
|
279 |
}else{ |
|
280 |
return synName; |
|
281 |
} |
|
282 |
}else{ |
|
283 |
return names.iterator().next(); |
|
284 |
} |
|
285 |
} |
|
286 |
}else if (!oldName.equals(taxonName.getNameCache())){ |
|
287 |
logger.warn("Old name freetext and linked name nameCache are not equal: " + oldName + "/" + taxonName.getNameCache() + "; occSourceId: " + occSourceId); |
|
288 |
return taxonName; |
|
289 |
}else{ |
|
290 |
return taxonName; |
|
291 |
} |
|
292 |
}else{ |
|
293 |
return taxonName; |
|
254 | 294 |
} |
255 |
return taxonName; |
|
256 | 295 |
} |
257 | 296 |
|
258 | 297 |
/** |
298 |
* @param state |
|
299 |
* @param names |
|
300 |
* @param taxon |
|
301 |
* @return |
|
302 |
*/ |
|
303 |
private TaxonName getFirstSynonymName(BerlinModelImportState state, Set<TaxonName> names, Distribution distribution, Integer occSourceId) { |
|
304 |
Taxon taxon = CdmBase.deproxy(distribution.getInDescription(), TaxonDescription.class).getTaxon(); |
|
305 |
Set<TaxonName> synonyms = taxon.getSynonymNames(); |
|
306 |
TaxonName result = null; |
|
307 |
for (TaxonName name : names){ |
|
308 |
if (synonyms.contains(name)){ |
|
309 |
if (result != null){ |
|
310 |
logger.warn("There is more than 1 matching synonym for " + name.getNameCache() + "; occSourceId: " + occSourceId); |
|
311 |
} |
|
312 |
result = name; |
|
313 |
} |
|
314 |
} |
|
315 |
return result; |
|
316 |
} |
|
317 |
|
|
318 |
/** |
|
319 |
* @param state |
|
320 |
* @param oldName |
|
321 |
* @return |
|
322 |
*/ |
|
323 |
private Set<TaxonName> getOldNames(BerlinModelImportState state, String oldName) { |
|
324 |
Set<Integer> nameIds = nameCache2NameIdMap.get(oldName); |
|
325 |
Set<TaxonName> names = new HashSet<>(nameIds.size()); |
|
326 |
for (Integer id : nameIds){ |
|
327 |
TaxonName name = (TaxonName)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(id)); |
|
328 |
names.add(name); |
|
329 |
} |
|
330 |
return names; |
|
331 |
} |
|
332 |
|
|
333 |
/** |
|
259 | 334 |
* Creates a map which maps source numbers on references |
260 | 335 |
* @param state |
261 | 336 |
* @return |
... | ... | |
284 | 359 |
return result; |
285 | 360 |
} |
286 | 361 |
|
362 |
/** |
|
363 |
* Creates a map which maps nameCaches to nameIDs numbers on references |
|
364 |
* @param state |
|
365 |
* @return |
|
366 |
* @throws SQLException |
|
367 |
*/ |
|
368 |
private Map<String, Set<Integer>> makeNameCache2NameIdMap(BerlinModelImportState state) throws SQLException { |
|
369 |
Map<String, Set<Integer>> result = new HashMap<>(); |
|
370 |
|
|
371 |
Source source = state.getConfig().getSource(); |
|
372 |
String strQuery = " SELECT NameId, nameCache " + |
|
373 |
" FROM Name " + |
|
374 |
" WHERE (nameCache IS NOT NULL) AND (nameCache NOT LIKE '') "; |
|
375 |
|
|
376 |
ResultSet rs = source.getResultSet(strQuery) ; |
|
377 |
while (rs.next()){ |
|
378 |
int nameId = rs.getInt("NameId"); |
|
379 |
String nameCache = rs.getString("nameCache"); |
|
380 |
if (isNotBlank(nameCache)){ |
|
381 |
nameCache = nameCache.trim(); |
|
382 |
Set<Integer> set = result.get(nameCache); |
|
383 |
if (set == null){ |
|
384 |
set = new HashSet<>(); |
|
385 |
result.put(nameCache, set); |
|
386 |
} |
|
387 |
set.add(nameId); |
|
388 |
} |
|
389 |
} |
|
390 |
return result; |
|
391 |
} |
|
392 |
|
|
393 |
|
|
394 |
|
|
287 | 395 |
@Override |
288 | 396 |
protected boolean doCheck(BerlinModelImportState state){ |
289 | 397 |
IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceSourceImportValidator(); |
Also available in: Unified diff
ref #7798 import freetext nameInSource for occurrences best way and log all problems