Revision b6f9eb47
Added by Andreas Müller almost 4 years ago
eu.etaxonomy.taxeditor.store/src/main/java/eu/etaxonomy/taxeditor/parser/ParseHandler.java | ||
---|---|---|
8 | 8 |
*/ |
9 | 9 |
package eu.etaxonomy.taxeditor.parser; |
10 | 10 |
|
11 |
import java.util.ArrayList; |
|
12 |
import java.util.List; |
|
13 |
|
|
14 |
import org.apache.log4j.Logger; |
|
15 |
|
|
11 | 16 |
import eu.etaxonomy.cdm.api.service.INameService; |
12 | 17 |
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper; |
18 |
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase; |
|
13 | 19 |
import eu.etaxonomy.cdm.model.name.INonViralName; |
20 |
import eu.etaxonomy.cdm.model.name.ITaxonNameBase; |
|
14 | 21 |
import eu.etaxonomy.cdm.model.name.Rank; |
15 | 22 |
import eu.etaxonomy.cdm.model.name.TaxonName; |
16 | 23 |
import eu.etaxonomy.cdm.model.name.TaxonNameFactory; |
24 |
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference; |
|
25 |
import eu.etaxonomy.cdm.model.reference.Reference; |
|
26 |
import eu.etaxonomy.cdm.strategy.match.MatchException; |
|
27 |
import eu.etaxonomy.cdm.strategy.match.MatchStrategyConfigurator.MatchStrategy; |
|
17 | 28 |
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; |
29 |
import eu.etaxonomy.taxeditor.model.MessagingUtils; |
|
18 | 30 |
import eu.etaxonomy.taxeditor.preference.PreferencesUtil; |
19 | 31 |
import eu.etaxonomy.taxeditor.store.CdmStore; |
20 | 32 |
|
... | ... | |
26 | 38 |
*/ |
27 | 39 |
public class ParseHandler{ |
28 | 40 |
|
41 |
public static final Logger logger = Logger.getLogger(ParseHandler.class); |
|
42 |
|
|
43 |
|
|
29 | 44 |
private static NonViralNameParserImpl nonViralNameParser = NonViralNameParserImpl.NewInstance(); |
30 | 45 |
|
31 | 46 |
/** |
... | ... | |
69 | 84 |
public static INonViralName parseReferencedName(String unparsedNameString, Rank rank){ |
70 | 85 |
INonViralName name = nonViralNameParser.parseReferencedName(unparsedNameString, |
71 | 86 |
PreferencesUtil.getPreferredNomenclaturalCode(), rank); |
72 |
// if (name.hasProblem()) { |
|
73 |
// name.setFullTitleCache(unparsedNameString); |
|
74 |
// } |
|
75 | 87 |
return name; |
76 | 88 |
} |
77 | 89 |
|
... | ... | |
98 | 110 |
* @return The parsed NonViralName object |
99 | 111 |
*/ |
100 | 112 |
public INonViralName parse(String unparsedNameString){ |
101 |
|
|
102 | 113 |
nonViralNameParser.parseReferencedName(name, unparsedNameString, |
103 | 114 |
name.getRank(), true); |
104 |
|
|
105 |
// if (name.hasProblem()) { |
|
106 |
// name.setFullTitleCache(unparsedNameString); |
|
107 |
// } |
|
108 |
|
|
109 | 115 |
return name; |
110 | 116 |
} |
111 | 117 |
|
118 |
public INonViralName parseAndResolveDuplicates(String unparsedNameString){ |
|
119 |
// Instant time = java.time.Instant.now(); |
|
120 |
// logger.warn("Start resolve duplicate"); |
|
121 |
INonViralName result = parseAndResolveDuplicatesNew(unparsedNameString); |
|
122 |
// INonViralName result = parseAndResolveDuplicatesOld(unparsedNameString); |
|
123 |
// logger.warn("End resolve duplicate: "+ time.until(Instant.now(), ChronoUnit.MILLIS)); |
|
124 |
return result; |
|
125 |
} |
|
126 |
|
|
127 |
|
|
112 | 128 |
/** |
113 | 129 |
* Parses the string that was entered into the text widget and returns a |
114 | 130 |
* NonViralName object that resulted from the parsing process. |
... | ... | |
120 | 136 |
* |
121 | 137 |
* @return The parsed NonViralName object |
122 | 138 |
*/ |
123 |
public INonViralName parseAndResolveDuplicates(String unparsedNameString){ |
|
139 |
public INonViralName parseAndResolveDuplicatesNew(String unparsedNameString){
|
|
124 | 140 |
|
125 | 141 |
TaxonName parsedName = (TaxonName)CdmStore.getService(INameService.class).parseName(name, unparsedNameString, null, true, true).getCdmEntity(); |
126 | 142 |
return parsedName; |
127 | 143 |
} |
128 | 144 |
|
145 |
//********************************** OLD can be removed once parseAndResolveDuplicatesNew is stable ************************************/ |
|
146 |
private class MatchMatrix { |
|
147 |
List<TaxonName> duplicateNames = new ArrayList<>(); |
|
148 |
|
|
149 |
List<INomenclaturalReference> duplicateReferences = new ArrayList<>(); |
|
150 |
List<INomenclaturalReference> duplicateInReferences = new ArrayList<>(); |
|
151 |
|
|
152 |
List<TeamOrPersonBase> duplicateCombinationAuthorships = new ArrayList<>(); |
|
153 |
List<TeamOrPersonBase> duplicateExCombinationAuthorships = new ArrayList<>(); |
|
154 |
List<TeamOrPersonBase> duplicateBasionymAuthorships = new ArrayList<>(); |
|
155 |
List<TeamOrPersonBase> duplicateExBasionymAuthorships = new ArrayList<>(); |
|
156 |
} |
|
157 |
|
|
158 |
/** |
|
159 |
* Creates an empty <code>TaxonNameBase</code> instance with the nomenclatural code |
|
160 |
* currently set in preferences. |
|
161 |
* |
|
162 |
* @return a {@link eu.etaxonomy.cdm.model.name.TaxonNameBase} object. |
|
163 |
*/ |
|
164 |
public static TaxonName createEmptyNameOld(){ |
|
165 |
return (TaxonName)nonViralNameParser.getNonViralNameInstance("", PreferencesUtil.getPreferredNomenclaturalCode()); |
|
166 |
} |
|
167 |
|
|
168 |
/** |
|
169 |
* Parses the string that was entered into the text widget and returns a |
|
170 |
* NonViralName object that resulted from the parsing process. |
|
171 |
* |
|
172 |
* The atomized fields (scientific name, author and reference) will be matched |
|
173 |
* against the database to find possible duplicates. If duplicates were found |
|
174 |
* the respective parts of the NonViralName will be replaced with the found |
|
175 |
* objects. |
|
176 |
* |
|
177 |
* @return The parsed NonViralName object |
|
178 |
*/ |
|
179 |
public INonViralName parseAndResolveDuplicatesOld(String unparsedNameString){ |
|
180 |
|
|
181 |
INonViralName parsedName = parse(unparsedNameString); |
|
182 |
|
|
183 |
MatchMatrix matchMatrix = findMatches(parsedName); |
|
184 |
|
|
185 |
resolveDuplicates(parsedName, matchMatrix); |
|
186 |
|
|
187 |
return parsedName; |
|
188 |
} |
|
189 |
|
|
190 |
|
|
191 |
|
|
192 |
|
|
193 |
/** |
|
194 |
* @param name The name to resolve duplicates for. |
|
195 |
*/ |
|
196 |
private void resolveDuplicates(INonViralName name, MatchMatrix matchMatrix) { |
|
197 |
resolveDuplicateNames(name, matchMatrix); |
|
198 |
|
|
199 |
resolveAllDuplicateAuthors(name, matchMatrix); |
|
200 |
|
|
201 |
resolveDuplicateReferences(name, matchMatrix); |
|
202 |
|
|
203 |
// if(matchMatrix.duplicateInReferences != null) { |
|
204 |
// resolveDuplicateInReferences(name, matchMatrix); |
|
205 |
// } |
|
206 |
} |
|
207 |
|
|
208 |
|
|
209 |
/** |
|
210 |
* @param name The name to resolve duplicates for. |
|
211 |
*/ |
|
212 |
private void resolveDuplicateNames(INonViralName name, MatchMatrix matchMatrix) { |
|
213 |
|
|
214 |
if (matchMatrix.duplicateNames.size() == 1){ |
|
215 |
name = matchMatrix.duplicateNames.iterator().next(); |
|
216 |
}else if(matchMatrix.duplicateNames.size() > 1){ |
|
217 |
// FIXME TODO resolve multiple duplications. Use first match for a start |
|
218 |
name = matchMatrix.duplicateNames.iterator().next(); |
|
219 |
} |
|
220 |
} |
|
221 |
|
|
222 |
/** |
|
223 |
* @param name The name to resolve duplicates for. |
|
224 |
*/ |
|
225 |
private void resolveDuplicateReferences(INonViralName name, MatchMatrix matchMatrix) { |
|
226 |
if(matchMatrix.duplicateReferences.size() == 1){ |
|
227 |
// exactly one match. We assume that the user wants this reference |
|
228 |
INomenclaturalReference duplicate = matchMatrix.duplicateReferences.iterator().next(); |
|
229 |
name.setNomenclaturalReference(duplicate); |
|
230 |
}else if(matchMatrix.duplicateReferences.size() > 1){ |
|
231 |
// FIXME TODO resolve multiple duplications. Use first match for a start |
|
232 |
INomenclaturalReference duplicate = matchMatrix.duplicateReferences.iterator().next(); |
|
233 |
name.setNomenclaturalReference(duplicate); |
|
234 |
} |
|
235 |
//if reference is new but the in reference is already in db |
|
236 |
if (matchMatrix.duplicateReferences.size() == 0 && (name.getNomenclaturalReference() != null && name.getNomenclaturalReference().getInReference() != null) && matchMatrix.duplicateInReferences.size() > 0){ |
|
237 |
resolveDuplicateInReferences(name, matchMatrix); |
|
238 |
} |
|
239 |
} |
|
240 |
|
|
241 |
/** |
|
242 |
* @param name The name to resolve duplicates for. |
|
243 |
*/ |
|
244 |
private void resolveDuplicateInReferences(INonViralName name, MatchMatrix matchMatrix) { |
|
245 |
Reference reference = HibernateProxyHelper.deproxy(name.getNomenclaturalReference()); |
|
246 |
|
|
247 |
if(matchMatrix.duplicateInReferences.size() > 0){ |
|
248 |
Reference inReference = (Reference) matchMatrix.duplicateInReferences.iterator().next(); |
|
249 |
reference.setInReference(inReference); |
|
250 |
MessagingUtils.warn(this.getClass(), reference.generateTitle()); |
|
251 |
// FIXME TODO resolve multiple duplications. We use first match for a start |
|
252 |
MessagingUtils.warn(this.getClass(), reference.getTitleCache()); |
|
253 |
} |
|
254 |
} |
|
255 |
|
|
256 |
|
|
257 |
/** |
|
258 |
* @param name The name to resolve duplicates for. |
|
259 |
*/ |
|
260 |
private void resolveAllDuplicateAuthors(INonViralName name, MatchMatrix matchMatrix) { |
|
261 |
|
|
262 |
if(matchMatrix.duplicateCombinationAuthorships.size() > 0){ |
|
263 |
name.setCombinationAuthorship(matchMatrix.duplicateCombinationAuthorships.iterator().next()); |
|
264 |
Reference reference = name.getNomenclaturalReference(); |
|
265 |
if(reference != null){ |
|
266 |
reference.setAuthorship(matchMatrix.duplicateCombinationAuthorships.iterator().next()); |
|
267 |
} |
|
268 |
// FIXME TODO resolve multiple duplications. We use first match for a start. |
|
269 |
} |
|
270 |
|
|
271 |
if(matchMatrix.duplicateExCombinationAuthorships.size() > 0){ |
|
272 |
name.setExCombinationAuthorship(matchMatrix.duplicateExCombinationAuthorships.iterator().next()); |
|
273 |
// FIXME TODO resolve multiple duplications. We use first match for a start. |
|
274 |
} |
|
275 |
|
|
276 |
if(matchMatrix.duplicateBasionymAuthorships.size() > 0){ |
|
277 |
name.setBasionymAuthorship(matchMatrix.duplicateBasionymAuthorships.iterator().next()); |
|
278 |
// FIXME TODO resolve multiple duplications. We use first match for a start. |
|
279 |
} |
|
280 |
|
|
281 |
if(matchMatrix.duplicateExBasionymAuthorships.size() > 0){ |
|
282 |
name.setExBasionymAuthorship(matchMatrix.duplicateExBasionymAuthorships.iterator().next()); |
|
283 |
// FIXME TODO resolve multiple duplications. We use first match for a start. |
|
284 |
} |
|
285 |
} |
|
286 |
|
|
287 |
/** |
|
288 |
* Splits a NonViralName into its parts and calls methods to find matches for these |
|
289 |
* parts in the database. |
|
290 |
* |
|
291 |
* @param name The NonViralName to find matches for. |
|
292 |
*/ |
|
293 |
private MatchMatrix findMatches(INonViralName name){ |
|
294 |
|
|
295 |
MatchMatrix matchMatrix = new MatchMatrix(); |
|
296 |
|
|
297 |
matchMatrix.duplicateNames = findMatchingLatinNames(name); |
|
298 |
|
|
299 |
matchMatrix.duplicateCombinationAuthorships = findMatchingAuthors(name.getCombinationAuthorship()); |
|
300 |
matchMatrix.duplicateExCombinationAuthorships = findMatchingAuthors(name.getExCombinationAuthorship()); |
|
301 |
matchMatrix.duplicateBasionymAuthorships = findMatchingAuthors(name.getBasionymAuthorship()); |
|
302 |
matchMatrix.duplicateExBasionymAuthorships = findMatchingAuthors(name.getExBasionymAuthorship()); |
|
303 |
|
|
304 |
INomenclaturalReference nomenclaturalReference = name.getNomenclaturalReference(); |
|
305 |
|
|
306 |
// check if the reference has an inreference and also check if the inReference already exists |
|
307 |
if(nomenclaturalReference != null){ |
|
308 |
Reference inReference = ((Reference)nomenclaturalReference).getInReference(); |
|
309 |
if(inReference != null){ |
|
310 |
matchMatrix.duplicateInReferences = findMatchingNomenclaturalReference(inReference); |
|
311 |
} |
|
312 |
} |
|
313 |
|
|
314 |
matchMatrix.duplicateReferences = findMatchingNomenclaturalReference(nomenclaturalReference); |
|
315 |
|
|
316 |
return matchMatrix; |
|
317 |
} |
|
318 |
|
|
319 |
/** |
|
320 |
* @param nomenclaturalReference The NomenclaturalReference to find matches for. |
|
321 |
* @return A <code>List</code> of possibly matching NomenclaturalReference's. |
|
322 |
*/ |
|
323 |
private List<INomenclaturalReference> findMatchingNomenclaturalReference(INomenclaturalReference nomenclaturalReference) { |
|
324 |
if(nomenclaturalReference == null) { |
|
325 |
return new ArrayList<INomenclaturalReference>(); |
|
326 |
} |
|
327 |
try{ |
|
328 |
|
|
329 |
return CdmStore.getCommonService().findMatching(nomenclaturalReference, MatchStrategy.Reference); |
|
330 |
|
|
331 |
}catch (MatchException e) { |
|
332 |
MessagingUtils.error(this.getClass(), "Error finding matching references", e); |
|
333 |
} |
|
334 |
return null; |
|
335 |
} |
|
336 |
|
|
337 |
/** |
|
338 |
* @param authorTeam The TeamOrPersonBase to find matches for. |
|
339 |
* @return A <code>List</code> of possibly matching TeamOrPersonBase's. |
|
340 |
*/ |
|
341 |
private List<TeamOrPersonBase> findMatchingAuthors(TeamOrPersonBase authorTeam) { |
|
342 |
if(authorTeam == null){ |
|
343 |
return new ArrayList<>(); |
|
344 |
} |
|
345 |
try{ |
|
346 |
return CdmStore.getCommonService().findMatching(authorTeam, MatchStrategy.TeamOrPerson); |
|
347 |
|
|
348 |
}catch (MatchException e) { |
|
349 |
MessagingUtils.error(this.getClass(), "Error finding matching authors", e); |
|
350 |
} |
|
351 |
return null; |
|
352 |
} |
|
353 |
|
|
354 |
/** |
|
355 |
* @param taxonName The TaxonNameBase to find matches for. |
|
356 |
* @return A <code>List</code> of possibly matching TaxonNameBase's. |
|
357 |
*/ |
|
358 |
private List<TaxonName> findMatchingLatinNames(ITaxonNameBase taxonName) { |
|
359 |
try { |
|
360 |
return CdmStore.getCommonService().findMatching(TaxonName.castAndDeproxy(taxonName), MatchStrategy.NonViralName); |
|
361 |
} catch (MatchException e) { |
|
362 |
MessagingUtils.error(this.getClass(), "Error finding matching names", e); |
|
363 |
} |
|
364 |
return null; |
|
365 |
} |
|
366 |
|
|
129 | 367 |
} |
Also available in: Unified diff
fix #9078 finally switch to new deduplication handling (but keep old code until fully reviewed)