Revision 1a63486c
Added by Andreas Müller over 5 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelTaxonNameImport.java | ||
---|---|---|
11 | 11 |
|
12 | 12 |
import java.sql.ResultSet; |
13 | 13 |
import java.sql.SQLException; |
14 |
import java.util.ArrayList; |
|
15 |
import java.util.Collections; |
|
14 | 16 |
import java.util.HashMap; |
15 | 17 |
import java.util.HashSet; |
18 |
import java.util.List; |
|
16 | 19 |
import java.util.Map; |
20 |
import java.util.Objects; |
|
17 | 21 |
import java.util.Set; |
18 | 22 |
import java.util.UUID; |
19 | 23 |
|
... | ... | |
24 | 28 |
import eu.etaxonomy.cdm.database.update.DatabaseTypeNotSupportedException; |
25 | 29 |
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer; |
26 | 30 |
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelTaxonNameImportValidator; |
27 |
import eu.etaxonomy.cdm.io.common.IImportConfigurator; |
|
28 | 31 |
import eu.etaxonomy.cdm.io.common.IOValidator; |
29 | 32 |
import eu.etaxonomy.cdm.io.common.ImportHelper; |
30 | 33 |
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner; |
... | ... | |
37 | 40 |
import eu.etaxonomy.cdm.model.common.ExtensionType; |
38 | 41 |
import eu.etaxonomy.cdm.model.common.Language; |
39 | 42 |
import eu.etaxonomy.cdm.model.common.Representation; |
43 |
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod; |
|
40 | 44 |
import eu.etaxonomy.cdm.model.name.IBotanicalName; |
41 | 45 |
import eu.etaxonomy.cdm.model.name.ICultivarPlantName; |
42 |
import eu.etaxonomy.cdm.model.name.INonViralName; |
|
43 | 46 |
import eu.etaxonomy.cdm.model.name.IZoologicalName; |
44 | 47 |
import eu.etaxonomy.cdm.model.name.Rank; |
45 | 48 |
import eu.etaxonomy.cdm.model.name.TaxonName; |
46 | 49 |
import eu.etaxonomy.cdm.model.name.TaxonNameFactory; |
50 |
import eu.etaxonomy.cdm.model.reference.IJournal; |
|
47 | 51 |
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference; |
48 | 52 |
import eu.etaxonomy.cdm.model.reference.Reference; |
49 | 53 |
import eu.etaxonomy.cdm.model.reference.ReferenceFactory; |
54 |
import eu.etaxonomy.cdm.model.reference.ReferenceType; |
|
50 | 55 |
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException; |
56 |
import eu.etaxonomy.cdm.strategy.match.FieldMatcher; |
|
57 |
import eu.etaxonomy.cdm.strategy.match.IMatchStrategy; |
|
58 |
import eu.etaxonomy.cdm.strategy.match.IMatchStrategyEqual; |
|
59 |
import eu.etaxonomy.cdm.strategy.match.MatchException; |
|
60 |
import eu.etaxonomy.cdm.strategy.match.MatchMode; |
|
61 |
import eu.etaxonomy.cdm.strategy.match.MatchResult; |
|
62 |
import eu.etaxonomy.cdm.strategy.match.MatchStrategyFactory; |
|
51 | 63 |
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser; |
52 | 64 |
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; |
65 |
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser; |
|
53 | 66 |
|
54 | 67 |
/** |
55 | 68 |
* @author a.mueller |
... | ... | |
65 | 78 |
|
66 | 79 |
public static final String NAMESPACE = "TaxonName"; |
67 | 80 |
|
81 |
public static final String NAMESPACE_PRELIM = "RefDetail_Preliminary"; |
|
82 |
|
|
68 | 83 |
public static final UUID SOURCE_ACC_UUID = UUID.fromString("c3959b4f-d876-4b7a-a739-9260f4cafd1c"); |
69 | 84 |
|
70 | 85 |
private static int modCount = 5000; |
... | ... | |
79 | 94 |
|
80 | 95 |
@Override |
81 | 96 |
protected String getIdQuery(BerlinModelImportState state) { |
82 |
if (state.getConfig().getNameIdTable()==null ){
|
|
97 |
if (state.getConfig().getNameIdTable() == null ){
|
|
83 | 98 |
return super.getIdQuery(state); |
84 | 99 |
}else{ |
85 |
return "SELECT nameId FROM " + state.getConfig().getNameIdTable() + ""; |
|
100 |
return "SELECT nameId FROM " + state.getConfig().getNameIdTable() |
|
101 |
// + " WHERE nameId = 146109 " |
|
102 |
; |
|
86 | 103 |
} |
87 | 104 |
} |
88 | 105 |
|
... | ... | |
106 | 123 |
" RefDetail.FullRefCache, RefDetail.FullNomRefCache, RefDetail.PreliminaryFlag AS RefDetailPrelim, RefDetail.Details, " + |
107 | 124 |
" RefDetail.SecondarySources, Rank.RankAbbrev, Rank.Rank " + |
108 | 125 |
facultativCols + |
109 |
" FROM Name LEFT OUTER JOIN RefDetail ON Name.NomRefDetailFk = RefDetail.RefDetailId AND " +
|
|
110 |
" Name.NomRefFk = RefDetail.RefFk " +
|
|
126 |
" FROM Name LEFT OUTER JOIN RefDetail ON Name.NomRefDetailFk = RefDetail.RefDetailId " + |
|
127 |
" AND Name.NomRefFk = RefDetail.RefFk " +
|
|
111 | 128 |
" LEFT OUTER JOIN Rank ON Name.RankFk = Rank.rankID " + |
112 |
" WHERE name.nameId IN ("+ID_LIST_TOKEN+") "; |
|
113 |
//strQuery += " AND RefDetail.PreliminaryFlag = 1 ";
|
|
129 |
" WHERE name.nameId IN ("+ID_LIST_TOKEN+") ";
|
|
130 |
// strRecordQuery += " AND RefDetail.PreliminaryFlag = 1 ";
|
|
114 | 131 |
//strQuery += " AND Name.Created_When > '03.03.2004' "; |
115 | 132 |
return strRecordQuery + ""; |
116 | 133 |
} |
117 | 134 |
|
118 | 135 |
|
136 |
private class ReferenceMapping{ |
|
137 |
public Map<String, ReferenceWrapper> titleMapping = new HashMap<>(); |
|
138 |
public Map<String, ReferenceWrapper> abbrevMapping = new HashMap<>(); |
|
139 |
|
|
140 |
private class ReferenceWrapper { |
|
141 |
Set<ReferenceCandidate> candidates = new HashSet<>(); |
|
142 |
|
|
143 |
public Set<ReferenceCandidate> getCandidates() { |
|
144 |
return candidates; |
|
145 |
} |
|
146 |
public void add(Reference ref, String detail) { |
|
147 |
candidates.add(new ReferenceCandidate(ref, detail)); |
|
148 |
} |
|
149 |
} |
|
150 |
private void unload(){ |
|
151 |
titleMapping.clear(); |
|
152 |
abbrevMapping.clear(); |
|
153 |
} |
|
154 |
|
|
155 |
public void addCandidate(Reference ref, String detail) { |
|
156 |
String hash = refHash(ref); |
|
157 |
ReferenceWrapper wrap = abbrevMapping.get(hash); |
|
158 |
if (wrap == null){ |
|
159 |
wrap = new ReferenceWrapper(); |
|
160 |
abbrevMapping.put(hash, wrap); |
|
161 |
} |
|
162 |
wrap.add(ref, detail); |
|
163 |
} |
|
164 |
|
|
165 |
|
|
166 |
/** |
|
167 |
* @param nomRef |
|
168 |
* @return |
|
169 |
*/ |
|
170 |
public Set<ReferenceCandidate> getCandidates(Reference exemplar) { |
|
171 |
String hash = refHash(exemplar); |
|
172 |
ReferenceMapping.ReferenceWrapper wrap = abbrevMapping.get(hash); |
|
173 |
if (wrap == null){ |
|
174 |
return new HashSet<>(); |
|
175 |
}else{ |
|
176 |
return wrap.getCandidates(); |
|
177 |
} |
|
178 |
} |
|
179 |
|
|
180 |
@Override |
|
181 |
public String toString(){ |
|
182 |
return "ReferenceMapping"; |
|
183 |
} |
|
184 |
} |
|
185 |
|
|
186 |
private ReferenceMapping refMapping = new ReferenceMapping(); |
|
187 |
|
|
188 |
private void loadReferenceMap(BerlinModelImportState state){ |
|
189 |
List<Reference> list = getReferenceService().list(null, null, null, null, null); |
|
190 |
for (Reference ref : list){ |
|
191 |
refMapping.addCandidate(ref, null); |
|
192 |
} |
|
193 |
|
|
194 |
// try { |
|
195 |
// |
|
196 |
// String query = "SELECT * FROM Reference "; |
|
197 |
// |
|
198 |
// ResultSet rs = state.getConfig().getDestination().executeQuery(query); |
|
199 |
// while (rs.next()){ |
|
200 |
// String title = rs.getString("title"); |
|
201 |
// String abbrevTitle = rs.getString("abbrevTitle"); |
|
202 |
// int id = rs.getInt("id"); |
|
203 |
// UUID uuid = UUID.fromString(rs.getString("uuid")); |
|
204 |
// String titleCache = rs.getString("titleCache"); |
|
205 |
// String abbrevTitleCache = rs.getString("abbrevTitleCache"); |
|
206 |
// String typeStr = rs.getString("refType"); |
|
207 |
// ReferenceType type = ReferenceType.valueOf(typeStr); |
|
208 |
// |
|
209 |
// ReferenceMapping.ReferenceWrapper wrapping = refMapping.new ReferenceWrapper(title, id, uuid, titleCache, type) ; |
|
210 |
// refMapping.titleMapping.put(title, wrapping); |
|
211 |
// wrapping = refMapping.new ReferenceWrapper(abbrevTitle, id, uuid, abbrevTitleCache, type) ; |
|
212 |
// |
|
213 |
// } |
|
214 |
// } catch (SQLException e) { |
|
215 |
// e.printStackTrace(); |
|
216 |
// } |
|
217 |
} |
|
218 |
|
|
219 |
private void unloadReferenceMap(){ |
|
220 |
refMapping.unload(); |
|
221 |
refMapping = null; |
|
222 |
} |
|
223 |
|
|
119 | 224 |
|
120 | 225 |
@Override |
121 | 226 |
protected void doInvoke(BerlinModelImportState state) { |
122 |
//update rank labels if necessary |
|
227 |
loadReferenceMap(state); |
|
228 |
|
|
229 |
//update rank labels if necessary |
|
123 | 230 |
String strAbbrev = state.getConfig().getInfrGenericRankAbbrev(); |
124 | 231 |
Rank rank = Rank.INFRAGENERICTAXON(); |
125 | 232 |
testRankAbbrev(strAbbrev, rank); |
... | ... | |
129 | 236 |
testRankAbbrev(strAbbrev, rank); |
130 | 237 |
|
131 | 238 |
super.doInvoke(state); |
239 |
unloadReferenceMap(); |
|
240 |
printMatchResults(); |
|
132 | 241 |
} |
133 | 242 |
|
134 | 243 |
private void testRankAbbrev(String strAbbrev, Rank rank) { |
... | ... | |
243 | 352 |
cdmAttrName = "nomenclaturalMicroReference"; |
244 | 353 |
success &= ImportHelper.addStringValue(rs, taxonName, dbAttrName, cdmAttrName, BLANK_TO_NULL); |
245 | 354 |
|
355 |
//authorTeams |
|
356 |
if (teamMap != null ){ |
|
357 |
taxonName.setCombinationAuthorship(getAuthorTeam(teamMap, authorFk, nameId, config)); |
|
358 |
taxonName.setExCombinationAuthorship(getAuthorTeam(teamMap, exAuthorFk, nameId, config)); |
|
359 |
taxonName.setBasionymAuthorship(getAuthorTeam(teamMap, basAuthorFk, nameId, config)); |
|
360 |
taxonName.setExBasionymAuthorship(getAuthorTeam(teamMap, exBasAuthorFk, nameId, config)); |
|
361 |
}else{ |
|
362 |
logger.warn("TeamMap is null"); |
|
363 |
success = false; |
|
364 |
} |
|
365 |
|
|
246 | 366 |
//nomRef |
247 |
success &= makeNomenclaturalReference(config, taxonName, nameId, rs, partitioner);
|
|
367 |
success &= makeNomenclaturalReference(state, taxonName, nameId, rs, partitioner);
|
|
248 | 368 |
|
249 | 369 |
//Source_Acc |
250 | 370 |
boolean colExists = true; |
... | ... | |
267 | 387 |
success &= doIdCreatedUpdatedNotes(state, taxonName, rs, nameId, NAMESPACE, excludeUpdated, excludeNotes); |
268 | 388 |
handleNameNotes(state, taxonName, rs, nameId); |
269 | 389 |
|
270 |
//NonViralName |
|
271 |
if (taxonName.isNonViral()){ |
|
272 |
INonViralName nonViralName = taxonName; |
|
273 |
|
|
274 |
//authorTeams |
|
275 |
if (teamMap != null ){ |
|
276 |
nonViralName.setCombinationAuthorship(getAuthorTeam(teamMap, authorFk, nameId, config)); |
|
277 |
nonViralName.setExCombinationAuthorship(getAuthorTeam(teamMap, exAuthorFk, nameId, config)); |
|
278 |
nonViralName.setBasionymAuthorship(getAuthorTeam(teamMap, basAuthorFk, nameId, config)); |
|
279 |
nonViralName.setExBasionymAuthorship(getAuthorTeam(teamMap, exBasAuthorFk, nameId, config)); |
|
280 |
}else{ |
|
281 |
logger.warn("TeamMap is null"); |
|
282 |
success = false; |
|
283 |
} |
|
284 |
}//nonviralName |
|
285 |
|
|
286 |
|
|
287 |
|
|
288 | 390 |
//zoologicalName |
289 | 391 |
if (taxonName.isZoological()){ |
290 | 392 |
IZoologicalName zooName = taxonName; |
... | ... | |
324 | 426 |
|
325 | 427 |
// logger.info( i + " names handled"); |
326 | 428 |
getNameService().save(namesToSave); |
429 |
// printMatchResults(); |
|
327 | 430 |
return success; |
328 | 431 |
} |
329 | 432 |
|
330 | 433 |
|
331 | 434 |
/** |
435 |
* |
|
436 |
*/ |
|
437 |
private void printMatchResults() { |
|
438 |
for (MatchType type : MatchType.values()){ |
|
439 |
List<String> list = matchResults.get(type); |
|
440 |
list = list == null? new ArrayList<>(): list; |
|
441 |
Collections.sort(list); |
|
442 |
System.out.println("\n" + type.toString() + " " + list.size()); |
|
443 |
System.out.println("============================="); |
|
444 |
for (String result : list){ |
|
445 |
System.out.println(result); |
|
446 |
} |
|
447 |
} |
|
448 |
|
|
449 |
} |
|
450 |
|
|
451 |
|
|
452 |
/** |
|
332 | 453 |
* @param state |
333 | 454 |
* @param taxonName |
334 | 455 |
* @param rs |
... | ... | |
436 | 557 |
Set<String> teamIdSet = new HashSet<>(); |
437 | 558 |
Set<String> referenceIdSet = new HashSet<>(); |
438 | 559 |
Set<String> refDetailIdSet = new HashSet<>(); |
560 |
Set<Integer> prelimRefDetailCandidateIdSet = new HashSet<>(); |
|
439 | 561 |
while (rs.next()){ |
440 | 562 |
handleForeignKey(rs, teamIdSet, "AuthorTeamFk"); |
441 | 563 |
handleForeignKey(rs, teamIdSet, "ExAuthorTeamFk"); |
... | ... | |
443 | 565 |
handleForeignKey(rs, teamIdSet, "ExBasAuthorTeamFk"); |
444 | 566 |
handleForeignKey(rs, referenceIdSet, "nomRefFk"); |
445 | 567 |
handleForeignKey(rs, refDetailIdSet, "nomRefDetailFk"); |
568 |
prelimRefDetailCandidateIdSet.addAll(getPreliminaryIdCandidates(state, rs)); |
|
446 | 569 |
} |
447 | 570 |
|
448 | 571 |
//team map |
... | ... | |
469 | 592 |
Map<String, Reference> refDetailMap= (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace); |
470 | 593 |
result.put(nameSpace, refDetailMap); |
471 | 594 |
|
595 |
//prelim map |
|
596 |
nameSpace = NAMESPACE_PRELIM; |
|
597 |
cdmClass = Reference.class; |
|
598 |
List<Reference> list = getReferenceService().findById(prelimRefDetailCandidateIdSet); |
|
599 |
Map<String, Reference> prelimMap = new HashMap<>(); |
|
600 |
for (Reference ref : list){ |
|
601 |
prelimMap.put(String.valueOf(ref.getId()), ref); |
|
602 |
} |
|
603 |
result.put(nameSpace, prelimMap); |
|
604 |
|
|
472 | 605 |
} catch (SQLException e) { |
473 | 606 |
throw new RuntimeException(e); |
474 | 607 |
} |
... | ... | |
540 | 673 |
} |
541 | 674 |
|
542 | 675 |
|
543 |
private boolean makeNomenclaturalReference(BerlinModelImportConfigurator config, TaxonName taxonName,
|
|
676 |
private boolean makeNomenclaturalReference(BerlinModelImportState state, TaxonName taxonName,
|
|
544 | 677 |
int nameId, ResultSet rs, @SuppressWarnings("rawtypes") ResultSetPartitioner partitioner) throws SQLException{ |
678 |
BerlinModelImportConfigurator config = state.getConfig(); |
|
545 | 679 |
|
546 | 680 |
@SuppressWarnings("unchecked") |
547 | 681 |
Map<String, Reference> refMap = partitioner.getObjectMap(BerlinModelReferenceImport.REFERENCE_NAMESPACE); |
... | ... | |
563 | 697 |
getReferenceFromMaps(refDetailMap, refMap, nomRefDetailFk, nomRefFk); |
564 | 698 |
|
565 | 699 |
if(config.isDoPreliminaryRefDetailsWithNames() && refDetailPrelim){ |
566 |
makePrelimRefDetailRef(config, rs, taxonName, nameId); |
|
567 |
} |
|
568 |
|
|
569 |
//setNomRef |
|
570 |
if (nomReference == null ){ |
|
571 |
//TODO |
|
572 |
if (! config.isIgnoreNull()){ |
|
573 |
logger.warn("Nomenclatural reference (nomRefFk = " + nomRefFk + ") for TaxonName (nameId = " + nameId + ")"+ |
|
574 |
" was not found in reference store. Nomenclatural reference not set!!"); |
|
575 |
} |
|
700 |
makePrelimRefDetailRef(state, rs, partitioner, taxonName, nameId); |
|
576 | 701 |
}else{ |
577 |
if (! INomenclaturalReference.class.isAssignableFrom(nomReference.getClass())){ |
|
578 |
logger.warn("Nomenclatural reference (nomRefFk = " + nomRefFk + ") for TaxonName (nameId = " + nameId + ")"+ |
|
579 |
" is not assignable from INomenclaturalReference. (Class = " + nomReference.getClass()+ ")"); |
|
580 |
} |
|
581 |
nomReference.setNomenclaturallyRelevant(true); |
|
582 |
taxonName.setNomenclaturalReference(nomReference); |
|
702 |
|
|
703 |
//setNomRef |
|
704 |
if (nomReference == null ){ |
|
705 |
//TODO |
|
706 |
if (! config.isIgnoreNull()){ |
|
707 |
logger.warn("Nomenclatural reference (nomRefFk = " + nomRefFk + ") for TaxonName (nameId = " + nameId + ")"+ |
|
708 |
" was not found in reference store. Nomenclatural reference not set!!"); |
|
709 |
} |
|
710 |
}else{ |
|
711 |
if (! INomenclaturalReference.class.isAssignableFrom(nomReference.getClass())){ |
|
712 |
logger.warn("Nomenclatural reference (nomRefFk = " + nomRefFk + ") for TaxonName (nameId = " + nameId + ")"+ |
|
713 |
" is not assignable from INomenclaturalReference. (Class = " + nomReference.getClass()+ ")"); |
|
714 |
} |
|
715 |
nomReference.setNomenclaturallyRelevant(true); |
|
716 |
taxonName.setNomenclaturalReference(nomReference); |
|
717 |
} |
|
583 | 718 |
} |
584 | 719 |
} |
585 | 720 |
} |
... | ... | |
589 | 724 |
|
590 | 725 |
private INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance(); |
591 | 726 |
|
727 |
|
|
728 |
private class ReferenceCandidate{ |
|
729 |
Reference ref; |
|
730 |
String detail; |
|
731 |
private ReferenceCandidate(Reference ref, String detail) { |
|
732 |
this.ref = ref; |
|
733 |
this.detail = detail; |
|
734 |
} |
|
735 |
public Integer getId() { |
|
736 |
return ref.getId(); |
|
737 |
} |
|
738 |
@Override |
|
739 |
public String toString(){ |
|
740 |
return ref.toString() + ": " + detail; |
|
741 |
} |
|
742 |
} |
|
743 |
|
|
744 |
private class FinalCandidate{ |
|
745 |
private FinalCandidate(ReferenceCandidate candidate, ReferenceCandidate exemplar, MatchResult matchResult) { |
|
746 |
this.candidate = candidate; |
|
747 |
this.exemplar = exemplar; |
|
748 |
this.matchResult = matchResult; |
|
749 |
} |
|
750 |
ReferenceCandidate candidate; |
|
751 |
ReferenceCandidate exemplar; |
|
752 |
MatchResult matchResult; |
|
753 |
|
|
754 |
@Override |
|
755 |
public String toString(){ |
|
756 |
return candidate.toString() + " <-> " + exemplar.toString() + "\n " + matchResult.toString()+"\n"; |
|
757 |
} |
|
758 |
} |
|
759 |
|
|
592 | 760 |
/** |
593 | 761 |
* @param config |
594 | 762 |
* @param rs |
763 |
* @param partitioner |
|
595 | 764 |
* @param taxonName |
596 | 765 |
* @param nameId |
597 | 766 |
* @throws SQLException |
598 | 767 |
*/ |
599 |
private void makePrelimRefDetailRef(IImportConfigurator config, ResultSet rs, TaxonName taxonName, int nameId) throws SQLException { |
|
768 |
private void makePrelimRefDetailRef(BerlinModelImportState state, ResultSet rs, @SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, |
|
769 |
TaxonName taxonName, int nameId) throws SQLException { |
|
770 |
|
|
771 |
int refDetailId = rs.getInt("RefDetailId"); |
|
772 |
@SuppressWarnings("unchecked") |
|
773 |
Map<String, Reference> refMap = partitioner.getObjectMap(NAMESPACE_PRELIM); |
|
774 |
|
|
775 |
String nameTitleCache = taxonName.getTitleCache(); |
|
776 |
|
|
600 | 777 |
String fullNomRefCache = rs.getString("FullNomRefCache"); |
778 |
String detail = rs.getString("Details"); |
|
779 |
|
|
601 | 780 |
if (fullNomRefCache == null){ |
602 | 781 |
logger.warn("fullNomRefCache is null for preliminary refDetail. NameId: " + nameId); |
603 | 782 |
return; |
604 |
}else if (fullNomRefCache.trim().startsWith(": ")){ |
|
605 |
logger.warn("fullNomRefCache starts with for preliminary refDetail. NameId: " + nameId); |
|
783 |
} |
|
784 |
|
|
785 |
fullNomRefCache = fullNomRefCache.trim(); |
|
786 |
if (fullNomRefCache.startsWith(": ")){ |
|
787 |
logger.warn("fullNomRefCache starts with ':' for preliminary refDetail. NameId: " + nameId); |
|
606 | 788 |
return; |
607 |
}else if (fullNomRefCache.trim().startsWith("in ")){ |
|
608 |
String fullStr = taxonName.getTitleCache()+ " " + fullNomRefCache; |
|
609 |
INonViralName newName = parser.parseReferencedName(fullStr, config.getNomenclaturalCode(), taxonName.getRank()); |
|
610 |
if (newName.isProtectedFullTitleCache()){ |
|
611 |
Reference nomRef = ReferenceFactory.newGeneric(); |
|
612 |
nomRef.setAbbrevTitleCache(fullNomRefCache, true); |
|
613 |
taxonName.setNomenclaturalReference(nomRef); |
|
614 |
//check detail |
|
789 |
}else if (fullNomRefCache.matches("[12][7890][0-9][0-9](-(1774|1832))?") && isBlank(detail)){ |
|
790 |
handlePrelimYearOnly(state, rs, taxonName, nameId, refMap, fullNomRefCache, detail, refDetailId); |
|
791 |
}else{ |
|
792 |
Reference genericCandidate = ReferenceFactory.newGeneric(); |
|
793 |
genericCandidate.setAbbrevTitleCache(fullNomRefCache, true); |
|
794 |
Set<FinalCandidate> finalCandidates = new HashSet<>(); |
|
795 |
Set<FinalCandidate> finalInRefCandidates = new HashSet<>(); |
|
796 |
Set<Reference> parsedReferences = new HashSet<>(); |
|
797 |
|
|
798 |
makeFinalCandidates(state, rs, taxonName, refMap, |
|
799 |
nameTitleCache, finalCandidates, |
|
800 |
finalInRefCandidates, parsedReferences); |
|
801 |
|
|
802 |
evaluateFinalCandidates(state, rs, taxonName, detail, genericCandidate, parsedReferences, |
|
803 |
finalCandidates, fullNomRefCache); |
|
804 |
} |
|
805 |
} |
|
806 |
|
|
807 |
|
|
808 |
/** |
|
809 |
* @param state |
|
810 |
* @param rs |
|
811 |
* @param taxonName |
|
812 |
* @param nameId |
|
813 |
* @param refMap |
|
814 |
* @param fullNomRefCache |
|
815 |
* @param detail |
|
816 |
* @throws SQLException |
|
817 |
*/ |
|
818 |
private void handlePrelimYearOnly(BerlinModelImportState state, ResultSet rs, TaxonName taxonName, int nameId, |
|
819 |
Map<String, Reference> refMap, String fullNomRefCache, String detail, int refDetailId) throws SQLException { |
|
820 |
TeamOrPersonBase<?> combAuthor = taxonName.getCombinationAuthorship(); |
|
821 |
Set<Integer> candidateIds = getPreliminaryIdCandidates(state, rs); |
|
822 |
|
|
823 |
boolean candidateMatches = false; |
|
824 |
for (Integer candidateId : candidateIds){ |
|
825 |
Reference dedupCandidate = CdmBase.deproxy(refMap.get(String.valueOf(candidateId))); |
|
826 |
System.out.println("dedupCandidate: " + dedupCandidate.getAbbrevTitleCache()); |
|
827 |
TeamOrPersonBase<?> dedupAuthor = dedupCandidate.getAuthorship(); |
|
828 |
if (dedupAuthor != null && combAuthor != null){ |
|
829 |
if (Objects.equals(dedupAuthor, combAuthor)){ |
|
830 |
taxonName.setNomenclaturalReference(dedupCandidate); |
|
831 |
candidateMatches = true; |
|
832 |
}else if (Objects.equals(dedupAuthor.getNomenclaturalTitle(), combAuthor.getNomenclaturalTitle())){ |
|
833 |
logger.warn("Year nomAuthor equal in nomTitle but not same: " + dedupAuthor.getNomenclaturalTitle() + "; " + fullNomRefCache + "; nameId " + nameId); |
|
834 |
taxonName.setNomenclaturalReference(dedupCandidate); |
|
835 |
candidateMatches = true; |
|
836 |
} |
|
837 |
}else if (dedupCandidate.getAuthorship() == null && combAuthor != null){ |
|
838 |
logger.warn("Year dedupCand and name have no author: " + fullNomRefCache + "; nameId " + nameId); |
|
839 |
taxonName.setNomenclaturalReference(dedupCandidate); |
|
840 |
candidateMatches = true; |
|
841 |
} |
|
842 |
} |
|
843 |
if (!candidateMatches){ |
|
844 |
Reference yearRef = ReferenceFactory.newGeneric(); |
|
845 |
VerbatimTimePeriod timePeriod = TimePeriodParser.parseStringVerbatim(fullNomRefCache); |
|
846 |
yearRef.setDatePublished(timePeriod); |
|
847 |
yearRef.setAuthorship(combAuthor); |
|
848 |
taxonName.setNomenclaturalReference(yearRef); |
|
849 |
yearRef.addImportSource(String.valueOf(refDetailId), NAMESPACE_PRELIM, state.getTransactionalSourceReference(), null); |
|
850 |
refMapping.addCandidate(yearRef, detail); |
|
851 |
//TODO |
|
852 |
// refMap.put(key, yearRef); |
|
853 |
} |
|
854 |
} |
|
855 |
|
|
856 |
|
|
857 |
private enum MatchType{ |
|
858 |
UNPARSED, |
|
859 |
NO_MATCH_SINGLE_PARSE_ARTICLE_WITH_COMMA, |
|
860 |
NO_MATCH_SINGLE_PARSE_ARTICLE_NO_COMMA, |
|
861 |
NO_MATCH_SINGLE_PARSE_BOOKSECTION, |
|
862 |
NO_MATCH_SINGLE_PARSE_BOOK, |
|
863 |
NO_MATCH_SINGLE_PARSE_GENERIC, |
|
864 |
NO_MATCH_SINGLE_PARSE_OTHER, |
|
865 |
NO_MATCH_MULTI_PARSE, |
|
866 |
NO_MATCH_WITH_CANDIDATE, |
|
867 |
SINGLE_FULL_MATCH, |
|
868 |
SINGLE_INREF_MATCH, |
|
869 |
MULTI_SINGLE_PERSISTENT, |
|
870 |
MULTI_MULTI_PERSISTENT_NO_EXACT, |
|
871 |
MULTI_MULTI_PERSISTENT_MULTI_EXACT, |
|
872 |
MULTI_MULTI_PERSISTENT_SINGLE_EXACT, |
|
873 |
MULTI_NO_PERSISTENT_MULTI_EXACT, |
|
874 |
MULTI_NO_PERSISTENT_SINGLE_EXACT, |
|
875 |
MULTI_NO_PERSISTENT_NO_EXACT, |
|
876 |
} |
|
877 |
|
|
878 |
private Map<MatchType, List<String>> matchResults = new HashMap<>(); |
|
879 |
|
|
880 |
/** |
|
881 |
* @param taxonName |
|
882 |
* @param detail |
|
883 |
* @param genericCandidate |
|
884 |
* @param finalCandidates |
|
885 |
* @param fullNomRefCache |
|
886 |
* @param exemplars |
|
887 |
* @throws SQLException |
|
888 |
*/ |
|
889 |
private void evaluateFinalCandidates(BerlinModelImportState state, ResultSet rs, |
|
890 |
TaxonName taxonName, String detail, Reference genericCandidate, Set<Reference> parsedCandidates, |
|
891 |
Set<FinalCandidate> finalCandidates, String fullNomRefCache) throws SQLException { |
|
892 |
|
|
893 |
int refDetailId = rs.getInt("RefDetailId"); |
|
894 |
Set<FinalCandidate> matchingCandidates = getSuccess(finalCandidates); |
|
895 |
if (matchingCandidates.isEmpty()){ |
|
896 |
taxonName.setNomenclaturalReference(genericCandidate); |
|
897 |
genericCandidate.addImportSource(String.valueOf(refDetailId), BerlinModelRefDetailImport.REFDETAIL_NAMESPACE, |
|
898 |
state.getTransactionalSourceReference(), null); |
|
899 |
//TODO should we set this? |
|
900 |
taxonName.setNomenclaturalMicroReference(detail); |
|
901 |
if (finalCandidates.isEmpty()){ |
|
902 |
if (taxonName.getCombinationAuthorship()==null){ |
|
903 |
System.out.println("nom. ref. not parsed because author is null: " + taxonName.getTitleCache()); |
|
904 |
}else{ |
|
905 |
System.out.println("Final Candidates empty but author exists - should not happen: " + taxonName.getTitleCache()); |
|
906 |
} |
|
907 |
handleNoMatch(state, taxonName, detail, genericCandidate, finalCandidates, fullNomRefCache, parsedCandidates); |
|
908 |
// printResult(MatchType.NO_MATCH, unparsedAndName(fullNomRefCache, taxonName)); |
|
909 |
}else if (hasOnlyUnparsedExemplars(finalCandidates)){ |
|
910 |
printResult(MatchType.UNPARSED, unparsedAndName(fullNomRefCache, taxonName)); |
|
911 |
}else if (hasNoCandidateExemplars(finalCandidates)){ |
|
912 |
//but we can define the ref type here |
|
913 |
handleNoMatch(state, taxonName, detail, genericCandidate, finalCandidates, fullNomRefCache, parsedCandidates); |
|
914 |
// printResult(MatchType.NO_MATCH, unparsedAndName(fullNomRefCache, taxonName)); |
|
915 |
}else{ |
|
916 |
String message = resultMessage(fullNomRefCache, finalCandidates, taxonName); |
|
917 |
printResult(MatchType.NO_MATCH_WITH_CANDIDATE, message); |
|
918 |
} |
|
919 |
}else if (matchingCandidates.size() == 1){ |
|
920 |
ReferenceCandidate single = matchingCandidates.iterator().next().candidate; |
|
921 |
addAuthorAndDetail(taxonName, single); |
|
922 |
if (single.ref.isPersited()){ |
|
923 |
printResult(MatchType.SINGLE_FULL_MATCH, unparsedAndName(fullNomRefCache, taxonName)); |
|
615 | 924 |
}else{ |
616 |
Reference nomRef = newName.getNomenclaturalReference(); |
|
617 |
taxonName.setNomenclaturalReference(nomRef); |
|
618 |
String detail = newName.getNomenclaturalMicroReference(); |
|
619 |
String oldDetail = taxonName.getNomenclaturalMicroReference(); |
|
620 |
if (isBlank(detail)){ |
|
621 |
if (isNotBlank(oldDetail)){ |
|
622 |
logger.warn("Detail could not be parsed but seems to exist. NameId: " + nameId); |
|
925 |
single.ref.addImportSource(String.valueOf(refDetailId), BerlinModelRefDetailImport.REFDETAIL_NAMESPACE, |
|
926 |
state.getTransactionalSourceReference(), null); |
|
927 |
printResult(MatchType.SINGLE_INREF_MATCH, unparsedAndName(fullNomRefCache, taxonName)); |
|
928 |
} |
|
929 |
}else{ |
|
930 |
FinalCandidate finCand = findBestMatchingFinalCandidate(taxonName, matchingCandidates, fullNomRefCache); |
|
931 |
addAuthorAndDetail(taxonName, finCand.candidate); |
|
932 |
} |
|
933 |
} |
|
934 |
|
|
935 |
|
|
936 |
/** |
|
937 |
* @param state |
|
938 |
* @param taxonName |
|
939 |
* @param detail |
|
940 |
* @param genericCandidate |
|
941 |
* @param finalCandidates |
|
942 |
* @param fullNomRefCache |
|
943 |
* @param parsedCandidates |
|
944 |
*/ |
|
945 |
private void handleNoMatch(BerlinModelImportState state, TaxonName taxonName, String detail, |
|
946 |
Reference genericCandidate, Set<FinalCandidate> finalCandidates, String fullNomRefCache, |
|
947 |
Set<Reference> parsedCandidatesAsRef) { |
|
948 |
Set<FinalCandidate> parsedCandidates = getParsedExemplars(finalCandidates); |
|
949 |
// parsedCandidatesAsRef = removeGenericFromParsedReferencesAsRef(); |
|
950 |
// if (parsedCandidates.size() != parsedCandidatesAsRef.size()){ |
|
951 |
// System.out.println("Parsed Candidates differ in size. Should not happen"); |
|
952 |
// } |
|
953 |
if (parsedCandidates.isEmpty()){ |
|
954 |
System.out.println("Parsed Candidates empty. Should not happen"); |
|
955 |
}else if (parsedCandidates.size() == 1){ |
|
956 |
|
|
957 |
ReferenceCandidate refCand = parsedCandidates.iterator().next().exemplar; |
|
958 |
addAuthorAndDetail(taxonName, refCand); |
|
959 |
if (refCand.ref.getType() == ReferenceType.Article){ |
|
960 |
if(refCand.ref.getInReference().getAbbrevTitle().contains(",")){ |
|
961 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_ARTICLE_WITH_COMMA, unparsedAndName(fullNomRefCache, taxonName)); |
|
962 |
}else{ |
|
963 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_ARTICLE_NO_COMMA, unparsedAndName(fullNomRefCache, taxonName)); |
|
964 |
} |
|
965 |
}else if (refCand.ref.getType() == ReferenceType.BookSection){ |
|
966 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_BOOKSECTION, unparsedAndName(fullNomRefCache, taxonName)); |
|
967 |
}else if (refCand.ref.getType() == ReferenceType.Book){ |
|
968 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_BOOK, unparsedAndName(fullNomRefCache, taxonName)); |
|
969 |
}else { |
|
970 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_OTHER, unparsedAndName(fullNomRefCache, taxonName)); |
|
971 |
} |
|
972 |
}else{ |
|
973 |
ReferenceCandidate generCandidate = createGenericReference(parsedCandidates, detail); |
|
974 |
addAuthorAndDetail(taxonName, generCandidate); |
|
975 |
if (generCandidate.ref.getType() == ReferenceType.Article){ |
|
976 |
if(generCandidate.ref.getInReference().getAbbrevTitle().contains(",")){ |
|
977 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_ARTICLE_WITH_COMMA, unparsedAndName(fullNomRefCache, taxonName)); |
|
978 |
}else{ |
|
979 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_ARTICLE_NO_COMMA, unparsedAndName(fullNomRefCache, taxonName)); |
|
980 |
} |
|
981 |
}else if (generCandidate.ref.getType() == ReferenceType.BookSection){ |
|
982 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_BOOKSECTION, unparsedAndName(fullNomRefCache, taxonName)); |
|
983 |
}else if (generCandidate.ref.getType() == ReferenceType.Book){ |
|
984 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_BOOK, unparsedAndName(fullNomRefCache, taxonName)); |
|
985 |
}else if (generCandidate.ref.getType() == ReferenceType.Generic){ |
|
986 |
printResult(MatchType.NO_MATCH_SINGLE_PARSE_GENERIC, unparsedAndName(fullNomRefCache, taxonName)); |
|
987 |
}else { |
|
988 |
printResult(MatchType.NO_MATCH_MULTI_PARSE, unparsedAndName(fullNomRefCache, taxonName)); |
|
989 |
} |
|
990 |
} |
|
991 |
|
|
992 |
// System.out.println(fullNomRefCache); |
|
993 |
} |
|
994 |
|
|
995 |
private static final Reference NO_REMAINING_SINGLE = ReferenceFactory.newGeneric(); |
|
996 |
/** |
|
997 |
* @param parsedCandidates |
|
998 |
* @return |
|
999 |
*/ |
|
1000 |
private ReferenceCandidate createGenericReference(Set<FinalCandidate> parsedCandidates, String detail) { |
|
1001 |
Reference refGen = ReferenceFactory.newGeneric(); |
|
1002 |
String title = null; |
|
1003 |
VerbatimTimePeriod datePublished = null; |
|
1004 |
String volume = null; |
|
1005 |
String series = null; |
|
1006 |
String edition = null; |
|
1007 |
TeamOrPersonBase<?> author = null; |
|
1008 |
|
|
1009 |
Reference journalCandidate = null; |
|
1010 |
Reference remainingSingle = null; |
|
1011 |
for (FinalCandidate parsedCand : parsedCandidates){ |
|
1012 |
|
|
1013 |
Reference ref = parsedCand.exemplar.ref; |
|
1014 |
if (ref.getType().isArticle()){ |
|
1015 |
journalCandidate = ref; |
|
1016 |
} |
|
1017 |
if (!ref.getType().isPublication()){ |
|
1018 |
if (ref.getInReference().getAbbrevTitle().matches("((ser|ed)\\..*|(Beih|App|Suppl|Praef|Bot|S\u00E9r\\. Bot|Prodr|Alt|Ap|Nachtr)\\.|Apend|Texte|Atlas)")){ |
|
1019 |
continue; |
|
1020 |
} |
|
1021 |
} |
|
1022 |
if (ref.getType().isArticle()){ |
|
1023 |
if (ref.getVolume() == null || ref.getInReference().getAbbrevTitle().endsWith(", ed.")){ |
|
1024 |
continue; |
|
1025 |
} |
|
1026 |
} |
|
1027 |
|
|
1028 |
//title |
|
1029 |
if (ref.getType().isPublication()){ |
|
1030 |
title = verify(title, ref.getAbbrevTitle()); |
|
1031 |
}else{ |
|
1032 |
title = verify(title, ref.getInReference().getAbbrevTitle()); |
|
1033 |
} |
|
1034 |
//volume |
|
1035 |
if (ref.getType().isVolumeReference()){ |
|
1036 |
volume = verify(volume, ref.getVolume()); |
|
1037 |
}else{ |
|
1038 |
volume = verify(volume, ref.getInReference().getVolume()); |
|
1039 |
} |
|
1040 |
//edition |
|
1041 |
if (ref.getType().isVolumeReference()){ |
|
1042 |
edition = verify(edition, ref.getEdition()); |
|
1043 |
}else{ |
|
1044 |
edition = verify(edition, ref.getInReference().getEdition()); |
|
1045 |
} |
|
1046 |
//series |
|
1047 |
if (ref.getType().isVolumeReference()){ |
|
1048 |
series = verify(series, ref.getSeriesPart()); |
|
1049 |
}else{ |
|
1050 |
series = verify(series, ref.getInReference().getSeriesPart()); |
|
1051 |
} |
|
1052 |
//datePublished |
|
1053 |
datePublished = verify(datePublished, ref.getDatePublished()); |
|
1054 |
//datePublished |
|
1055 |
author = verify(author, ref.getAuthorship()); |
|
1056 |
|
|
1057 |
remainingSingle = remainingSingle == null? ref : NO_REMAINING_SINGLE; |
|
1058 |
} |
|
1059 |
|
|
1060 |
if (remainingSingle == null){ |
|
1061 |
System.out.println("No remaing ref. This should not happen."); |
|
1062 |
}else if (remainingSingle != NO_REMAINING_SINGLE){ |
|
1063 |
refGen = remainingSingle; |
|
1064 |
}else if (IJournal.guessIsJournalName(title) && journalCandidate != null){ |
|
1065 |
refGen = journalCandidate; |
|
1066 |
}else{ |
|
1067 |
refGen.setAbbrevTitle(title); |
|
1068 |
refGen.setVolume(volume); |
|
1069 |
refGen.setEdition(edition); |
|
1070 |
refGen.setSeriesPart(series); |
|
1071 |
refGen.setDatePublished(datePublished); |
|
1072 |
refGen.setAuthorship(author); |
|
1073 |
} |
|
1074 |
|
|
1075 |
ReferenceCandidate cand = new ReferenceCandidate(refGen, detail); |
|
1076 |
return cand; |
|
1077 |
} |
|
1078 |
|
|
1079 |
|
|
1080 |
/** |
|
1081 |
* @param existing |
|
1082 |
* @param newText |
|
1083 |
* @return |
|
1084 |
*/ |
|
1085 |
private <T extends Object> T verify(T existing, T newText) { |
|
1086 |
if (existing == null){ |
|
1087 |
return newText; |
|
1088 |
}else if (existing.equals(newText)){ |
|
1089 |
return existing; |
|
1090 |
}else if (newText == null){ |
|
1091 |
logger.warn("Text not verified, missing, before: " + existing); |
|
1092 |
return existing; |
|
1093 |
}else{ |
|
1094 |
logger.warn("Text not verified, differs: " + existing + "<->" +newText); |
|
1095 |
return existing; |
|
1096 |
} |
|
1097 |
} |
|
1098 |
|
|
1099 |
|
|
1100 |
/** |
|
1101 |
* @param finalCandidates |
|
1102 |
* @return |
|
1103 |
*/ |
|
1104 |
private boolean hasNoCandidateExemplars(Set<FinalCandidate> finalCandidates) { |
|
1105 |
for (FinalCandidate finalCandidate : finalCandidates){ |
|
1106 |
if (finalCandidate.matchResult != UNPARSED_EXEMPLAR && finalCandidate.matchResult != PARSED_NO_CANDIDATE ){ |
|
1107 |
return false; |
|
1108 |
} |
|
1109 |
} |
|
1110 |
return true; |
|
1111 |
} |
|
1112 |
|
|
1113 |
private Set<FinalCandidate> getParsedExemplars(Set<FinalCandidate> finalCandidates) { |
|
1114 |
Set<FinalCandidate> parsedCandidates = new HashSet<>(); |
|
1115 |
for (FinalCandidate finalCandidate : finalCandidates){ |
|
1116 |
if (finalCandidate.matchResult != UNPARSED_EXEMPLAR){ |
|
1117 |
parsedCandidates.add(finalCandidate); |
|
1118 |
} |
|
1119 |
} |
|
1120 |
return parsedCandidates; |
|
1121 |
} |
|
1122 |
|
|
1123 |
/** |
|
1124 |
* @param finalCandidates |
|
1125 |
* @return |
|
1126 |
*/ |
|
1127 |
private boolean hasOnlyUnparsedExemplars(Set<FinalCandidate> finalCandidates) { |
|
1128 |
for (FinalCandidate finalCandidate : finalCandidates){ |
|
1129 |
if (finalCandidate.matchResult != UNPARSED_EXEMPLAR){ |
|
1130 |
return false; |
|
1131 |
} |
|
1132 |
} |
|
1133 |
return true; |
|
1134 |
} |
|
1135 |
|
|
1136 |
|
|
1137 |
/** |
|
1138 |
* @param taxonName |
|
1139 |
* @param single |
|
1140 |
*/ |
|
1141 |
private void addAuthorAndDetail(TaxonName taxonName, ReferenceCandidate refCand) { |
|
1142 |
if (!CdmUtils.nullSafeEqual(refCand.ref.getAuthorship(), taxonName.getCombinationAuthorship())){ |
|
1143 |
TeamOrPersonBase<?> refAut = refCand.ref.getAuthorship(); |
|
1144 |
TeamOrPersonBase<?> nameAut = taxonName.getCombinationAuthorship(); |
|
1145 |
try { |
|
1146 |
MatchResult match = MatchStrategyFactory.NewParsedTeamOrPersonInstance().invoke(refAut, nameAut, true); |
|
1147 |
if (match.isFailed()){ |
|
1148 |
System.out.println("not same author \n"+ match); |
|
1149 |
}else{ |
|
1150 |
taxonName.setCombinationAuthorship(refAut); |
|
1151 |
} |
|
1152 |
} catch (MatchException e) { |
|
1153 |
// TODO Auto-generated catch block |
|
1154 |
e.printStackTrace(); |
|
1155 |
} |
|
1156 |
} |
|
1157 |
//FIXME deduplicate |
|
1158 |
taxonName.setNomenclaturalReference(refCand.ref); |
|
1159 |
taxonName.setNomenclaturalMicroReference(refCand.detail); |
|
1160 |
} |
|
1161 |
|
|
1162 |
|
|
1163 |
|
|
1164 |
// |
|
1165 |
// /** |
|
1166 |
// * @param finalCandidates |
|
1167 |
// * @return |
|
1168 |
// */ |
|
1169 |
// private int getSuccessCount(Set<FinalCandidate> finalCandidates) { |
|
1170 |
// int i = 0; |
|
1171 |
// for (FinalCandidate candidate : finalCandidates){ |
|
1172 |
// if (candidate.matchResult.isSuccessful()){ |
|
1173 |
// i++; |
|
1174 |
// } |
|
1175 |
// } |
|
1176 |
// return i; |
|
1177 |
// } |
|
1178 |
|
|
1179 |
/** |
|
1180 |
* @param fullNomRefCache |
|
1181 |
* @param taxonName |
|
1182 |
* @return |
|
1183 |
*/ |
|
1184 |
private String unparsedAndName(String fullNomRefCache, TaxonName taxonName) { |
|
1185 |
return fullNomRefCache +" | " + taxonName.getFullTitleCache(); |
|
1186 |
} |
|
1187 |
|
|
1188 |
|
|
1189 |
/** |
|
1190 |
* @param noMatch |
|
1191 |
* @param fullTitleCache |
|
1192 |
*/ |
|
1193 |
private void printResult(MatchType type, String text) { |
|
1194 |
List<String> list = matchResults.get(type); |
|
1195 |
if (list == null){ |
|
1196 |
list = new ArrayList<>(); |
|
1197 |
matchResults.put(type, list); |
|
1198 |
} |
|
1199 |
list.add(text); |
|
1200 |
|
|
1201 |
} |
|
1202 |
|
|
1203 |
|
|
1204 |
/** |
|
1205 |
* @param fullNomRefCache |
|
1206 |
* @param finalCandidates |
|
1207 |
* @param taxonName |
|
1208 |
* @return |
|
1209 |
*/ |
|
1210 |
private String resultMessage(String fullNomRefCache, Set<FinalCandidate> finalCandidates, TaxonName taxonName) { |
|
1211 |
String result = unparsedAndName(fullNomRefCache, taxonName)+"\n"; |
|
1212 |
result += finalCandidates.size() +": " + matchResultMessage(finalCandidates); |
|
1213 |
return result; |
|
1214 |
} |
|
1215 |
|
|
1216 |
|
|
1217 |
/** |
|
1218 |
* @param finalCandidates |
|
1219 |
* @param result |
|
1220 |
* @return |
|
1221 |
*/ |
|
1222 |
private String matchResultMessage(Set<FinalCandidate> finalCandidates) { |
|
1223 |
String result = "\n "; |
|
1224 |
for (FinalCandidate finalCand : finalCandidates){ |
|
1225 |
result += finalCand.matchResult.toString()+"\n "; |
|
1226 |
} |
|
1227 |
return result; |
|
1228 |
} |
|
1229 |
|
|
1230 |
|
|
1231 |
private Set<FinalCandidate> getSuccess(Set<FinalCandidate> finalCandidates) { |
|
1232 |
Set<FinalCandidate> result = new HashSet<>(); |
|
1233 |
for (FinalCandidate candidate : finalCandidates){ |
|
1234 |
if (candidate.matchResult.isSuccessful()){ |
|
1235 |
result.add(candidate); |
|
1236 |
} |
|
1237 |
} |
|
1238 |
return result; |
|
1239 |
} |
|
1240 |
|
|
1241 |
|
|
1242 |
/** |
|
1243 |
* @param taxonName |
|
1244 |
* @param finalCandidates |
|
1245 |
* @param exemplars |
|
1246 |
* @return |
|
1247 |
*/ |
|
1248 |
private FinalCandidate findBestMatchingFinalCandidate(TaxonName taxonName, |
|
1249 |
Set<FinalCandidate> finalCandidates, String fullNomRefCache) { |
|
1250 |
try { |
|
1251 |
Set<FinalCandidate> persistentMatches = findPersistentMatch(taxonName, finalCandidates); |
|
1252 |
if (persistentMatches.size() >= 1){ |
|
1253 |
if (persistentMatches.size()>1){ |
|
1254 |
Set<FinalCandidate> exactMatches = findExactMatch(taxonName, finalCandidates); |
|
1255 |
Set<FinalCandidate> successCandidatesExacts = getSuccess(exactMatches); |
|
1256 |
if (successCandidatesExacts.size() >= 1){ |
|
1257 |
FinalCandidate result = successCandidatesExacts.iterator().next(); |
|
1258 |
addAuthorAndDetail(taxonName, result.candidate); |
|
1259 |
// String message = resultMessage(fullNomRefCache, exactMatches, taxonName); |
|
1260 |
if (successCandidatesExacts.size()>1){ |
|
1261 |
printResult(MatchType.MULTI_MULTI_PERSISTENT_MULTI_EXACT, unparsedAndName(fullNomRefCache, taxonName)); |
|
1262 |
}else{ |
|
1263 |
printResult(MatchType.MULTI_MULTI_PERSISTENT_SINGLE_EXACT, unparsedAndName(fullNomRefCache, taxonName)); |
|
1264 |
} |
|
1265 |
return result; |
|
1266 |
}else{ |
|
1267 |
String message = resultMessage(fullNomRefCache, successCandidatesExacts, taxonName); |
|
1268 |
printResult(MatchType.MULTI_MULTI_PERSISTENT_NO_EXACT, message); |
|
1269 |
FinalCandidate result = persistentMatches.iterator().next(); |
|
1270 |
addAuthorAndDetail(taxonName, result.candidate); |
|
1271 |
return result; |
|
623 | 1272 |
} |
624 | 1273 |
}else{ |
625 |
if (isNotBlank(oldDetail) && !detail.equals(oldDetail)){ |
|
626 |
logger.warn("Details differ: " + detail + " <-> " + oldDetail + ". NameId: " + nameId); |
|
1274 |
FinalCandidate result = persistentMatches.iterator().next(); |
|
1275 |
addAuthorAndDetail(taxonName, result.candidate); |
|
1276 |
printResult(MatchType.MULTI_SINGLE_PERSISTENT, taxonName.getFullTitleCache()); |
|
1277 |
return result; |
|
1278 |
} |
|
1279 |
} |
|
1280 |
Set<FinalCandidate> exactMatches = findExactMatch(taxonName, finalCandidates); |
|
1281 |
Set<FinalCandidate> successCandidatesExacts = getSuccess(exactMatches); |
|
1282 |
if (successCandidatesExacts.size() >= 1){ |
|
1283 |
FinalCandidate result = successCandidatesExacts.iterator().next(); |
|
1284 |
addAuthorAndDetail(taxonName, result.candidate); |
|
1285 |
String message = resultMessage(fullNomRefCache, exactMatches, taxonName); |
|
1286 |
if (successCandidatesExacts.size()>1){ |
|
1287 |
printResult(MatchType.MULTI_NO_PERSISTENT_MULTI_EXACT, message); |
|
1288 |
// System.out.println("More then 1 exact match: " + taxonName.getFullTitleCache() + ": " + exactMatches.iterator().next().exemplar.ref.getAbbrevTitleCache()); |
|
1289 |
}else{ |
|
1290 |
printResult(MatchType.MULTI_NO_PERSISTENT_SINGLE_EXACT, message); |
|
1291 |
} |
|
1292 |
return result; |
|
1293 |
}else{ |
|
1294 |
FinalCandidate result = finalCandidates.iterator().next(); |
|
1295 |
addAuthorAndDetail(taxonName, result.candidate); |
|
1296 |
String message = resultMessage(fullNomRefCache, exactMatches, taxonName); |
|
1297 |
printResult(MatchType.MULTI_NO_PERSISTENT_NO_EXACT, message); |
|
1298 |
return result; |
|
1299 |
} |
|
1300 |
} catch (MatchException e) { |
|
1301 |
e.printStackTrace(); |
|
1302 |
return finalCandidates.iterator().next(); |
|
1303 |
} |
|
1304 |
} |
|
1305 |
|
|
1306 |
|
|
1307 |
|
|
1308 |
/** |
|
1309 |
* @param taxonName |
|
1310 |
* @param finalCandidates |
|
1311 |
* @return |
|
1312 |
*/ |
|
1313 |
private String getMultiMultiPersistentMessage(TaxonName taxonName, Set<FinalCandidate> finalCandidates) { |
|
1314 |
String result = finalCandidates.size() + ":" + taxonName.getFullTitleCache(); |
|
1315 |
result += matchResultMessage(finalCandidates); |
|
1316 |
return result; |
|
1317 |
} |
|
1318 |
private String getMultiNoPersistentMultiExactMessage(TaxonName taxonName, Set<FinalCandidate> finalCandidates) { |
|
1319 |
String result = finalCandidates.size() + ":" + taxonName.getFullTitleCache(); |
|
1320 |
result += matchResultMessage(finalCandidates); |
|
1321 |
return result; |
|
1322 |
} |
|
1323 |
|
|
1324 |
/** |
|
1325 |
* @param taxonName |
|
1326 |
* @param finalCandidates |
|
1327 |
* @param exemplars |
|
1328 |
* @return |
|
1329 |
* @throws MatchException |
|
1330 |
*/ |
|
1331 |
private Set<FinalCandidate> findExactMatch(TaxonName taxonName, Set<FinalCandidate> finalCandidates) throws MatchException { |
|
1332 |
IMatchStrategyEqual exactMatcher = getExactMatcher(); |
|
1333 |
Set<FinalCandidate> result = new HashSet<>(); |
|
1334 |
for (FinalCandidate cand : finalCandidates){ |
|
1335 |
Reference exemplarRef = cand.exemplar.ref; |
|
1336 |
if (cand.candidate.ref.getType().equals(exemplarRef.getType())){ |
|
1337 |
MatchResult match = exactMatcher.invoke(cand.candidate.ref, exemplarRef, true); |
|
1338 |
result.add(new FinalCandidate(cand.candidate, cand.exemplar, match)); |
|
1339 |
if (match.isFailed()){ |
|
1340 |
String oldTitle = exemplarRef.getTitle(); |
|
1341 |
exemplarRef.setTitle(exemplarRef.getAbbrevTitle()); |
|
1342 |
match = exactMatcher.invoke(cand.candidate.ref, exemplarRef, true); |
|
1343 |
if (match.isSuccessful()){ |
|
1344 |
result.add(new FinalCandidate(cand.candidate, cand.exemplar, match)); |
|
627 | 1345 |
} |
628 |
taxonName.setNomenclaturalMicroReference(detail);
|
|
1346 |
exemplarRef.setTitle(oldTitle);
|
|
629 | 1347 |
} |
1348 |
}else{ |
|
1349 |
MatchResult match = MatchResult.NewNoTypeInstance(cand.candidate.ref.getType(), exemplarRef.getType()); |
|
1350 |
FinalCandidate finCand = new FinalCandidate(cand.candidate, cand.exemplar, match); |
|
1351 |
result.add(finCand); |
|
630 | 1352 |
} |
631 |
}else{ |
|
632 |
String fullStrComma = taxonName.getTitleCache()+ ", " + fullNomRefCache; |
|
633 |
String fullStrIn = taxonName.getTitleCache()+ " in " + fullNomRefCache; |
|
634 |
INonViralName newNameComma = parser.parseReferencedName(fullStrComma, config.getNomenclaturalCode(), taxonName.getRank()); |
|
635 |
INonViralName newNameIn = parser.parseReferencedName(fullStrIn, config.getNomenclaturalCode(), taxonName.getRank()); |
|
636 |
|
|
637 |
INonViralName newName; |
|
638 |
boolean commaProtected = newNameComma.isProtectedFullTitleCache() || (newNameComma.getNomenclaturalReference() != null |
|
639 |
&& newNameComma.getNomenclaturalReference().isProtectedTitleCache()); |
|
640 |
boolean inProtected = newNameIn.isProtectedFullTitleCache() || (newNameIn.getNomenclaturalReference() != null |
|
641 |
&& newNameIn.getNomenclaturalReference().isProtectedTitleCache()); |
|
642 |
if (commaProtected && !inProtected){ |
|
643 |
newName = newNameIn; |
|
644 |
}else if (!commaProtected && inProtected){ |
|
645 |
newName = newNameComma; |
|
646 |
}else if (commaProtected && inProtected){ |
|
647 |
logger.warn("Can't parse preliminary refDetail: " + fullNomRefCache + " for name " + taxonName.getTitleCache() + "; nameId: " + nameId ); |
|
648 |
newName = newNameComma; |
|
1353 |
} |
|
1354 |
return result; |
|
1355 |
} |
|
1356 |
|
|
1357 |
/** |
|
1358 |
* @return |
|
1359 |
*/ |
|
1360 |
private IMatchStrategyEqual getExactMatcher() { |
|
1361 |
IMatchStrategyEqual result = MatchStrategyFactory.NewDefaultInstance(Reference.class); |
|
1362 |
FieldMatcher inRefMatcher = result.getMatching().getFieldMatcher("inReference"); |
|
1363 |
try { |
|
1364 |
inRefMatcher.getMatchStrategy().setMatchMode("title", MatchMode.EQUAL); |
|
1365 |
return result; |
|
1366 |
} catch (MatchException e) { |
|
1367 |
throw new RuntimeException("Problems creating exact matcher.", e); |
|
1368 |
}//must not be EXACT_REQUIRED |
|
1369 |
} |
|
1370 |
|
|
1371 |
|
|
1372 |
private Set<FinalCandidate> findPersistentMatch(TaxonName taxonName, Set<FinalCandidate> finalCandidates) throws MatchException { |
|
1373 |
Set<FinalCandidate> result = new HashSet<>(); |
|
1374 |
for (FinalCandidate cand : finalCandidates){ |
|
1375 |
if (cand.candidate.ref.isPersited()){ |
|
1376 |
result.add(cand); |
|
1377 |
} |
|
1378 |
} |
|
1379 |
|
|
1380 |
return result; |
|
1381 |
} |
|
1382 |
|
|
1383 |
|
|
1384 |
/** |
|
1385 |
* @param state |
|
1386 |
* @param rs |
|
1387 |
* @param taxonName |
|
1388 |
* @param refMap |
|
1389 |
* @param nameTitleCache |
|
1390 |
* @param fullNomRefCache |
|
1391 |
* @param finalCandidates |
|
1392 |
* @param genericCandidate |
|
1393 |
* @param exemplars2 |
|
1394 |
* @param finalCandidates |
|
1395 |
* @throws SQLException |
|
1396 |
*/ |
|
1397 |
public static final MatchResult UNPARSED_EXEMPLAR = new MatchResult(); |
|
1398 |
public static final MatchResult PARSED_NO_CANDIDATE = new MatchResult(); |
|
1399 |
{ |
|
1400 |
UNPARSED_EXEMPLAR.addNullMatching(null, null); |
|
1401 |
PARSED_NO_CANDIDATE.addNullMatching(null, null); |
|
1402 |
} |
|
1403 |
private void makeFinalCandidates(BerlinModelImportState state, ResultSet rs, TaxonName taxonName, |
|
1404 |
Map<String, Reference> refMap, String nameTitleCache, |
|
1405 |
Set<FinalCandidate> finalCandidates, |
|
1406 |
Set<FinalCandidate> finalInRefCandidates, Set<Reference> parsedReferences |
|
1407 |
) throws SQLException { |
|
1408 |
|
|
1409 |
Set<Integer> candidateIds = getPreliminaryIdCandidates(state, rs); |
|
1410 |
Set<TaxonName> nameCandidates = parseExemplars(state, rs, taxonName); |
|
1411 |
|
|
1412 |
Set<ReferenceCandidate> exemplars = new HashSet<>(); |
|
1413 |
for(TaxonName nameCandidate: nameCandidates){ |
|
1414 |
if(nameCandidate.getNomenclaturalReference()!= null){ |
|
1415 |
exemplars.add(new ReferenceCandidate(nameCandidate.getNomenclaturalReference(), nameCandidate.getNomenclaturalMicroReference())); |
|
1416 |
parsedReferences.add(nameCandidate.getNomenclaturalReference()); |
|
1417 |
} |
|
1418 |
} |
|
1419 |
|
|
1420 |
for(ReferenceCandidate exemplar: exemplars){ |
|
1421 |
if (exemplar.ref.isProtectedAbbrevTitleCache() || exemplar.ref.isProtectedTitleCache()){ |
|
1422 |
FinalCandidate parsedNoCandidateExemplarCandidate = new FinalCandidate(null, exemplar, UNPARSED_EXEMPLAR); |
|
1423 |
finalCandidates.add(parsedNoCandidateExemplarCandidate); |
|
1424 |
}else if (candidateIds.isEmpty()){ |
|
1425 |
FinalCandidate unparsedExemplarCandidate = new FinalCandidate(null, exemplar, PARSED_NO_CANDIDATE); |
|
1426 |
finalCandidates.add(unparsedExemplarCandidate); |
|
649 | 1427 |
}else{ |
650 |
logger.warn("Can't decide ref type for preliminary refDetail: " + fullNomRefCache + " for name " + taxonName.getTitleCache() + "; nameId: " + nameId ); |
|
651 |
newName = newNameComma; |
|
1428 |
for (Integer candidateId : candidateIds){ |
|
1429 |
if (candidateId == null){ |
|
1430 |
logger.warn("CandidateId not found: " + candidateId); |
|
1431 |
continue; |
|
1432 |
} |
|
1433 |
Reference dedupCandidate = CdmBase.deproxy(refMap.get(String.valueOf(candidateId))); |
|
1434 |
|
|
1435 |
//ref |
|
1436 |
FinalCandidate cand = matchSingle(finalCandidates, dedupCandidate, exemplar, nameTitleCache); |
|
1437 |
//inRef |
|
1438 |
if (cand.matchResult.isFailed() && exemplar.ref.getInReference() != null ){ |
|
1439 |
FinalCandidate candInRef = matchSingle(finalInRefCandidates, dedupCandidate, new ReferenceCandidate(exemplar.ref.getInReference(), null), nameTitleCache); |
|
1440 |
if(candInRef.matchResult.isSuccessful()){ |
|
1441 |
Reference clone = (Reference)exemplar.ref.clone(); |
|
1442 |
clone.setInReference(dedupCandidate); |
|
1443 |
FinalCandidate inRefCand = new FinalCandidate(new ReferenceCandidate(clone, exemplar.detail), |
|
1444 |
exemplar, candInRef.matchResult); |
|
1445 |
finalCandidates.add(inRefCand); |
|
1446 |
} |
|
1447 |
} |
|
1448 |
} |
|
652 | 1449 |
} |
1450 |
} |
|
1451 |
|
|
1452 |
return; |
|
1453 |
} |
|
1454 |
|
|
653 | 1455 |
|
1456 |
/** |
|
1457 |
* @param finalCandidates |
|
1458 |
* @param refCandidate |
|
1459 |
* @param exemplar |
|
1460 |
* @param fullNomRefCache |
|
1461 |
* @param nameTitleCache |
|
1462 |
*/ |
|
1463 |
protected FinalCandidate matchSingle(Set<FinalCandidate> finalCandidates, Reference dedupCandidate, |
|
1464 |
ReferenceCandidate exemplar, String nameTitleCache) { |
|
1465 |
|
|
1466 |
try { |
|
1467 |
MatchResult match = null; |
|
1468 |
FinalCandidate finalCand; |
|
1469 |
IMatchStrategy matchStrategy = getReferenceMatchStrategy(); |
|
1470 |
Reference refExemplar = exemplar.ref; |
|
1471 |
if(refExemplar.getType().equals(dedupCandidate.getType())){ |
|
1472 |
TeamOrPersonBase<?> exemplarAuthor = refExemplar.getAuthorship(); |
|
1473 |
TeamOrPersonBase<?> candidateAuthor = CdmBase.deproxy(dedupCandidate.getAuthorship()); |
|
1474 |
String cache = refExemplar.getTitleCache(); |
|
1475 |
String ccache = dedupCandidate.getTitleCache(); |
|
1476 |
String abbrevCache = refExemplar.getAbbrevTitleCache(); |
|
1477 |
String cabbrevCache = dedupCandidate.getAbbrevTitleCache(); |
|
1478 |
if (exemplarAuthor != null && candidateAuthor != null){ |
|
1479 |
exemplarAuthor.getTitleCache(); |
|
1480 |
String exemplarAuthorStr = exemplarAuthor.getNomenclaturalTitle(); |
|
1481 |
// System.out.println(exemplarAuthor.getTitleCache()); |
|
1482 |
String candidateAuthorStr = candidateAuthor.getNomenclaturalTitle(); |
|
1483 |
// System.out.println(candidateAuthor.getTitleCache()); |
|
1484 |
if (!exemplarAuthorStr.equals(candidateAuthorStr)){ |
|
1485 |
match = MatchResult.NewInstance(":authorship", MatchMode.EQUAL, exemplarAuthorStr, candidateAuthorStr); |
|
1486 |
} |
|
1487 |
} |
|
1488 |
|
|
1489 |
if (match == null){ |
|
1490 |
match = matchStrategy.invoke(dedupCandidate, refExemplar, true); |
|
1491 |
} |
|
654 | 1492 |
|
655 |
if (newName.isProtectedFullTitleCache()){ |
|
656 |
Reference nomRef = ReferenceFactory.newGeneric(); |
|
657 |
nomRef.setAbbrevTitleCache(fullNomRefCache, true); |
|
658 |
taxonName.setNomenclaturalReference(nomRef); |
|
659 |
//check detail |
|
1493 |
//TODO detail match |
|
1494 |
//TODO formatter match |
|
1495 |
if (true){ |
|
1496 |
// return true; |
|
1497 |
}else if (refExemplar.getInReference() != null && dedupCandidate.getInReference() != null){ |
|
1498 |
// boolean matchInRef = matchStrategy.invoke(dedupCandidate.getInReference(), refExemplar.getInReference()); |
|
1499 |
// if(matchInRef){ |
|
1500 |
// Reference clone = (Reference)refExemplar.clone(); |
|
1501 |
// clone.setInReference(dedupCandidate.getInReference()); |
|
1502 |
// finalCandidates.add(new ReferenceCandidate(clone, exemplar.detail)); |
|
1503 |
// } |
|
1504 |
} |
|
660 | 1505 |
}else{ |
661 |
Reference nomRef = newName.getNomenclaturalReference(); |
|
662 |
taxonName.setNomenclaturalReference(nomRef); |
|
663 |
String detail = newName.getNomenclaturalMicroReference(); |
|
664 |
String oldDetail = taxonName.getNomenclaturalMicroReference(); |
|
665 |
if (isBlank(detail)){ |
|
666 |
if (isNotBlank(oldDetail)){ |
|
667 |
logger.warn("Detail could not be parsed but seems to exist. NameId: " + nameId); |
|
1506 |
match = MatchResult.NewNoTypeInstance(refExemplar.getType(), dedupCandidate.getType()); |
|
1507 |
} |
|
1508 |
finalCand = new FinalCandidate(new ReferenceCandidate(dedupCandidate, exemplar.detail), exemplar, match); |
|
1509 |
finalCandidates.add(finalCand); |
|
1510 |
|
|
1511 |
return finalCand; |
|
1512 |
} catch (MatchException e) { |
|
1513 |
e.printStackTrace(); |
|
1514 |
throw new RuntimeException(); |
|
1515 |
} |
|
1516 |
} |
|
1517 |
|
|
1518 |
|
|
1519 |
private IMatchStrategy referenceMatchStrategy; |
|
1520 |
|
|
1521 |
/** |
|
1522 |
* @return |
|
1523 |
* @throws MatchException |
|
1524 |
*/ |
|
1525 |
protected IMatchStrategy getReferenceMatchStrategy() throws MatchException { |
|
1526 |
|
|
1527 |
if (referenceMatchStrategy == null){ |
|
1528 |
referenceMatchStrategy = MatchStrategyFactory.NewParsedReferenceInstance(); |
|
1529 |
} |
|
1530 |
// if (referenceMatchStrategy == null){ |
|
1531 |
// referenceMatchStrategy = DefaultMatchStrategy.NewInstance(Reference.class); |
|
1532 |
// |
|
1533 |
// referenceMatchStrategy.setMatchMode("title", MatchMode.EQUAL_OR_SECOND_NULL); |
|
1534 |
// referenceMatchStrategy.setMatchMode("placePublished", MatchMode.EQUAL_OR_SECOND_NULL); |
|
1535 |
// @SuppressWarnings("rawtypes") |
|
1536 |
// SubClassMatchStrategy<TeamOrPersonBase> refAuthorMatchStrategy = SubClassMatchStrategy |
|
1537 |
// .NewInstance(TeamOrPersonBase.class, Person.class, Team.class); |
|
1538 |
// refAuthorMatchStrategy.setMatchMode(Person.class, "familyName", MatchMode.EQUAL_OR_SECOND_NULL); |
|
1539 |
// refAuthorMatchStrategy.setMatchMode(Person.class, "givenName", MatchMode.EQUAL_OR_SECOND_NULL); |
|
1540 |
// refAuthorMatchStrategy.setMatchMode(Person.class, "initials", MatchMode.EQUAL_OR_SECOND_NULL); |
|
1541 |
// referenceMatchStrategy.setMatchMode("authorship", MatchMode.MATCH, refAuthorMatchStrategy); |
|
1542 |
// |
|
1543 |
// //for testing only |
|
1544 |
//// referenceMatchStrategy = null; |
|
1545 |
//// FieldMatcher autMatcher = referenceMatchStrategy.getMatching().getFieldMatcher("authorship"); |
|
1546 |
// } |
|
1547 |
return referenceMatchStrategy; |
|
1548 |
} |
|
1549 |
|
|
1550 |
private Set<Integer> getPreliminaryIdCandidates(BerlinModelImportState state, ResultSet rs) throws SQLException{ |
|
1551 |
|
|
1552 |
Set<Integer> result = new HashSet<>(); |
|
1553 |
boolean refDetailPrelim = rs.getBoolean("RefDetailPrelim"); |
|
1554 |
if(state.getConfig().isDoPreliminaryRefDetailsWithNames() && refDetailPrelim){ |
|
1555 |
|
|
1556 |
Set<TaxonName> names = parseExemplars(state, rs, null); |
|
1557 |
for (TaxonName name : names){ |
|
1558 |
Reference exemplar = name.getNomenclaturalReference(); |
|
1559 |
if (exemplar != null){ |
|
1560 |
Set<ReferenceCandidate> persistendCandidates = refMapping.getCandidates(exemplar); |
|
1561 |
if (exemplar.getInReference()!= null){ |
|
1562 |
persistendCandidates.addAll(refMapping.getCandidates(exemplar.getInReference())); |
|
668 | 1563 |
} |
669 |
}else{ |
|
670 |
if (isNotBlank(oldDetail) && !detail.equals(oldDetail)){ |
|
671 |
logger.warn("Details differ: " + detail + " <-> " + oldDetail + ". NameId: " + nameId); |
|
1564 |
for (ReferenceCandidate persistendCandidate : persistendCandidates){ |
|
1565 |
result.add(persistendCandidate.getId()); |
|
672 | 1566 |
} |
673 |
taxonName.setNomenclaturalMicroReference(detail); |
|
674 | 1567 |
} |
675 | 1568 |
} |
676 | 1569 |
} |
1570 |
return result; |
|
1571 |
} |
|
1572 |
|
|
1573 |
private Set<TaxonName> parseExemplars(BerlinModelImportState state, ResultSet rs, TaxonName taxonName) throws SQLException{ |
|
1574 |
BerlinModelImportConfigurator config = state.getConfig(); |
|
1575 |
|
|
1576 |
Set<TaxonName> result = new HashSet<>(); |
|
1577 |
|
|
1578 |
String fullNomRefCache = rs.getString("FullNomRefCache"); |
|
1579 |
String detail = rs.getString("Details"); |
|
1580 |
|
|
1581 |
|
|
1582 |
if (fullNomRefCache == null){ |
|
1583 |
// logger.warn("fullNomRefCache is null for preliminary refDetail. NameId: " + nameId); |
|
1584 |
return result; |
|
1585 |
}else if (fullNomRefCache.trim().startsWith(": ")){ |
|
1586 |
// logger.warn("fullNomRefCache starts with for preliminary refDetail. NameId: " + nameId); |
|
1587 |
return result; |
|
1588 |
}else{ |
|
1589 |
TaxonName testName = taxonName == null ? getTestName() : (TaxonName)taxonName.clone(); |
|
1590 |
|
|
1591 |
Set<String> fullStrCandidates; |
|
1592 |
if (fullNomRefCache.trim().startsWith("in ")){ |
|
1593 |
//RefDetails with "in" references |
|
1594 |
fullStrCandidates = makePrelimRefDetailInRef(state, testName, fullNomRefCache, detail); |
|
1595 |
}else if (fullNomRefCache.trim().startsWith(", ")){ |
|
1596 |
//RefDetails with ", " reference |
|
1597 |
fullStrCandidates = makePrelimRefDetailBook(state, testName, fullNomRefCache, detail); |
|
1598 |
}else{ |
|
1599 |
//ordinary protected ref details |
|
1600 |
fullStrCandidates = makePrelimRefDetailNotInRef(state, testName, fullNomRefCache, detail); |
|
1601 |
} |
|
1602 |
|
|
1603 |
for (String parseStr : fullStrCandidates){ |
|
1604 |
TaxonName newName = (TaxonName)parser.parseReferencedName(parseStr, config.getNomenclaturalCode(), testName.getRank()); |
|
1605 |
Reference newNomRef = newName.getNomenclaturalReference(); |
|
1606 |
if (taxonName != null && newNomRef != null && !newNomRef.isProtectedAbbrevTitleCache()&& !newNomRef.isProtectedTitleCache()){ |
|
1607 |
newNomRef.setAuthorship(taxonName.getCombinationAuthorship()); |
|
1608 |
} |
|
1609 |
result.add(newName); |
|
1610 |
// Reference exemplar = newName.getNomenclaturalReference(); |
|
1611 |
} |
|
1612 |
return result; |
|
1613 |
} |
|
1614 |
} |
|
1615 |
|
|
1616 |
|
|
1617 |
/** |
|
1618 |
* @return |
|
1619 |
*/ |
|
1620 |
protected TaxonName getTestName() { |
|
1621 |
TaxonName testName = TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES(), null); |
|
1622 |
testName.setGenusOrUninomial("Abies"); |
|
1623 |
testName.setSpecificEpithet("alba"); |
|
1624 |
testName.setAuthorshipCache("Mill."); |
|
1625 |
return testName; |
|
1626 |
} |
|
1627 |
|
|
1628 |
|
|
1629 |
|
|
1630 |
/** |
|
1631 |
* @param config |
|
1632 |
* @param refMap |
|
1633 |
* @param taxonName |
|
1634 |
* @param nameId |
|
1635 |
* @param fullNomRefCache |
|
1636 |
* @return |
|
1637 |
*/ |
|
1638 |
protected Set<String> makePrelimRefDetailNotInRef(BerlinModelImportState state, TaxonName taxonName, |
|
1639 |
String fullNomRefCache, String detail) { |
|
1640 |
Set<String> result = new HashSet<>(); |
|
1641 |
String fullStrComma = taxonName.getTitleCache()+ ", " + fullNomRefCache; |
|
1642 |
result.add(fullStrComma); |
|
1643 |
String fullStrIn = taxonName.getTitleCache()+ " in " + fullNomRefCache; |
|
1644 |
result.add(fullStrIn); |
|
1645 |
return result; |
|
1646 |
|
|
1647 |
// |
|
1648 |
// INonViralName newNameComma = parser.parseReferencedName(fullStrComma, config.getNomenclaturalCode(), taxonName.getRank()); |
|
1649 |
// INonViralName newNameIn = parser.parseReferencedName(fullStrIn, config.getNomenclaturalCode(), taxonName.getRank()); |
|
1650 |
// |
|
1651 |
// INonViralName newName; |
|
1652 |
// boolean commaProtected = newNameComma.isProtectedFullTitleCache() || (newNameComma.getNomenclaturalReference() != null |
|
1653 |
// && newNameComma.getNomenclaturalReference().isProtectedTitleCache()); |
|
1654 |
// boolean inProtected = newNameIn.isProtectedFullTitleCache() || (newNameIn.getNomenclaturalReference() != null |
|
1655 |
// && newNameIn.getNomenclaturalReference().isProtectedTitleCache()); |
|
1656 |
// if (commaProtected && !inProtected){ |
|
1657 |
// newName = newNameIn; |
|
1658 |
// }else if (!commaProtected && inProtected){ |
|
1659 |
// newName = newNameComma; |
|
1660 |
// }else if (commaProtected && inProtected){ |
|
1661 |
// logger.warn("Can't parse preliminary refDetail: " + fullNomRefCache + " for name " + taxonName.getTitleCache() + "; nameId: " + nameId ); |
|
1662 |
// newName = newNameComma; |
|
1663 |
// }else{ |
|
1664 |
// logger.warn("Can't decide ref type for preliminary refDetail: " + fullNomRefCache + " for name " + taxonName.getTitleCache() + "; nameId: " + nameId ); |
|
1665 |
// newName = newNameComma; |
|
1666 |
// } |
|
1667 |
// |
|
1668 |
// |
|
1669 |
// if (newName.isProtectedFullTitleCache()){ |
|
1670 |
// Reference nomRef = ReferenceFactory.newGeneric(); |
|
1671 |
// nomRef.setAbbrevTitleCache(fullNomRefCache, true); |
|
1672 |
// taxonName.setNomenclaturalReference(nomRef); |
|
1673 |
// //check detail |
|
1674 |
// }else{ |
|
1675 |
// Reference nomRef = newName.getNomenclaturalReference(); |
|
1676 |
// taxonName.setNomenclaturalReference(nomRef); |
|
1677 |
// String detail = newName.getNomenclaturalMicroReference(); |
|
1678 |
// String oldDetail = taxonName.getNomenclaturalMicroReference(); |
|
1679 |
// if (isBlank(detail)){ |
|
1680 |
// if (isNotBlank(oldDetail)){ |
|
1681 |
// logger.warn("Detail could not be parsed but seems to exist. NameId: " + nameId); |
|
1682 |
// } |
|
1683 |
// }else{ |
|
1684 |
// if (isNotBlank(oldDetail) && !detail.equals(oldDetail)){ |
|
1685 |
// logger.warn("Details differ: " + detail + " <-> " + oldDetail + ". NameId: " + nameId); |
|
1686 |
// } |
|
1687 |
// taxonName.setNomenclaturalMicroReference(detail); |
|
1688 |
// } |
|
1689 |
// } |
|
1690 |
} |
|
1691 |
|
|
1692 |
|
|
1693 |
/** |
|
1694 |
* @param config |
|
1695 |
* @param refMap |
|
1696 |
* @param taxonName |
|
1697 |
* @param nameId |
|
1698 |
* @param fullNomRefCache |
|
1699 |
* @param detail |
|
1700 |
* @return |
|
1701 |
*/ |
|
1702 |
protected Set<String> makePrelimRefDetailInRef(BerlinModelImportState state, |
|
1703 |
TaxonName taxonName, |
|
1704 |
String fullNomRefCache, String detail) { |
|
1705 |
|
|
1706 |
Set<String> result = new HashSet<>(); |
|
1707 |
String parseStr = taxonName.getTitleCache()+ " " + fullNomRefCache; |
|
1708 |
result.add(parseStr); |
|
1709 |
return result; |
|
1710 |
|
|
1711 |
|
|
1712 |
// String detail = newName.getNomenclaturalMicroReference(); |
|
1713 |
// String oldDetail = taxonName.getNomenclaturalMicroReference(); |
|
1714 |
// if (isBlank(detail)){ |
|
1715 |
// if (isNotBlank(oldDetail)){ |
|
1716 |
// logger.warn("Detail could not be parsed but seems to exist. NameId: " + nameId); |
|
1717 |
// } |
|
1718 |
// }else{ |
|
1719 |
// if (isNotBlank(oldDetail) && !detail.equals(oldDetail)){ |
|
1720 |
// logger.warn("Details differ: " + detail + " <-> " + oldDetail + ". NameId: " + nameId); |
|
1721 |
// } |
|
1722 |
// taxonName.setNomenclaturalMicroReference(detail); |
|
1723 |
// } |
|
1724 |
} |
|
1725 |
|
|
1726 |
protected Set<String> makePrelimRefDetailBook(BerlinModelImportState state, |
|
1727 |
TaxonName taxonName, |
|
1728 |
String fullNomRefCache, String detail) { |
|
1729 |
|
|
1730 |
Set<String> result = new HashSet<>(); |
|
1731 |
String parseStr = taxonName.getTitleCache()+ fullNomRefCache; |
|
1732 |
result.add(parseStr); |
|
1733 |
return result; |
|
1734 |
} |
|
1735 |
|
|
1736 |
|
|
1737 |
/** |
|
1738 |
* Creates the hash string for finding preliminary RefDetail duplicates |
|
1739 |
* @param nomRef |
|
1740 |
*/ |
|
1741 |
private String refHash(Reference nomRef) { |
|
1742 |
// TeamOrPersonBase<?> author = nomRef.getAuthorship(); |
|
1743 |
// String authorStr = author == null? "" : author.getNomenclaturalTitle(); |
|
1744 |
|
|
1745 |
String title = nomRef.getAbbrevTitle(); |
|
1746 |
if (title == null){ |
|
1747 |
title = nomRef.getTitle(); |
|
1748 |
if (title == null && nomRef.getInReference() != null){ |
|
1749 |
title = nomRef.getInReference().getAbbrevTitle(); |
|
1750 |
if (title == null){ |
|
1751 |
title = nomRef.getInReference().getTitle(); |
|
1752 |
} |
|
1753 |
} |
|
1754 |
if (title == null){ |
|
1755 |
title = nomRef.getAbbrevTitleCache(); |
|
1756 |
} |
|
1757 |
if (title == null){ |
|
1758 |
title = nomRef.getTitleCache(); |
|
1759 |
} |
|
1760 |
} |
|
1761 |
String vol = nomRef.getVolume(); |
|
1762 |
if (vol == null && nomRef.getInReference() != null){ |
|
1763 |
vol = nomRef.getInReference().getVolume(); |
|
1764 |
} |
|
1765 |
String date = nomRef.getDatePublishedString(); |
|
1766 |
if (date == null && nomRef.getInReference() != null){ |
|
1767 |
date = nomRef.getInReference().getDatePublishedString(); |
|
1768 |
} |
|
1769 |
ReferenceType type = nomRef.getType(); |
|
1770 |
|
|
1771 |
String result = CdmUtils.concat("@", title, vol, date, type.getKey()); |
|
1772 |
return result; |
|
677 | 1773 |
} |
678 | 1774 |
|
679 | 1775 |
|
Also available in: Unified diff
E+M latest changes to name import (mostly for unparsed refdetail handling)