Project

General

Profile

« Previous | Next » 

Revision 1a63486c

Added by Andreas Müller over 5 years ago

E+M latest changes to name import (mostly for unparsed refdetail handling)

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelTaxonNameImport.java
11 11

  
12 12
import java.sql.ResultSet;
13 13
import java.sql.SQLException;
14
import java.util.ArrayList;
15
import java.util.Collections;
14 16
import java.util.HashMap;
15 17
import java.util.HashSet;
18
import java.util.List;
16 19
import java.util.Map;
20
import java.util.Objects;
17 21
import java.util.Set;
18 22
import java.util.UUID;
19 23

  
......
24 28
import eu.etaxonomy.cdm.database.update.DatabaseTypeNotSupportedException;
25 29
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
26 30
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelTaxonNameImportValidator;
27
import eu.etaxonomy.cdm.io.common.IImportConfigurator;
28 31
import eu.etaxonomy.cdm.io.common.IOValidator;
29 32
import eu.etaxonomy.cdm.io.common.ImportHelper;
30 33
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
......
37 40
import eu.etaxonomy.cdm.model.common.ExtensionType;
38 41
import eu.etaxonomy.cdm.model.common.Language;
39 42
import eu.etaxonomy.cdm.model.common.Representation;
43
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
40 44
import eu.etaxonomy.cdm.model.name.IBotanicalName;
41 45
import eu.etaxonomy.cdm.model.name.ICultivarPlantName;
42
import eu.etaxonomy.cdm.model.name.INonViralName;
43 46
import eu.etaxonomy.cdm.model.name.IZoologicalName;
44 47
import eu.etaxonomy.cdm.model.name.Rank;
45 48
import eu.etaxonomy.cdm.model.name.TaxonName;
46 49
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
50
import eu.etaxonomy.cdm.model.reference.IJournal;
47 51
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
48 52
import eu.etaxonomy.cdm.model.reference.Reference;
49 53
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
54
import eu.etaxonomy.cdm.model.reference.ReferenceType;
50 55
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
56
import eu.etaxonomy.cdm.strategy.match.FieldMatcher;
57
import eu.etaxonomy.cdm.strategy.match.IMatchStrategy;
58
import eu.etaxonomy.cdm.strategy.match.IMatchStrategyEqual;
59
import eu.etaxonomy.cdm.strategy.match.MatchException;
60
import eu.etaxonomy.cdm.strategy.match.MatchMode;
61
import eu.etaxonomy.cdm.strategy.match.MatchResult;
62
import eu.etaxonomy.cdm.strategy.match.MatchStrategyFactory;
51 63
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
52 64
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
65
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
53 66

  
54 67
/**
55 68
 * @author a.mueller
......
65 78

  
66 79
	public static final String NAMESPACE = "TaxonName";
67 80

  
81
	   public static final String NAMESPACE_PRELIM = "RefDetail_Preliminary";
82

  
68 83
	public static final UUID SOURCE_ACC_UUID = UUID.fromString("c3959b4f-d876-4b7a-a739-9260f4cafd1c");
69 84

  
70 85
	private static int modCount = 5000;
......
79 94

  
80 95
	@Override
81 96
	protected String getIdQuery(BerlinModelImportState state) {
82
		if (state.getConfig().getNameIdTable()==null ){
97
		if (state.getConfig().getNameIdTable() == null ){
83 98
			return super.getIdQuery(state);
84 99
		}else{
85
			return "SELECT nameId FROM " + state.getConfig().getNameIdTable() + "";
100
			return "SELECT nameId FROM " + state.getConfig().getNameIdTable()
101
	//         + " WHERE nameId = 146109 "
102
			        ;
86 103
		}
87 104
	}
88 105

  
......
106 123
                      		" RefDetail.FullRefCache, RefDetail.FullNomRefCache, RefDetail.PreliminaryFlag AS RefDetailPrelim, RefDetail.Details, " +
107 124
                      		" RefDetail.SecondarySources, Rank.RankAbbrev, Rank.Rank " +
108 125
                      		facultativCols +
109
                    " FROM Name LEFT OUTER JOIN RefDetail ON Name.NomRefDetailFk = RefDetail.RefDetailId AND  " +
110
                    	" Name.NomRefFk = RefDetail.RefFk " +
126
                    " FROM Name LEFT OUTER JOIN RefDetail ON Name.NomRefDetailFk = RefDetail.RefDetailId " +
127
                    	                                   " AND Name.NomRefFk = RefDetail.RefFk " +
111 128
                    	" LEFT OUTER JOIN Rank ON Name.RankFk = Rank.rankID " +
112
                " WHERE name.nameId IN ("+ID_LIST_TOKEN+") ";
113
					//strQuery += " AND RefDetail.PreliminaryFlag = 1 ";
129
                    " WHERE name.nameId IN ("+ID_LIST_TOKEN+") ";
130
	//	    strRecordQuery += " AND RefDetail.PreliminaryFlag = 1 ";
114 131
					//strQuery += " AND Name.Created_When > '03.03.2004' ";
115 132
		return strRecordQuery +  "";
116 133
	}
117 134

  
118 135

  
136
	private class ReferenceMapping{
137
	    public Map<String, ReferenceWrapper> titleMapping = new HashMap<>();
138
	    public Map<String, ReferenceWrapper> abbrevMapping = new HashMap<>();
139

  
140
	    private class ReferenceWrapper {
141
	        Set<ReferenceCandidate> candidates = new HashSet<>();
142

  
143
            public Set<ReferenceCandidate> getCandidates() {
144
                return candidates;
145
            }
146
            public void add(Reference ref, String detail) {
147
                candidates.add(new ReferenceCandidate(ref, detail));
148
            }
149
        }
150
	    private void unload(){
151
	        titleMapping.clear();
152
	        abbrevMapping.clear();
153
	    }
154

  
155
	    public void addCandidate(Reference ref, String detail) {
156
	        String hash = refHash(ref);
157
            ReferenceWrapper wrap = abbrevMapping.get(hash);
158
            if (wrap == null){
159
                wrap = new ReferenceWrapper();
160
                abbrevMapping.put(hash, wrap);
161
            }
162
            wrap.add(ref, detail);
163
	    }
164

  
165

  
166
        /**
167
         * @param nomRef
168
         * @return
169
         */
170
        public Set<ReferenceCandidate> getCandidates(Reference exemplar) {
171
            String hash = refHash(exemplar);
172
            ReferenceMapping.ReferenceWrapper wrap = abbrevMapping.get(hash);
173
            if (wrap == null){
174
                return new HashSet<>();
175
            }else{
176
                return wrap.getCandidates();
177
            }
178
        }
179

  
180
        @Override
181
        public String toString(){
182
            return "ReferenceMapping";
183
        }
184
	}
185

  
186
	private ReferenceMapping refMapping = new ReferenceMapping();
187

  
188
	private void loadReferenceMap(BerlinModelImportState state){
189
	    List<Reference> list = getReferenceService().list(null, null, null, null, null);
190
	    for (Reference ref : list){
191
	        refMapping.addCandidate(ref, null);
192
	    }
193

  
194
//	    try {
195
//
196
//            String query = "SELECT * FROM Reference ";
197
//
198
//            ResultSet rs = state.getConfig().getDestination().executeQuery(query);
199
//            while (rs.next()){
200
//                String title = rs.getString("title");
201
//                String abbrevTitle = rs.getString("abbrevTitle");
202
//                int id = rs.getInt("id");
203
//                UUID uuid = UUID.fromString(rs.getString("uuid"));
204
//                String titleCache = rs.getString("titleCache");
205
//                String abbrevTitleCache = rs.getString("abbrevTitleCache");
206
//                String typeStr = rs.getString("refType");
207
//                ReferenceType type = ReferenceType.valueOf(typeStr);
208
//
209
//                ReferenceMapping.ReferenceWrapper wrapping = refMapping.new ReferenceWrapper(title, id, uuid, titleCache, type) ;
210
//                refMapping.titleMapping.put(title, wrapping);
211
//                wrapping = refMapping.new ReferenceWrapper(abbrevTitle, id, uuid, abbrevTitleCache, type) ;
212
//
213
//            }
214
//        } catch (SQLException e) {
215
//            e.printStackTrace();
216
//        }
217
	}
218

  
219
    private void unloadReferenceMap(){
220
	    refMapping.unload();
221
	    refMapping = null;
222
	}
223

  
119 224

  
120 225
	@Override
121 226
	protected void doInvoke(BerlinModelImportState state) {
122
		//update rank labels if necessary
227
	    loadReferenceMap(state);
228

  
229
	    //update rank labels if necessary
123 230
		String strAbbrev = state.getConfig().getInfrGenericRankAbbrev();
124 231
		Rank rank = Rank.INFRAGENERICTAXON();
125 232
		testRankAbbrev(strAbbrev, rank);
......
129 236
		testRankAbbrev(strAbbrev, rank);
130 237

  
131 238
		super.doInvoke(state);
239
		unloadReferenceMap();
240
		printMatchResults();
132 241
	}
133 242

  
134 243
	private void testRankAbbrev(String strAbbrev, Rank rank) {
......
243 352
					cdmAttrName = "nomenclaturalMicroReference";
244 353
					success &= ImportHelper.addStringValue(rs, taxonName, dbAttrName, cdmAttrName, BLANK_TO_NULL);
245 354

  
355
                    //authorTeams
356
                    if (teamMap != null ){
357
                        taxonName.setCombinationAuthorship(getAuthorTeam(teamMap, authorFk, nameId, config));
358
                        taxonName.setExCombinationAuthorship(getAuthorTeam(teamMap, exAuthorFk, nameId, config));
359
                        taxonName.setBasionymAuthorship(getAuthorTeam(teamMap, basAuthorFk, nameId, config));
360
                        taxonName.setExBasionymAuthorship(getAuthorTeam(teamMap, exBasAuthorFk, nameId, config));
361
                    }else{
362
                        logger.warn("TeamMap is null");
363
                        success = false;
364
                    }
365

  
246 366
					//nomRef
247
					success &= makeNomenclaturalReference(config, taxonName, nameId, rs, partitioner);
367
					success &= makeNomenclaturalReference(state, taxonName, nameId, rs, partitioner);
248 368

  
249 369
					//Source_Acc
250 370
					boolean colExists = true;
......
267 387
					success &= doIdCreatedUpdatedNotes(state, taxonName, rs, nameId, NAMESPACE, excludeUpdated, excludeNotes);
268 388
					handleNameNotes(state, taxonName, rs, nameId);
269 389

  
270
					//NonViralName
271
					if (taxonName.isNonViral()){
272
						INonViralName nonViralName = taxonName;
273

  
274
						//authorTeams
275
						if (teamMap != null ){
276
							nonViralName.setCombinationAuthorship(getAuthorTeam(teamMap, authorFk, nameId, config));
277
							nonViralName.setExCombinationAuthorship(getAuthorTeam(teamMap, exAuthorFk, nameId, config));
278
							nonViralName.setBasionymAuthorship(getAuthorTeam(teamMap, basAuthorFk, nameId, config));
279
							nonViralName.setExBasionymAuthorship(getAuthorTeam(teamMap, exBasAuthorFk, nameId, config));
280
						}else{
281
							logger.warn("TeamMap is null");
282
							success = false;
283
						}
284
					}//nonviralName
285

  
286

  
287

  
288 390
					//zoologicalName
289 391
					if (taxonName.isZoological()){
290 392
						IZoologicalName zooName = taxonName;
......
324 426

  
325 427
//		logger.info( i + " names handled");
326 428
		getNameService().save(namesToSave);
429
//		printMatchResults();
327 430
		return success;
328 431
	}
329 432

  
330 433

  
331 434
	/**
435
     *
436
     */
437
    private void printMatchResults() {
438
        for (MatchType type : MatchType.values()){
439
            List<String> list = matchResults.get(type);
440
            list = list == null? new ArrayList<>(): list;
441
            Collections.sort(list);
442
            System.out.println("\n" + type.toString() + " " + list.size());
443
            System.out.println("=============================");
444
            for (String result : list){
445
                System.out.println(result);
446
            }
447
        }
448

  
449
    }
450

  
451

  
452
    /**
332 453
     * @param state
333 454
     * @param taxonName
334 455
     * @param rs
......
436 557
			Set<String> teamIdSet = new HashSet<>();
437 558
			Set<String> referenceIdSet = new HashSet<>();
438 559
			Set<String> refDetailIdSet = new HashSet<>();
560
			Set<Integer> prelimRefDetailCandidateIdSet = new HashSet<>();
439 561
			while (rs.next()){
440 562
				handleForeignKey(rs, teamIdSet, "AuthorTeamFk");
441 563
				handleForeignKey(rs, teamIdSet, "ExAuthorTeamFk");
......
443 565
				handleForeignKey(rs, teamIdSet, "ExBasAuthorTeamFk");
444 566
				handleForeignKey(rs, referenceIdSet, "nomRefFk");
445 567
				handleForeignKey(rs, refDetailIdSet, "nomRefDetailFk");
568
				prelimRefDetailCandidateIdSet.addAll(getPreliminaryIdCandidates(state, rs));
446 569
			}
447 570

  
448 571
			//team map
......
469 592
            Map<String, Reference> refDetailMap= (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
470 593
			result.put(nameSpace, refDetailMap);
471 594

  
595
	         //prelim map
596
            nameSpace = NAMESPACE_PRELIM;
597
            cdmClass = Reference.class;
598
            List<Reference> list = getReferenceService().findById(prelimRefDetailCandidateIdSet);
599
            Map<String, Reference> prelimMap = new HashMap<>();
600
            for (Reference ref : list){
601
                prelimMap.put(String.valueOf(ref.getId()), ref);
602
            }
603
            result.put(nameSpace, prelimMap);
604

  
472 605
		} catch (SQLException e) {
473 606
			throw new RuntimeException(e);
474 607
		}
......
540 673
	}
541 674

  
542 675

  
543
	private boolean makeNomenclaturalReference(BerlinModelImportConfigurator config, TaxonName taxonName,
676
	private boolean makeNomenclaturalReference(BerlinModelImportState state, TaxonName taxonName,
544 677
					int nameId, ResultSet rs, @SuppressWarnings("rawtypes") ResultSetPartitioner partitioner) throws SQLException{
678
	    BerlinModelImportConfigurator config = state.getConfig();
545 679

  
546 680
	    @SuppressWarnings("unchecked")
547 681
        Map<String, Reference> refMap = partitioner.getObjectMap(BerlinModelReferenceImport.REFERENCE_NAMESPACE);
......
563 697
					getReferenceFromMaps(refDetailMap, refMap, nomRefDetailFk, nomRefFk);
564 698

  
565 699
				if(config.isDoPreliminaryRefDetailsWithNames() && refDetailPrelim){
566
				    makePrelimRefDetailRef(config, rs, taxonName, nameId);
567
				}
568

  
569
				//setNomRef
570
				if (nomReference == null ){
571
					//TODO
572
					if (! config.isIgnoreNull()){
573
						logger.warn("Nomenclatural reference (nomRefFk = " + nomRefFk + ") for TaxonName (nameId = " + nameId + ")"+
574
							" was not found in reference store. Nomenclatural reference not set!!");
575
					}
700
				    makePrelimRefDetailRef(state, rs, partitioner, taxonName, nameId);
576 701
				}else{
577
					if (! INomenclaturalReference.class.isAssignableFrom(nomReference.getClass())){
578
						logger.warn("Nomenclatural reference (nomRefFk = " + nomRefFk + ") for TaxonName (nameId = " + nameId + ")"+
579
								" is not assignable from INomenclaturalReference. (Class = " + nomReference.getClass()+ ")");
580
					}
581
					nomReference.setNomenclaturallyRelevant(true);
582
					taxonName.setNomenclaturalReference(nomReference);
702

  
703
    				//setNomRef
704
    				if (nomReference == null ){
705
    					//TODO
706
    					if (! config.isIgnoreNull()){
707
    						logger.warn("Nomenclatural reference (nomRefFk = " + nomRefFk + ") for TaxonName (nameId = " + nameId + ")"+
708
    							" was not found in reference store. Nomenclatural reference not set!!");
709
    					}
710
    				}else{
711
    					if (! INomenclaturalReference.class.isAssignableFrom(nomReference.getClass())){
712
    						logger.warn("Nomenclatural reference (nomRefFk = " + nomRefFk + ") for TaxonName (nameId = " + nameId + ")"+
713
    								" is not assignable from INomenclaturalReference. (Class = " + nomReference.getClass()+ ")");
714
    					}
715
    					nomReference.setNomenclaturallyRelevant(true);
716
    					taxonName.setNomenclaturalReference(nomReference);
717
    				}
583 718
				}
584 719
			}
585 720
		}
......
589 724

  
590 725
	private INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
591 726

  
727

  
728
    private class ReferenceCandidate{
729
        Reference ref;
730
        String detail;
731
        private ReferenceCandidate(Reference ref, String detail) {
732
            this.ref = ref;
733
            this.detail = detail;
734
        }
735
        public Integer getId() {
736
            return ref.getId();
737
        }
738
        @Override
739
        public String toString(){
740
            return ref.toString() + ": " + detail;
741
        }
742
    }
743

  
744
    private class FinalCandidate{
745
        private FinalCandidate(ReferenceCandidate candidate, ReferenceCandidate exemplar, MatchResult matchResult) {
746
            this.candidate = candidate;
747
            this.exemplar = exemplar;
748
            this.matchResult = matchResult;
749
        }
750
        ReferenceCandidate candidate;
751
        ReferenceCandidate exemplar;
752
        MatchResult matchResult;
753

  
754
        @Override
755
        public String toString(){
756
            return candidate.toString() + " <-> " + exemplar.toString() + "\n   " + matchResult.toString()+"\n";
757
        }
758
    }
759

  
592 760
	/**
593 761
     * @param config
594 762
     * @param rs
763
	 * @param partitioner
595 764
     * @param taxonName
596 765
     * @param nameId
597 766
	 * @throws SQLException
598 767
     */
599
    private void makePrelimRefDetailRef(IImportConfigurator config, ResultSet rs, TaxonName taxonName, int nameId) throws SQLException {
768
    private void makePrelimRefDetailRef(BerlinModelImportState state, ResultSet rs, @SuppressWarnings("rawtypes") ResultSetPartitioner partitioner,
769
            TaxonName taxonName, int nameId) throws SQLException {
770

  
771
        int refDetailId = rs.getInt("RefDetailId");
772
        @SuppressWarnings("unchecked")
773
        Map<String, Reference> refMap = partitioner.getObjectMap(NAMESPACE_PRELIM);
774

  
775
        String nameTitleCache = taxonName.getTitleCache();
776

  
600 777
        String fullNomRefCache = rs.getString("FullNomRefCache");
778
        String detail = rs.getString("Details");
779

  
601 780
        if (fullNomRefCache == null){
602 781
            logger.warn("fullNomRefCache is null for preliminary refDetail. NameId: " + nameId);
603 782
            return;
604
        }else if (fullNomRefCache.trim().startsWith(": ")){
605
            logger.warn("fullNomRefCache starts with for preliminary refDetail. NameId: " + nameId);
783
        }
784

  
785
        fullNomRefCache = fullNomRefCache.trim();
786
        if (fullNomRefCache.startsWith(": ")){
787
            logger.warn("fullNomRefCache starts with ':' for preliminary refDetail. NameId: " + nameId);
606 788
            return;
607
        }else if (fullNomRefCache.trim().startsWith("in ")){
608
            String fullStr = taxonName.getTitleCache()+ " " + fullNomRefCache;
609
            INonViralName newName = parser.parseReferencedName(fullStr, config.getNomenclaturalCode(), taxonName.getRank());
610
            if (newName.isProtectedFullTitleCache()){
611
                Reference nomRef = ReferenceFactory.newGeneric();
612
                nomRef.setAbbrevTitleCache(fullNomRefCache, true);
613
                taxonName.setNomenclaturalReference(nomRef);
614
                //check detail
789
        }else if (fullNomRefCache.matches("[12][7890][0-9][0-9](-(1774|1832))?") && isBlank(detail)){
790
            handlePrelimYearOnly(state, rs, taxonName, nameId, refMap, fullNomRefCache, detail, refDetailId);
791
        }else{
792
            Reference genericCandidate = ReferenceFactory.newGeneric();
793
            genericCandidate.setAbbrevTitleCache(fullNomRefCache, true);
794
            Set<FinalCandidate> finalCandidates = new HashSet<>();
795
            Set<FinalCandidate> finalInRefCandidates = new HashSet<>();
796
            Set<Reference> parsedReferences = new HashSet<>();
797

  
798
            makeFinalCandidates(state, rs, taxonName, refMap,
799
                    nameTitleCache, finalCandidates,
800
                    finalInRefCandidates, parsedReferences);
801

  
802
            evaluateFinalCandidates(state, rs, taxonName, detail, genericCandidate, parsedReferences,
803
                    finalCandidates, fullNomRefCache);
804
        }
805
    }
806

  
807

  
808
    /**
809
     * @param state
810
     * @param rs
811
     * @param taxonName
812
     * @param nameId
813
     * @param refMap
814
     * @param fullNomRefCache
815
     * @param detail
816
     * @throws SQLException
817
     */
818
    private void handlePrelimYearOnly(BerlinModelImportState state, ResultSet rs, TaxonName taxonName, int nameId,
819
            Map<String, Reference> refMap, String fullNomRefCache, String detail, int refDetailId) throws SQLException {
820
        TeamOrPersonBase<?> combAuthor = taxonName.getCombinationAuthorship();
821
        Set<Integer> candidateIds = getPreliminaryIdCandidates(state, rs);
822

  
823
        boolean candidateMatches = false;
824
        for (Integer candidateId : candidateIds){
825
            Reference dedupCandidate = CdmBase.deproxy(refMap.get(String.valueOf(candidateId)));
826
            System.out.println("dedupCandidate: " + dedupCandidate.getAbbrevTitleCache());
827
            TeamOrPersonBase<?> dedupAuthor = dedupCandidate.getAuthorship();
828
            if (dedupAuthor != null && combAuthor != null){
829
                if (Objects.equals(dedupAuthor, combAuthor)){
830
                    taxonName.setNomenclaturalReference(dedupCandidate);
831
                    candidateMatches = true;
832
                }else if (Objects.equals(dedupAuthor.getNomenclaturalTitle(), combAuthor.getNomenclaturalTitle())){
833
                    logger.warn("Year nomAuthor equal in nomTitle but not same: " + dedupAuthor.getNomenclaturalTitle() + "; " + fullNomRefCache + "; nameId " + nameId);
834
                    taxonName.setNomenclaturalReference(dedupCandidate);
835
                    candidateMatches = true;
836
                }
837
            }else if (dedupCandidate.getAuthorship() == null && combAuthor != null){
838
                logger.warn("Year dedupCand and name have no author: " + fullNomRefCache + "; nameId " + nameId);
839
                taxonName.setNomenclaturalReference(dedupCandidate);
840
                candidateMatches = true;
841
            }
842
        }
843
        if (!candidateMatches){
844
            Reference yearRef = ReferenceFactory.newGeneric();
845
            VerbatimTimePeriod timePeriod = TimePeriodParser.parseStringVerbatim(fullNomRefCache);
846
            yearRef.setDatePublished(timePeriod);
847
            yearRef.setAuthorship(combAuthor);
848
            taxonName.setNomenclaturalReference(yearRef);
849
            yearRef.addImportSource(String.valueOf(refDetailId), NAMESPACE_PRELIM, state.getTransactionalSourceReference(), null);
850
            refMapping.addCandidate(yearRef, detail);
851
            //TODO
852
//                refMap.put(key, yearRef);
853
        }
854
    }
855

  
856

  
857
    private enum MatchType{
858
        UNPARSED,
859
        NO_MATCH_SINGLE_PARSE_ARTICLE_WITH_COMMA,
860
        NO_MATCH_SINGLE_PARSE_ARTICLE_NO_COMMA,
861
        NO_MATCH_SINGLE_PARSE_BOOKSECTION,
862
        NO_MATCH_SINGLE_PARSE_BOOK,
863
        NO_MATCH_SINGLE_PARSE_GENERIC,
864
        NO_MATCH_SINGLE_PARSE_OTHER,
865
        NO_MATCH_MULTI_PARSE,
866
        NO_MATCH_WITH_CANDIDATE,
867
        SINGLE_FULL_MATCH,
868
        SINGLE_INREF_MATCH,
869
        MULTI_SINGLE_PERSISTENT,
870
        MULTI_MULTI_PERSISTENT_NO_EXACT,
871
        MULTI_MULTI_PERSISTENT_MULTI_EXACT,
872
        MULTI_MULTI_PERSISTENT_SINGLE_EXACT,
873
        MULTI_NO_PERSISTENT_MULTI_EXACT,
874
        MULTI_NO_PERSISTENT_SINGLE_EXACT,
875
        MULTI_NO_PERSISTENT_NO_EXACT,
876
    }
877

  
878
    private Map<MatchType, List<String>> matchResults = new HashMap<>();
879

  
880
    /**
881
     * @param taxonName
882
     * @param detail
883
     * @param genericCandidate
884
     * @param finalCandidates
885
     * @param fullNomRefCache
886
     * @param exemplars
887
     * @throws SQLException
888
     */
889
    private void evaluateFinalCandidates(BerlinModelImportState state, ResultSet rs,
890
            TaxonName taxonName, String detail, Reference genericCandidate, Set<Reference> parsedCandidates,
891
            Set<FinalCandidate> finalCandidates, String fullNomRefCache) throws SQLException {
892

  
893
        int refDetailId = rs.getInt("RefDetailId");
894
        Set<FinalCandidate> matchingCandidates = getSuccess(finalCandidates);
895
        if (matchingCandidates.isEmpty()){
896
            taxonName.setNomenclaturalReference(genericCandidate);
897
            genericCandidate.addImportSource(String.valueOf(refDetailId), BerlinModelRefDetailImport.REFDETAIL_NAMESPACE,
898
                    state.getTransactionalSourceReference(), null);
899
            //TODO should we set this?
900
            taxonName.setNomenclaturalMicroReference(detail);
901
            if (finalCandidates.isEmpty()){
902
                if (taxonName.getCombinationAuthorship()==null){
903
                    System.out.println("nom. ref. not parsed because author is null: " + taxonName.getTitleCache());
904
                }else{
905
                    System.out.println("Final Candidates empty but author exists - should not happen: " + taxonName.getTitleCache());
906
                }
907
                handleNoMatch(state, taxonName, detail, genericCandidate, finalCandidates, fullNomRefCache, parsedCandidates);
908
//                printResult(MatchType.NO_MATCH, unparsedAndName(fullNomRefCache, taxonName));
909
            }else if (hasOnlyUnparsedExemplars(finalCandidates)){
910
                printResult(MatchType.UNPARSED, unparsedAndName(fullNomRefCache, taxonName));
911
            }else if (hasNoCandidateExemplars(finalCandidates)){
912
                //but we can define the ref type here
913
                handleNoMatch(state, taxonName, detail, genericCandidate, finalCandidates, fullNomRefCache, parsedCandidates);
914
//                printResult(MatchType.NO_MATCH, unparsedAndName(fullNomRefCache, taxonName));
915
            }else{
916
                String message = resultMessage(fullNomRefCache, finalCandidates, taxonName);
917
                printResult(MatchType.NO_MATCH_WITH_CANDIDATE, message);
918
            }
919
        }else if (matchingCandidates.size() == 1){
920
            ReferenceCandidate single = matchingCandidates.iterator().next().candidate;
921
            addAuthorAndDetail(taxonName, single);
922
            if (single.ref.isPersited()){
923
                printResult(MatchType.SINGLE_FULL_MATCH, unparsedAndName(fullNomRefCache, taxonName));
615 924
            }else{
616
                Reference nomRef = newName.getNomenclaturalReference();
617
                taxonName.setNomenclaturalReference(nomRef);
618
                String detail = newName.getNomenclaturalMicroReference();
619
                String oldDetail = taxonName.getNomenclaturalMicroReference();
620
                if (isBlank(detail)){
621
                    if (isNotBlank(oldDetail)){
622
                        logger.warn("Detail could not be parsed but seems to exist. NameId: " + nameId);
925
                single.ref.addImportSource(String.valueOf(refDetailId), BerlinModelRefDetailImport.REFDETAIL_NAMESPACE,
926
                        state.getTransactionalSourceReference(), null);
927
                printResult(MatchType.SINGLE_INREF_MATCH,  unparsedAndName(fullNomRefCache, taxonName));
928
            }
929
        }else{
930
            FinalCandidate finCand = findBestMatchingFinalCandidate(taxonName, matchingCandidates, fullNomRefCache);
931
            addAuthorAndDetail(taxonName, finCand.candidate);
932
        }
933
    }
934

  
935

  
936
    /**
937
     * @param state
938
     * @param taxonName
939
     * @param detail
940
     * @param genericCandidate
941
     * @param finalCandidates
942
     * @param fullNomRefCache
943
     * @param parsedCandidates
944
     */
945
    private void handleNoMatch(BerlinModelImportState state, TaxonName taxonName, String detail,
946
            Reference genericCandidate, Set<FinalCandidate> finalCandidates, String fullNomRefCache,
947
            Set<Reference> parsedCandidatesAsRef) {
948
        Set<FinalCandidate> parsedCandidates = getParsedExemplars(finalCandidates);
949
//        parsedCandidatesAsRef = removeGenericFromParsedReferencesAsRef();
950
//        if (parsedCandidates.size() != parsedCandidatesAsRef.size()){
951
//            System.out.println("Parsed Candidates differ in size. Should not happen");
952
//        }
953
        if (parsedCandidates.isEmpty()){
954
            System.out.println("Parsed Candidates empty. Should not happen");
955
        }else if (parsedCandidates.size() == 1){
956

  
957
            ReferenceCandidate refCand = parsedCandidates.iterator().next().exemplar;
958
            addAuthorAndDetail(taxonName, refCand);
959
            if (refCand.ref.getType() == ReferenceType.Article){
960
                if(refCand.ref.getInReference().getAbbrevTitle().contains(",")){
961
                    printResult(MatchType.NO_MATCH_SINGLE_PARSE_ARTICLE_WITH_COMMA, unparsedAndName(fullNomRefCache, taxonName));
962
                }else{
963
                    printResult(MatchType.NO_MATCH_SINGLE_PARSE_ARTICLE_NO_COMMA, unparsedAndName(fullNomRefCache, taxonName));
964
                }
965
            }else if (refCand.ref.getType() == ReferenceType.BookSection){
966
                printResult(MatchType.NO_MATCH_SINGLE_PARSE_BOOKSECTION, unparsedAndName(fullNomRefCache, taxonName));
967
            }else if (refCand.ref.getType() == ReferenceType.Book){
968
                printResult(MatchType.NO_MATCH_SINGLE_PARSE_BOOK, unparsedAndName(fullNomRefCache, taxonName));
969
            }else {
970
                printResult(MatchType.NO_MATCH_SINGLE_PARSE_OTHER, unparsedAndName(fullNomRefCache, taxonName));
971
            }
972
        }else{
973
            ReferenceCandidate generCandidate = createGenericReference(parsedCandidates, detail);
974
            addAuthorAndDetail(taxonName, generCandidate);
975
            if (generCandidate.ref.getType() == ReferenceType.Article){
976
                if(generCandidate.ref.getInReference().getAbbrevTitle().contains(",")){
977
                    printResult(MatchType.NO_MATCH_SINGLE_PARSE_ARTICLE_WITH_COMMA, unparsedAndName(fullNomRefCache, taxonName));
978
                }else{
979
                    printResult(MatchType.NO_MATCH_SINGLE_PARSE_ARTICLE_NO_COMMA, unparsedAndName(fullNomRefCache, taxonName));
980
                }
981
            }else if (generCandidate.ref.getType() == ReferenceType.BookSection){
982
                printResult(MatchType.NO_MATCH_SINGLE_PARSE_BOOKSECTION, unparsedAndName(fullNomRefCache, taxonName));
983
            }else if (generCandidate.ref.getType() == ReferenceType.Book){
984
                printResult(MatchType.NO_MATCH_SINGLE_PARSE_BOOK, unparsedAndName(fullNomRefCache, taxonName));
985
            }else if (generCandidate.ref.getType() == ReferenceType.Generic){
986
                printResult(MatchType.NO_MATCH_SINGLE_PARSE_GENERIC, unparsedAndName(fullNomRefCache, taxonName));
987
            }else {
988
                printResult(MatchType.NO_MATCH_MULTI_PARSE, unparsedAndName(fullNomRefCache, taxonName));
989
            }
990
        }
991

  
992
//        System.out.println(fullNomRefCache);
993
    }
994

  
995
    private static final Reference NO_REMAINING_SINGLE = ReferenceFactory.newGeneric();
996
    /**
997
     * @param parsedCandidates
998
     * @return
999
     */
1000
    private ReferenceCandidate createGenericReference(Set<FinalCandidate> parsedCandidates, String detail) {
1001
        Reference refGen = ReferenceFactory.newGeneric();
1002
        String title = null;
1003
        VerbatimTimePeriod datePublished = null;
1004
        String volume = null;
1005
        String series = null;
1006
        String edition = null;
1007
        TeamOrPersonBase<?> author = null;
1008

  
1009
        Reference journalCandidate = null;
1010
        Reference remainingSingle = null;
1011
        for (FinalCandidate parsedCand : parsedCandidates){
1012

  
1013
            Reference ref = parsedCand.exemplar.ref;
1014
            if (ref.getType().isArticle()){
1015
                journalCandidate = ref;
1016
            }
1017
            if (!ref.getType().isPublication()){
1018
                if (ref.getInReference().getAbbrevTitle().matches("((ser|ed)\\..*|(Beih|App|Suppl|Praef|Bot|S\u00E9r\\. Bot|Prodr|Alt|Ap|Nachtr)\\.|Apend|Texte|Atlas)")){
1019
                    continue;
1020
                }
1021
            }
1022
            if (ref.getType().isArticle()){
1023
                if (ref.getVolume() == null || ref.getInReference().getAbbrevTitle().endsWith(", ed.")){
1024
                    continue;
1025
                }
1026
            }
1027

  
1028
            //title
1029
            if (ref.getType().isPublication()){
1030
                title = verify(title, ref.getAbbrevTitle());
1031
            }else{
1032
                title = verify(title, ref.getInReference().getAbbrevTitle());
1033
            }
1034
            //volume
1035
            if (ref.getType().isVolumeReference()){
1036
                volume = verify(volume, ref.getVolume());
1037
            }else{
1038
                volume = verify(volume, ref.getInReference().getVolume());
1039
            }
1040
            //edition
1041
            if (ref.getType().isVolumeReference()){
1042
                edition = verify(edition, ref.getEdition());
1043
            }else{
1044
                edition = verify(edition, ref.getInReference().getEdition());
1045
            }
1046
            //series
1047
            if (ref.getType().isVolumeReference()){
1048
                series = verify(series, ref.getSeriesPart());
1049
            }else{
1050
                series = verify(series, ref.getInReference().getSeriesPart());
1051
            }
1052
            //datePublished
1053
            datePublished = verify(datePublished, ref.getDatePublished());
1054
            //datePublished
1055
            author = verify(author, ref.getAuthorship());
1056

  
1057
            remainingSingle = remainingSingle == null? ref : NO_REMAINING_SINGLE;
1058
        }
1059

  
1060
        if (remainingSingle == null){
1061
            System.out.println("No remaing ref. This should not happen.");
1062
        }else if (remainingSingle != NO_REMAINING_SINGLE){
1063
            refGen = remainingSingle;
1064
        }else if (IJournal.guessIsJournalName(title) && journalCandidate != null){
1065
            refGen = journalCandidate;
1066
        }else{
1067
            refGen.setAbbrevTitle(title);
1068
            refGen.setVolume(volume);
1069
            refGen.setEdition(edition);
1070
            refGen.setSeriesPart(series);
1071
            refGen.setDatePublished(datePublished);
1072
            refGen.setAuthorship(author);
1073
        }
1074

  
1075
        ReferenceCandidate cand = new ReferenceCandidate(refGen, detail);
1076
        return cand;
1077
    }
1078

  
1079

  
1080
    /**
1081
     * @param existing
1082
     * @param newText
1083
     * @return
1084
     */
1085
    private <T extends Object> T verify(T existing, T newText) {
1086
        if (existing == null){
1087
            return newText;
1088
        }else if (existing.equals(newText)){
1089
            return existing;
1090
        }else if (newText == null){
1091
            logger.warn("Text not verified, missing, before: " + existing);
1092
            return existing;
1093
        }else{
1094
            logger.warn("Text not verified, differs: " +  existing + "<->" +newText);
1095
            return existing;
1096
        }
1097
    }
1098

  
1099

  
1100
    /**
1101
     * @param finalCandidates
1102
     * @return
1103
     */
1104
    private boolean hasNoCandidateExemplars(Set<FinalCandidate> finalCandidates) {
1105
        for (FinalCandidate finalCandidate : finalCandidates){
1106
            if (finalCandidate.matchResult != UNPARSED_EXEMPLAR && finalCandidate.matchResult != PARSED_NO_CANDIDATE ){
1107
                return false;
1108
            }
1109
        }
1110
        return true;
1111
    }
1112

  
1113
    private Set<FinalCandidate> getParsedExemplars(Set<FinalCandidate> finalCandidates) {
1114
        Set<FinalCandidate> parsedCandidates = new HashSet<>();
1115
        for (FinalCandidate finalCandidate : finalCandidates){
1116
            if (finalCandidate.matchResult != UNPARSED_EXEMPLAR){
1117
                parsedCandidates.add(finalCandidate);
1118
            }
1119
        }
1120
        return parsedCandidates;
1121
    }
1122

  
1123
    /**
1124
     * @param finalCandidates
1125
     * @return
1126
     */
1127
    private boolean hasOnlyUnparsedExemplars(Set<FinalCandidate> finalCandidates) {
1128
        for (FinalCandidate finalCandidate : finalCandidates){
1129
            if (finalCandidate.matchResult != UNPARSED_EXEMPLAR){
1130
                return false;
1131
            }
1132
        }
1133
        return true;
1134
    }
1135

  
1136

  
1137
    /**
1138
     * @param taxonName
1139
     * @param single
1140
     */
1141
    private void addAuthorAndDetail(TaxonName taxonName, ReferenceCandidate refCand) {
1142
        if (!CdmUtils.nullSafeEqual(refCand.ref.getAuthorship(), taxonName.getCombinationAuthorship())){
1143
            TeamOrPersonBase<?> refAut = refCand.ref.getAuthorship();
1144
            TeamOrPersonBase<?> nameAut = taxonName.getCombinationAuthorship();
1145
            try {
1146
                MatchResult match = MatchStrategyFactory.NewParsedTeamOrPersonInstance().invoke(refAut, nameAut, true);
1147
                if (match.isFailed()){
1148
                    System.out.println("not same author \n"+ match);
1149
                }else{
1150
                    taxonName.setCombinationAuthorship(refAut);
1151
                }
1152
            } catch (MatchException e) {
1153
                // TODO Auto-generated catch block
1154
                e.printStackTrace();
1155
            }
1156
        }
1157
        //FIXME deduplicate
1158
        taxonName.setNomenclaturalReference(refCand.ref);
1159
        taxonName.setNomenclaturalMicroReference(refCand.detail);
1160
    }
1161

  
1162

  
1163

  
1164
//
1165
//    /**
1166
//     * @param finalCandidates
1167
//     * @return
1168
//     */
1169
//    private int getSuccessCount(Set<FinalCandidate> finalCandidates) {
1170
//        int i = 0;
1171
//        for (FinalCandidate candidate : finalCandidates){
1172
//            if (candidate.matchResult.isSuccessful()){
1173
//                i++;
1174
//            }
1175
//        }
1176
//        return i;
1177
//    }
1178

  
1179
    /**
1180
     * @param fullNomRefCache
1181
     * @param taxonName
1182
     * @return
1183
     */
1184
    private String unparsedAndName(String fullNomRefCache, TaxonName taxonName) {
1185
        return fullNomRefCache +" | " + taxonName.getFullTitleCache();
1186
    }
1187

  
1188

  
1189
    /**
1190
     * @param noMatch
1191
     * @param fullTitleCache
1192
     */
1193
    private void printResult(MatchType type, String text) {
1194
        List<String> list = matchResults.get(type);
1195
        if (list == null){
1196
            list = new ArrayList<>();
1197
            matchResults.put(type, list);
1198
        }
1199
        list.add(text);
1200

  
1201
    }
1202

  
1203

  
1204
    /**
1205
     * @param fullNomRefCache
1206
     * @param finalCandidates
1207
     * @param taxonName
1208
     * @return
1209
     */
1210
    private String resultMessage(String fullNomRefCache, Set<FinalCandidate> finalCandidates, TaxonName taxonName) {
1211
        String result = unparsedAndName(fullNomRefCache, taxonName)+"\n";
1212
        result += finalCandidates.size() +": " + matchResultMessage(finalCandidates);
1213
        return result;
1214
    }
1215

  
1216

  
1217
    /**
1218
     * @param finalCandidates
1219
     * @param result
1220
     * @return
1221
     */
1222
    private String matchResultMessage(Set<FinalCandidate> finalCandidates) {
1223
        String result = "\n     ";
1224
        for (FinalCandidate finalCand : finalCandidates){
1225
            result += finalCand.matchResult.toString()+"\n     ";
1226
        }
1227
        return result;
1228
    }
1229

  
1230

  
1231
    private Set<FinalCandidate> getSuccess(Set<FinalCandidate> finalCandidates) {
1232
        Set<FinalCandidate> result = new HashSet<>();
1233
        for (FinalCandidate candidate : finalCandidates){
1234
            if (candidate.matchResult.isSuccessful()){
1235
               result.add(candidate);
1236
            }
1237
        }
1238
        return result;
1239
    }
1240

  
1241

  
1242
    /**
1243
     * @param taxonName
1244
     * @param finalCandidates
1245
     * @param exemplars
1246
     * @return
1247
     */
1248
    private FinalCandidate findBestMatchingFinalCandidate(TaxonName taxonName,
1249
            Set<FinalCandidate> finalCandidates, String fullNomRefCache) {
1250
        try {
1251
            Set<FinalCandidate> persistentMatches = findPersistentMatch(taxonName, finalCandidates);
1252
            if (persistentMatches.size() >= 1){
1253
                if (persistentMatches.size()>1){
1254
                    Set<FinalCandidate> exactMatches = findExactMatch(taxonName, finalCandidates);
1255
                    Set<FinalCandidate> successCandidatesExacts = getSuccess(exactMatches);
1256
                    if (successCandidatesExacts.size() >= 1){
1257
                        FinalCandidate result = successCandidatesExacts.iterator().next();
1258
                        addAuthorAndDetail(taxonName, result.candidate);
1259
//                        String message = resultMessage(fullNomRefCache, exactMatches, taxonName);
1260
                        if (successCandidatesExacts.size()>1){
1261
                            printResult(MatchType.MULTI_MULTI_PERSISTENT_MULTI_EXACT, unparsedAndName(fullNomRefCache, taxonName));
1262
                        }else{
1263
                            printResult(MatchType.MULTI_MULTI_PERSISTENT_SINGLE_EXACT, unparsedAndName(fullNomRefCache, taxonName));
1264
                        }
1265
                        return result;
1266
                    }else{
1267
                        String message = resultMessage(fullNomRefCache, successCandidatesExacts, taxonName);
1268
                        printResult(MatchType.MULTI_MULTI_PERSISTENT_NO_EXACT, message);
1269
                        FinalCandidate result = persistentMatches.iterator().next();
1270
                        addAuthorAndDetail(taxonName, result.candidate);
1271
                        return result;
623 1272
                    }
624 1273
                }else{
625
                    if (isNotBlank(oldDetail) && !detail.equals(oldDetail)){
626
                        logger.warn("Details differ: " +  detail + " <-> " + oldDetail + ". NameId: " + nameId);
1274
                    FinalCandidate result = persistentMatches.iterator().next();
1275
                    addAuthorAndDetail(taxonName, result.candidate);
1276
                    printResult(MatchType.MULTI_SINGLE_PERSISTENT, taxonName.getFullTitleCache());
1277
                    return result;
1278
                }
1279
            }
1280
            Set<FinalCandidate> exactMatches = findExactMatch(taxonName, finalCandidates);
1281
            Set<FinalCandidate> successCandidatesExacts = getSuccess(exactMatches);
1282
            if (successCandidatesExacts.size() >= 1){
1283
                FinalCandidate result = successCandidatesExacts.iterator().next();
1284
                addAuthorAndDetail(taxonName, result.candidate);
1285
                String message = resultMessage(fullNomRefCache, exactMatches, taxonName);
1286
                if (successCandidatesExacts.size()>1){
1287
                    printResult(MatchType.MULTI_NO_PERSISTENT_MULTI_EXACT, message);
1288
//                    System.out.println("More then 1 exact match: " + taxonName.getFullTitleCache() + ": " + exactMatches.iterator().next().exemplar.ref.getAbbrevTitleCache());
1289
                }else{
1290
                    printResult(MatchType.MULTI_NO_PERSISTENT_SINGLE_EXACT, message);
1291
                }
1292
                return result;
1293
            }else{
1294
                FinalCandidate result = finalCandidates.iterator().next();
1295
                addAuthorAndDetail(taxonName, result.candidate);
1296
                String message = resultMessage(fullNomRefCache, exactMatches, taxonName);
1297
                printResult(MatchType.MULTI_NO_PERSISTENT_NO_EXACT, message);
1298
                return result;
1299
            }
1300
        } catch (MatchException e) {
1301
            e.printStackTrace();
1302
            return finalCandidates.iterator().next();
1303
        }
1304
    }
1305

  
1306

  
1307

  
1308
    /**
1309
     * @param taxonName
1310
     * @param finalCandidates
1311
     * @return
1312
     */
1313
    private String getMultiMultiPersistentMessage(TaxonName taxonName, Set<FinalCandidate> finalCandidates) {
1314
        String result = finalCandidates.size() + ":" + taxonName.getFullTitleCache();
1315
        result += matchResultMessage(finalCandidates);
1316
        return result;
1317
    }
1318
    private String getMultiNoPersistentMultiExactMessage(TaxonName taxonName, Set<FinalCandidate> finalCandidates) {
1319
        String result = finalCandidates.size() + ":" + taxonName.getFullTitleCache();
1320
        result += matchResultMessage(finalCandidates);
1321
        return result;
1322
    }
1323

  
1324
    /**
1325
     * @param taxonName
1326
     * @param finalCandidates
1327
     * @param exemplars
1328
     * @return
1329
     * @throws MatchException
1330
     */
1331
    private Set<FinalCandidate> findExactMatch(TaxonName taxonName, Set<FinalCandidate> finalCandidates) throws MatchException {
1332
        IMatchStrategyEqual exactMatcher = getExactMatcher();
1333
        Set<FinalCandidate> result = new HashSet<>();
1334
        for (FinalCandidate cand : finalCandidates){
1335
            Reference exemplarRef = cand.exemplar.ref;
1336
            if (cand.candidate.ref.getType().equals(exemplarRef.getType())){
1337
                MatchResult match = exactMatcher.invoke(cand.candidate.ref, exemplarRef, true);
1338
                result.add(new FinalCandidate(cand.candidate, cand.exemplar, match));
1339
                if (match.isFailed()){
1340
                    String oldTitle = exemplarRef.getTitle();
1341
                    exemplarRef.setTitle(exemplarRef.getAbbrevTitle());
1342
                    match = exactMatcher.invoke(cand.candidate.ref, exemplarRef, true);
1343
                    if (match.isSuccessful()){
1344
                        result.add(new FinalCandidate(cand.candidate, cand.exemplar, match));
627 1345
                    }
628
                    taxonName.setNomenclaturalMicroReference(detail);
1346
                    exemplarRef.setTitle(oldTitle);
629 1347
                }
1348
            }else{
1349
                MatchResult match = MatchResult.NewNoTypeInstance(cand.candidate.ref.getType(), exemplarRef.getType());
1350
                FinalCandidate finCand = new FinalCandidate(cand.candidate, cand.exemplar, match);
1351
                result.add(finCand);
630 1352
            }
631
        }else{
632
            String fullStrComma = taxonName.getTitleCache()+ ", " + fullNomRefCache;
633
            String fullStrIn = taxonName.getTitleCache()+ " in " + fullNomRefCache;
634
            INonViralName newNameComma = parser.parseReferencedName(fullStrComma, config.getNomenclaturalCode(), taxonName.getRank());
635
            INonViralName newNameIn = parser.parseReferencedName(fullStrIn, config.getNomenclaturalCode(), taxonName.getRank());
636

  
637
            INonViralName newName;
638
            boolean commaProtected = newNameComma.isProtectedFullTitleCache() || (newNameComma.getNomenclaturalReference() != null
639
                    && newNameComma.getNomenclaturalReference().isProtectedTitleCache());
640
            boolean inProtected = newNameIn.isProtectedFullTitleCache() || (newNameIn.getNomenclaturalReference() != null
641
                    && newNameIn.getNomenclaturalReference().isProtectedTitleCache());
642
            if (commaProtected && !inProtected){
643
                newName = newNameIn;
644
            }else if (!commaProtected && inProtected){
645
                newName = newNameComma;
646
            }else if (commaProtected && inProtected){
647
                logger.warn("Can't parse preliminary refDetail: " +  fullNomRefCache + " for name " + taxonName.getTitleCache() + "; nameId: " + nameId );
648
                newName = newNameComma;
1353
        }
1354
        return result;
1355
    }
1356

  
1357
    /**
1358
     * @return
1359
     */
1360
    private IMatchStrategyEqual getExactMatcher() {
1361
        IMatchStrategyEqual result = MatchStrategyFactory.NewDefaultInstance(Reference.class);
1362
        FieldMatcher inRefMatcher = result.getMatching().getFieldMatcher("inReference");
1363
        try {
1364
            inRefMatcher.getMatchStrategy().setMatchMode("title", MatchMode.EQUAL);
1365
            return result;
1366
        } catch (MatchException e) {
1367
            throw new RuntimeException("Problems creating exact matcher.", e);
1368
        }//must not be EXACT_REQUIRED
1369
    }
1370

  
1371

  
1372
    private Set<FinalCandidate> findPersistentMatch(TaxonName taxonName, Set<FinalCandidate> finalCandidates) throws MatchException {
1373
        Set<FinalCandidate> result = new HashSet<>();
1374
        for (FinalCandidate cand : finalCandidates){
1375
            if (cand.candidate.ref.isPersited()){
1376
                result.add(cand);
1377
            }
1378
        }
1379

  
1380
        return result;
1381
    }
1382

  
1383

  
1384
    /**
1385
     * @param state
1386
     * @param rs
1387
     * @param taxonName
1388
     * @param refMap
1389
     * @param nameTitleCache
1390
     * @param fullNomRefCache
1391
     * @param finalCandidates
1392
     * @param genericCandidate
1393
     * @param exemplars2
1394
     * @param finalCandidates
1395
     * @throws SQLException
1396
     */
1397
    public static final MatchResult UNPARSED_EXEMPLAR = new MatchResult();
1398
    public static final MatchResult PARSED_NO_CANDIDATE = new MatchResult();
1399
    {
1400
        UNPARSED_EXEMPLAR.addNullMatching(null, null);
1401
        PARSED_NO_CANDIDATE.addNullMatching(null, null);
1402
    }
1403
    private void makeFinalCandidates(BerlinModelImportState state, ResultSet rs, TaxonName taxonName,
1404
            Map<String, Reference> refMap, String nameTitleCache,
1405
            Set<FinalCandidate> finalCandidates,
1406
            Set<FinalCandidate> finalInRefCandidates, Set<Reference> parsedReferences
1407
            ) throws SQLException {
1408

  
1409
        Set<Integer> candidateIds = getPreliminaryIdCandidates(state, rs);
1410
        Set<TaxonName> nameCandidates = parseExemplars(state, rs, taxonName);
1411

  
1412
        Set<ReferenceCandidate> exemplars = new HashSet<>();
1413
        for(TaxonName nameCandidate: nameCandidates){
1414
            if(nameCandidate.getNomenclaturalReference()!= null){
1415
                exemplars.add(new ReferenceCandidate(nameCandidate.getNomenclaturalReference(), nameCandidate.getNomenclaturalMicroReference()));
1416
                parsedReferences.add(nameCandidate.getNomenclaturalReference());
1417
            }
1418
        }
1419

  
1420
        for(ReferenceCandidate exemplar: exemplars){
1421
            if (exemplar.ref.isProtectedAbbrevTitleCache() || exemplar.ref.isProtectedTitleCache()){
1422
                FinalCandidate parsedNoCandidateExemplarCandidate = new FinalCandidate(null, exemplar, UNPARSED_EXEMPLAR);
1423
                finalCandidates.add(parsedNoCandidateExemplarCandidate);
1424
            }else if (candidateIds.isEmpty()){
1425
                FinalCandidate unparsedExemplarCandidate = new FinalCandidate(null, exemplar, PARSED_NO_CANDIDATE);
1426
                finalCandidates.add(unparsedExemplarCandidate);
649 1427
            }else{
650
                logger.warn("Can't decide ref type for preliminary refDetail: " +  fullNomRefCache + " for name " + taxonName.getTitleCache() + "; nameId: " + nameId );
651
                newName = newNameComma;
1428
                for (Integer candidateId : candidateIds){
1429
                    if (candidateId == null){
1430
                        logger.warn("CandidateId not found: " + candidateId);
1431
                        continue;
1432
                    }
1433
                    Reference dedupCandidate = CdmBase.deproxy(refMap.get(String.valueOf(candidateId)));
1434

  
1435
                    //ref
1436
                    FinalCandidate cand = matchSingle(finalCandidates, dedupCandidate, exemplar, nameTitleCache);
1437
                    //inRef
1438
                    if (cand.matchResult.isFailed() && exemplar.ref.getInReference() != null ){
1439
                        FinalCandidate candInRef = matchSingle(finalInRefCandidates, dedupCandidate, new ReferenceCandidate(exemplar.ref.getInReference(), null), nameTitleCache);
1440
                        if(candInRef.matchResult.isSuccessful()){
1441
                            Reference clone = (Reference)exemplar.ref.clone();
1442
                            clone.setInReference(dedupCandidate);
1443
                            FinalCandidate inRefCand = new FinalCandidate(new ReferenceCandidate(clone, exemplar.detail),
1444
                                    exemplar, candInRef.matchResult);
1445
                            finalCandidates.add(inRefCand);
1446
                        }
1447
                    }
1448
                }
652 1449
            }
1450
        }
1451

  
1452
        return;
1453
    }
1454

  
653 1455

  
1456
    /**
1457
     * @param finalCandidates
1458
     * @param refCandidate
1459
     * @param exemplar
1460
     * @param fullNomRefCache
1461
     * @param nameTitleCache
1462
     */
1463
    protected FinalCandidate matchSingle(Set<FinalCandidate> finalCandidates, Reference dedupCandidate,
1464
            ReferenceCandidate exemplar, String nameTitleCache) {
1465

  
1466
        try {
1467
            MatchResult match = null;
1468
            FinalCandidate finalCand;
1469
            IMatchStrategy matchStrategy = getReferenceMatchStrategy();
1470
            Reference refExemplar = exemplar.ref;
1471
            if(refExemplar.getType().equals(dedupCandidate.getType())){
1472
                TeamOrPersonBase<?> exemplarAuthor = refExemplar.getAuthorship();
1473
                TeamOrPersonBase<?> candidateAuthor = CdmBase.deproxy(dedupCandidate.getAuthorship());
1474
                String cache = refExemplar.getTitleCache();
1475
                String ccache = dedupCandidate.getTitleCache();
1476
                String abbrevCache = refExemplar.getAbbrevTitleCache();
1477
                String cabbrevCache = dedupCandidate.getAbbrevTitleCache();
1478
                if (exemplarAuthor != null && candidateAuthor != null){
1479
                    exemplarAuthor.getTitleCache();
1480
                    String exemplarAuthorStr = exemplarAuthor.getNomenclaturalTitle();
1481
//                    System.out.println(exemplarAuthor.getTitleCache());
1482
                    String candidateAuthorStr = candidateAuthor.getNomenclaturalTitle();
1483
//                    System.out.println(candidateAuthor.getTitleCache());
1484
                    if (!exemplarAuthorStr.equals(candidateAuthorStr)){
1485
                        match = MatchResult.NewInstance(":authorship", MatchMode.EQUAL, exemplarAuthorStr, candidateAuthorStr);
1486
                    }
1487
                }
1488

  
1489
                if (match == null){
1490
                    match = matchStrategy.invoke(dedupCandidate, refExemplar, true);
1491
                }
654 1492

  
655
            if (newName.isProtectedFullTitleCache()){
656
                Reference nomRef = ReferenceFactory.newGeneric();
657
                nomRef.setAbbrevTitleCache(fullNomRefCache, true);
658
                taxonName.setNomenclaturalReference(nomRef);
659
                //check detail
1493
                //TODO detail match
1494
                //TODO formatter match
1495
                if (true){
1496
//                    return true;
1497
                }else if (refExemplar.getInReference() != null && dedupCandidate.getInReference() != null){
1498
//                    boolean matchInRef = matchStrategy.invoke(dedupCandidate.getInReference(), refExemplar.getInReference());
1499
//                    if(matchInRef){
1500
//                        Reference clone = (Reference)refExemplar.clone();
1501
//                        clone.setInReference(dedupCandidate.getInReference());
1502
//                        finalCandidates.add(new ReferenceCandidate(clone, exemplar.detail));
1503
//                    }
1504
                }
660 1505
            }else{
661
                Reference nomRef = newName.getNomenclaturalReference();
662
                taxonName.setNomenclaturalReference(nomRef);
663
                String detail = newName.getNomenclaturalMicroReference();
664
                String oldDetail = taxonName.getNomenclaturalMicroReference();
665
                if (isBlank(detail)){
666
                    if (isNotBlank(oldDetail)){
667
                        logger.warn("Detail could not be parsed but seems to exist. NameId: " + nameId);
1506
                match = MatchResult.NewNoTypeInstance(refExemplar.getType(), dedupCandidate.getType());
1507
            }
1508
            finalCand = new FinalCandidate(new ReferenceCandidate(dedupCandidate, exemplar.detail), exemplar, match);
1509
            finalCandidates.add(finalCand);
1510

  
1511
            return finalCand;
1512
        } catch (MatchException e) {
1513
            e.printStackTrace();
1514
            throw new RuntimeException();
1515
        }
1516
    }
1517

  
1518

  
1519
    private IMatchStrategy referenceMatchStrategy;
1520

  
1521
    /**
1522
     * @return
1523
     * @throws MatchException
1524
     */
1525
    protected IMatchStrategy getReferenceMatchStrategy() throws MatchException {
1526

  
1527
        if (referenceMatchStrategy == null){
1528
            referenceMatchStrategy = MatchStrategyFactory.NewParsedReferenceInstance();
1529
        }
1530
//        if (referenceMatchStrategy == null){
1531
//            referenceMatchStrategy = DefaultMatchStrategy.NewInstance(Reference.class);
1532
//
1533
//            referenceMatchStrategy.setMatchMode("title", MatchMode.EQUAL_OR_SECOND_NULL);
1534
//            referenceMatchStrategy.setMatchMode("placePublished", MatchMode.EQUAL_OR_SECOND_NULL);
1535
//            @SuppressWarnings("rawtypes")
1536
//            SubClassMatchStrategy<TeamOrPersonBase> refAuthorMatchStrategy = SubClassMatchStrategy
1537
//                    .NewInstance(TeamOrPersonBase.class, Person.class, Team.class);
1538
//            refAuthorMatchStrategy.setMatchMode(Person.class, "familyName", MatchMode.EQUAL_OR_SECOND_NULL);
1539
//            refAuthorMatchStrategy.setMatchMode(Person.class, "givenName", MatchMode.EQUAL_OR_SECOND_NULL);
1540
//            refAuthorMatchStrategy.setMatchMode(Person.class, "initials", MatchMode.EQUAL_OR_SECOND_NULL);
1541
//            referenceMatchStrategy.setMatchMode("authorship", MatchMode.MATCH, refAuthorMatchStrategy);
1542
//
1543
//            //for testing only
1544
////            referenceMatchStrategy = null;
1545
////            FieldMatcher autMatcher = referenceMatchStrategy.getMatching().getFieldMatcher("authorship");
1546
//        }
1547
        return referenceMatchStrategy;
1548
    }
1549

  
1550
    private Set<Integer> getPreliminaryIdCandidates(BerlinModelImportState state, ResultSet rs) throws SQLException{
1551

  
1552
        Set<Integer> result = new HashSet<>();
1553
        boolean refDetailPrelim = rs.getBoolean("RefDetailPrelim");
1554
        if(state.getConfig().isDoPreliminaryRefDetailsWithNames() && refDetailPrelim){
1555

  
1556
            Set<TaxonName> names = parseExemplars(state, rs, null);
1557
            for (TaxonName name : names){
1558
                Reference exemplar = name.getNomenclaturalReference();
1559
                if (exemplar != null){
1560
                    Set<ReferenceCandidate> persistendCandidates = refMapping.getCandidates(exemplar);
1561
                    if (exemplar.getInReference()!= null){
1562
                        persistendCandidates.addAll(refMapping.getCandidates(exemplar.getInReference()));
668 1563
                    }
669
                }else{
670
                    if (isNotBlank(oldDetail) && !detail.equals(oldDetail)){
671
                        logger.warn("Details differ: " +  detail + " <-> " + oldDetail + ". NameId: " + nameId);
1564
                    for (ReferenceCandidate persistendCandidate : persistendCandidates){
1565
                        result.add(persistendCandidate.getId());
672 1566
                    }
673
                    taxonName.setNomenclaturalMicroReference(detail);
674 1567
                }
675 1568
            }
676 1569
        }
1570
        return result;
1571
    }
1572

  
1573
    private Set<TaxonName> parseExemplars(BerlinModelImportState state, ResultSet rs, TaxonName taxonName) throws SQLException{
1574
        BerlinModelImportConfigurator config = state.getConfig();
1575

  
1576
        Set<TaxonName> result = new HashSet<>();
1577

  
1578
        String fullNomRefCache = rs.getString("FullNomRefCache");
1579
        String detail = rs.getString("Details");
1580

  
1581

  
1582
        if (fullNomRefCache == null){
1583
//            logger.warn("fullNomRefCache is null for preliminary refDetail. NameId: " + nameId);
1584
            return result;
1585
        }else if (fullNomRefCache.trim().startsWith(": ")){
1586
//            logger.warn("fullNomRefCache starts with for preliminary refDetail. NameId: " + nameId);
1587
            return result;
1588
        }else{
1589
            TaxonName testName = taxonName == null ? getTestName() : (TaxonName)taxonName.clone();
1590

  
1591
            Set<String> fullStrCandidates;
1592
            if (fullNomRefCache.trim().startsWith("in ")){
1593
                //RefDetails with "in" references
1594
                fullStrCandidates = makePrelimRefDetailInRef(state, testName, fullNomRefCache, detail);
1595
            }else if (fullNomRefCache.trim().startsWith(", ")){
1596
                //RefDetails with ", " reference
1597
                fullStrCandidates = makePrelimRefDetailBook(state, testName, fullNomRefCache, detail);
1598
            }else{
1599
                //ordinary protected ref details
1600
                fullStrCandidates = makePrelimRefDetailNotInRef(state, testName, fullNomRefCache, detail);
1601
            }
1602

  
1603
            for (String parseStr : fullStrCandidates){
1604
                TaxonName newName = (TaxonName)parser.parseReferencedName(parseStr, config.getNomenclaturalCode(), testName.getRank());
1605
                Reference newNomRef = newName.getNomenclaturalReference();
1606
                if (taxonName != null && newNomRef != null && !newNomRef.isProtectedAbbrevTitleCache()&& !newNomRef.isProtectedTitleCache()){
1607
                    newNomRef.setAuthorship(taxonName.getCombinationAuthorship());
1608
                }
1609
                result.add(newName);
1610
//                Reference exemplar = newName.getNomenclaturalReference();
1611
            }
1612
            return result;
1613
        }
1614
    }
1615

  
1616

  
1617
    /**
1618
     * @return
1619
     */
1620
    protected TaxonName getTestName() {
1621
        TaxonName testName = TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES(), null);
1622
        testName.setGenusOrUninomial("Abies");
1623
        testName.setSpecificEpithet("alba");
1624
        testName.setAuthorshipCache("Mill.");
1625
        return testName;
1626
    }
1627

  
1628

  
1629

  
1630
    /**
1631
     * @param config
1632
     * @param refMap
1633
     * @param taxonName
1634
     * @param nameId
1635
     * @param fullNomRefCache
1636
     * @return
1637
     */
1638
    protected Set<String> makePrelimRefDetailNotInRef(BerlinModelImportState state, TaxonName taxonName,
1639
            String fullNomRefCache, String detail) {
1640
        Set<String> result = new HashSet<>();
1641
        String fullStrComma = taxonName.getTitleCache()+ ", " + fullNomRefCache;
1642
        result.add(fullStrComma);
1643
        String fullStrIn = taxonName.getTitleCache()+ " in " + fullNomRefCache;
1644
        result.add(fullStrIn);
1645
        return result;
1646

  
1647
//
1648
//        INonViralName newNameComma = parser.parseReferencedName(fullStrComma, config.getNomenclaturalCode(), taxonName.getRank());
1649
//        INonViralName newNameIn = parser.parseReferencedName(fullStrIn, config.getNomenclaturalCode(), taxonName.getRank());
1650
//
1651
//        INonViralName newName;
1652
//        boolean commaProtected = newNameComma.isProtectedFullTitleCache() || (newNameComma.getNomenclaturalReference() != null
1653
//                && newNameComma.getNomenclaturalReference().isProtectedTitleCache());
1654
//        boolean inProtected = newNameIn.isProtectedFullTitleCache() || (newNameIn.getNomenclaturalReference() != null
1655
//                && newNameIn.getNomenclaturalReference().isProtectedTitleCache());
1656
//        if (commaProtected && !inProtected){
1657
//            newName = newNameIn;
1658
//        }else if (!commaProtected && inProtected){
1659
//            newName = newNameComma;
1660
//        }else if (commaProtected && inProtected){
1661
//            logger.warn("Can't parse preliminary refDetail: " +  fullNomRefCache + " for name " + taxonName.getTitleCache() + "; nameId: " + nameId );
1662
//            newName = newNameComma;
1663
//        }else{
1664
//            logger.warn("Can't decide ref type for preliminary refDetail: " +  fullNomRefCache + " for name " + taxonName.getTitleCache() + "; nameId: " + nameId );
1665
//            newName = newNameComma;
1666
//        }
1667
//
1668
//
1669
//        if (newName.isProtectedFullTitleCache()){
1670
//            Reference nomRef = ReferenceFactory.newGeneric();
1671
//            nomRef.setAbbrevTitleCache(fullNomRefCache, true);
1672
//            taxonName.setNomenclaturalReference(nomRef);
1673
//            //check detail
1674
//        }else{
1675
//            Reference nomRef = newName.getNomenclaturalReference();
1676
//            taxonName.setNomenclaturalReference(nomRef);
1677
//            String detail = newName.getNomenclaturalMicroReference();
1678
//            String oldDetail = taxonName.getNomenclaturalMicroReference();
1679
//            if (isBlank(detail)){
1680
//                if (isNotBlank(oldDetail)){
1681
//                    logger.warn("Detail could not be parsed but seems to exist. NameId: " + nameId);
1682
//                }
1683
//            }else{
1684
//                if (isNotBlank(oldDetail) && !detail.equals(oldDetail)){
1685
//                    logger.warn("Details differ: " +  detail + " <-> " + oldDetail + ". NameId: " + nameId);
1686
//                }
1687
//                taxonName.setNomenclaturalMicroReference(detail);
1688
//            }
1689
//        }
1690
    }
1691

  
1692

  
1693
    /**
1694
     * @param config
1695
     * @param refMap
1696
     * @param taxonName
1697
     * @param nameId
1698
     * @param fullNomRefCache
1699
     * @param detail
1700
     * @return
1701
     */
1702
    protected Set<String> makePrelimRefDetailInRef(BerlinModelImportState state,
1703
            TaxonName taxonName,
1704
            String fullNomRefCache, String detail) {
1705

  
1706
        Set<String> result = new HashSet<>();
1707
        String parseStr = taxonName.getTitleCache()+ " " + fullNomRefCache;
1708
        result.add(parseStr);
1709
        return result;
1710

  
1711

  
1712
//            String detail = newName.getNomenclaturalMicroReference();
1713
//            String oldDetail = taxonName.getNomenclaturalMicroReference();
1714
//            if (isBlank(detail)){
1715
//                if (isNotBlank(oldDetail)){
1716
//                    logger.warn("Detail could not be parsed but seems to exist. NameId: " + nameId);
1717
//                }
1718
//            }else{
1719
//                if (isNotBlank(oldDetail) && !detail.equals(oldDetail)){
1720
//                    logger.warn("Details differ: " +  detail + " <-> " + oldDetail + ". NameId: " + nameId);
1721
//                }
1722
//                taxonName.setNomenclaturalMicroReference(detail);
1723
//            }
1724
    }
1725

  
1726
    protected Set<String> makePrelimRefDetailBook(BerlinModelImportState state,
1727
            TaxonName taxonName,
1728
            String fullNomRefCache, String detail) {
1729

  
1730
        Set<String> result = new HashSet<>();
1731
        String parseStr = taxonName.getTitleCache()+ fullNomRefCache;
1732
        result.add(parseStr);
1733
        return result;
1734
    }
1735

  
1736

  
1737
    /**
1738
     * Creates the hash string for finding preliminary RefDetail duplicates
1739
     * @param nomRef
1740
     */
1741
    private String refHash(Reference nomRef) {
1742
//        TeamOrPersonBase<?> author = nomRef.getAuthorship();
1743
//        String authorStr = author == null? "" : author.getNomenclaturalTitle();
1744

  
1745
        String title = nomRef.getAbbrevTitle();
1746
        if (title == null){
1747
            title = nomRef.getTitle();
1748
            if (title == null && nomRef.getInReference() != null){
1749
                title = nomRef.getInReference().getAbbrevTitle();
1750
                if (title == null){
1751
                    title = nomRef.getInReference().getTitle();
1752
                }
1753
            }
1754
            if (title == null){
1755
                title = nomRef.getAbbrevTitleCache();
1756
            }
1757
            if (title == null){
1758
                title = nomRef.getTitleCache();
1759
            }
1760
        }
1761
        String vol = nomRef.getVolume();
1762
        if (vol == null && nomRef.getInReference() != null){
1763
            vol = nomRef.getInReference().getVolume();
1764
        }
1765
        String date = nomRef.getDatePublishedString();
1766
        if (date == null && nomRef.getInReference() != null){
1767
            date = nomRef.getInReference().getDatePublishedString();
1768
        }
1769
        ReferenceType type = nomRef.getType();
1770

  
1771
        String result = CdmUtils.concat("@", title, vol, date, type.getKey());
1772
        return result;
677 1773
    }
678 1774

  
679 1775

  

Also available in: Unified diff