Project

General

Profile

Revision c12d2903

IDc12d290324cefa7db2017782614b698015273cc4
Parent 15d48661
Child c0411992

Added by Andreas Müller 10 months ago

ref #1444, ref #7976 better logging for unhandled unacceptReason

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsImportState.java
39 39

  
40 40
	private Set<Integer> acceptedTaxaKeys;
41 41

  
42
	private String unacceptReason;
43

  
42 44
	@Override
43 45
	public void initialize(ErmsImportConfigurator config) {
44 46
//		super(config);
......
110 112
	public void setAcceptedTaxaKeys(Set<Integer> acceptedTaxaKeys) {
111 113
		this.acceptedTaxaKeys = acceptedTaxaKeys;
112 114
	}
115

  
116
	public String getUnhandledUnacceptReason() {
117
        return this.unacceptReason;
118
    }
119
    public void setUnhandledUnacceptReason(String unacceptReason) {
120
        this.unacceptReason = unacceptReason;
121
    }
113 122
}
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsTaxonImport.java
11 11

  
12 12
import java.sql.ResultSet;
13 13
import java.sql.SQLException;
14
import java.util.ArrayList;
14 15
import java.util.HashMap;
15 16
import java.util.HashSet;
17
import java.util.List;
16 18
import java.util.Map;
17 19
import java.util.Set;
18 20
import java.util.UUID;
19 21

  
20 22
import org.apache.commons.lang3.StringUtils;
21 23
import org.apache.log4j.Logger;
24
import org.hsqldb.lib.StringComparator;
22 25
import org.springframework.stereotype.Component;
23 26

  
24 27
import eu.etaxonomy.cdm.common.CdmUtils;
......
75 78
	private static final String dbTableName = "tu";
76 79
	private static final Class<?> cdmTargetClass = TaxonBase.class;
77 80

  
81
	private static Map<String, Integer> unacceptReasons = new HashMap<>();
82

  
78 83
	public ErmsTaxonImport(){
79 84
		super(pluralString, dbTableName, cdmTargetClass);
80 85
	}
......
174 179

  
175 180
		//first path
176 181
		super.doInvoke(state);
182
		if(true){
183
		    logUnacceptReasons();
184
		}
177 185
		return;
178 186
	}
179 187

  
......
359 367
    private static TaxonBase<?> appendedPhraseForMisapplications(ResultSet rs, ErmsImportState state) throws SQLException{
360 368
        TaxonBase<?> taxon = (TaxonBase<?>)state.getRelatedObject(DbImportStateBase.CURRENT_OBJECT_NAMESPACE, DbImportStateBase.CURRENT_OBJECT_ID);
361 369
        TaxonName taxonName = taxon.getName();
362
         String unacceptreason = rs.getString("tu_unacceptreason");
363
         RelationshipTermBase<?>[] rels = state.getTransformer().getSynonymRelationTypesByKey(unacceptreason, state);
364
         if (rels[1]!= null && rels[1].equals(TaxonRelationshipType.MISAPPLIED_NAME_FOR())){
365
             taxon.setAppendedPhrase(taxonName.getAuthorshipCache());
366
             taxon.setSec(null);
367
             taxonName.setAuthorshipCache(null, taxonName.isProtectedAuthorshipCache());
368
             //TODO maybe some further authorship handling is needed if authors get parsed, but not very likely for MAN authorship
369
         }
370
         return taxon;
371
     }
370
        String unacceptreason = rs.getString("tu_unacceptreason");
371
        RelationshipTermBase<?>[] rels = state.getTransformer().getSynonymRelationTypesByKey(unacceptreason, state);
372
        if (rels[1]!= null && rels[1].equals(TaxonRelationshipType.MISAPPLIED_NAME_FOR())){
373
            taxon.setAppendedPhrase(taxonName.getAuthorshipCache());
374
            taxon.setSec(null);
375
            taxonName.setAuthorshipCache(null, taxonName.isProtectedAuthorshipCache());
376
            //TODO maybe some further authorship handling is needed if authors get parsed, but not very likely for MAN authorship
377
        }
378
        if(state.getUnhandledUnacceptReason() != null){
379
            //to handle it hear is a workaround, as the real place where it is handled is DbImportSynonymMapper which is called ErmsTaxonRelationImport but where it is diffcult to aggregate logging data
380
            addUnacceptReason(state.getUnhandledUnacceptReason());
381
        }
382
        return taxon;
383
    }
384

  
385
    private static void addUnacceptReason(String unhandledUnacceptReason) {
386
        unhandledUnacceptReason = unhandledUnacceptReason.toLowerCase();
387
        if (!unacceptReasons.keySet().contains(unhandledUnacceptReason)){
388
            unacceptReasons.put(unhandledUnacceptReason, 1);
389
        }else{
390
            unacceptReasons.put(unhandledUnacceptReason, unacceptReasons.get(unhandledUnacceptReason)+1);
391
        }
392
    }
372 393

  
373 394
    @SuppressWarnings("unused")  //used by MethodMapper
374 395
    private static TaxonBase<?> testTitleCache(ResultSet rs, ErmsImportState state) throws SQLException{
......
498 519
		}
499 520
	}
500 521

  
522
    private void logUnacceptReasons() {
523
        String logStr = "\n Unhandled unaccept reasons:\n===================";
524

  
525
        while (!unacceptReasons.isEmpty()) {
526
            int n = 0;
527
            List<String> mostUsedStrings = new ArrayList<>();
528
            for (Map.Entry<String, Integer> entry : unacceptReasons.entrySet()) {
529
                if (entry.getValue() > n) {
530
                    mostUsedStrings = new ArrayList<>();
531
                    mostUsedStrings.add(entry.getKey());
532
                    n = entry.getValue();
533
                } else if (entry.getValue() == n) {
534
                    mostUsedStrings.add(entry.getKey());
535
                } else {
536
                    //neglect
537
                }
538
            }
539
            mostUsedStrings.sort(new StringComparator());
540
            logStr += "\n   " + String.valueOf(n);
541
            for (String str : mostUsedStrings) {
542
                logStr += "\n   "+ str;
543
                unacceptReasons.remove(str);
544
            }
545
        }
546
        logger.warn(logStr);
547

  
548
    }
549

  
501 550
	@Override
502 551
	protected boolean doCheck(ErmsImportState state){
503 552
		IOValidator<ErmsImportState> validator = new ErmsTaxonImportValidator();
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsTransformer.java
695 695
        TaxonRelationshipType taxonRelType = getSynTaxonRelType(state);
696 696
        NameRelationshipType nameType = null;
697 697
//        HybridRelationshipType hybridType = null;
698
        ((ErmsImportState)state).setUnhandledUnacceptReason(null);  //see below for unhandledAcceptReason handling
698 699

  
699 700
        //according to SQL script erms300_Match_Relation&Status.sql
700 701

  
......
740 741

  
741 742
            if(handled == false && SynonymType.SYNONYM_OF().equals(synType) &&
742 743
                    getSynTaxonRelType(state).equals(taxonRelType) && nameType == null){
743
                logger.warn("Unaccept reason not yet handled: " + unacceptreason);
744
                ((ErmsImportState)state).setUnhandledUnacceptReason(unacceptreason);
745
                logger.debug("Unaccept reason not yet handled: " + unacceptreason);
744 746
            }
745 747
        }
746 748
//      update Match_RelStat set RelTaxon      =  1 where tu_unacceptreason like '%bas[iy][no]%ny%'

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)