Project

General

Profile

« Previous | Next » 

Revision 3d6c7bfc

Added by Andreas Müller over 4 years ago

ref #1444, ref #8508, ref #8509 fix ERMS taxon status handling

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/validate/PesiErmsValidator.java
419 419
        int taxonStatusFk = destRS.getInt("TaxonStatusFk");
420 420
        String parentTaxonId = destRS.getString("parentTaxonFk");
421 421
        int rankFk = destRS.getInt("RankFk");
422
        if (taxonStatusFk == 2 || taxonStatusFk == 4 || taxonStatusFk == 7|| rankFk <= 10){  //synonym; pro parte syn; kingdom and higher
422
        if (taxonStatusFk == 2 || taxonStatusFk == 4 || rankFk <= 10){  //synonym; pro parte syn; kingdom and higher
423 423
            result = isNull(childIndexAttr, destRS, id);
424 424
        }else{
425 425
            String childIndex = destRS.getString(childIndexAttr);
......
850 850
            return 5;
851 851
        }else if ("p".equals(sourceType)){
852 852
            return 11;
853
        }else if ("i".equals(sourceType)){
854
            return 12;
855 853
        }
856 854
        return null;
857 855
    }
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsTaxonImport.java
72 72
    private static final long serialVersionUID = -7111568277264140051L;
73 73
    private static final Logger logger = Logger.getLogger(ErmsTaxonImport.class);
74 74

  
75
	private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping;
76

  
77 75
	private static final String pluralString = "taxa";
78 76
	private static final String dbTableName = "tu";
79 77
	private static final Class<?> cdmTargetClass = TaxonBase.class;
80 78

  
81 79
	private static Map<String, Integer> unacceptReasons = new HashMap<>();
82 80

  
81
	private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping;
82

  
83 83
	public ErmsTaxonImport(){
84 84
		super(pluralString, dbTableName, cdmTargetClass);
85 85
	}
......
240 240
	@Override
241 241
	public TaxonBase<?> createObject(ResultSet rs, ErmsImportState state) throws SQLException {
242 242
		int statusId = rs.getInt("status_id");
243
//		Object accTaxonId = rs.getObject("tu_accfinal");
244 243
		Integer meId = rs.getInt("id");
244
		Integer accFinal = nullSafeInt(rs, "tu_accfinal");
245 245

  
246 246
        TaxonName taxonName = getTaxonName(rs, state);
247 247
		fillTaxonName(taxonName, rs, state, meId);
......
256 256
			Taxon taxon = Taxon.NewInstance(taxonName, citation);
257 257
			if (statusId != 1){
258 258
				logger.info("Taxon created as taxon but has status <> 1 ("+statusId+"): " + meId);
259
				handleNotAcceptedTaxon(taxon, statusId, state, rs);
259
				boolean idsDiffer = accFinal != null && !meId.equals(accFinal);
260
				handleNotAcceptedTaxonStatus(taxon, statusId, idsDiffer, accFinal == null, state, rs);
260 261
			}
261 262
			result = taxon;
262 263
		}else{
263 264
			result = Synonym.NewInstance(taxonName, citation);
265
			//real synonyms (id <> tu_accfinal) are always handled as "synonym" or "pro parte synonym"
266
//			handleNotAcceptedTaxonStatus(result, statusId, state, rs);
264 267
		}
265 268

  
266 269
		handleNameStatus(result.getName(), rs, state);
......
429 432
        return result.trim();
430 433
    }
431 434

  
432
    private void handleNotAcceptedTaxon(Taxon taxon, int statusId, ErmsImportState state, ResultSet rs) throws SQLException {
433
		ExtensionType notAccExtensionType = getExtensionType(state, ErmsTransformer.uuidErmsTaxonStatus, "ERMS taxon status", "ERMS taxon status", "status", null);
434
		String statusName = rs.getString("status_name");
435

  
436
		if (statusId > 1){
437
			taxon.addExtension(statusName, notAccExtensionType);
438
		}
435
    private void handleNotAcceptedTaxonStatus(Taxon taxon, int statusId, boolean idsDiffer, boolean accIdNull, ErmsImportState state, ResultSet rs) throws SQLException {
436
		ExtensionType pesiStatusType = getExtensionType(state, ErmsTransformer.uuidPesiTaxonStatus, "PESI taxon status", "PESI taxon status", "status", null);
437

  
438
		if(idsDiffer){
439
		    //if ids differ the taxon should always be an ordinary synonym, some synonyms need to be imported to CDM as Taxon because they have factual data attached, they use a concept relationship as synonym relationship
440
		    addPesiStatus(taxon, PesiTransformer.T_STATUS_SYNONYM, pesiStatusType);
441
		}else if(statusId == 1){
442
            //nothing to do, not expected to happen
443
		}else if (statusId > 1 && statusId < 6 || statusId == 7){ //unaccepted, nomen nudum, alternate representation, temporary name       they have sometimes no tu_accfinal or are handled incorrect
444
		    //TODO discuss alternate representations, at the very end of the PESI export unaccepted taxa with relationship "is alternative name for" are set to status "accepted". Need to check if this is true for the PESI taxa too (do they have such a relationship?)
445
		    //Note: in SQL script, also the tu_unacceptreason was checked to be NOT LIKE '%syno%', this is not always correct and the few real synonyms should better data cleaned
446
		    addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType);
447
        }else if (statusId == 6 || statusId == 8 || statusId == 10){
448
            taxon.setDoubtful(true);  //nomen dubium, taxon inquirendum, uncertain
449
        }else if (statusId == 9){
450
            addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType);         //interim unpublished, we should better not yet publish, but will be probably accepted in future
451
        }else{
452
            logger.error("Unhandled statusId "+ statusId);
453
        }
439 454
	}
440 455

  
441
	private void handleException(Integer parentRank, TaxonName taxonName, String displayName, Integer meId) {
456
    private void addPesiStatus(Taxon taxon, int status, ExtensionType pesiStatusType) {
457
        taxon.addExtension(String.valueOf(status), pesiStatusType);
458

  
459
    }
460

  
461
    private void handleException(Integer parentRank, TaxonName taxonName, String displayName, Integer meId) {
442 462
		logger.warn("Parent of infra specific taxon is of higher rank ("+parentRank+") than species. Used nameCache: " + displayName +  "; id=" + meId) ;
443 463
		taxonName.setNameCache(displayName);
444 464
	}
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsTransformer.java
249 249
    public static final UUID uuidBiology = UUID.fromString("af5c6832-74f3-4b87-bac9-6fdfc68ffada");
250 250

  
251 251
	//extension type uuids
252
	public static final UUID uuidErmsTaxonStatus = UUID.fromString("859eee7f-5240-48a0-8edc-7af63557fa6e");
252
	public static final UUID uuidPesiTaxonStatus = UUID.fromString("859eee7f-5240-48a0-8edc-7af63557fa6e");
253 253
	public static final UUID uuidExtGazetteer = UUID.fromString("dcfa124a-1028-49cd-aea5-fdf9bd396c1a");
254 254
	public static final UUID uuidExtImis = UUID.fromString("ee2ac2ca-b60c-4e6f-9cad-720fcdb0a6ae");
255 255
	public static final UUID uuidExtFossilStatus = UUID.fromString("ec3dffbe-a0c8-4d76-845f-5fc166a33d5b");
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/out/PesiTransformer.java
1898 1898
	public static Integer taxonBase2statusFk (TaxonBase<?> taxonBase){
1899 1899
		if (taxonBase == null){
1900 1900
			return null;
1901
		}
1902
		if (taxonBase.isInstanceOf(Taxon.class)){
1903
			Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
1904
			Set<TaxonRelationship> rels = taxon.getRelationsFromThisTaxon();
1905
			Set<TaxonNode> nodes = taxon.getTaxonNodes();
1906
			if (!rels.isEmpty() && !nodes.isEmpty()){
1907
			    logger.warn("Taxon has relations and parent. This is not expected in E+M, but maybe possible in ERMS. Check if taxon status is correct.");
1908
			}else if (rels.isEmpty() && nodes.isEmpty()){
1909
                logger.warn("Taxon has neither relations nor parent. This is not expected. Check if taxon status is correct.");
1910
            }
1911
			if (!rels.isEmpty()){
1912
			    //we expect all rels to have same type, maybe not true
1913
			    UUID relTypeUuid = rels.iterator().next().getType().getUuid();
1914
			    //E+M
1915
			    if (TaxonRelationshipType.proParteUuids().contains(relTypeUuid)){
1916
	                return T_STATUS_PRO_PARTE_SYN;
1917
	            }else if (TaxonRelationshipType.partialUuids().contains(relTypeUuid)){
1918
	                return T_STATUS_PARTIAL_SYN;
1919
	            }else if (TaxonRelationshipType.misappliedNameUuids().contains(relTypeUuid)){
1920
	                return T_STATUS_SYNONYM;  //no explicit MAN status exists in PESI
1921
	            }
1922
			    //ERMS
1923
	            else if (TaxonRelationshipType.pseudoTaxonUuids().contains(relTypeUuid)){
1924
	                return T_STATUS_SYNONYM;
1925
	            }
1926
			}
1927
			if (!nodes.isEmpty()){
1928
			    TaxonNode parentNode = nodes.iterator().next().getParent();
1929
			    if (parentNode.getTaxon() != null && !parentNode.getTaxon().isPublish()){
1930
			        if (parentNode.getTaxon().getUuid().equals(uuidTaxonValuelessEuroMed) ){
1931
			            return T_STATUS_NOT_ACCEPTED_VALUELESS;
1932
			        }
1933
			    }else{
1934
			        return T_STATUS_ACCEPTED;
1935
			    }
1936
	        }
1937
			logger.error("Taxon status could not be defined. This should not happen: " + taxonBase.getTitleCache() );
1938
			return T_STATUS_UNRESOLVED;
1939
		}else if (taxonBase.isInstanceOf(Synonym.class)){
1940
			Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
1941
			if (taxonBase2statusFk(synonym.getAcceptedTaxon())== T_STATUS_NOT_ACCEPTED_VALUELESS ){
1942
			    return T_STATUS_NOT_ACCEPTED_VALUELESS;
1943
			}else{
1944
			    return T_STATUS_SYNONYM;
1945
			}
1901
		}else if(!taxonBase.getExtensions(ErmsTransformer.uuidPesiTaxonStatus).isEmpty()){
1902
		    return taxonStatusByErmsStatus(taxonBase.getExtensions(ErmsTransformer.uuidPesiTaxonStatus));
1946 1903
		}else{
1947
			logger.warn("Unresolved taxon status.");
1948
			return T_STATUS_UNRESOLVED;
1904
		    if (taxonBase.isInstanceOf(Synonym.class)){
1905
    		    Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class);
1906
    		    if (taxonBase2statusFk(synonym.getAcceptedTaxon())== T_STATUS_NOT_ACCEPTED_VALUELESS ){
1907
    		        return T_STATUS_NOT_ACCEPTED_VALUELESS;
1908
    		    }else{
1909
    		        return T_STATUS_SYNONYM;
1910
    		    }
1911
		    }else if (taxonBase.isInstanceOf(Taxon.class)){
1912
    			Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
1913
    			Set<TaxonRelationship> rels = taxon.getRelationsFromThisTaxon();
1914
    			Set<TaxonNode> nodes = taxon.getTaxonNodes();
1915
    			if (!rels.isEmpty() && !nodes.isEmpty()){
1916
    			    logger.warn("Taxon has relations and parent. This is not expected in E+M, but maybe possible in ERMS. Check if taxon status is correct.");
1917
    			}else if (rels.isEmpty() && nodes.isEmpty()){
1918
                    logger.warn("Taxon has neither relations nor parent. This is not expected. Check if taxon status is correct.");
1919
                }
1920
    			if (!rels.isEmpty()){
1921
    			    //we expect all rels to have same type, maybe not true
1922
    			    UUID relTypeUuid = rels.iterator().next().getType().getUuid();
1923
    			    //E+M
1924
    			    if (TaxonRelationshipType.proParteUuids().contains(relTypeUuid)){
1925
    	                return T_STATUS_PRO_PARTE_SYN;
1926
    	            }else if (TaxonRelationshipType.partialUuids().contains(relTypeUuid)){
1927
    	                return T_STATUS_PARTIAL_SYN;
1928
    	            }else if (TaxonRelationshipType.misappliedNameUuids().contains(relTypeUuid)){
1929
    	                return T_STATUS_SYNONYM;  //no explicit MAN status exists in PESI
1930
    	            }
1931
    			    //ERMS
1932
    	            else if (TaxonRelationshipType.pseudoTaxonUuids().contains(relTypeUuid)){
1933
    	                return T_STATUS_SYNONYM;
1934
    	            }
1935
    			}
1936
    			if (!nodes.isEmpty()){
1937
    			    TaxonNode parentNode = nodes.iterator().next().getParent();
1938
    			    if (parentNode.getTaxon() != null && !parentNode.getTaxon().isPublish()){
1939
    			        if (parentNode.getTaxon().getUuid().equals(uuidTaxonValuelessEuroMed) ){
1940
    			            return T_STATUS_NOT_ACCEPTED_VALUELESS;
1941
    			        }
1942
    			    }else{
1943
    			        if (taxon.isDoubtful()){
1944
    			            return T_STATUS_UNRESOLVED;
1945
    			        }else{
1946
    			            return T_STATUS_ACCEPTED;
1947
    			        }
1948
    			    }
1949
    	        }
1950
    			logger.error("Taxon status could not be defined. This should not happen: " + taxonBase.getTitleCache() );
1951
    			return T_STATUS_UNRESOLVED;
1952
    		}else{
1953
    		    logger.warn("Unresolved taxon status, neither taxon nor synonym, this should not happen");
1954
    			return T_STATUS_UNRESOLVED;
1955
    		}
1956
    		//TODO
1957
    //		public static int T_STATUS_ORPHANED = 6;  //never used in SQL import
1949 1958
		}
1950
		//TODO
1951
//		public static int T_STATUS_UNRESOLVED = 5;
1952
//		public static int T_STATUS_ORPHANED = 6;
1953 1959
	}
1954 1960

  
1955
//	/**
1956
//	 *
1957
//	 * @param taxonBase
1958
//	 * @return
1959
//	 */
1960
//	public static String taxonBase2statusCache (TaxonBase<?> taxonBase){
1961
//		if (taxonBase == null){return null;}
1962
//		if (taxonBase.isInstanceOf(Taxon.class)){
1963
//			Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
1964
//			if (taxon.getTaxonNodes().size() == 0){
1965
//				return T_STATUS_STR_NOT_ACCEPTED;
1966
//			}else{
1967
//				return T_STATUS_STR_ACCEPTED;
1968
//			}
1969
//		}else if (taxonBase.isInstanceOf(Synonym.class)){
1970
//			return T_STATUS_STR_SYNONYM;
1971
//		}else{
1972
//			logger.warn("Unknown ");
1973
//			return T_STATUS_STR_UNRESOLVED;
1974
//		}
1975
//		//TODO
1976
//		public static int T_STATUS_STR_PARTIAL_SYN = 3;
1977
//		public static int T_STATUS_STR_PRO_PARTE_SYN = 4;
1978
//		public static int T_STATUS_STR_UNRESOLVED = 5;
1979
//		public static int T_STATUS_STR_ORPHANED = 6;
1980
//	}
1961
    private static Integer taxonStatusByErmsStatus(Set<String> extensions) {
1962
        //Note: status extensions should only be used during ERMS import
1963
        //      if the status can not be derived from ordinary CDM status handling (Taxon, Synonym, ProParte, doubtful, ...)
1964
        // see ErmsTaxonImport.handleNotAcceptedTaxonStatus
1965
        if (extensions.size()>1){
1966
            logger.warn("More than 1 ERMS status available. This should not happen.");
1967
        }
1968
        String statusStr = extensions.iterator().next();
1969
        Integer status = Integer.valueOf(statusStr);
1970
        return status;
1971
    }
1981 1972

  
1982
	/**
1973
    /**
1983 1974
	 * Returns the {@link SourceCategory SourceCategory} representation of the given {@link ReferenceType ReferenceType} in PESI.
1984 1975
	 * @param reference The {@link Reference Reference}.
1985 1976
	 * @return The {@link SourceCategory SourceCategory} representation in PESI.

Also available in: Unified diff