Project

General

Profile

Download (29 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.pesi.erms;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.ArrayList;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20
import java.util.UUID;
21

    
22
import org.apache.commons.lang3.StringUtils;
23
import org.apache.log4j.Logger;
24
import org.springframework.stereotype.Component;
25

    
26
import eu.etaxonomy.cdm.common.CdmUtils;
27
import eu.etaxonomy.cdm.common.StringComparator;
28
import eu.etaxonomy.cdm.io.common.DbImportStateBase;
29
import eu.etaxonomy.cdm.io.common.IOValidator;
30
import eu.etaxonomy.cdm.io.common.mapping.DbIgnoreMapper;
31
import eu.etaxonomy.cdm.io.common.mapping.DbImportAnnotationMapper;
32
import eu.etaxonomy.cdm.io.common.mapping.DbImportExtensionMapper;
33
import eu.etaxonomy.cdm.io.common.mapping.DbImportLsidMapper;
34
import eu.etaxonomy.cdm.io.common.mapping.DbImportMapping;
35
import eu.etaxonomy.cdm.io.common.mapping.DbImportMarkerMapper;
36
import eu.etaxonomy.cdm.io.common.mapping.DbImportMethodMapper;
37
import eu.etaxonomy.cdm.io.common.mapping.DbImportObjectCreationMapper;
38
import eu.etaxonomy.cdm.io.common.mapping.DbImportStringMapper;
39
import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;
40
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
41
import eu.etaxonomy.cdm.io.common.mapping.out.DbLastActionMapper;
42
import eu.etaxonomy.cdm.io.pesi.erms.validation.ErmsTaxonImportValidator;
43
import eu.etaxonomy.cdm.io.pesi.out.PesiTaxonExport;
44
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
45
import eu.etaxonomy.cdm.model.common.AnnotationType;
46
import eu.etaxonomy.cdm.model.common.CdmBase;
47
import eu.etaxonomy.cdm.model.common.ExtensionType;
48
import eu.etaxonomy.cdm.model.common.Language;
49
import eu.etaxonomy.cdm.model.common.MarkerType;
50
import eu.etaxonomy.cdm.model.common.RelationshipTermBase;
51
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
52
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
53
import eu.etaxonomy.cdm.model.name.Rank;
54
import eu.etaxonomy.cdm.model.name.TaxonName;
55
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
56
import eu.etaxonomy.cdm.model.reference.Reference;
57
import eu.etaxonomy.cdm.model.taxon.Synonym;
58
import eu.etaxonomy.cdm.model.taxon.Taxon;
59
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
60
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
61
import eu.etaxonomy.cdm.strategy.cache.name.TaxonNameDefaultCacheStrategy;
62

    
63
/**
64
 * @author a.mueller
65
 * @since 20.02.2010
66
 */
67
@Component
68
public class ErmsTaxonImport
69
        extends ErmsImportBase<TaxonBase<?>>
70
        implements IMappingImport<TaxonBase<?>, ErmsImportState>{
71

    
72
    private static final long serialVersionUID = -7111568277264140051L;
73
    private static final Logger logger = Logger.getLogger(ErmsTaxonImport.class);
74

    
75
	private static final String pluralString = "taxa";
76
	private static final String dbTableName = "tu";
77
	private static final Class<?> cdmTargetClass = TaxonBase.class;
78

    
79
	private static Map<String, Integer> unacceptReasons = new HashMap<>();
80

    
81
	private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping;
82

    
83
	public ErmsTaxonImport(){
84
		super(pluralString, dbTableName, cdmTargetClass);
85
	}
86

    
87
	@Override
88
	protected String getIdQuery() {
89
		String strQuery = " SELECT id FROM tu " ;  //WHERE id NOT IN (147415) for now we exclude Monera as it has no children and is unclear what classification it has. In ERMS it is alternative accepted name (in https://en.wikipedia.org/wiki/Monera it might be a super taxon to bacteria).
90
		return strQuery;
91
	}
92

    
93
	@Override
94
    protected DbImportMapping<ErmsImportState, ErmsImportConfigurator> getMapping() {
95
		if (mapping == null){
96
			mapping = new DbImportMapping<>();
97

    
98
			mapping.addMapper(DbImportObjectCreationMapper.NewInstance(this, "id", TAXON_NAMESPACE)); //id + tu_status
99
			mapping.addMapper(DbImportLsidMapper.NewInstance("GUID", "lsid"));
100

    
101
			UUID tsnUuid = ErmsTransformer.uuidExtTsn;
102
			ExtensionType tsnExtType = getExtensionType(tsnUuid, "TSN", "TSN", "TSN");
103
			mapping.addMapper(DbImportExtensionMapper.NewInstance("tsn", tsnExtType));
104
//			mapping.addMapper(DbImportStringMapper.NewInstance("tu_name", "(NonViralName)name.nameCache"));
105

    
106
			ExtensionType displayNameExtType = getExtensionType(ErmsTransformer.uuidExtDisplayName, "display name", "display name", "display name");
107
			mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_displayname", displayNameExtType));
108
            //Ignore fuzzyName
109
            //  ExtensionType fuzzyNameExtType = getExtensionType(ErmsTransformer.uuidExtFuzzyName, "fuzzy name", "fuzzy name", "fuzzy name");
110
            //  mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_fuzzyname", fuzzyNameExtType));
111
			mapping.addMapper(DbImportStringMapper.NewInstance("tu_authority", "name.authorshipCache"));
112

    
113
			ExtensionType fossilStatusExtType = getExtensionType(ErmsTransformer.uuidExtFossilStatus, "fossil status", "fossil status", "fos. stat.");
114
			mapping.addMapper(DbImportExtensionMapper.NewInstance("fossil_name", fossilStatusExtType));
115

    
116
			ExtensionType unacceptExtType = getExtensionType(ErmsTransformer.uuidExtUnacceptReason, "unaccept reason", "unaccept reason", "reason");
117
			mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_unacceptreason", unacceptExtType));
118

    
119
			ExtensionType qualityStatusExtType = getExtensionType(ErmsTransformer.uuidExtQualityStatus, "quality status", "quality status", "quality status");
120
			mapping.addMapper(DbImportExtensionMapper.NewInstance("qualitystatus_name", qualityStatusExtType)); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor
121

    
122
			ExtensionType cacheCitationExtType = getExtensionType(PesiTransformer.uuidExtCacheCitation, "cache_citation", "quality status", "cache_citation");
123
            mapping.addMapper(DbImportExtensionMapper.NewInstance("cache_citation", cacheCitationExtType));
124

    
125
            //flags
126
			mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_marine", ErmsTransformer.uuidMarkerMarine, "marine", "marine", "marine", null));
127
			mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_brackish", ErmsTransformer.uuidMarkerBrackish, "brackish", "brackish", "brackish", null));
128
			mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_fresh", ErmsTransformer.uuidMarkerFreshwater, "freshwater", "fresh", "fresh", null));
129
			mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_terrestrial", ErmsTransformer.uuidMarkerTerrestrial, "terrestrial", "terrestrial", "terrestrial", null));
130

    
131
			//last action, species expert
132
			ExtensionType speciesExpertNameExtType = getExtensionType(PesiTransformer.uuidExtSpeciesExpertName, "species expert name", "species expert name", "species expert name");
133
            mapping.addMapper(DbImportExtensionMapper.NewInstance("ExpertName", speciesExpertNameExtType)); //according to sql script ExpertName maps to SpeciesExpertName in ERMS
134
            AnnotationType lastActionDateType = getAnnotationType(DbLastActionMapper.uuidAnnotationTypeLastActionDate, "Last action date", "Last action date", null);
135
			mapping.addMapper(DbImportAnnotationMapper.NewInstance("lastActionDate", lastActionDateType));
136
            AnnotationType lastActionType = getAnnotationType(DbLastActionMapper.uuidAnnotationTypeLastAction, "Last action", "Last action", null);
137
            MarkerType hasNoLastActionMarkerType = getMarkerType(DbLastActionMapper.uuidMarkerTypeHasNoLastAction, "has no last action", "No last action information available", "no last action");
138
            mapping.addMapper(DbImportAnnotationMapper.NewInstance("lastAction", lastActionType, hasNoLastActionMarkerType));
139

    
140
            //MAN authorshipCache => appendedPhrase
141
            mapping.addMapper(DbImportMethodMapper.NewDefaultInstance(this, "appendedPhraseForMisapplications", ErmsImportState.class));
142

    
143
            //titleCache compare
144
            mapping.addMapper(DbImportMethodMapper.NewDefaultInstance(this, "testTitleCache", ErmsImportState.class));
145

    
146
			//ignore
147
            mapping.addMapper(DbIgnoreMapper.NewInstance("tu_sp", "included in rank/object creation, only needed for defining kingdom"));
148
			mapping.addMapper(DbIgnoreMapper.NewInstance("tu_fossil", "tu_fossil implemented as foreign key"));
149

    
150
		}
151
		return mapping;
152
	}
153

    
154
	@Override
155
	protected String getRecordQuery(ErmsImportConfigurator config) {
156
		String strSelect = " SELECT tu.*, parent1.tu_name AS parent1name, parent2.tu_name AS parent2name, parent3.tu_name AS parent3name, parent4.tu_name AS parent4name, " +
157
		            " parent1.tu_rank AS parent1rank, parent2.tu_rank AS parent2rank, parent3.tu_rank AS parent3rank, " +
158
		            " status.status_id as status_id, status.status_name, fossil.fossil_name, qualitystatus.qualitystatus_name," +
159
		            " s.sessiondate lastActionDate, a.action_name lastAction, s.ExpertName ";
160
		String strFrom = " FROM tu  LEFT OUTER JOIN  tu AS parent1 ON parent1.id = tu.tu_parent " +
161
				" LEFT OUTER JOIN   tu AS parent2  ON parent2.id = parent1.tu_parent " +
162
				" LEFT OUTER JOIN tu AS parent3 ON parent2.tu_parent = parent3.id " +
163
				" LEFT OUTER JOIN tu AS parent4 ON parent3.tu_parent = parent4.id " +
164
                " LEFT OUTER JOIN status ON tu.tu_status = status.status_id " +
165
				" LEFT OUTER JOIN fossil ON tu.tu_fossil = fossil.fossil_id " +
166
				" LEFT OUTER JOIN qualitystatus ON tu.tu_qualitystatus = qualitystatus.id " +
167
				" LEFT OUTER JOIN tu_sessions ts ON ts.tu_id = tu.id " +
168
                " LEFT OUTER JOIN [sessions] s ON s.id = ts.session_id " +
169
                " LEFT OUTER JOIN actions a ON a.id = ts.action_id ";
170
		String strWhere = " WHERE ( tu.id IN (" + ID_LIST_TOKEN + ") )";
171
		String strOrderBy = " ORDER BY tu.id, s.sessiondate DESC, a.id DESC ";
172
		String strRecordQuery = strSelect + strFrom + strWhere + strOrderBy;
173
		return strRecordQuery;
174
	}
175

    
176
	@Override
177
	protected void doInvoke(ErmsImportState state) {
178
		state.setAcceptedTaxaKeys(getAcceptedTaxaKeys(state));
179

    
180
		//first path
181
		super.doInvoke(state);
182
		if(true){
183
		    logUnacceptReasons();
184
		}
185
		return;
186
	}
187

    
188
    Integer lastTaxonId = null;
189
    @Override
190
    protected boolean ignoreRecord(ResultSet rs) throws SQLException {
191
        Integer id = rs.getInt("id");
192
        boolean result = id.equals(lastTaxonId);
193
        lastTaxonId = id;
194
        return result;
195
    }
196

    
197
	private Set<Integer> getAcceptedTaxaKeys(ErmsImportState state) {
198
		Set<Integer> result = new HashSet<>();
199
		String idCol = " id ";
200
		String tuFk = "tu_id";
201
		String vernacularsTable = "vernaculars";
202
		String distributionTable = "dr";
203
		String notesTable = "notes";
204
		String sql =
205
                "          SELECT id FROM tu WHERE tu_accfinal is NULL" //id of taxa not having accepted taxon
206
                + " UNION  SELECT DISTINCT tu_accfinal FROM tu "  //fk to accepted taxon (either the accepted taxon or the taxon itself, if accepted)
207
                + " UNION  SELECT id FROM tu WHERE trim(tu.tu_unacceptreason) like 'misidentification' OR trim(tu.tu_unacceptreason) like 'misidentifications' OR "
208
                            + " tu.tu_unacceptreason like 'misapplied %%name' OR "
209
                            + " tu.tu_unacceptreason like '%%misapplication%%' OR "
210
                            + " tu.tu_unacceptreason like 'incorrect identification%%'" //Misapplications, see ErmsTransformer.getSynonymRelationTypesByKey
211
                + " UNION  SELECT syn.id FROM tu syn INNER JOIN tu acc ON syn.tu_accfinal = acc.id WHERE syn.id = acc.tu_parent AND acc.id <> syn.id "  //see also ErmsTaxonRelationImport.isAccepted, there are some autonyms being the accepted taxon of there own parents
212
                + " UNION  SELECT DISTINCT %s FROM %s " //vernaculars
213
                + " UNION  SELECT DISTINCT %s FROM %s "  //distributions
214
                + " UNION  SELECT DISTINCT %s FROM %s ";  //notes
215
		sql = String.format(sql,
216
		        tuFk, vernacularsTable,
217
				tuFk, distributionTable,
218
				tuFk, notesTable);
219
		ResultSet rs = state.getConfig().getSource().getResultSet(sql);
220
		try {
221
			while (rs.next()){
222
				Integer id;
223
				id = rs.getInt(idCol.trim());
224
				result.add(id);
225
			}
226
			return result;
227
		} catch (SQLException e) {
228
			e.printStackTrace();
229
			throw new RuntimeException(e);
230
		}
231
	}
232

    
233
	@Override
234
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, ErmsImportState state) {
235
		//currently no referencing objects needed
236
	    Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
237
		return result;
238
	}
239

    
240
	@Override
241
	public TaxonBase<?> createObject(ResultSet rs, ErmsImportState state) throws SQLException {
242
		int statusId = rs.getInt("status_id");
243
		Integer meId = rs.getInt("id");
244
		Integer accFinal = nullSafeInt(rs, "tu_accfinal");
245

    
246
        TaxonName taxonName = getTaxonName(rs, state);
247
		fillTaxonName(taxonName, rs, state, meId);
248

    
249
		//add original source for taxon name (taxon original source is added in mapper)
250
		Reference citation = state.getTransactionalSourceReference();
251
		addOriginalSource(rs, taxonName, "id", NAME_NAMESPACE, citation);
252

    
253
		TaxonBase<?> result;
254
		//handle accepted<-> synonym, we create more accepted taxa as we need them within the tree or to attache factual data
255
		if (state.getAcceptedTaxaKeys().contains(meId)){
256
			Taxon taxon = Taxon.NewInstance(taxonName, citation);
257
			if (statusId != 1){
258
				logger.info("Taxon created as taxon but has status <> 1 ("+statusId+"): " + meId);
259
				boolean idsDiffer = accFinal != null && !meId.equals(accFinal);
260
				handleNotAcceptedTaxonStatus(taxon, statusId, idsDiffer, accFinal == null, state, rs);
261
			}
262
			result = taxon;
263
		}else{
264
			result = Synonym.NewInstance(taxonName, citation);
265
			//real synonyms (id <> tu_accfinal) are always handled as "synonym" or "pro parte synonym"
266
//			handleNotAcceptedTaxonStatus(result, statusId, state, rs);
267
		}
268

    
269
		handleNameStatus(result.getName(), rs, state);
270
		return result;
271
	}
272

    
273
    private void handleNameStatus(TaxonName name, ResultSet rs, ErmsImportState state) throws SQLException {
274
        NomenclaturalStatusType nomStatus = null;
275
        int tuStatus = rs.getInt("tu_status");
276
        //the order is bottom up from SQL script as their values are overridden from top to bottom
277
        if (tuStatus == 8){
278
            //species inquirenda
279
            nomStatus = getNomenclaturalStatusType(state, ErmsTransformer.uuidNomStatusSpeciesInquirenda, "species inquirenda", "species inquirenda", null, Language.LATIN(), null);
280
        }else if (tuStatus == 7){
281
            //temporary name
282
            nomStatus = getNomenclaturalStatusType(state, PesiTransformer.uuidNomStatusTemporaryName, "temporary name", "temporary name", null, Language.ENGLISH(), null);
283
        }else if (tuStatus == 6){
284
            //nomen dubium
285
            nomStatus = NomenclaturalStatusType.DOUBTFUL();
286
        }else if (tuStatus == 5){
287
            //"alternate representation"
288
            nomStatus = getNomenclaturalStatusType(state, ErmsTransformer.uuidNomStatusAlternateRepresentation, "alternate representation", "alternate representation", null, Language.ENGLISH(), null);
289
        }else if (tuStatus == 3){
290
            //nomen nudum
291
            nomStatus = NomenclaturalStatusType.NUDUM();
292
        }
293
        if (nomStatus == null){
294
            //IN SQL Script it is set first by unacceptreason and then overriden if above tu_status exists
295
            String unacceptReason = rs.getString("tu_unacceptreason");
296
            try {
297
                nomStatus = state.getTransformer().getNomenclaturalStatusByKey(unacceptReason);
298
            } catch (UndefinedTransformerMethodException e) {logger.warn("Unhandled method");
299
            }
300
        }
301
        if (nomStatus != null){
302
            name.addStatus(nomStatus, null, null);
303
        }
304
    }
305

    
306
    private TaxonName fillTaxonName(TaxonName taxonName, ResultSet rs, ErmsImportState state, Integer meId) throws SQLException {
307
        String tuName = rs.getString("tu_name");
308
		String displayName = rs.getString("tu_displayname").trim();
309

    
310
		String parent1Name = rs.getString("parent1name");
311
		Integer parent1Rank = rs.getInt("parent1rank");
312

    
313
		String parent2Name = rs.getString("parent2name");
314
		Integer parent2Rank = rs.getInt("parent2rank");
315

    
316
		String parent3Name = rs.getString("parent3name");
317
		Integer parent3Rank = rs.getInt("parent3rank");
318

    
319
	    String parent4Name = rs.getString("parent4name");
320

    
321
		//set epithets
322
		if (taxonName.isGenus() || taxonName.isSupraGeneric()){
323
			taxonName.setGenusOrUninomial(tuName);
324
		}else if (taxonName.isInfraGeneric()){
325
			taxonName.setInfraGenericEpithet(tuName);
326
			taxonName.setGenusOrUninomial(parent1Name);
327
		}else if (taxonName.isSpecies()){
328
			taxonName.setSpecificEpithet(tuName);
329
			getGenusAndInfraGenus(parent1Name, parent2Name, parent1Rank, taxonName);
330
		}else if (taxonName.isInfraSpecific()){
331
			if (parent1Rank < 220){
332
				handleException(parent1Rank, taxonName, displayName, meId);
333
			}
334
			taxonName.setInfraSpecificEpithet(tuName);
335
			if (parent1Rank > 220){  //parent is still infraspecific
336
			    taxonName.setSpecificEpithet(parent2Name);
337
			    getGenusAndInfraGenus(parent3Name, parent4Name, parent3Rank, taxonName);
338
			}else{
339
			    //default
340
			    taxonName.setSpecificEpithet(parent1Name);
341
			    getGenusAndInfraGenus(parent2Name, parent3Name, parent2Rank, taxonName);
342
			}
343
		}else if (taxonName.getRank()== null){
344
			if ("Biota".equalsIgnoreCase(tuName)){
345
				Rank rank = Rank.DOMAIN();  //should be Superdomain
346
				taxonName.setRank(rank);
347
				taxonName.setGenusOrUninomial(tuName);
348
			}else{
349
				String warning = "TaxonName has no rank. Use namecache.";
350
				logger.warn(warning);
351
				taxonName.setNameCache(tuName);
352
			}
353
		}
354

    
355
		//e.g. Leucon [Platyhelminthes] ornatus
356
		if (containsBrackets(displayName)){
357
			taxonName.setNameCache(displayName);
358
			logger.warn("Set name cache: " +  displayName + "; id =" + meId);
359
		}
360
        if (!taxonName.getNameCache().equals(displayName) && !isErroneousSubgenus(taxonName, displayName)){
361
            int pos = CdmUtils.diffIndex(taxonName.getNameCache(), displayName);
362
            logger.warn("Computed name cache differs at "+pos+".\n Computed   : " + taxonName.getNameCache()+"\n DisplayName: " +displayName);
363
            taxonName.setNameCache(displayName, true);
364
        }
365
		taxonName.getTitleCache();
366
        return taxonName;
367
    }
368

    
369
    private static boolean isErroneousSubgenus(TaxonName taxonName, String displayName) {
370
        //this is an error in ERMS formatting in v2019 for ICNafp names, that hopefully soon will be corrected
371
        return (Rank.SPECIES().equals(taxonName.getRank()) && displayName.contains(" subg. "));
372
    }
373

    
374
    @SuppressWarnings("unused")  //used by MethodMapper
375
    private static TaxonBase<?> appendedPhraseForMisapplications(ResultSet rs, ErmsImportState state) throws SQLException{
376
        TaxonBase<?> taxon = (TaxonBase<?>)state.getRelatedObject(DbImportStateBase.CURRENT_OBJECT_NAMESPACE, DbImportStateBase.CURRENT_OBJECT_ID);
377
        TaxonName taxonName = taxon.getName();
378
        String unacceptreason = rs.getString("tu_unacceptreason");
379
        RelationshipTermBase<?>[] rels = state.getTransformer().getSynonymRelationTypesByKey(unacceptreason, state);
380
        if (rels[1]!= null && rels[1].equals(TaxonRelationshipType.MISAPPLIED_NAME_FOR())){
381
            taxon.setAppendedPhrase(taxonName.getAuthorshipCache());
382
            taxon.setSec(null);
383
            taxonName.setAuthorshipCache(null, taxonName.isProtectedAuthorshipCache());
384
            //TODO maybe some further authorship handling is needed if authors get parsed, but not very likely for MAN authorship
385
        }
386
        if(state.getUnhandledUnacceptReason() != null){
387
            //to handle it hear is a workaround, as the real place where it is handled is DbImportSynonymMapper which is called ErmsTaxonRelationImport but where it is diffcult to aggregate logging data
388
            addUnacceptReason(state.getUnhandledUnacceptReason());
389
        }
390
        return taxon;
391
    }
392

    
393
    private static void addUnacceptReason(String unhandledUnacceptReason) {
394
        unhandledUnacceptReason = unhandledUnacceptReason.toLowerCase();
395
        if (!unacceptReasons.keySet().contains(unhandledUnacceptReason)){
396
            unacceptReasons.put(unhandledUnacceptReason, 1);
397
        }else{
398
            unacceptReasons.put(unhandledUnacceptReason, unacceptReasons.get(unhandledUnacceptReason)+1);
399
        }
400
    }
401

    
402
    @SuppressWarnings("unused")  //used by MethodMapper
403
    private static TaxonBase<?> testTitleCache(ResultSet rs, ErmsImportState state) throws SQLException{
404
        TaxonBase<?> taxon = (TaxonBase<?>)state.getRelatedObject(DbImportStateBase.CURRENT_OBJECT_NAMESPACE, DbImportStateBase.CURRENT_OBJECT_ID);
405
        TaxonName taxonName = taxon.getName();
406
        String displayName = rs.getString("tu_displayname");
407
        displayName = displayName == null ? null : displayName.trim();
408
        String titleCache = taxonName.resetTitleCache(); //calling titleCache should always be kept to have a computed titleCache in the CDM DB.
409
        titleCache = CdmUtils.concat(" ", titleCache, taxon.getAppendedPhrase());
410
        String expectedTitleCache = getExpectedTitleCache(rs);
411
        //TODO check titleCache, but beware of autonyms
412
        if (!titleCache.equals(expectedTitleCache) && !isErroneousSubgenus(taxonName, displayName)){
413
            int pos = CdmUtils.diffIndex(titleCache, expectedTitleCache);
414
            logger.warn("Computed title cache differs at "+pos+".\n Computed             : " + titleCache + "\n DisplayName+Authority: " + expectedTitleCache);
415
            taxonName.setNameCache(displayName, true);
416
        }
417
        return taxon;
418
    }
419

    
420
    //see also PesiErmsValidation.srcFullName()
421
    private static String getExpectedTitleCache(ResultSet srcRs) throws SQLException {
422
        String result;
423
        String epi = srcRs.getString("tu_name");
424
        epi = " a" + epi;
425
        String display = srcRs.getString("tu_displayname");
426
        String sp = srcRs.getString("tu_sp");
427
        if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //homonym, animal
428
            result = srcRs.getString("tu_displayname").replaceFirst(epi+" ", CdmUtils.concat(" ", " "+epi, srcRs.getString("tu_authority")))+" ";
429
        }else{
430
            result = CdmUtils.concat(" ", srcRs.getString("tu_displayname"), srcRs.getString("tu_authority"));
431
        }
432
        return result.trim();
433
    }
434

    
435
    private void handleNotAcceptedTaxonStatus(Taxon taxon, int statusId, boolean idsDiffer, boolean accIdNull, ErmsImportState state, ResultSet rs) throws SQLException {
436
		ExtensionType pesiStatusType = getExtensionType(state, ErmsTransformer.uuidPesiTaxonStatus, "PESI taxon status", "PESI taxon status", "status", null);
437

    
438
		if(idsDiffer){
439
		    //if ids differ the taxon should always be an ordinary synonym, some synonyms need to be imported to CDM as Taxon because they have factual data attached, they use a concept relationship as synonym relationship
440
		    addPesiStatus(taxon, PesiTransformer.T_STATUS_SYNONYM, pesiStatusType);
441
		}else if(statusId == 1){
442
            //nothing to do, not expected to happen
443
		}else if (statusId > 1 && statusId < 6 || statusId == 7){ //unaccepted, nomen nudum, alternate representation, temporary name       they have sometimes no tu_accfinal or are handled incorrect
444
		    //TODO discuss alternate representations, at the very end of the PESI export unaccepted taxa with relationship "is alternative name for" are set to status "accepted". Need to check if this is true for the PESI taxa too (do they have such a relationship?)
445
		    //Note: in SQL script, also the tu_unacceptreason was checked to be NOT LIKE '%syno%', this is not always correct and the few real synonyms should better data cleaned
446
		    addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType);
447
        }else if (statusId == 6 || statusId == 8 || statusId == 10){
448
            taxon.setDoubtful(true);  //nomen dubium, taxon inquirendum, uncertain
449
        }else if (statusId == 9){
450
            addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType);         //interim unpublished, we should better not yet publish, but will be probably accepted in future
451
        }else{
452
            logger.error("Unhandled statusId "+ statusId);
453
        }
454
	}
455

    
456
    private void addPesiStatus(Taxon taxon, int status, ExtensionType pesiStatusType) {
457
        taxon.addExtension(String.valueOf(status), pesiStatusType);
458

    
459
    }
460

    
461
    private void handleException(Integer parentRank, TaxonName taxonName, String displayName, Integer meId) {
462
		logger.warn("Parent of infra specific taxon is of higher rank ("+parentRank+") than species. Used nameCache: " + displayName +  "; id=" + meId) ;
463
		taxonName.setNameCache(displayName);
464
	}
465

    
466
	private boolean containsBrackets(String displayName) {
467
		int index = displayName.indexOf("[");
468
		return (index > -1);
469
	}
470

    
471
	private void getGenusAndInfraGenus(String parentName, String grandParentName, Integer parent1Rank, TaxonName taxonName) {
472
		if (parent1Rank <220 && parent1Rank > 180){
473
			//parent is infrageneric
474
			taxonName.setInfraGenericEpithet(parentName);
475
			taxonName.setGenusOrUninomial(grandParentName);
476
		}else{
477
			taxonName.setGenusOrUninomial(parentName);
478
		}
479
	}
480

    
481
	/**
482
	 * Returns an empty Taxon Name instance according to the given rank and kingdom.
483
	 */
484
	private TaxonName getTaxonName(ResultSet rs, ErmsImportState state) throws SQLException {
485
	    TaxonName result;
486
		int kingdomId = parseKingdomId(rs);
487
		Integer intRank = rs.getInt("tu_rank");
488

    
489
		NomenclaturalCode nc = ErmsTransformer.kingdomId2NomCode(kingdomId);
490
		Rank rank = null;
491
		rank = state.getRank(intRank, kingdomId);
492

    
493
		if (rank == null){
494
			logger.warn("Rank is null. KingdomId: " + kingdomId + ", rankId: " +  intRank);
495
		}
496
		if (nc != null){
497
			result = nc.getNewTaxonNameInstance(rank);
498
		}else{
499
			result = TaxonNameFactory.NewNonViralInstance(rank);
500
		}
501
		//cache strategy
502
		if (result.isZoological()){
503
		    TaxonNameDefaultCacheStrategy cacheStrategy = PesiTaxonExport.zooNameStrategy;
504
			result.setCacheStrategy(cacheStrategy);
505
		}
506

    
507
		return result;
508
	}
509

    
510
	/**
511
	 * Returns the kingdom id by extracting it from the second character in the <code>tu_sp</code>
512
	 * attribute. If the attribute can not be parsed to a valid id <code>null</code>
513
	 * is returned. If the attribute is <code>null</code> the id of the record is returned.
514
	 * @param rs
515
	 * @return
516
	 * @throws SQLException
517
	 */
518
	private int parseKingdomId(ResultSet rs) throws SQLException {
519
		String treeString = rs.getString("tu_sp");
520
		if (treeString != null){
521
		    if (StringUtils.isNotBlank(treeString) && treeString.length() > 1){
522
				String strKingdom = treeString.substring(1,2);
523

    
524
				if (! treeString.substring(0, 1).equals("#") && ! treeString.substring(2, 3).equals("#") ){
525
					String message = "Tree string " + treeString + " has no recognized format";
526
                    logger.warn(message);
527
                    throw new RuntimeException(message);
528
				}else{
529
					try {
530
						return Integer.valueOf(strKingdom);
531
					} catch (NumberFormatException e) {
532
					    String message = "Kingdom string " + strKingdom + "could not be recognized as a valid number";
533
						logger.warn(message);
534
						throw new RuntimeException(message);
535
					}
536
				}
537
			}else{
538
                String message = "Tree string for kingdom recognition is to short: " + treeString;
539
                logger.warn(message);
540
                throw new RuntimeException(message);
541
			}
542
		}else{
543
			int tu_id = rs.getInt("id");
544
			return tu_id;
545
		}
546
	}
547

    
548
    private void logUnacceptReasons() {
549
        String logStr = "\n Unhandled unaccept reasons:\n===================";
550

    
551
        while (!unacceptReasons.isEmpty()) {
552
            int n = 0;
553
            List<String> mostUsedStrings = new ArrayList<>();
554
            for (Map.Entry<String, Integer> entry : unacceptReasons.entrySet()) {
555
                if (entry.getValue() > n) {
556
                    mostUsedStrings = new ArrayList<>();
557
                    mostUsedStrings.add(entry.getKey());
558
                    n = entry.getValue();
559
                } else if (entry.getValue() == n) {
560
                    mostUsedStrings.add(entry.getKey());
561
                } else {
562
                    //neglect
563
                }
564
            }
565
            mostUsedStrings.sort(new StringComparator());
566
            logStr += "\n   " + String.valueOf(n);
567
            for (String str : mostUsedStrings) {
568
                logStr += "\n   "+ str;
569
                unacceptReasons.remove(str);
570
            }
571
        }
572
        logger.warn(logStr);
573

    
574
    }
575

    
576
	@Override
577
	protected boolean doCheck(ErmsImportState state){
578
		IOValidator<ErmsImportState> validator = new ErmsTaxonImportValidator();
579
		return validator.validate(state);
580
	}
581

    
582
	@Override
583
    protected boolean isIgnore(ErmsImportState state){
584
		return ! state.getConfig().isDoTaxa();
585
	}
586
}
(13-13/17)