Project

General

Profile

Download (27.1 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.pesi.erms;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.ArrayList;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20
import java.util.UUID;
21

    
22
import org.apache.commons.lang3.StringUtils;
23
import org.apache.log4j.Logger;
24
import org.hsqldb.lib.StringComparator;
25
import org.springframework.stereotype.Component;
26

    
27
import eu.etaxonomy.cdm.common.CdmUtils;
28
import eu.etaxonomy.cdm.io.common.DbImportStateBase;
29
import eu.etaxonomy.cdm.io.common.IOValidator;
30
import eu.etaxonomy.cdm.io.common.mapping.DbIgnoreMapper;
31
import eu.etaxonomy.cdm.io.common.mapping.DbImportAnnotationMapper;
32
import eu.etaxonomy.cdm.io.common.mapping.DbImportExtensionMapper;
33
import eu.etaxonomy.cdm.io.common.mapping.DbImportLsidMapper;
34
import eu.etaxonomy.cdm.io.common.mapping.DbImportMapping;
35
import eu.etaxonomy.cdm.io.common.mapping.DbImportMarkerMapper;
36
import eu.etaxonomy.cdm.io.common.mapping.DbImportMethodMapper;
37
import eu.etaxonomy.cdm.io.common.mapping.DbImportObjectCreationMapper;
38
import eu.etaxonomy.cdm.io.common.mapping.DbImportStringMapper;
39
import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;
40
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
41
import eu.etaxonomy.cdm.io.common.mapping.out.DbLastActionMapper;
42
import eu.etaxonomy.cdm.io.pesi.erms.validation.ErmsTaxonImportValidator;
43
import eu.etaxonomy.cdm.io.pesi.out.PesiTaxonExport;
44
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
45
import eu.etaxonomy.cdm.model.common.AnnotationType;
46
import eu.etaxonomy.cdm.model.common.CdmBase;
47
import eu.etaxonomy.cdm.model.common.ExtensionType;
48
import eu.etaxonomy.cdm.model.common.Language;
49
import eu.etaxonomy.cdm.model.common.MarkerType;
50
import eu.etaxonomy.cdm.model.common.RelationshipTermBase;
51
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
52
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
53
import eu.etaxonomy.cdm.model.name.Rank;
54
import eu.etaxonomy.cdm.model.name.TaxonName;
55
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
56
import eu.etaxonomy.cdm.model.reference.Reference;
57
import eu.etaxonomy.cdm.model.taxon.Synonym;
58
import eu.etaxonomy.cdm.model.taxon.Taxon;
59
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
60
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
61
import eu.etaxonomy.cdm.strategy.cache.name.TaxonNameDefaultCacheStrategy;
62

    
63
/**
64
 * @author a.mueller
65
 * @since 20.02.2010
66
 */
67
@Component
68
public class ErmsTaxonImport
69
        extends ErmsImportBase<TaxonBase<?>>
70
        implements IMappingImport<TaxonBase<?>, ErmsImportState>{
71

    
72
    private static final long serialVersionUID = -7111568277264140051L;
73
    private static final Logger logger = Logger.getLogger(ErmsTaxonImport.class);
74

    
75
	private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping;
76

    
77
	private static final String pluralString = "taxa";
78
	private static final String dbTableName = "tu";
79
	private static final Class<?> cdmTargetClass = TaxonBase.class;
80

    
81
	private static Map<String, Integer> unacceptReasons = new HashMap<>();
82

    
83
	public ErmsTaxonImport(){
84
		super(pluralString, dbTableName, cdmTargetClass);
85
	}
86

    
87
	@Override
88
	protected String getIdQuery() {
89
		String strQuery = " SELECT id FROM tu " ;  //WHERE id NOT IN (147415) for now we exclude Monera as it has no children and is unclear what classification it has. In ERMS it is alternative accepted name (in https://en.wikipedia.org/wiki/Monera it might be a super taxon to bacteria).
90
		return strQuery;
91
	}
92

    
93
	@Override
94
    protected DbImportMapping<ErmsImportState, ErmsImportConfigurator> getMapping() {
95
		if (mapping == null){
96
			mapping = new DbImportMapping<>();
97

    
98
			mapping.addMapper(DbImportObjectCreationMapper.NewInstance(this, "id", TAXON_NAMESPACE)); //id + tu_status
99
			mapping.addMapper(DbImportLsidMapper.NewInstance("GUID", "lsid"));
100

    
101
			UUID tsnUuid = ErmsTransformer.uuidExtTsn;
102
			ExtensionType tsnExtType = getExtensionType(tsnUuid, "TSN", "TSN", "TSN");
103
			mapping.addMapper(DbImportExtensionMapper.NewInstance("tsn", tsnExtType));
104
//			mapping.addMapper(DbImportStringMapper.NewInstance("tu_name", "(NonViralName)name.nameCache"));
105

    
106
			ExtensionType displayNameExtType = getExtensionType(ErmsTransformer.uuidExtDisplayName, "display name", "display name", "display name");
107
			mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_displayname", displayNameExtType));
108
            //Ignore fuzzyName
109
            //  ExtensionType fuzzyNameExtType = getExtensionType(ErmsTransformer.uuidExtFuzzyName, "fuzzy name", "fuzzy name", "fuzzy name");
110
            //  mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_fuzzyname", fuzzyNameExtType));
111
			mapping.addMapper(DbImportStringMapper.NewInstance("tu_authority", "name.authorshipCache"));
112

    
113
			ExtensionType fossilStatusExtType = getExtensionType(ErmsTransformer.uuidExtFossilStatus, "fossil status", "fossil status", "fos. stat.");
114
			mapping.addMapper(DbImportExtensionMapper.NewInstance("fossil_name", fossilStatusExtType));
115

    
116
			ExtensionType unacceptExtType = getExtensionType(ErmsTransformer.uuidExtUnacceptReason, "unaccept reason", "unaccept reason", "reason");
117
			mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_unacceptreason", unacceptExtType));
118

    
119
			ExtensionType qualityStatusExtType = getExtensionType(ErmsTransformer.uuidExtQualityStatus, "quality status", "quality status", "quality status");
120
			mapping.addMapper(DbImportExtensionMapper.NewInstance("qualitystatus_name", qualityStatusExtType)); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor
121

    
122
			ExtensionType cacheCitationExtType = getExtensionType(PesiTransformer.uuidExtCacheCitation, "cache_citation", "quality status", "cache_citation");
123
            mapping.addMapper(DbImportExtensionMapper.NewInstance("cache_citation", cacheCitationExtType));
124

    
125
            //flags
126
			mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_marine", ErmsTransformer.uuidMarkerMarine, "marine", "marine", "marine", null));
127
			mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_brackish", ErmsTransformer.uuidMarkerBrackish, "brackish", "brackish", "brackish", null));
128
			mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_fresh", ErmsTransformer.uuidMarkerFreshwater, "freshwater", "fresh", "fresh", null));
129
			mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_terrestrial", ErmsTransformer.uuidMarkerTerrestrial, "terrestrial", "terrestrial", "terrestrial", null));
130

    
131
			//last action, species expert
132
			ExtensionType speciesExpertNameExtType = getExtensionType(PesiTransformer.uuidExtSpeciesExpertName, "species expert name", "species expert name", "species expert name");
133
            mapping.addMapper(DbImportExtensionMapper.NewInstance("ExpertName", speciesExpertNameExtType)); //according to sql script ExpertName maps to SpeciesExpertName in ERMS
134
            AnnotationType lastActionDateType = getAnnotationType(DbLastActionMapper.uuidAnnotationTypeLastActionDate, "Last action date", "Last action date", null);
135
			mapping.addMapper(DbImportAnnotationMapper.NewInstance("lastActionDate", lastActionDateType));
136
            AnnotationType lastActionType = getAnnotationType(DbLastActionMapper.uuidAnnotationTypeLastAction, "Last action", "Last action", null);
137
            MarkerType hasNoLastActionMarkerType = getMarkerType(DbLastActionMapper.uuidMarkerTypeHasNoLastAction, "has no last action", "No last action information available", "no last action");
138
            mapping.addMapper(DbImportAnnotationMapper.NewInstance("lastAction", lastActionType, hasNoLastActionMarkerType));
139

    
140
            //MAN authorshipCache => appendedPhrase
141
            mapping.addMapper(DbImportMethodMapper.NewDefaultInstance(this, "appendedPhraseForMisapplications", ErmsImportState.class));
142

    
143
            //titleCache compare
144
            mapping.addMapper(DbImportMethodMapper.NewDefaultInstance(this, "testTitleCache", ErmsImportState.class));
145

    
146
			//ignore
147
            mapping.addMapper(DbIgnoreMapper.NewInstance("tu_sp", "included in rank/object creation, only needed for defining kingdom"));
148
			mapping.addMapper(DbIgnoreMapper.NewInstance("tu_fossil", "tu_fossil implemented as foreign key"));
149

    
150
		}
151
		return mapping;
152
	}
153

    
154
	@Override
155
	protected String getRecordQuery(ErmsImportConfigurator config) {
156
		String strSelect = " SELECT tu.*, parent1.tu_name AS parent1name, parent2.tu_name AS parent2name, parent3.tu_name AS parent3name, parent4.tu_name AS parent4name, " +
157
		            " parent1.tu_rank AS parent1rank, parent2.tu_rank AS parent2rank, parent3.tu_rank AS parent3rank, " +
158
		            " status.status_id as status_id, status.status_name, fossil.fossil_name, qualitystatus.qualitystatus_name," +
159
		            " s.sessiondate lastActionDate, a.action_name lastAction, s.ExpertName ";
160
		String strFrom = " FROM tu  LEFT OUTER JOIN  tu AS parent1 ON parent1.id = tu.tu_parent " +
161
				" LEFT OUTER JOIN   tu AS parent2  ON parent2.id = parent1.tu_parent " +
162
				" LEFT OUTER JOIN tu AS parent3 ON parent2.tu_parent = parent3.id " +
163
				" LEFT OUTER JOIN tu AS parent4 ON parent3.tu_parent = parent4.id " +
164
                " LEFT OUTER JOIN status ON tu.tu_status = status.status_id " +
165
				" LEFT OUTER JOIN fossil ON tu.tu_fossil = fossil.fossil_id " +
166
				" LEFT OUTER JOIN qualitystatus ON tu.tu_qualitystatus = qualitystatus.id " +
167
				" LEFT OUTER JOIN tu_sessions ts ON ts.tu_id = tu.id " +
168
                " LEFT OUTER JOIN [sessions] s ON s.id = ts.session_id " +
169
                " LEFT OUTER JOIN actions a ON a.id = ts.action_id ";
170
		String strWhere = " WHERE ( tu.id IN (" + ID_LIST_TOKEN + ") )";
171
		String strOrderBy = " ORDER BY tu.id, s.sessiondate DESC, a.id DESC ";
172
		String strRecordQuery = strSelect + strFrom + strWhere + strOrderBy;
173
		return strRecordQuery;
174
	}
175

    
176
	@Override
177
	protected void doInvoke(ErmsImportState state) {
178
		state.setAcceptedTaxaKeys(getAcceptedTaxaKeys(state));
179

    
180
		//first path
181
		super.doInvoke(state);
182
		if(true){
183
		    logUnacceptReasons();
184
		}
185
		return;
186
	}
187

    
188
    Integer lastTaxonId = null;
189
    @Override
190
    protected boolean ignoreRecord(ResultSet rs) throws SQLException {
191
        Integer id = rs.getInt("id");
192
        boolean result = id.equals(lastTaxonId);
193
        lastTaxonId = id;
194
        return result;
195
    }
196

    
197
	private Set<Integer> getAcceptedTaxaKeys(ErmsImportState state) {
198
		Set<Integer> result = new HashSet<>();
199
		String idCol = " id ";
200
		String tuFk = "tu_id";
201
		String vernacularsTable = "vernaculars";
202
		String distributionTable = "dr";
203
		String notesTable = "notes";
204
		String sql =
205
                "          SELECT id FROM tu WHERE tu_accfinal is NULL" //id of taxa not having accepted taxon
206
                + " UNION  SELECT DISTINCT tu_accfinal FROM tu "  //fk to accepted taxon (either the accepted taxon or the taxon itself, if accepted)
207
                + " UNION  SELECT id FROM tu WHERE trim(tu.tu_unacceptreason) like 'misidentification' OR trim(tu.tu_unacceptreason) like 'misidentifications' OR "
208
                            + " tu.tu_unacceptreason like 'misapplied %%name' OR "
209
                            + " tu.tu_unacceptreason like '%%misapplication%%' OR "
210
                            + " tu.tu_unacceptreason like 'incorrect identification%%'" //Misapplications, see ErmsTransformer.getSynonymRelationTypesByKey
211
                + " UNION  SELECT syn.id FROM tu syn INNER JOIN tu acc ON syn.tu_accfinal = acc.id WHERE syn.id = acc.tu_parent AND acc.id <> syn.id "  //see also ErmsTaxonRelationImport.isAccepted, there are some autonyms being the accepted taxon of there own parents
212
                + " UNION  SELECT DISTINCT %s FROM %s " //vernaculars
213
                + " UNION  SELECT DISTINCT %s FROM %s "  //distributions
214
                + " UNION  SELECT DISTINCT %s FROM %s ";  //notes
215
		sql = String.format(sql,
216
		        tuFk, vernacularsTable,
217
				tuFk, distributionTable,
218
				tuFk, notesTable);
219
		ResultSet rs = state.getConfig().getSource().getResultSet(sql);
220
		try {
221
			while (rs.next()){
222
				Integer id;
223
				id = rs.getInt(idCol.trim());
224
				result.add(id);
225
			}
226
			return result;
227
		} catch (SQLException e) {
228
			e.printStackTrace();
229
			throw new RuntimeException(e);
230
		}
231
	}
232

    
233
	@Override
234
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, ErmsImportState state) {
235
		//currently no referencing objects needed
236
	    Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
237
		return result;
238
	}
239

    
240
	@Override
241
	public TaxonBase<?> createObject(ResultSet rs, ErmsImportState state) throws SQLException {
242
		int statusId = rs.getInt("status_id");
243
//		Object accTaxonId = rs.getObject("tu_accfinal");
244
		Integer meId = rs.getInt("id");
245

    
246
        TaxonName taxonName = getTaxonName(rs, state);
247
		fillTaxonName(taxonName, rs, state, meId);
248

    
249
		//add original source for taxon name (taxon original source is added in mapper)
250
		Reference citation = state.getTransactionalSourceReference();
251
		addOriginalSource(rs, taxonName, "id", NAME_NAMESPACE, citation);
252

    
253
		TaxonBase<?> result;
254
		//handle accepted<-> synonym, we create more accepted taxa as we need them within the tree or to attache factual data
255
		if (state.getAcceptedTaxaKeys().contains(meId)){
256
			Taxon taxon = Taxon.NewInstance(taxonName, citation);
257
			if (statusId != 1){
258
				logger.info("Taxon created as taxon but has status <> 1 ("+statusId+"): " + meId);
259
				handleNotAcceptedTaxon(taxon, statusId, state, rs);
260
			}
261
			result = taxon;
262
		}else{
263
			result = Synonym.NewInstance(taxonName, citation);
264
		}
265

    
266
		handleNameStatus(result.getName(), rs, state);
267
		return result;
268
	}
269

    
270
    private void handleNameStatus(TaxonName name, ResultSet rs, ErmsImportState state) throws SQLException {
271
        NomenclaturalStatusType nomStatus = null;
272
        int tuStatus = rs.getInt("tu_status");
273
        //the order is bottom up from SQL script as their values are overridden from top to bottom
274
        if (tuStatus == 8){
275
            //species inquirenda
276
            nomStatus = getNomenclaturalStatusType(state, ErmsTransformer.uuidNomStatusSpeciesInquirenda, "species inquirenda", "species inquirenda", null, Language.LATIN(), null);
277
        }else if (tuStatus == 7){
278
            //temporary name
279
            nomStatus = getNomenclaturalStatusType(state, PesiTransformer.uuidNomStatusTemporaryName, "temporary name", "temporary name", null, Language.ENGLISH(), null);
280
        }else if (tuStatus == 6){
281
            //nomen dubium
282
            nomStatus = NomenclaturalStatusType.DOUBTFUL();
283
        }else if (tuStatus == 5){
284
            //"alternate representation"
285
            nomStatus = getNomenclaturalStatusType(state, ErmsTransformer.uuidNomStatusAlternateRepresentation, "alternate representation", "alternate representation", null, Language.ENGLISH(), null);
286
        }else if (tuStatus == 3){
287
            //nomen nudum
288
            nomStatus = NomenclaturalStatusType.NUDUM();
289
        }
290
        if (nomStatus == null){
291
            //IN SQL Script it is set first by unacceptreason and then overriden if above tu_status exists
292
            String unacceptReason = rs.getString("tu_unacceptreason");
293
            try {
294
                nomStatus = state.getTransformer().getNomenclaturalStatusByKey(unacceptReason);
295
            } catch (UndefinedTransformerMethodException e) {logger.warn("Unhandled method");
296
            }
297
        }
298
        if (nomStatus != null){
299
            name.addStatus(nomStatus, null, null);
300
        }
301
    }
302

    
303
    private TaxonName fillTaxonName(TaxonName taxonName, ResultSet rs, ErmsImportState state, Integer meId) throws SQLException {
304
        String tuName = rs.getString("tu_name");
305
		String displayName = rs.getString("tu_displayname").trim();
306

    
307
		String parent1Name = rs.getString("parent1name");
308
		Integer parent1Rank = rs.getInt("parent1rank");
309

    
310
		String parent2Name = rs.getString("parent2name");
311
		Integer parent2Rank = rs.getInt("parent2rank");
312

    
313
		String parent3Name = rs.getString("parent3name");
314
		Integer parent3Rank = rs.getInt("parent3rank");
315

    
316
	    String parent4Name = rs.getString("parent4name");
317

    
318
		//set epithets
319
		if (taxonName.isGenus() || taxonName.isSupraGeneric()){
320
			taxonName.setGenusOrUninomial(tuName);
321
		}else if (taxonName.isInfraGeneric()){
322
			taxonName.setInfraGenericEpithet(tuName);
323
			taxonName.setGenusOrUninomial(parent1Name);
324
		}else if (taxonName.isSpecies()){
325
			taxonName.setSpecificEpithet(tuName);
326
			getGenusAndInfraGenus(parent1Name, parent2Name, parent1Rank, taxonName);
327
		}else if (taxonName.isInfraSpecific()){
328
			if (parent1Rank < 220){
329
				handleException(parent1Rank, taxonName, displayName, meId);
330
			}
331
			taxonName.setInfraSpecificEpithet(tuName);
332
			if (parent1Rank > 220){  //parent is still infraspecific
333
			    taxonName.setSpecificEpithet(parent2Name);
334
			    getGenusAndInfraGenus(parent3Name, parent4Name, parent3Rank, taxonName);
335
			}else{
336
			    //default
337
			    taxonName.setSpecificEpithet(parent1Name);
338
			    getGenusAndInfraGenus(parent2Name, parent3Name, parent2Rank, taxonName);
339
			}
340
		}else if (taxonName.getRank()== null){
341
			if ("Biota".equalsIgnoreCase(tuName)){
342
				Rank rank = Rank.DOMAIN();  //should be Superdomain
343
				taxonName.setRank(rank);
344
				taxonName.setGenusOrUninomial(tuName);
345
			}else{
346
				String warning = "TaxonName has no rank. Use namecache.";
347
				logger.warn(warning);
348
				taxonName.setNameCache(tuName);
349
			}
350
		}
351

    
352
		//e.g. Leucon [Platyhelminthes] ornatus
353
		if (containsBrackets(displayName)){
354
			taxonName.setNameCache(displayName);
355
			logger.warn("Set name cache: " +  displayName + "; id =" + meId);
356
		}
357
        if (!taxonName.getNameCache().equals(displayName) && !isErroneousSubgenus(taxonName, displayName)){
358
            int pos = CdmUtils.diffIndex(taxonName.getNameCache(), displayName);
359
            logger.warn("Computed name cache differs at "+pos+".\n Computed   : " + taxonName.getNameCache()+"\n DisplayName: " +displayName);
360
            taxonName.setNameCache(displayName, true);
361
        }
362
		taxonName.getTitleCache();
363
        return taxonName;
364
    }
365

    
366
    private static boolean isErroneousSubgenus(TaxonName taxonName, String displayName) {
367
        //this is an error in ERMS formatting in v2019 for ICNafp names, that hopefully soon will be corrected
368
        return (Rank.SPECIES().equals(taxonName.getRank()) && displayName.contains(" subg. "));
369
    }
370

    
371
    @SuppressWarnings("unused")  //used by MethodMapper
372
    private static TaxonBase<?> appendedPhraseForMisapplications(ResultSet rs, ErmsImportState state) throws SQLException{
373
        TaxonBase<?> taxon = (TaxonBase<?>)state.getRelatedObject(DbImportStateBase.CURRENT_OBJECT_NAMESPACE, DbImportStateBase.CURRENT_OBJECT_ID);
374
        TaxonName taxonName = taxon.getName();
375
        String unacceptreason = rs.getString("tu_unacceptreason");
376
        RelationshipTermBase<?>[] rels = state.getTransformer().getSynonymRelationTypesByKey(unacceptreason, state);
377
        if (rels[1]!= null && rels[1].equals(TaxonRelationshipType.MISAPPLIED_NAME_FOR())){
378
            taxon.setAppendedPhrase(taxonName.getAuthorshipCache());
379
            taxon.setSec(null);
380
            taxonName.setAuthorshipCache(null, taxonName.isProtectedAuthorshipCache());
381
            //TODO maybe some further authorship handling is needed if authors get parsed, but not very likely for MAN authorship
382
        }
383
        if(state.getUnhandledUnacceptReason() != null){
384
            //to handle it hear is a workaround, as the real place where it is handled is DbImportSynonymMapper which is called ErmsTaxonRelationImport but where it is diffcult to aggregate logging data
385
            addUnacceptReason(state.getUnhandledUnacceptReason());
386
        }
387
        return taxon;
388
    }
389

    
390
    private static void addUnacceptReason(String unhandledUnacceptReason) {
391
        unhandledUnacceptReason = unhandledUnacceptReason.toLowerCase();
392
        if (!unacceptReasons.keySet().contains(unhandledUnacceptReason)){
393
            unacceptReasons.put(unhandledUnacceptReason, 1);
394
        }else{
395
            unacceptReasons.put(unhandledUnacceptReason, unacceptReasons.get(unhandledUnacceptReason)+1);
396
        }
397
    }
398

    
399
    @SuppressWarnings("unused")  //used by MethodMapper
400
    private static TaxonBase<?> testTitleCache(ResultSet rs, ErmsImportState state) throws SQLException{
401
        TaxonBase<?> taxon = (TaxonBase<?>)state.getRelatedObject(DbImportStateBase.CURRENT_OBJECT_NAMESPACE, DbImportStateBase.CURRENT_OBJECT_ID);
402
        TaxonName taxonName = taxon.getName();
403
        String displayName = rs.getString("tu_displayname");
404
        displayName = displayName == null ? null : displayName.trim();
405
        String titleCache = taxonName.resetTitleCache(); //calling titleCache should always be kept to have a computed titleCache in the CDM DB.
406
        titleCache = CdmUtils.concat(" ", titleCache, taxon.getAppendedPhrase());
407
        String expectedTitleCache = getExpectedTitleCache(rs);
408
        //TODO check titleCache, but beware of autonyms
409
        if (!titleCache.equals(expectedTitleCache) && !isErroneousSubgenus(taxonName, displayName)){
410
            int pos = CdmUtils.diffIndex(titleCache, expectedTitleCache);
411
            logger.warn("Computed title cache differs at "+pos+".\n Computed             : " + titleCache + "\n DisplayName+Authority: " + expectedTitleCache);
412
            taxonName.setNameCache(displayName, true);
413
        }
414
        return taxon;
415
    }
416

    
417
    //see also PesiErmsValidation.srcFullName()
418
    private static String getExpectedTitleCache(ResultSet srcRs) throws SQLException {
419
        String result;
420
        String epi = srcRs.getString("tu_name");
421
        epi = " a" + epi;
422
        String display = srcRs.getString("tu_displayname");
423
        String sp = srcRs.getString("tu_sp");
424
        if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //homonym, animal
425
            result = srcRs.getString("tu_displayname").replaceFirst(epi+" ", CdmUtils.concat(" ", " "+epi, srcRs.getString("tu_authority")))+" ";
426
        }else{
427
            result = CdmUtils.concat(" ", srcRs.getString("tu_displayname"), srcRs.getString("tu_authority"));
428
        }
429
        return result.trim();
430
    }
431

    
432
    private void handleNotAcceptedTaxon(Taxon taxon, int statusId, ErmsImportState state, ResultSet rs) throws SQLException {
433
		ExtensionType notAccExtensionType = getExtensionType(state, ErmsTransformer.uuidErmsTaxonStatus, "ERMS taxon status", "ERMS taxon status", "status", null);
434
		String statusName = rs.getString("status_name");
435

    
436
		if (statusId > 1){
437
			taxon.addExtension(statusName, notAccExtensionType);
438
		}
439
	}
440

    
441
	private void handleException(Integer parentRank, TaxonName taxonName, String displayName, Integer meId) {
442
		logger.warn("Parent of infra specific taxon is of higher rank ("+parentRank+") than species. Used nameCache: " + displayName +  "; id=" + meId) ;
443
		taxonName.setNameCache(displayName);
444
	}
445

    
446
	private boolean containsBrackets(String displayName) {
447
		int index = displayName.indexOf("[");
448
		return (index > -1);
449
	}
450

    
451
	private void getGenusAndInfraGenus(String parentName, String grandParentName, Integer parent1Rank, TaxonName taxonName) {
452
		if (parent1Rank <220 && parent1Rank > 180){
453
			//parent is infrageneric
454
			taxonName.setInfraGenericEpithet(parentName);
455
			taxonName.setGenusOrUninomial(grandParentName);
456
		}else{
457
			taxonName.setGenusOrUninomial(parentName);
458
		}
459
	}
460

    
461
	/**
462
	 * Returns an empty Taxon Name instance according to the given rank and kingdom.
463
	 */
464
	private TaxonName getTaxonName(ResultSet rs, ErmsImportState state) throws SQLException {
465
	    TaxonName result;
466
		int kingdomId = parseKingdomId(rs);
467
		Integer intRank = rs.getInt("tu_rank");
468

    
469
		NomenclaturalCode nc = ErmsTransformer.kingdomId2NomCode(kingdomId);
470
		Rank rank = null;
471
		rank = state.getRank(intRank, kingdomId);
472

    
473
		if (rank == null){
474
			logger.warn("Rank is null. KingdomId: " + kingdomId + ", rankId: " +  intRank);
475
		}
476
		if (nc != null){
477
			result = nc.getNewTaxonNameInstance(rank);
478
		}else{
479
			result = TaxonNameFactory.NewNonViralInstance(rank);
480
		}
481
		//cache strategy
482
		if (result.isZoological()){
483
		    TaxonNameDefaultCacheStrategy cacheStrategy = PesiTaxonExport.zooNameStrategy;
484
			result.setCacheStrategy(cacheStrategy);
485
		}
486

    
487
		return result;
488
	}
489

    
490
	/**
491
	 * Returns the kingdom id by extracting it from the second character in the <code>tu_sp</code>
492
	 * attribute. If the attribute can not be parsed to a valid id <code>null</code>
493
	 * is returned. If the attribute is <code>null</code> the id of the record is returned.
494
	 * @param rs
495
	 * @return
496
	 * @throws SQLException
497
	 */
498
	private int parseKingdomId(ResultSet rs) throws SQLException {
499
		String treeString = rs.getString("tu_sp");
500
		if (treeString != null){
501
		    if (StringUtils.isNotBlank(treeString) && treeString.length() > 1){
502
				String strKingdom = treeString.substring(1,2);
503

    
504
				if (! treeString.substring(0, 1).equals("#") && ! treeString.substring(2, 3).equals("#") ){
505
					String message = "Tree string " + treeString + " has no recognized format";
506
                    logger.warn(message);
507
                    throw new RuntimeException(message);
508
				}else{
509
					try {
510
						return Integer.valueOf(strKingdom);
511
					} catch (NumberFormatException e) {
512
					    String message = "Kingdom string " + strKingdom + "could not be recognized as a valid number";
513
						logger.warn(message);
514
						throw new RuntimeException(message);
515
					}
516
				}
517
			}else{
518
                String message = "Tree string for kingdom recognition is to short: " + treeString;
519
                logger.warn(message);
520
                throw new RuntimeException(message);
521
			}
522
		}else{
523
			int tu_id = rs.getInt("id");
524
			return tu_id;
525
		}
526
	}
527

    
528
    private void logUnacceptReasons() {
529
        String logStr = "\n Unhandled unaccept reasons:\n===================";
530

    
531
        while (!unacceptReasons.isEmpty()) {
532
            int n = 0;
533
            List<String> mostUsedStrings = new ArrayList<>();
534
            for (Map.Entry<String, Integer> entry : unacceptReasons.entrySet()) {
535
                if (entry.getValue() > n) {
536
                    mostUsedStrings = new ArrayList<>();
537
                    mostUsedStrings.add(entry.getKey());
538
                    n = entry.getValue();
539
                } else if (entry.getValue() == n) {
540
                    mostUsedStrings.add(entry.getKey());
541
                } else {
542
                    //neglect
543
                }
544
            }
545
            mostUsedStrings.sort(new StringComparator());
546
            logStr += "\n   " + String.valueOf(n);
547
            for (String str : mostUsedStrings) {
548
                logStr += "\n   "+ str;
549
                unacceptReasons.remove(str);
550
            }
551
        }
552
        logger.warn(logStr);
553

    
554
    }
555

    
556
	@Override
557
	protected boolean doCheck(ErmsImportState state){
558
		IOValidator<ErmsImportState> validator = new ErmsTaxonImportValidator();
559
		return validator.validate(state);
560
	}
561

    
562
	@Override
563
    protected boolean isIgnore(ErmsImportState state){
564
		return ! state.getConfig().isDoTaxa();
565
	}
566
}
(13-13/17)