Project

General

Profile

« Previous | Next » 

Revision 6ae21a57

Added by Andreas Müller over 4 years ago

ref #1444, ref #8508, ref #8509 fix link handling and add ecology export to ERMS pipeline

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsLinkImport.java
20 20
import org.springframework.stereotype.Component;
21 21

  
22 22
import eu.etaxonomy.cdm.io.common.IOValidator;
23
import eu.etaxonomy.cdm.io.common.mapping.DbIgnoreMapper;
23 24
import eu.etaxonomy.cdm.io.common.mapping.DbImportExtensionCreationMapper;
24 25
import eu.etaxonomy.cdm.io.common.mapping.DbImportMapping;
25
import eu.etaxonomy.cdm.io.common.mapping.DbNotYetImplementedMapper;
26 26
import eu.etaxonomy.cdm.io.pesi.erms.validation.ErmsLinkImportValidator;
27 27
import eu.etaxonomy.cdm.model.common.CdmBase;
28 28
import eu.etaxonomy.cdm.model.common.Extension;
......
37 37
public class ErmsLinkImport  extends ErmsImportBase<TaxonBase> {
38 38

  
39 39
    private static final long serialVersionUID = 1270264097223862441L;
40

  
41 40
    @SuppressWarnings("unused")
42
	private static final Logger logger = Logger.getLogger(ErmsLinkImport.class);
41
    private static final Logger logger = Logger.getLogger(ErmsLinkImport.class);
42

  
43
    public static final String TOKEN_URL = "@URL: ";
44
    public static final String TOKEN_LINKTEXT = " ,@Text: ";
43 45

  
44 46
	private DbImportMapping<ErmsImportState,ErmsImportConfigurator> mapping;
45 47

  
......
54 56
	@Override
55 57
	protected String getRecordQuery(ErmsImportConfigurator config) {
56 58
		String strRecordQuery =
57
			" SELECT * " +
58
			" FROM links " +
59
			" SELECT l.* " +
60
			        ",'%s' + link_url + '%s' + ISNULL(link_text, '')   valueAll" + //+ ' ,@Note: ' + ISNULL(CAST(note as nvarchar(max)), '')
61
			" FROM links l " +
59 62
			" WHERE ( links.id IN (" + ID_LIST_TOKEN + ") )";
63
		strRecordQuery = String.format(strRecordQuery, TOKEN_URL, TOKEN_LINKTEXT);
60 64
		return strRecordQuery;
61 65
	}
62 66

  
63 67
	@Override
64
    protected DbImportMapping<ErmsImportState,ErmsImportConfigurator> getMapping() {
68
    protected DbImportMapping<ErmsImportState, ErmsImportConfigurator> getMapping() {
65 69
		if (mapping == null){
66
			mapping = new DbImportMapping<ErmsImportState,ErmsImportConfigurator>();
67
			ExtensionType extensionType = ExtensionType.URL();
68
			//TODO do we need to add to TaxonNameBase too?
69
			mapping.addMapper(DbImportExtensionCreationMapper.NewInstance("tu_id", ErmsTaxonImport.TAXON_NAMESPACE, "link_url", "id", extensionType));
70
			//not yet implemented
71
			mapping.addMapper(DbNotYetImplementedMapper.NewInstance("link_text"));  //maybe implement as a second extension ?? but this is ambigous!
72
			mapping.addMapper(DbNotYetImplementedMapper.NewInstance("link_fn"));
73
			mapping.addMapper(DbNotYetImplementedMapper.NewInstance("note"));
70
			mapping = new DbImportMapping<>();
71
			ExtensionType extensionType = getExtensionType(ErmsTransformer.uuidExtErmsLink, "ERMS link", "ERMS link", null);
72
			mapping.addMapper(DbImportExtensionCreationMapper.NewInstance("tu_id", ErmsImportBase.TAXON_NAMESPACE, "valueAll", "id", extensionType));
73

  
74
			//handled in creation mapper
75
			mapping.addMapper(DbIgnoreMapper.NewInstance("link_text", "handled in creation mapper"));
76
			mapping.addMapper(DbIgnoreMapper.NewInstance("note", "A note field does not yet exist in PESI.Note"));//not used in SQL script but why not put to PESI.Note.note field?
77

  
78
			//Ignore
79
			mapping.addMapper(DbIgnoreMapper.NewInstance("link_fn", "Seems to be an internal VLIZ file name. Not used in SQL script "));
80
			mapping.addMapper(DbIgnoreMapper.NewInstance("link_thumbnail", "Some data (>1000) but not used in SQL script."));
81
			mapping.addMapper(DbIgnoreMapper.NewInstance("link_qualitystatus_id", "Not used in SQL script."));
82
			mapping.addMapper(DbIgnoreMapper.NewInstance("link_order", "Not used in SQL script. Until 2019 only 'null' and '0' existed."));
74 83
		}
75 84
		return mapping;
76 85
	}
......
80 89
		String nameSpace;
81 90
		Class<?> cdmClass;
82 91
		Set<String> idSet;
83
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
92
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
84 93

  
85 94
		try{
86
			Set<String> taxonIdSet = new HashSet<String>();
87
			Set<String> languageIdSet = new HashSet<String>();
95
			Set<String> taxonIdSet = new HashSet<>();
88 96
			while (rs.next()){
89 97
				handleForeignKey(rs, taxonIdSet, "tu_id");
90 98
			}
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsTransformer.java
249 249
	public static final UUID uuidExtUnacceptReason = UUID.fromString("3883fb79-374d-4120-964b-9666307e3567");
250 250
	public static final UUID uuidExtQualityStatus = UUID.fromString("4de84c6e-41bd-4a0e-894d-77e9ec3103d2");
251 251
	public static final UUID uuidExtAuthor = UUID.fromString("85387300-281f-47bc-8499-7008075dc8e0");
252
	public static final UUID uuidExtErmsLink = UUID.fromString("b2d6ee54-1363-4641-9658-75a1843b84ff");
252 253

  
253 254
	//AnnotationType
254 255
	public static final UUID uuidAnnSpeciesExpertName = UUID.fromString("4d8abf02-3d92-4c65-b30b-0393a1f4818b");
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/out/PesiEcologyAndLinkExport.java
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.pesi.out;
10

  
11
import java.sql.SQLException;
12
import java.util.Arrays;
13
import java.util.List;
14
import java.util.Set;
15
import java.util.UUID;
16

  
17
import org.apache.log4j.Logger;
18
import org.springframework.stereotype.Component;
19
import org.springframework.transaction.TransactionStatus;
20

  
21
import eu.etaxonomy.cdm.common.CdmUtils;
22
import eu.etaxonomy.cdm.io.common.mapping.out.DbExportIgnoreMapper;
23
import eu.etaxonomy.cdm.io.common.mapping.out.DbFixedIntegerMapper;
24
import eu.etaxonomy.cdm.io.common.mapping.out.DbFixedStringMapper;
25
import eu.etaxonomy.cdm.io.common.mapping.out.IdIncMapper;
26
import eu.etaxonomy.cdm.io.common.mapping.out.MethodMapper;
27
import eu.etaxonomy.cdm.io.pesi.erms.ErmsLinkImport;
28
import eu.etaxonomy.cdm.io.pesi.erms.ErmsTransformer;
29
import eu.etaxonomy.cdm.model.common.CdmBase;
30
import eu.etaxonomy.cdm.model.common.Extension;
31
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
32
import eu.etaxonomy.cdm.profiler.ProfilerController;
33
/**
34
 * The export class for PESI ecology notes (marine, brackish, fresh, terrestrial) coming from ERMS tu table.<p>
35
 * @author a.mueller
36
 * @since 28.09.2019
37
 */
38
@Component
39
public class PesiEcologyAndLinkExport extends PesiExportBase {
40

  
41
    private static final long serialVersionUID = -2567615286288369111L;
42
    private static final Logger logger = Logger.getLogger(PesiEcologyAndLinkExport.class);
43

  
44
	private static final Class<? extends CdmBase> standardMethodParameter = TaxonBase.class;
45

  
46
	private static int modCount = 1000;
47
	private static final String dbTableName = "Note";
48
	private static final String pluralString = "ecology or link notes";
49
	private static final String parentPluralString = "Taxa";
50

  
51
	public PesiEcologyAndLinkExport() {
52
		super();
53
	}
54

  
55
	int countNotes = 0;
56
	int countUrls = 0;
57
	String currentValue;
58

  
59
	@Override
60
	public Class<? extends CdmBase> getStandardMethodParameter() {
61
		return standardMethodParameter;
62
	}
63

  
64
	@Override
65
	protected void doInvoke(PesiExportState state) {
66
		try {
67
			logger.info("*** Started making " + pluralString + " ...");
68

  
69
			// Stores whether this invoke was successful or not.
70
			boolean success = true;
71

  
72
			// Get specific mappings: (CDM) TaxonBase.marker -> (PESI) Note (ecology)
73
			PesiExportMapping mapping = getMapping();
74
			mapping.initialize(state);
75

  
76
			PesiExportMapping urlMapping = getUrlMapping();
77
			mapping.initialize(state);
78

  
79
			//All
80
			success &= doPhase01(state, mapping, urlMapping);
81

  
82
			logger.info("*** Finished Making " + pluralString + " ..." + getSuccessString(success));
83

  
84
			if (!success){
85
				state.getResult().addError("An unknown problem occurred");
86
			}
87
			return;
88
		} catch (SQLException e) {
89
			e.printStackTrace();
90
			logger.error(e.getMessage());
91
			state.getResult().addException(e, e.getMessage());
92
		}
93
	}
94

  
95
	//PHASE 01: All
96
	private boolean doPhase01(PesiExportState state, PesiExportMapping mapping, PesiExportMapping urlMapping) throws SQLException {
97

  
98
//	    logger.info("PHASE 1 (ecology...");
99
		int count = 0;
100
		int pastCount = 0;
101
		boolean success = true;
102
		int limit = state.getConfig().getLimitSave();
103

  
104
		List<TaxonBase> taxonList = null;
105

  
106
		TransactionStatus txStatus = startTransaction(true);
107

  
108
		if (logger.isDebugEnabled()){
109
		    logger.info("Started new transaction. Fetching some " + parentPluralString + " (max: " + limit + ") ...");
110
		    logger.debug("Start snapshot, before starting loop");
111
		    ProfilerController.memorySnapshot();
112
		}
113

  
114
		List<String> propPath = Arrays.asList(new String[]{"markers.*"});
115
		int partitionCount = 0;
116
		while ((taxonList = getNextTaxonPartition(TaxonBase.class, limit, partitionCount++, propPath )) != null   ) {
117

  
118
			if (logger.isDebugEnabled()) {
119
                logger.info("Fetched " + taxonList.size() + " " + parentPluralString + ". Exporting...");
120
            }
121

  
122
			for (TaxonBase<?> taxon : taxonList) {
123
				doCount(count++, modCount, pluralString);
124
				state.setCurrentTaxon(taxon);
125
				if (!taxon.getMarkers().isEmpty()){
126
					success &= handleSingleEcologyTaxon(taxon, mapping);
127
				}
128
				if (!taxon.getExtensions().isEmpty()){
129
                    success &= handleSingleLinkTaxon(taxon, urlMapping);
130
                }
131
			}
132
			taxonList = null;
133
			state.setCurrentTaxon(null);
134

  
135
			// Commit transaction
136
			commitTransaction(txStatus);
137
			logger.info("Exported " + (count - pastCount) + " " + parentPluralString + ". Total taxa: " + count + ". Ecology notes: " + countNotes + ". Link notes: " + countUrls);
138
			pastCount = count;
139
			if (logger.isDebugEnabled()) {
140
                ProfilerController.memorySnapshot();
141
            }
142
			// Start transaction
143
			txStatus = startTransaction(true);
144
			if(logger.isDebugEnabled()) {
145
                logger.info("Started new transaction. Fetching some " + pluralString + " (max: " + limit + ") for description import ...");
146
            }
147
		}
148

  
149

  
150
		// Commit transaction
151
		commitTransaction(txStatus);
152
		logger.debug("Committed transaction.");
153
		return success;
154
	}
155

  
156
	private boolean handleSingleEcologyTaxon(TaxonBase<?> taxon, PesiExportMapping mapping) {
157

  
158
	    boolean success = true;
159
	    String ecologyStr = getEcologyString(taxon);
160
	    if (isNotBlank(ecologyStr)){
161
	        success &= mapping.invoke(taxon);
162
	        countNotes++;
163
	    }
164
		return success;
165
	}
166

  
167
    private static String getEcologyString(TaxonBase<?> taxon) {
168
        String ecologyStr = null;
169
	    ecologyStr = CdmUtils.concat(", ", ecologyStr, createEcologyStr(taxon, "marine", ErmsTransformer.uuidMarkerMarine));
170
	    ecologyStr = CdmUtils.concat(", ", ecologyStr, createEcologyStr(taxon, "brackish", ErmsTransformer.uuidMarkerBrackish));
171
	    ecologyStr = CdmUtils.concat(", ", ecologyStr, createEcologyStr(taxon, "fresh", ErmsTransformer.uuidMarkerFreshwater));
172
	    ecologyStr = CdmUtils.concat(", ", ecologyStr, createEcologyStr(taxon, "terrestrial", ErmsTransformer.uuidMarkerTerrestrial));
173
        return ecologyStr;
174
    }
175

  
176
    private static String createEcologyStr(TaxonBase<?> taxon, String strEcology, UUID markerUuid) {
177
        Boolean value = taxon.markerValue(markerUuid);
178
        if (value == null){
179
            return null;
180
        }else if (value == true){
181
            return strEcology;
182
        }else{
183
            return "not " + strEcology;
184
        }
185
    }
186

  
187
    private boolean handleSingleLinkTaxon(TaxonBase<?> taxon, PesiExportMapping mapping) {
188

  
189
        boolean success = true;
190
        Set<Extension> urlExtensions = taxon.getFilteredExtensions(ErmsTransformer.uuidExtErmsLink);
191
        for (Extension extension : urlExtensions){
192
            mapping.invoke(extension);
193
            countUrls++;
194
        }
195
        return success;
196
    }
197

  
198
	protected boolean doDelete(PesiExportState state) {
199
	    //Note table is already filled by Description import
200
	    //=> we do not empty any table here
201
		return true;
202
	}
203

  
204
    @SuppressWarnings("unused")  //used by mapper
205
    private static Integer getTaxonFk(TaxonBase<?> taxonBase, PesiExportState state) {
206
        return state.getDbId(taxonBase);
207
    }
208

  
209
    @SuppressWarnings("unused")  //used by mapper
210
    private static Integer getCurrentTaxonFk(Extension extension, PesiExportState state) {
211
        return state.getDbId(state.getCurrentTaxon());
212
    }
213

  
214
    @SuppressWarnings("unused")  //used by mapper
215
    private static String getNote_1(TaxonBase<?> taxon) {
216
        return getEcologyString(taxon);
217
    }
218

  
219
    @SuppressWarnings("unused")  //used by mapper
220
    private static String getUrlNote_1(Extension extension, PesiExportState state) {
221
        String value = extension.getValue();
222
        if (value == null){
223
            return null;
224
        }else{
225
            //TODO use regex grouping
226
            String result = value.split(ErmsLinkImport.TOKEN_LINKTEXT)[0];
227
            result = result.trim();
228
            return result;
229
        }
230
    }
231

  
232
    @SuppressWarnings("unused")  //used by mapper
233
    private static String getUrlNote_2(Extension extension, PesiExportState state) {
234
        String value = extension.getValue();
235
        if (value == null){
236
            return null;
237
        }else{
238
            //TODO use regex grouping
239
            String result = value.split(ErmsLinkImport.TOKEN_LINKTEXT)[1];
240
            result = result.trim();
241
            return result;
242
        }
243
    }
244

  
245

  
246
    @SuppressWarnings("unused")  //used by mapper
247
    private static Integer getNoteCategoryFk(Extension extension, PesiExportState state) {
248
        String linktext = getUrlNote_2(extension, state);
249
        int catFk = categoryByLinkText(linktext);
250
        return catFk;
251
    }
252

  
253
    private static int categoryByLinkText(String linktext) {
254
        if (linktext == null){
255
            return PesiTransformer.NoteCategory_undefined_link;
256
        }else if (linktext.matches("(to fishbase|marine life inf).*")){
257
            return PesiTransformer.NoteCategory_Link_to_general_information;
258
        }else if (linktext.matches(".*(clemam|nemys|algaebase|fishbase).*")){
259
            return PesiTransformer.NoteCategory_Link_to_taxonomy;
260
        }else{
261
            return PesiTransformer.NoteCategory_undefined_link;
262
        }
263
    }
264

  
265
    @SuppressWarnings("unused")  //used by mapper
266
    private static String getNoteCategoryCache(Extension extension, PesiExportState state) {
267
        int catFk = getNoteCategoryFk(extension, state);
268
        String result = categoryByLinkText(catFk);
269
        return result;
270
    }
271

  
272
//******************************* MAPPINGS ********************************************
273

  
274
    /**
275
     * @param catFk
276
     * @return
277
     */
278
    private static String categoryByLinkText(int catFk) {
279
        if(catFk == 22){
280
            return "";
281
        }else if (catFk == 23){
282
            return "";
283
        }else if (catFk == 24){
284
            return "";
285
        }else{
286
            logger.warn("Link category fk not yet supported: " + catFk);
287
            return null;
288
        }
289
    }
290

  
291
    private PesiExportMapping getMapping() {
292
        PesiExportMapping mapping = new PesiExportMapping(dbTableName);
293

  
294
        mapping.addMapper(idIncMapper);
295
        mapping.addMapper(MethodMapper.NewInstance("Note_1", this, standardMethodParameter));
296

  
297
        mapping.addMapper(DbFixedIntegerMapper.NewInstance(PesiTransformer.NoteCategory_ecology, "NoteCategoryFk"));
298
        mapping.addMapper(DbFixedStringMapper.NewInstance("ecology", "NoteCategoryCache"));
299
        mapping.addMapper(DbFixedIntegerMapper.NewInstance(12, "LanguageFk"));
300
        mapping.addMapper(DbFixedStringMapper.NewInstance("English", "LanguageCache"));
301

  
302
        mapping.addMapper(MethodMapper.NewInstance("TaxonFk", this, standardMethodParameter, PesiExportState.class));
303

  
304
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("Note_2", "Note_2 not used for ecology fact"));
305
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("Region", "Region not used for ecology fact"));
306
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("SpeciesExpertGUID", "SpeciesExpertGUID not used for ecology fact"));
307
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("SpeciesExpertName", "SpeciesExpertName not used for ecology fact"));
308
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("LastAction", "LastAction not used for ecology fact"));
309
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("LastActionDate", "LastActionDate not used for ecology fact"));
310

  
311
        return mapping;
312
    }
313
    private IdIncMapper idIncMapper = IdIncMapper.NewComputedInstance("NoteId");
314
    private PesiExportMapping getUrlMapping() {
315
        PesiExportMapping mapping = new PesiExportMapping(dbTableName);
316

  
317
        mapping.addMapper(idIncMapper);
318
        mapping.addMapper(MethodMapper.NewInstance("Note_1", this.getClass(), "getUrlNote_1", Extension.class));
319
        mapping.addMapper(MethodMapper.NewInstance("Note_2", this.getClass(), "getUrlNote_2", Extension.class));
320
        mapping.addMapper(MethodMapper.NewInstance("NoteCategoryFk", this.getClass(), Extension.class));
321
        mapping.addMapper(MethodMapper.NewInstance("NoteCategoryCache", this.getClass(), Extension.class));
322
        mapping.addMapper(MethodMapper.NewInstance("TaxonFk", this.getClass(), "getCurrentTaxonFk", Extension.class, PesiExportState.class));
323

  
324
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("LanguageFk", "LanguageFk not used for link fact"));
325
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("LanguageCache", "LanguageCache not used for link fact"));
326
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("Region", "Region not used for link fact"));
327
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("SpeciesExpertGUID", "SpeciesExpertGUID not used for link fact"));
328
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("SpeciesExpertName", "SpeciesExpertName not used for link fact"));
329
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("LastAction", "LastAction not used for link fact"));
330
        mapping.addMapper(DbExportIgnoreMapper.NewInstance("LastActionDate", "LastActionDate not used for link fact"));
331

  
332
        return mapping;
333
    }
334

  
335
    @Override
336
    protected boolean doCheck(PesiExportState state) {
337
        boolean result = true;
338
        return result;
339
    }
340

  
341
    @Override
342
    protected boolean isIgnore(PesiExportState state) {
343
        return ! state.getConfig().isDoEcology();
344
    }
345
}
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/out/PesiExportConfigurator.java
46 46
	private boolean doInferredSynonyms = true;
47 47
	private boolean doPureNames = true;
48 48
	private boolean doDescription = true;
49
	private boolean doEcology = true;
49 50

  
50 51
	private int nameIdStart = 10000000;
51 52

  
......
62 63
				PesiRelTaxonExport.class, // RelTaxonId's could be deleted from state hashmap
63 64
				PesiAdditionalSourceExport.class,
64 65
				PesiDescriptionExport.class,
66
				PesiEcologyAndLinkExport.class,
65 67
				PesiFinalUpdateExport.class
66 68
		};
67 69
	}
......
209 211
		this.doParentAndBiota = doParentAndBiota;
210 212
	}
211 213

  
214
    public boolean isDoEcology() {
215
        return doEcology;
216
    }
217
    public void setDoEcology(boolean doEcology) {
218
        this.doEcology = doEcology;
219
    }
220

  
212 221
}

Also available in: Unified diff