1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.dwca.out;
|
10
|
|
11
|
import java.io.ByteArrayOutputStream;
|
12
|
import java.io.File;
|
13
|
import java.io.FileNotFoundException;
|
14
|
import java.io.FileOutputStream;
|
15
|
import java.io.IOException;
|
16
|
import java.io.OutputStream;
|
17
|
import java.io.OutputStreamWriter;
|
18
|
import java.io.PrintWriter;
|
19
|
import java.io.UnsupportedEncodingException;
|
20
|
import java.util.ArrayList;
|
21
|
import java.util.HashSet;
|
22
|
import java.util.List;
|
23
|
import java.util.Set;
|
24
|
import java.util.UUID;
|
25
|
|
26
|
import javax.xml.stream.XMLOutputFactory;
|
27
|
import javax.xml.stream.XMLStreamException;
|
28
|
import javax.xml.stream.XMLStreamWriter;
|
29
|
|
30
|
import org.apache.commons.lang.StringUtils;
|
31
|
import org.apache.log4j.Logger;
|
32
|
import org.springframework.beans.factory.annotation.Autowired;
|
33
|
|
34
|
import eu.etaxonomy.cdm.api.service.IClassificationService;
|
35
|
import eu.etaxonomy.cdm.api.service.ITaxonNodeService;
|
36
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
37
|
import eu.etaxonomy.cdm.io.common.CdmExportBase;
|
38
|
import eu.etaxonomy.cdm.io.common.ICdmExport;
|
39
|
import eu.etaxonomy.cdm.io.common.mapping.out.IExportTransformer;
|
40
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
41
|
import eu.etaxonomy.cdm.model.common.IOriginalSource;
|
42
|
import eu.etaxonomy.cdm.model.common.ISourceable;
|
43
|
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
|
44
|
import eu.etaxonomy.cdm.model.location.Country;
|
45
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
46
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
47
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
48
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
49
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
50
|
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto;
|
51
|
|
52
|
/**
|
53
|
* @author a.mueller
|
54
|
* @date 18.04.2011
|
55
|
*
|
56
|
*/
|
57
|
public abstract class DwcaExportBase
|
58
|
extends CdmExportBase<DwcaTaxExportConfigurator, DwcaTaxExportState, IExportTransformer>
|
59
|
implements ICdmExport<DwcaTaxExportConfigurator, DwcaTaxExportState>{
|
60
|
|
61
|
private static final long serialVersionUID = -3214410418410044139L;
|
62
|
|
63
|
private static final Logger logger = Logger.getLogger(DwcaExportBase.class);
|
64
|
|
65
|
protected static final boolean IS_CORE = true;
|
66
|
|
67
|
@Autowired
|
68
|
private IClassificationService classificationService;
|
69
|
|
70
|
@Autowired
|
71
|
private ITaxonNodeService taxonNodeService;
|
72
|
|
73
|
|
74
|
@Override
|
75
|
public int countSteps(DwcaTaxExportState state) {
|
76
|
//FIXME count without initialization
|
77
|
List<TaxonNode> allNodes = allNodes(state);
|
78
|
return allNodes.size();
|
79
|
}
|
80
|
|
81
|
/**
|
82
|
* Returns the list of {@link TaxonNode taxon nodes} that correspond to the
|
83
|
* given filter criteria (e.g. subtreeUUids). If no filter is given
|
84
|
* all taxon nodes of all classifications are returned. If the list has been
|
85
|
* computed before it is taken from the state cache. Nodes that do not have
|
86
|
* a taxon attached are not returned. Instead a warning is given that the node is
|
87
|
* ommitted (empty taxon nodes should not but do exist in CDM databases).
|
88
|
* <BR>
|
89
|
* Preliminary implementation. Better implement API method for this.
|
90
|
*/
|
91
|
//TODO unify with similar methods for other exports
|
92
|
protected List<TaxonNode> allNodes(DwcaTaxExportState state) {
|
93
|
|
94
|
Set<UUID> subtreeUuidSet = state.getConfig().getSubtreeUuids();
|
95
|
if (subtreeUuidSet == null){
|
96
|
subtreeUuidSet = new HashSet<>();
|
97
|
}
|
98
|
//handle empty list as no filter defined
|
99
|
if (subtreeUuidSet.isEmpty()){
|
100
|
List<Classification> classificationList = getClassificationService().list(Classification.class, null, 0, null, null);
|
101
|
for (Classification classification : classificationList){
|
102
|
subtreeUuidSet.add(classification.getRootNode().getUuid());
|
103
|
}
|
104
|
}
|
105
|
|
106
|
//TODO memory critical to store ALL node
|
107
|
if (state.getAllNodes().isEmpty()){
|
108
|
makeAllNodes(state, subtreeUuidSet);
|
109
|
}
|
110
|
List<TaxonNode> allNodes = state.getAllNodes();
|
111
|
return allNodes;
|
112
|
}
|
113
|
|
114
|
private void makeAllNodes(DwcaTaxExportState state, Set<UUID> subtreeSet) {
|
115
|
|
116
|
try {
|
117
|
boolean doSynonyms = false;
|
118
|
boolean recursive = true;
|
119
|
Set<UUID> uuidSet = new HashSet<>();
|
120
|
|
121
|
for (UUID subtreeUuid : subtreeSet){
|
122
|
UUID tnUuuid = taxonNodeUuid(subtreeUuid);
|
123
|
uuidSet.add(tnUuuid);
|
124
|
List<TaxonNodeDto> records = getTaxonNodeService().pageChildNodesDTOs(tnUuuid,
|
125
|
recursive, doSynonyms, null, null, null).getRecords();
|
126
|
for (TaxonNodeDto dto : records){
|
127
|
uuidSet.add(dto.getUuid());
|
128
|
}
|
129
|
}
|
130
|
List<TaxonNode> allNodes = getTaxonNodeService().find(uuidSet);
|
131
|
|
132
|
List<TaxonNode> result = new ArrayList<>();
|
133
|
for (TaxonNode node : allNodes){
|
134
|
if(node.getParent()== null){ //root (or invalid) node
|
135
|
continue;
|
136
|
}
|
137
|
node = CdmBase.deproxy(node);
|
138
|
Taxon taxon = CdmBase.deproxy(node.getTaxon());
|
139
|
if (taxon == null){
|
140
|
String message = "There is a taxon node without taxon. id=" + node.getId();
|
141
|
state.getResult().addWarning(message);
|
142
|
continue;
|
143
|
}
|
144
|
result.add(node);
|
145
|
}
|
146
|
state.setAllNodes(result);
|
147
|
} catch (Exception e) {
|
148
|
String message = "Unexpected exception when trying to compute all taxon nodes";
|
149
|
state.getResult().addException(e, message);
|
150
|
}
|
151
|
}
|
152
|
|
153
|
|
154
|
/**
|
155
|
* @param subtreeUuid
|
156
|
* @return
|
157
|
*/
|
158
|
private UUID taxonNodeUuid(UUID subtreeUuid) {
|
159
|
TaxonNode node = taxonNodeService.find(subtreeUuid);
|
160
|
if (node == null){
|
161
|
Classification classification = classificationService.find(subtreeUuid);
|
162
|
if (classification != null){
|
163
|
node = classification.getRootNode();
|
164
|
}else{
|
165
|
throw new IllegalArgumentException("Subtree identifier does not exist: " + subtreeUuid);
|
166
|
}
|
167
|
}
|
168
|
return node.getUuid();
|
169
|
}
|
170
|
|
171
|
/**
|
172
|
* Creates the locationId, locality, countryCode triple
|
173
|
* @param record
|
174
|
* @param area
|
175
|
*/
|
176
|
protected void handleArea(IDwcaAreaRecord record, NamedArea area, TaxonBase<?> taxon, boolean required) {
|
177
|
if (area != null){
|
178
|
record.setLocationId(area);
|
179
|
record.setLocality(area.getLabel());
|
180
|
if (area.isInstanceOf(Country.class)){
|
181
|
Country country = CdmBase.deproxy(area, Country.class);
|
182
|
record.setCountryCode(country.getIso3166_A2());
|
183
|
}
|
184
|
}else{
|
185
|
if (required){
|
186
|
String message = "Description requires area but area does not exist for taxon " + getTaxonLogString(taxon);
|
187
|
logger.warn(message);
|
188
|
}
|
189
|
}
|
190
|
}
|
191
|
|
192
|
|
193
|
protected String getTaxonLogString(TaxonBase<?> taxon) {
|
194
|
return taxon.getTitleCache() + "(" + taxon.getId() + ")";
|
195
|
}
|
196
|
|
197
|
|
198
|
protected String getSources(ISourceable<?> sourceable, DwcaTaxExportConfigurator config) {
|
199
|
String result = "";
|
200
|
for (IOriginalSource<?> source: sourceable.getSources()){
|
201
|
if (StringUtils.isBlank(source.getIdInSource())){//idInSource indicates that this source is only data provenance, may be changed in future
|
202
|
if (source.getCitation() != null){
|
203
|
String ref = source.getCitation().getTitleCache();
|
204
|
result = CdmUtils.concat(config.getSetSeparator(), result, ref);
|
205
|
}
|
206
|
}
|
207
|
}
|
208
|
return result;
|
209
|
}
|
210
|
|
211
|
protected String getSources3(ISourceable<?> sourceable, DwcaTaxExportConfigurator config) {
|
212
|
String result = "";
|
213
|
for (IOriginalSource<?> source: sourceable.getSources()){
|
214
|
if (source.getCitation() != null){
|
215
|
String ref = source.getCitation().getTitleCache();
|
216
|
result = CdmUtils.concat(config.getSetSeparator(), result, ref);
|
217
|
}
|
218
|
}
|
219
|
return result;
|
220
|
}
|
221
|
|
222
|
protected String getSources2(Set<DescriptionElementSource> set, DwcaTaxExportConfigurator config) {
|
223
|
String result = "";
|
224
|
for(DescriptionElementSource source: set){
|
225
|
if (StringUtils.isBlank(source.getIdInSource())){//idInSource indicates that this source is only data provenance, may be changed in future
|
226
|
if (source.getCitation() != null){
|
227
|
String ref = source.getCitation().getTitleCache();
|
228
|
result = CdmUtils.concat(config.getSetSeparator(), result, ref);
|
229
|
}
|
230
|
}
|
231
|
}
|
232
|
return result;
|
233
|
}
|
234
|
|
235
|
|
236
|
/**
|
237
|
* @param config
|
238
|
* @return
|
239
|
* @throws IOException
|
240
|
* @throws FileNotFoundException
|
241
|
*/
|
242
|
protected FileOutputStream createFileOutputStream(DwcaTaxExportConfigurator config, String thisFileName) throws IOException, FileNotFoundException {
|
243
|
String filePath = config.getDestinationNameString();
|
244
|
String fileName = filePath + File.separatorChar + thisFileName;
|
245
|
File f = new File(fileName);
|
246
|
if (!f.exists()){
|
247
|
f.createNewFile();
|
248
|
}
|
249
|
FileOutputStream fos = new FileOutputStream(f);
|
250
|
return fos;
|
251
|
}
|
252
|
|
253
|
|
254
|
protected XMLStreamWriter createXmlStreamWriter(DwcaTaxExportState state, DwcaTaxOutputFile table)
|
255
|
throws IOException, FileNotFoundException, XMLStreamException {
|
256
|
|
257
|
XMLOutputFactory factory = XMLOutputFactory.newInstance();
|
258
|
OutputStream os;
|
259
|
boolean useZip = state.isZip();
|
260
|
if (useZip){
|
261
|
os = state.getZipStream(table.getTableName());
|
262
|
}else if(state.getConfig().getDestination() != null){
|
263
|
os = createFileOutputStream(state.getConfig(), table.getTableName());
|
264
|
}else{
|
265
|
os = new ByteArrayOutputStream();
|
266
|
state.getProcessor().put(table, (ByteArrayOutputStream)os);
|
267
|
}
|
268
|
XMLStreamWriter writer = factory.createXMLStreamWriter(os);
|
269
|
return writer;
|
270
|
}
|
271
|
|
272
|
|
273
|
/**
|
274
|
* @param writer2
|
275
|
* @param coreTaxFileName
|
276
|
* @param config
|
277
|
* @return
|
278
|
* @throws IOException
|
279
|
* @throws FileNotFoundException
|
280
|
* @throws UnsupportedEncodingException
|
281
|
*/
|
282
|
protected PrintWriter createPrintWriter(DwcaTaxExportState state, DwcaTaxOutputFile file)
|
283
|
throws IOException, FileNotFoundException, UnsupportedEncodingException {
|
284
|
|
285
|
if (state.getWriter(file) == null){
|
286
|
|
287
|
boolean useZip = state.isZip();
|
288
|
OutputStream os;
|
289
|
if (useZip){
|
290
|
os = state.getZipStream(file.getTableName());
|
291
|
}else if(state.getConfig().getDestination() != null){
|
292
|
os = createFileOutputStream(state.getConfig(), file.getTableName());
|
293
|
}else{
|
294
|
os = new ByteArrayOutputStream();
|
295
|
state.getProcessor().put(file, (ByteArrayOutputStream)os);
|
296
|
}
|
297
|
PrintWriter writer = new PrintWriter(new OutputStreamWriter(os, "UTF8"), true);
|
298
|
state.putWriter(file, writer);
|
299
|
}
|
300
|
return state.getWriter(file);
|
301
|
}
|
302
|
|
303
|
|
304
|
/**
|
305
|
* flushes the writer for the according file if exists.
|
306
|
*/
|
307
|
protected void flushWriter(DwcaTaxExportState state, DwcaTaxOutputFile file) {
|
308
|
if (state.getWriter(file) != null){
|
309
|
state.getWriter(file).flush();
|
310
|
}
|
311
|
}
|
312
|
|
313
|
|
314
|
/**
|
315
|
* Closes the writer
|
316
|
* @param file
|
317
|
* @param state
|
318
|
*/
|
319
|
protected void closeWriter(DwcaTaxOutputFile file, DwcaTaxExportState state) {
|
320
|
PrintWriter writer = state.getWriter(file);
|
321
|
if (writer != null && state.isZip() == false){
|
322
|
writer.close();
|
323
|
}
|
324
|
}
|
325
|
|
326
|
|
327
|
|
328
|
/**
|
329
|
* Closes the writer.
|
330
|
* Note: XMLStreamWriter does not close the underlying stream.
|
331
|
* @param writer
|
332
|
* @param state
|
333
|
*/
|
334
|
protected void closeWriter(XMLStreamWriter writer, DwcaTaxExportState state) {
|
335
|
if (writer != null && state.isZip() == false){
|
336
|
try {
|
337
|
writer.close();
|
338
|
} catch (XMLStreamException e) {
|
339
|
throw new RuntimeException(e);
|
340
|
}
|
341
|
}
|
342
|
}
|
343
|
|
344
|
}
|