1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.dwca.out;
|
10
|
|
11
|
import java.io.File;
|
12
|
import java.io.FileNotFoundException;
|
13
|
import java.io.FileOutputStream;
|
14
|
import java.io.IOException;
|
15
|
import java.io.OutputStream;
|
16
|
import java.io.OutputStreamWriter;
|
17
|
import java.io.PrintWriter;
|
18
|
import java.io.UnsupportedEncodingException;
|
19
|
import java.util.ArrayList;
|
20
|
import java.util.HashSet;
|
21
|
import java.util.List;
|
22
|
import java.util.Set;
|
23
|
import java.util.UUID;
|
24
|
|
25
|
import javax.xml.stream.XMLOutputFactory;
|
26
|
import javax.xml.stream.XMLStreamException;
|
27
|
import javax.xml.stream.XMLStreamWriter;
|
28
|
|
29
|
import org.apache.commons.lang.StringUtils;
|
30
|
import org.apache.log4j.Logger;
|
31
|
|
32
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
33
|
import eu.etaxonomy.cdm.io.common.CdmExportBase;
|
34
|
import eu.etaxonomy.cdm.io.common.ICdmExport;
|
35
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
36
|
import eu.etaxonomy.cdm.model.common.IOriginalSource;
|
37
|
import eu.etaxonomy.cdm.model.common.ISourceable;
|
38
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
39
|
import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
|
40
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
41
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
42
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
43
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
44
|
|
45
|
/**
|
46
|
* @author a.mueller
|
47
|
* @date 18.04.2011
|
48
|
*
|
49
|
*/
|
50
|
public abstract class DwcaExportBase extends CdmExportBase<DwcaTaxExportConfigurator, DwcaTaxExportState> implements ICdmExport<DwcaTaxExportConfigurator, DwcaTaxExportState>{
|
51
|
private static final Logger logger = Logger.getLogger(DwcaExportBase.class);
|
52
|
|
53
|
protected static final boolean IS_CORE = true;
|
54
|
|
55
|
|
56
|
protected Set<Integer> existingRecordIds = new HashSet<Integer>();
|
57
|
protected Set<UUID> existingRecordUuids = new HashSet<UUID>();
|
58
|
|
59
|
|
60
|
|
61
|
/* (non-Javadoc)
|
62
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#countSteps()
|
63
|
*/
|
64
|
@Override
|
65
|
public int countSteps() {
|
66
|
List<TaxonNode> allNodes = getClassificationService().getAllNodes();
|
67
|
return allNodes.size();
|
68
|
}
|
69
|
|
70
|
|
71
|
|
72
|
/**
|
73
|
* Returns the list of taxon nodes that are part in one of the given classifications
|
74
|
* and do have a taxon attached (empty taxon nodes should not but do exist in CDM databases).
|
75
|
* Preliminary implementation. Better implement API method for this.
|
76
|
* @return
|
77
|
*/
|
78
|
protected List<TaxonNode> getAllNodes(Set<Classification> classificationList) {
|
79
|
List<TaxonNode> allNodes = getClassificationService().getAllNodes();
|
80
|
List<TaxonNode> result = new ArrayList<TaxonNode>();
|
81
|
for (TaxonNode node : allNodes){
|
82
|
if (node.getClassification() == null ){
|
83
|
continue;
|
84
|
}else if (classificationList != null && classificationList.contains(node.getClassification())){
|
85
|
continue;
|
86
|
}
|
87
|
Taxon taxon = CdmBase.deproxy(node.getTaxon(), Taxon.class);
|
88
|
if (taxon == null){
|
89
|
String message = "There is a taxon node without taxon: " + node.getId();
|
90
|
logger.warn(message);
|
91
|
continue;
|
92
|
}
|
93
|
result.add(node);
|
94
|
}
|
95
|
return result;
|
96
|
}
|
97
|
|
98
|
|
99
|
/**
|
100
|
* Creates the locationId, locality, countryCode triple
|
101
|
* @param record
|
102
|
* @param area
|
103
|
*/
|
104
|
protected void handleArea(IDwcaAreaRecord record, NamedArea area, TaxonBase<?> taxon, boolean required) {
|
105
|
if (area != null){
|
106
|
record.setLocationId(area);
|
107
|
record.setLocality(area.getLabel());
|
108
|
if (area.isInstanceOf(WaterbodyOrCountry.class)){
|
109
|
WaterbodyOrCountry country = CdmBase.deproxy(area, WaterbodyOrCountry.class);
|
110
|
record.setCountryCode(country.getIso3166_A2());
|
111
|
}
|
112
|
}else{
|
113
|
if (required){
|
114
|
String message = "Description requires area but area does not exist for taxon " + getTaxonLogString(taxon);
|
115
|
logger.warn(message);
|
116
|
}
|
117
|
}
|
118
|
}
|
119
|
|
120
|
|
121
|
protected String getTaxonLogString(TaxonBase<?> taxon) {
|
122
|
return taxon.getTitleCache() + "(" + taxon.getId() + ")";
|
123
|
}
|
124
|
|
125
|
|
126
|
/**
|
127
|
* @param el
|
128
|
* @return
|
129
|
*/
|
130
|
protected boolean recordExists(CdmBase el) {
|
131
|
return existingRecordIds.contains(el.getId());
|
132
|
}
|
133
|
|
134
|
|
135
|
/**
|
136
|
* @param sec
|
137
|
*/
|
138
|
protected void addExistingRecord(CdmBase cdmBase) {
|
139
|
existingRecordIds.add(cdmBase.getId());
|
140
|
}
|
141
|
|
142
|
/**
|
143
|
* @param el
|
144
|
* @return
|
145
|
*/
|
146
|
protected boolean recordExistsUuid(CdmBase el) {
|
147
|
return existingRecordUuids.contains(el.getUuid());
|
148
|
}
|
149
|
|
150
|
/**
|
151
|
* @param sec
|
152
|
*/
|
153
|
protected void addExistingRecordUuid(CdmBase cdmBase) {
|
154
|
existingRecordUuids.add(cdmBase.getUuid());
|
155
|
}
|
156
|
|
157
|
|
158
|
protected String getSources(ISourceable<?> sourceable, DwcaTaxExportConfigurator config) {
|
159
|
String result = "";
|
160
|
for (IOriginalSource source: sourceable.getSources()){
|
161
|
if (StringUtils.isBlank(source.getIdInSource())){//idInSource indicates that this source is only data provenance, may be changed in future
|
162
|
result = CdmUtils.concat(config.getSetSeparator(), result, source.getCitation().getTitleCache());
|
163
|
}
|
164
|
}
|
165
|
return result;
|
166
|
}
|
167
|
|
168
|
|
169
|
/**
|
170
|
* @param config
|
171
|
* @return
|
172
|
* @throws IOException
|
173
|
* @throws FileNotFoundException
|
174
|
*/
|
175
|
protected FileOutputStream createFileOutputStream(DwcaTaxExportConfigurator config, String thisFileName) throws IOException, FileNotFoundException {
|
176
|
String filePath = config.getDestinationNameString();
|
177
|
String fileName = filePath + File.separatorChar + thisFileName;
|
178
|
File f = new File(fileName);
|
179
|
if (!f.exists()){
|
180
|
f.createNewFile();
|
181
|
}
|
182
|
FileOutputStream fos = new FileOutputStream(f);
|
183
|
return fos;
|
184
|
}
|
185
|
|
186
|
|
187
|
/**
|
188
|
* @param config
|
189
|
* @param factory
|
190
|
* @return
|
191
|
* @throws IOException
|
192
|
* @throws FileNotFoundException
|
193
|
* @throws XMLStreamException
|
194
|
*/
|
195
|
protected XMLStreamWriter createXmlStreamWriter(DwcaTaxExportState state, String fileName)
|
196
|
throws IOException, FileNotFoundException, XMLStreamException {
|
197
|
XMLOutputFactory factory = XMLOutputFactory.newInstance();
|
198
|
OutputStream os;
|
199
|
boolean useZip = state.isZip();
|
200
|
if (useZip){
|
201
|
os = state.getZipStream(fileName);
|
202
|
}else{
|
203
|
os = createFileOutputStream(state.getConfig(), fileName);
|
204
|
}
|
205
|
XMLStreamWriter writer = factory.createXMLStreamWriter(os);
|
206
|
return writer;
|
207
|
}
|
208
|
|
209
|
|
210
|
/**
|
211
|
* @param coreTaxFileName
|
212
|
* @param config
|
213
|
* @return
|
214
|
* @throws IOException
|
215
|
* @throws FileNotFoundException
|
216
|
* @throws UnsupportedEncodingException
|
217
|
*/
|
218
|
protected PrintWriter createPrintWriter(final String fileName, DwcaTaxExportState state)
|
219
|
throws IOException, FileNotFoundException, UnsupportedEncodingException {
|
220
|
|
221
|
OutputStream os;
|
222
|
boolean useZip = state.isZip();
|
223
|
if (useZip){
|
224
|
os = state.getZipStream(fileName);
|
225
|
}else{
|
226
|
os = createFileOutputStream(state.getConfig(), fileName);
|
227
|
}
|
228
|
PrintWriter writer = new PrintWriter(new OutputStreamWriter(os, "UTF8"), true);
|
229
|
|
230
|
return writer;
|
231
|
}
|
232
|
|
233
|
|
234
|
/**
|
235
|
* Closes the writer
|
236
|
* @param writer
|
237
|
* @param state
|
238
|
*/
|
239
|
protected void closeWriter(PrintWriter writer, DwcaTaxExportState state) {
|
240
|
if (writer != null && state.isZip() == false){
|
241
|
writer.close();
|
242
|
}
|
243
|
}
|
244
|
|
245
|
|
246
|
|
247
|
/**
|
248
|
* Closes the writer.
|
249
|
* Note: XMLStreamWriter does not close the underlying stream.
|
250
|
* @param writer
|
251
|
* @param state
|
252
|
*/
|
253
|
protected void closeWriter(XMLStreamWriter writer, DwcaTaxExportState state) {
|
254
|
if (writer != null && state.isZip() == false){
|
255
|
try {
|
256
|
writer.close();
|
257
|
} catch (XMLStreamException e) {
|
258
|
throw new RuntimeException(e);
|
259
|
}
|
260
|
}
|
261
|
}
|
262
|
|
263
|
}
|