Project

General

Profile

Download (8.13 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.wp6.diptera;
10

    
11
import java.io.File;
12
import java.io.FileInputStream;
13
import java.io.InputStream;
14
import java.io.InputStreamReader;
15
import java.util.ArrayList;
16
import java.util.HashMap;
17
import java.util.List;
18
import java.util.Map;
19

    
20
import org.apache.commons.lang.StringUtils;
21
import org.apache.log4j.Logger;
22
import org.springframework.transaction.TransactionStatus;
23

    
24
import au.com.bytecode.opencsv.CSVReader;
25
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
26
import eu.etaxonomy.cdm.app.common.CdmDestinations;
27
import eu.etaxonomy.cdm.common.CdmUtils;
28
import eu.etaxonomy.cdm.database.DbSchemaValidation;
29
import eu.etaxonomy.cdm.database.ICdmDataSource;
30
import eu.etaxonomy.cdm.io.common.ImportResult;
31
import eu.etaxonomy.cdm.model.agent.Institution;
32
import eu.etaxonomy.cdm.model.occurrence.Collection;
33
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
34
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
35

    
36
/**
37
 * @author a.mueller
38
 * @date 07.04.2010
39
 *
40
 */
41
public class DipteraCollectionImport {
42
	private static final Logger logger = Logger.getLogger(DipteraCollectionImport.class);
43

    
44
	public static final File acronymsFile = new File("src/main/resources/collections/Acronyms.tab");
45
	//datasource for use from local main()
46
	static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
47

    
48

    
49
	public ImportResult invoke(ICdmDataSource dataSource) {
50
	    ImportResult result = new ImportResult();
51
		CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
52

    
53
		//create collections
54
		TransactionStatus tx = cdmApp.startTransaction();
55
		Map<String, Collection> colletionMap = createCollections(cdmApp);
56

    
57
		//add collections to specimen
58
		addCollectionsToSpecimen(cdmApp, colletionMap);
59
		cdmApp.commitTransaction(tx);
60

    
61
		return result;
62

    
63
	}
64

    
65

    
66
	/**
67
	 * @param cdmApp
68
	 * @param colletionMap
69
	 */
70
	private void addCollectionsToSpecimen(CdmApplicationController cdmApp, Map<String, Collection> colletionMap) {
71
		List<DerivedUnit> specimens = cdmApp.getOccurrenceService().list(DerivedUnit.class, null, null, null, null);
72
		for (SpecimenOrObservationBase<?> specOrObservBase : specimens){
73
			if (specOrObservBase.getRecordBasis().isPreservedSpecimen()){
74
				handleSingleSpecimen((DerivedUnit)specOrObservBase, colletionMap);
75
			}else{
76
				logger.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
77
			}
78
		}
79
		List<SpecimenOrObservationBase> specimenList = new ArrayList<SpecimenOrObservationBase>(specimens);
80
		cdmApp.getOccurrenceService().save(specimenList);
81
	}
82

    
83

    
84
	/**
85
	 * @param specimen
86
	 * @param colletionMap
87
	 */
88
	private void handleSingleSpecimen(DerivedUnit specimen, Map<String, Collection> collectionMap) {
89
		String titleCache = specimen.getTitleCache();
90
		String collectionCode = getCollectionCode(titleCache);
91
		if (StringUtils.isBlank(collectionCode)){
92
			logger.warn("Collection code is empty for: " + titleCache);
93
		}else{
94
			Collection collection = collectionMap.get(collectionCode);
95
			if (collection != null){
96
				specimen.setCollection(collection);
97
			}else{
98
				logger.warn("Collection not found for code: " +  collectionCode + "; titleCache: " +  titleCache);
99
			}
100
		}
101
	}
102

    
103

    
104
	/**
105
	 * @param titleCache
106
	 * @return
107
	 */
108
	private String getCollectionCode(String titleCache) {
109
		String result = titleCache.trim();
110
		result = replaceBracket(result);
111
		result = replaceLastFullStop(result);
112
		result = replaceLastQuestionMark(result);
113
		result = parseLastUpperCase(result);
114
		return result;
115
	}
116

    
117

    
118
	/**
119
	 * @param result
120
	 * @return
121
	 */
122
	private String parseLastUpperCase(String string) {
123
		String result = "";
124
		String tmpString = string;
125
		int pos = tmpString.lastIndexOf(" ");
126
		if (pos>-1){
127
			tmpString = tmpString.substring(pos+1);
128
		}
129
		while (tmpString.length() > 0){
130
			int len = tmpString.length();
131
			char lastChar = tmpString.charAt(len-1);
132
			if (Character.isUpperCase( lastChar)){
133
				result = lastChar + result;
134
			}else{
135
				if (result.length() > 0){
136
					logger.warn("Collection code is not space separated: " + string);
137
				}
138
				break;
139
			}
140
			//remove last character
141
			tmpString = tmpString.substring(0, tmpString.length()-1);
142
		}
143
		return result;
144
	}
145

    
146

    
147

    
148
	/**
149
	 * @param result
150
	 * @return
151
	 */
152
	private String replaceLastQuestionMark(String string) {
153
		if (string.endsWith("?")){
154
			string = string.substring(0,string.length()-1).trim();
155
		}
156
		return string;
157
	}
158

    
159
	/**
160
	 * @param result
161
	 * @return
162
	 */
163
	private String replaceLastFullStop(String string) {
164
		if (string.endsWith(".")){
165
			string = string.substring(0,string.length()-1).trim();
166
		}
167
		return string;
168
	}
169

    
170

    
171
	/**
172
	 * @param result
173
	 * @return
174
	 */
175
	private String replaceBracket(String string) {
176
		if (string.endsWith("]")){
177
			int pos  = string.indexOf("[");
178
			if (pos >0){
179
				string = string.substring(0, pos).trim();
180
			}else{
181
				logger.warn("Closing bracket has no opening bracket in: " + string);
182
			}
183
		}
184
		return string;
185
	}
186

    
187

    
188
	/**
189
	 * @param cdmApp
190
	 */
191
	private Map<String, Collection> createCollections(CdmApplicationController cdmApp) {
192
		Map<String, Collection> collectionMap = new HashMap<String, Collection>();
193
		List<String[]> lines = getLines();
194
		for (String[] line:lines){
195
			Collection collection = makeLine(line);
196
			collectionMap.put(collection.getCode(), collection);
197
		}
198
		cdmApp.getCollectionService().save(collectionMap.values());
199
//			for (Collection collection: collectionMap.values()){
200
//				System.out.println(collection.getTitleCache());
201
//			}
202
		return collectionMap;
203
	}
204

    
205

    
206
	private Collection makeLine(String[] line) {
207
		String code = line[0];
208
		String instituteName = line[1];
209
		String lowerInstitutionName = line[2];
210
		String higherInstitutionName = line[3];
211
		String location = line[4];
212
		String country = line[5];
213
		//create objects
214
		Collection collection = Collection.NewInstance();
215
		collection.setCode(code);
216
		Institution institution = Institution.NewInstance();
217
		institution.setCode(code);
218

    
219
		institution.setName(instituteName);
220

    
221
		if (StringUtils.isNotBlank(lowerInstitutionName)){
222
			Institution lowerInstitution = Institution.NewInstance();
223
			lowerInstitution.setName(lowerInstitutionName);
224
			lowerInstitution.setIsPartOf(institution);
225
		}
226

    
227
		if (StringUtils.isNotBlank(higherInstitutionName)){
228
			Institution higherInstitution = Institution.NewInstance();
229
			higherInstitution.setName(higherInstitutionName);
230
			institution.setIsPartOf(higherInstitution);
231
		}
232

    
233
		collection.setInstitute(institution);
234
		String locationAndCountry = CdmUtils.concat("/", location, country);
235
		collection.setTownOrLocation(locationAndCountry);
236

    
237
		String titleCache = CdmUtils.concat(", ", new String[]{instituteName, lowerInstitutionName, higherInstitutionName, location, country});
238
		collection.setTitleCache(titleCache, true);
239

    
240
		return collection;
241
	}
242

    
243

    
244

    
245

    
246
	private List<String[]> getLines() {
247
		List<String[]> result = new ArrayList<String[]>();
248

    
249
		try {
250
			InputStream inStream = new FileInputStream(acronymsFile);
251
			InputStreamReader inputStreamReader = new InputStreamReader(inStream, "UTF8");
252
			CSVReader reader = new CSVReader(inputStreamReader, '\t');
253
			String [] nextLine = reader.readNext();
254

    
255

    
256
			while ((nextLine = reader.readNext()) != null) {
257
				if (nextLine.length == 0){
258
					continue;
259
				}
260
				result.add(nextLine);
261
			}
262
			return result;
263
		} catch (Exception e) {
264
			logger.error(e + " " + e.getCause() + " " + e.getMessage());
265
			for(StackTraceElement ste : e.getStackTrace()) {
266
				logger.error(ste);
267
			}
268
			throw new RuntimeException(e);
269
		}
270
	}
271

    
272

    
273

    
274

    
275

    
276
	/**
277
	 * @param args
278
	 */
279
	public static void main(String[] args) {
280
		try {
281
			DipteraCollectionImport collectionImport = new DipteraCollectionImport();
282
			collectionImport.invoke(cdmDestination);
283
//			String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
284
//			String collectionCode = collectionImport.getCollectionCode(titleCache);
285
//			System.out.println(collectionCode);
286
		} catch (Exception e) {
287
			e.printStackTrace();
288
			System.exit(-1);
289
		}
290
	}
291

    
292
}
(2-2/4)