Project

General

Profile

Download (8.2 KB) Statistics
| Branch: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2007 EDIT
4
* European Distributed Institute of Taxonomy 
5
* http://www.e-taxonomy.eu
6
* 
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.app.wp6.diptera;
11

    
12
import java.io.File;
13
import java.io.FileInputStream;
14
import java.io.InputStream;
15
import java.io.InputStreamReader;
16
import java.util.ArrayList;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Map;
20

    
21
import org.apache.log4j.Logger;
22
import org.springframework.transaction.TransactionStatus;
23

    
24
import au.com.bytecode.opencsv.CSVReader;
25
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
26
import eu.etaxonomy.cdm.app.common.CdmDestinations;
27
import eu.etaxonomy.cdm.common.CdmUtils;
28
import eu.etaxonomy.cdm.database.DbSchemaValidation;
29
import eu.etaxonomy.cdm.database.ICdmDataSource;
30
import eu.etaxonomy.cdm.model.agent.Institution;
31
import eu.etaxonomy.cdm.model.occurrence.Collection;
32
import eu.etaxonomy.cdm.model.occurrence.Specimen;
33
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
34

    
35
/**
36
 * @author a.mueller
37
 * @date 07.04.2010
38
 *
39
 */
40
public class DipteraCollectionImport {
41
	private static final Logger logger = Logger.getLogger(DipteraCollectionImport.class);
42

    
43
	public static final File acronymsFile = new File("src/main/resources/collections/Acronyms.tab");
44
	//datasource for use from local main()
45
	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_edit_diptera_a();
46
	
47

    
48
	public boolean invoke(ICdmDataSource dataSource) {
49
		CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
50
			
51
		//create collections
52
		TransactionStatus tx = cdmApp.startTransaction();
53
		Map<String, Collection> colletionMap = createCollections(cdmApp);
54
		
55
		//add collections to specimen
56
		addCollectionsToSpecimen(cdmApp, colletionMap);
57
		cdmApp.commitTransaction(tx);
58
		
59
		return true;
60
		
61
	}
62

    
63

    
64
	/**
65
	 * @param cdmApp
66
	 * @param colletionMap 
67
	 */
68
	private void addCollectionsToSpecimen(CdmApplicationController cdmApp, Map<String, Collection> colletionMap) {
69
		List<SpecimenOrObservationBase> specimens = (cdmApp.getOccurrenceService().list(Specimen.class, null, null, null, null));
70
		for (SpecimenOrObservationBase specOrObservBase : specimens){
71
			if (specOrObservBase instanceof Specimen){
72
				handleSingleSpecimen((Specimen)specOrObservBase, colletionMap);
73
			}else{
74
				logger.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
75
			}
76
		}
77
		cdmApp.getOccurrenceService().save(specimens);
78
	}
79

    
80

    
81
	/**
82
	 * @param specimen 
83
	 * @param colletionMap
84
	 */
85
	private void handleSingleSpecimen(Specimen specimen, Map<String, Collection> collectionMap) {
86
		String titleCache = specimen.getTitleCache();
87
		String collectionCode = getCollectionCode(titleCache);
88
		if (CdmUtils.isEmpty(collectionCode)){
89
			logger.warn("Collection code is empty for: " + titleCache);
90
		}else{
91
			Collection collection = collectionMap.get(collectionCode);
92
			if (collection != null){
93
				specimen.setCollection(collection);
94
			}else{
95
				logger.warn("Collection not found for code: " +  collectionCode + "; titleCache: " +  titleCache);
96
			}
97
		}
98
	}
99

    
100

    
101
	/**
102
	 * @param titleCache
103
	 * @return
104
	 */
105
	private String getCollectionCode(String titleCache) {
106
		String result = titleCache.trim();
107
		result = replaceBracket(result);
108
		result = replaceLastFullStop(result);
109
		result = replaceLastQuestionMark(result);
110
		result = parseLastUpperCase(result);
111
		return result;
112
	}
113

    
114

    
115
	/**
116
	 * @param result
117
	 * @return
118
	 */
119
	private String parseLastUpperCase(String string) {
120
		String result = "";
121
		String tmpString = string;
122
		int pos = tmpString.lastIndexOf(" ");
123
		if (pos>-1){
124
			tmpString = tmpString.substring(pos+1);
125
		}
126
		while (tmpString.length() > 0){
127
			int len = tmpString.length();
128
			char lastChar = tmpString.charAt(len-1);
129
			if (Character.isUpperCase( lastChar)){
130
				result = lastChar + result;
131
			}else{
132
				if (result.length() > 0){
133
					logger.warn("Collection code is not space separated: " + string);
134
				}
135
				break;
136
			}
137
			//remove last character
138
			tmpString = tmpString.substring(0, tmpString.length()-1);
139
		}
140
		return result;
141
	}
142

    
143

    
144

    
145
	/**
146
	 * @param result
147
	 * @return
148
	 */
149
	private String replaceLastQuestionMark(String string) {
150
		if (string.endsWith("?")){
151
			string = string.substring(0,string.length()-1).trim();
152
		}
153
		return string;
154
	}
155
	
156
	/**
157
	 * @param result
158
	 * @return
159
	 */
160
	private String replaceLastFullStop(String string) {
161
		if (string.endsWith(".")){
162
			string = string.substring(0,string.length()-1).trim();
163
		}
164
		return string;
165
	}
166

    
167

    
168
	/**
169
	 * @param result
170
	 * @return
171
	 */
172
	private String replaceBracket(String string) {
173
		if (string.endsWith("]")){
174
			int pos  = string.indexOf("[");
175
			if (pos >0){
176
				string = string.substring(0, pos).trim();
177
			}else{
178
				logger.warn("Closing bracket has no opening bracket in: " + string);
179
			}
180
		}
181
		return string;
182
	}
183

    
184

    
185
	/**
186
	 * @param cdmApp
187
	 */
188
	private Map<String, Collection> createCollections(CdmApplicationController cdmApp) {
189
		Map<String, Collection> collectionMap = new HashMap<String, Collection>(); 
190
		List<String[]> lines = getLines();
191
		for (String[] line:lines){
192
			Collection collection = makeLine(line);
193
			collectionMap.put(collection.getCode(), collection);
194
		}
195
		cdmApp.getCollectionService().save(collectionMap.values());
196
//			for (Collection collection: collectionMap.values()){
197
//				System.out.println(collection.getTitleCache());
198
//			}
199
		return collectionMap;
200
	}
201
	
202

    
203
	private Collection makeLine(String[] line) {
204
		String code = line[0];
205
		String instituteName = line[1];
206
		String lowerInstitutionName = line[2];
207
		String higherInstitutionName = line[3];
208
		String location = line[4];
209
		String country = line[5];
210
		//create objects
211
		Collection collection = Collection.NewInstance();
212
		collection.setCode(code);
213
		Institution institution = Institution.NewInstance();
214
		institution.setCode(code);
215
		
216
		institution.setName(instituteName);
217
		
218
		if (CdmUtils.isNotEmpty(lowerInstitutionName)){
219
			Institution lowerInstitution = Institution.NewInstance();
220
			lowerInstitution.setName(lowerInstitutionName);
221
			lowerInstitution.setIsPartOf(institution);
222
		}
223
		
224
		if (CdmUtils.isNotEmpty(higherInstitutionName)){
225
			Institution higherInstitution = Institution.NewInstance();
226
			higherInstitution.setName(higherInstitutionName);
227
			institution.setIsPartOf(higherInstitution);
228
		}
229
		
230
		collection.setInstitute(institution);
231
		String locationAndCountry = CdmUtils.concat("/", location, country);
232
		collection.setTownOrLocation(locationAndCountry);
233
		
234
		String titleCache = CdmUtils.concat(", ", new String[]{instituteName, lowerInstitutionName, higherInstitutionName, location, country});
235
		collection.setTitleCache(titleCache, true);
236
		
237
		return collection;
238
	}
239

    
240
	
241
	
242
	
243
	private List<String[]> getLines() {
244
		List<String[]> result = new ArrayList<String[]>();
245
		
246
		try {
247
			InputStream inStream = new FileInputStream(acronymsFile);
248
			InputStreamReader inputStreamReader = new InputStreamReader(inStream, "UTF8");
249
			CSVReader reader = new CSVReader(inputStreamReader, '\t');
250
			String [] nextLine = reader.readNext();
251
			
252
			
253
			while ((nextLine = reader.readNext()) != null) {
254
				if (nextLine.length == 0){
255
					continue;
256
				}
257
				result.add(nextLine);
258
			}
259
			return result;
260
		} catch (Exception e) {
261
			logger.error(e + " " + e.getCause() + " " + e.getMessage());
262
			for(StackTraceElement ste : e.getStackTrace()) {
263
				logger.error(ste);
264
			}
265
			throw new RuntimeException(e);
266
		}
267
	}
268

    
269

    
270

    
271

    
272

    
273
	/**
274
	 * @param args
275
	 */
276
	public static void main(String[] args) {
277
		try {
278
			DipteraCollectionImport collectionImport = new DipteraCollectionImport();
279
			collectionImport.invoke(cdmDestination);
280
//			String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
281
//			String collectionCode = collectionImport.getCollectionCode(titleCache);
282
//			System.out.println(collectionCode);
283
		} catch (Exception e) {
284
			e.printStackTrace();
285
			System.exit(-1);
286
		}
287
	}
288

    
289
}
(2-2/4)