Project

General

Profile

Download (8.04 KB) Statistics
| Branch: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2007 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.app.wp6.diptera;
11

    
12
import java.io.File;
13
import java.io.FileInputStream;
14
import java.io.InputStream;
15
import java.io.InputStreamReader;
16
import java.util.ArrayList;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Map;
20

    
21
import org.apache.commons.lang.StringUtils;
22
import org.apache.log4j.Logger;
23
import org.springframework.transaction.TransactionStatus;
24

    
25
import au.com.bytecode.opencsv.CSVReader;
26
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
27
import eu.etaxonomy.cdm.app.common.CdmDestinations;
28
import eu.etaxonomy.cdm.common.CdmUtils;
29
import eu.etaxonomy.cdm.database.DbSchemaValidation;
30
import eu.etaxonomy.cdm.database.ICdmDataSource;
31
import eu.etaxonomy.cdm.model.agent.Institution;
32
import eu.etaxonomy.cdm.model.occurrence.Collection;
33
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
34
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
35

    
36
/**
37
 * @author a.mueller
38
 * @date 07.04.2010
39
 *
40
 */
41
public class DipteraCollectionImport {
42
	private static final Logger logger = Logger.getLogger(DipteraCollectionImport.class);
43

    
44
	public static final File acronymsFile = new File("src/main/resources/collections/Acronyms.tab");
45
	//datasource for use from local main()
46
	static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
47

    
48

    
49
	public boolean invoke(ICdmDataSource dataSource) {
50
		CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
51

    
52
		//create collections
53
		TransactionStatus tx = cdmApp.startTransaction();
54
		Map<String, Collection> colletionMap = createCollections(cdmApp);
55

    
56
		//add collections to specimen
57
		addCollectionsToSpecimen(cdmApp, colletionMap);
58
		cdmApp.commitTransaction(tx);
59

    
60
		return true;
61

    
62
	}
63

    
64

    
65
	/**
66
	 * @param cdmApp
67
	 * @param colletionMap
68
	 */
69
	private void addCollectionsToSpecimen(CdmApplicationController cdmApp, Map<String, Collection> colletionMap) {
70
		List<DerivedUnit> specimens = cdmApp.getOccurrenceService().list(DerivedUnit.class, null, null, null, null);
71
		for (SpecimenOrObservationBase<?> specOrObservBase : specimens){
72
			if (specOrObservBase.getRecordBasis().isPreservedSpecimen()){
73
				handleSingleSpecimen((DerivedUnit)specOrObservBase, colletionMap);
74
			}else{
75
				logger.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
76
			}
77
		}
78
		List<SpecimenOrObservationBase> specimenList = new ArrayList<SpecimenOrObservationBase>(specimens);
79
		cdmApp.getOccurrenceService().save(specimenList);
80
	}
81

    
82

    
83
	/**
84
	 * @param specimen
85
	 * @param colletionMap
86
	 */
87
	private void handleSingleSpecimen(DerivedUnit specimen, Map<String, Collection> collectionMap) {
88
		String titleCache = specimen.getTitleCache();
89
		String collectionCode = getCollectionCode(titleCache);
90
		if (StringUtils.isBlank(collectionCode)){
91
			logger.warn("Collection code is empty for: " + titleCache);
92
		}else{
93
			Collection collection = collectionMap.get(collectionCode);
94
			if (collection != null){
95
				specimen.setCollection(collection);
96
			}else{
97
				logger.warn("Collection not found for code: " +  collectionCode + "; titleCache: " +  titleCache);
98
			}
99
		}
100
	}
101

    
102

    
103
	/**
104
	 * @param titleCache
105
	 * @return
106
	 */
107
	private String getCollectionCode(String titleCache) {
108
		String result = titleCache.trim();
109
		result = replaceBracket(result);
110
		result = replaceLastFullStop(result);
111
		result = replaceLastQuestionMark(result);
112
		result = parseLastUpperCase(result);
113
		return result;
114
	}
115

    
116

    
117
	/**
118
	 * @param result
119
	 * @return
120
	 */
121
	private String parseLastUpperCase(String string) {
122
		String result = "";
123
		String tmpString = string;
124
		int pos = tmpString.lastIndexOf(" ");
125
		if (pos>-1){
126
			tmpString = tmpString.substring(pos+1);
127
		}
128
		while (tmpString.length() > 0){
129
			int len = tmpString.length();
130
			char lastChar = tmpString.charAt(len-1);
131
			if (Character.isUpperCase( lastChar)){
132
				result = lastChar + result;
133
			}else{
134
				if (result.length() > 0){
135
					logger.warn("Collection code is not space separated: " + string);
136
				}
137
				break;
138
			}
139
			//remove last character
140
			tmpString = tmpString.substring(0, tmpString.length()-1);
141
		}
142
		return result;
143
	}
144

    
145

    
146

    
147
	/**
148
	 * @param result
149
	 * @return
150
	 */
151
	private String replaceLastQuestionMark(String string) {
152
		if (string.endsWith("?")){
153
			string = string.substring(0,string.length()-1).trim();
154
		}
155
		return string;
156
	}
157

    
158
	/**
159
	 * @param result
160
	 * @return
161
	 */
162
	private String replaceLastFullStop(String string) {
163
		if (string.endsWith(".")){
164
			string = string.substring(0,string.length()-1).trim();
165
		}
166
		return string;
167
	}
168

    
169

    
170
	/**
171
	 * @param result
172
	 * @return
173
	 */
174
	private String replaceBracket(String string) {
175
		if (string.endsWith("]")){
176
			int pos  = string.indexOf("[");
177
			if (pos >0){
178
				string = string.substring(0, pos).trim();
179
			}else{
180
				logger.warn("Closing bracket has no opening bracket in: " + string);
181
			}
182
		}
183
		return string;
184
	}
185

    
186

    
187
	/**
188
	 * @param cdmApp
189
	 */
190
	private Map<String, Collection> createCollections(CdmApplicationController cdmApp) {
191
		Map<String, Collection> collectionMap = new HashMap<String, Collection>();
192
		List<String[]> lines = getLines();
193
		for (String[] line:lines){
194
			Collection collection = makeLine(line);
195
			collectionMap.put(collection.getCode(), collection);
196
		}
197
		cdmApp.getCollectionService().save(collectionMap.values());
198
//			for (Collection collection: collectionMap.values()){
199
//				System.out.println(collection.getTitleCache());
200
//			}
201
		return collectionMap;
202
	}
203

    
204

    
205
	private Collection makeLine(String[] line) {
206
		String code = line[0];
207
		String instituteName = line[1];
208
		String lowerInstitutionName = line[2];
209
		String higherInstitutionName = line[3];
210
		String location = line[4];
211
		String country = line[5];
212
		//create objects
213
		Collection collection = Collection.NewInstance();
214
		collection.setCode(code);
215
		Institution institution = Institution.NewInstance();
216
		institution.setCode(code);
217

    
218
		institution.setName(instituteName);
219

    
220
		if (StringUtils.isNotBlank(lowerInstitutionName)){
221
			Institution lowerInstitution = Institution.NewInstance();
222
			lowerInstitution.setName(lowerInstitutionName);
223
			lowerInstitution.setIsPartOf(institution);
224
		}
225

    
226
		if (StringUtils.isNotBlank(higherInstitutionName)){
227
			Institution higherInstitution = Institution.NewInstance();
228
			higherInstitution.setName(higherInstitutionName);
229
			institution.setIsPartOf(higherInstitution);
230
		}
231

    
232
		collection.setInstitute(institution);
233
		String locationAndCountry = CdmUtils.concat("/", location, country);
234
		collection.setTownOrLocation(locationAndCountry);
235

    
236
		String titleCache = CdmUtils.concat(", ", new String[]{instituteName, lowerInstitutionName, higherInstitutionName, location, country});
237
		collection.setTitleCache(titleCache, true);
238

    
239
		return collection;
240
	}
241

    
242

    
243

    
244

    
245
	private List<String[]> getLines() {
246
		List<String[]> result = new ArrayList<String[]>();
247

    
248
		try {
249
			InputStream inStream = new FileInputStream(acronymsFile);
250
			InputStreamReader inputStreamReader = new InputStreamReader(inStream, "UTF8");
251
			CSVReader reader = new CSVReader(inputStreamReader, '\t');
252
			String [] nextLine = reader.readNext();
253

    
254

    
255
			while ((nextLine = reader.readNext()) != null) {
256
				if (nextLine.length == 0){
257
					continue;
258
				}
259
				result.add(nextLine);
260
			}
261
			return result;
262
		} catch (Exception e) {
263
			logger.error(e + " " + e.getCause() + " " + e.getMessage());
264
			for(StackTraceElement ste : e.getStackTrace()) {
265
				logger.error(ste);
266
			}
267
			throw new RuntimeException(e);
268
		}
269
	}
270

    
271

    
272

    
273

    
274

    
275
	/**
276
	 * @param args
277
	 */
278
	public static void main(String[] args) {
279
		try {
280
			DipteraCollectionImport collectionImport = new DipteraCollectionImport();
281
			collectionImport.invoke(cdmDestination);
282
//			String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
283
//			String collectionCode = collectionImport.getCollectionCode(titleCache);
284
//			System.out.println(collectionCode);
285
		} catch (Exception e) {
286
			e.printStackTrace();
287
			System.exit(-1);
288
		}
289
	}
290

    
291
}
(2-2/4)