Project

General

Profile

Download (8.14 KB) Statistics
| Branch: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2007 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.app.wp6.diptera;
11

    
12
import java.io.File;
13
import java.io.FileInputStream;
14
import java.io.InputStream;
15
import java.io.InputStreamReader;
16
import java.util.ArrayList;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Map;
20

    
21
import org.apache.commons.lang.StringUtils;
22
import org.apache.log4j.Logger;
23
import org.springframework.transaction.TransactionStatus;
24

    
25
import au.com.bytecode.opencsv.CSVReader;
26
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
27
import eu.etaxonomy.cdm.app.common.CdmDestinations;
28
import eu.etaxonomy.cdm.common.CdmUtils;
29
import eu.etaxonomy.cdm.database.DbSchemaValidation;
30
import eu.etaxonomy.cdm.database.ICdmDataSource;
31
import eu.etaxonomy.cdm.io.common.ImportResult;
32
import eu.etaxonomy.cdm.model.agent.Institution;
33
import eu.etaxonomy.cdm.model.occurrence.Collection;
34
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
35
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
36

    
37
/**
38
 * @author a.mueller
39
 * @date 07.04.2010
40
 *
41
 */
42
public class DipteraCollectionImport {
43
	private static final Logger logger = Logger.getLogger(DipteraCollectionImport.class);
44

    
45
	public static final File acronymsFile = new File("src/main/resources/collections/Acronyms.tab");
46
	//datasource for use from local main()
47
	static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
48

    
49

    
50
	public ImportResult invoke(ICdmDataSource dataSource) {
51
	    ImportResult result = new ImportResult();
52
		CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
53

    
54
		//create collections
55
		TransactionStatus tx = cdmApp.startTransaction();
56
		Map<String, Collection> colletionMap = createCollections(cdmApp);
57

    
58
		//add collections to specimen
59
		addCollectionsToSpecimen(cdmApp, colletionMap);
60
		cdmApp.commitTransaction(tx);
61

    
62
		return result;
63

    
64
	}
65

    
66

    
67
	/**
68
	 * @param cdmApp
69
	 * @param colletionMap
70
	 */
71
	private void addCollectionsToSpecimen(CdmApplicationController cdmApp, Map<String, Collection> colletionMap) {
72
		List<DerivedUnit> specimens = cdmApp.getOccurrenceService().list(DerivedUnit.class, null, null, null, null);
73
		for (SpecimenOrObservationBase<?> specOrObservBase : specimens){
74
			if (specOrObservBase.getRecordBasis().isPreservedSpecimen()){
75
				handleSingleSpecimen((DerivedUnit)specOrObservBase, colletionMap);
76
			}else{
77
				logger.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
78
			}
79
		}
80
		List<SpecimenOrObservationBase> specimenList = new ArrayList<SpecimenOrObservationBase>(specimens);
81
		cdmApp.getOccurrenceService().save(specimenList);
82
	}
83

    
84

    
85
	/**
86
	 * @param specimen
87
	 * @param colletionMap
88
	 */
89
	private void handleSingleSpecimen(DerivedUnit specimen, Map<String, Collection> collectionMap) {
90
		String titleCache = specimen.getTitleCache();
91
		String collectionCode = getCollectionCode(titleCache);
92
		if (StringUtils.isBlank(collectionCode)){
93
			logger.warn("Collection code is empty for: " + titleCache);
94
		}else{
95
			Collection collection = collectionMap.get(collectionCode);
96
			if (collection != null){
97
				specimen.setCollection(collection);
98
			}else{
99
				logger.warn("Collection not found for code: " +  collectionCode + "; titleCache: " +  titleCache);
100
			}
101
		}
102
	}
103

    
104

    
105
	/**
106
	 * @param titleCache
107
	 * @return
108
	 */
109
	private String getCollectionCode(String titleCache) {
110
		String result = titleCache.trim();
111
		result = replaceBracket(result);
112
		result = replaceLastFullStop(result);
113
		result = replaceLastQuestionMark(result);
114
		result = parseLastUpperCase(result);
115
		return result;
116
	}
117

    
118

    
119
	/**
120
	 * @param result
121
	 * @return
122
	 */
123
	private String parseLastUpperCase(String string) {
124
		String result = "";
125
		String tmpString = string;
126
		int pos = tmpString.lastIndexOf(" ");
127
		if (pos>-1){
128
			tmpString = tmpString.substring(pos+1);
129
		}
130
		while (tmpString.length() > 0){
131
			int len = tmpString.length();
132
			char lastChar = tmpString.charAt(len-1);
133
			if (Character.isUpperCase( lastChar)){
134
				result = lastChar + result;
135
			}else{
136
				if (result.length() > 0){
137
					logger.warn("Collection code is not space separated: " + string);
138
				}
139
				break;
140
			}
141
			//remove last character
142
			tmpString = tmpString.substring(0, tmpString.length()-1);
143
		}
144
		return result;
145
	}
146

    
147

    
148

    
149
	/**
150
	 * @param result
151
	 * @return
152
	 */
153
	private String replaceLastQuestionMark(String string) {
154
		if (string.endsWith("?")){
155
			string = string.substring(0,string.length()-1).trim();
156
		}
157
		return string;
158
	}
159

    
160
	/**
161
	 * @param result
162
	 * @return
163
	 */
164
	private String replaceLastFullStop(String string) {
165
		if (string.endsWith(".")){
166
			string = string.substring(0,string.length()-1).trim();
167
		}
168
		return string;
169
	}
170

    
171

    
172
	/**
173
	 * @param result
174
	 * @return
175
	 */
176
	private String replaceBracket(String string) {
177
		if (string.endsWith("]")){
178
			int pos  = string.indexOf("[");
179
			if (pos >0){
180
				string = string.substring(0, pos).trim();
181
			}else{
182
				logger.warn("Closing bracket has no opening bracket in: " + string);
183
			}
184
		}
185
		return string;
186
	}
187

    
188

    
189
	/**
190
	 * @param cdmApp
191
	 */
192
	private Map<String, Collection> createCollections(CdmApplicationController cdmApp) {
193
		Map<String, Collection> collectionMap = new HashMap<String, Collection>();
194
		List<String[]> lines = getLines();
195
		for (String[] line:lines){
196
			Collection collection = makeLine(line);
197
			collectionMap.put(collection.getCode(), collection);
198
		}
199
		cdmApp.getCollectionService().save(collectionMap.values());
200
//			for (Collection collection: collectionMap.values()){
201
//				System.out.println(collection.getTitleCache());
202
//			}
203
		return collectionMap;
204
	}
205

    
206

    
207
	private Collection makeLine(String[] line) {
208
		String code = line[0];
209
		String instituteName = line[1];
210
		String lowerInstitutionName = line[2];
211
		String higherInstitutionName = line[3];
212
		String location = line[4];
213
		String country = line[5];
214
		//create objects
215
		Collection collection = Collection.NewInstance();
216
		collection.setCode(code);
217
		Institution institution = Institution.NewInstance();
218
		institution.setCode(code);
219

    
220
		institution.setName(instituteName);
221

    
222
		if (StringUtils.isNotBlank(lowerInstitutionName)){
223
			Institution lowerInstitution = Institution.NewInstance();
224
			lowerInstitution.setName(lowerInstitutionName);
225
			lowerInstitution.setIsPartOf(institution);
226
		}
227

    
228
		if (StringUtils.isNotBlank(higherInstitutionName)){
229
			Institution higherInstitution = Institution.NewInstance();
230
			higherInstitution.setName(higherInstitutionName);
231
			institution.setIsPartOf(higherInstitution);
232
		}
233

    
234
		collection.setInstitute(institution);
235
		String locationAndCountry = CdmUtils.concat("/", location, country);
236
		collection.setTownOrLocation(locationAndCountry);
237

    
238
		String titleCache = CdmUtils.concat(", ", new String[]{instituteName, lowerInstitutionName, higherInstitutionName, location, country});
239
		collection.setTitleCache(titleCache, true);
240

    
241
		return collection;
242
	}
243

    
244

    
245

    
246

    
247
	private List<String[]> getLines() {
248
		List<String[]> result = new ArrayList<String[]>();
249

    
250
		try {
251
			InputStream inStream = new FileInputStream(acronymsFile);
252
			InputStreamReader inputStreamReader = new InputStreamReader(inStream, "UTF8");
253
			CSVReader reader = new CSVReader(inputStreamReader, '\t');
254
			String [] nextLine = reader.readNext();
255

    
256

    
257
			while ((nextLine = reader.readNext()) != null) {
258
				if (nextLine.length == 0){
259
					continue;
260
				}
261
				result.add(nextLine);
262
			}
263
			return result;
264
		} catch (Exception e) {
265
			logger.error(e + " " + e.getCause() + " " + e.getMessage());
266
			for(StackTraceElement ste : e.getStackTrace()) {
267
				logger.error(ste);
268
			}
269
			throw new RuntimeException(e);
270
		}
271
	}
272

    
273

    
274

    
275

    
276

    
277
	/**
278
	 * @param args
279
	 */
280
	public static void main(String[] args) {
281
		try {
282
			DipteraCollectionImport collectionImport = new DipteraCollectionImport();
283
			collectionImport.invoke(cdmDestination);
284
//			String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
285
//			String collectionCode = collectionImport.getCollectionCode(titleCache);
286
//			System.out.println(collectionCode);
287
		} catch (Exception e) {
288
			e.printStackTrace();
289
			System.exit(-1);
290
		}
291
	}
292

    
293
}
(2-2/4)