last updates for Common Name import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / app / wp6 / diptera / DipteraCollectionImport.java
1 // $Id$
2 /**
3 * Copyright (C) 2007 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.app.wp6.diptera;
11
12 import java.io.File;
13 import java.io.FileInputStream;
14 import java.io.InputStream;
15 import java.io.InputStreamReader;
16 import java.util.ArrayList;
17 import java.util.HashMap;
18 import java.util.List;
19 import java.util.Map;
20
21 import org.apache.log4j.Logger;
22 import org.springframework.transaction.TransactionStatus;
23
24 import au.com.bytecode.opencsv.CSVReader;
25 import eu.etaxonomy.cdm.api.application.CdmApplicationController;
26 import eu.etaxonomy.cdm.app.common.CdmDestinations;
27 import eu.etaxonomy.cdm.common.CdmUtils;
28 import eu.etaxonomy.cdm.database.DbSchemaValidation;
29 import eu.etaxonomy.cdm.database.ICdmDataSource;
30 import eu.etaxonomy.cdm.model.agent.Institution;
31 import eu.etaxonomy.cdm.model.occurrence.Collection;
32 import eu.etaxonomy.cdm.model.occurrence.Specimen;
33 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
34
35 /**
36 * @author a.mueller
37 * @date 07.04.2010
38 *
39 */
40 public class DipteraCollectionImport {
41 private static final Logger logger = Logger.getLogger(DipteraCollectionImport.class);
42
43 public static final File acronymsFile = new File("src/main/resources/collections/Acronyms.tab");
44 //datasource for use from local main()
45 static final ICdmDataSource cdmDestination = CdmDestinations.cdm_edit_diptera_a();
46
47
48 public boolean invoke(ICdmDataSource dataSource) {
49 CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
50
51 //create collections
52 TransactionStatus tx = cdmApp.startTransaction();
53 Map<String, Collection> colletionMap = createCollections(cdmApp);
54
55 //add collections to specimen
56 addCollectionsToSpecimen(cdmApp, colletionMap);
57 cdmApp.commitTransaction(tx);
58
59 return true;
60
61 }
62
63
64 /**
65 * @param cdmApp
66 * @param colletionMap
67 */
68 private void addCollectionsToSpecimen(CdmApplicationController cdmApp, Map<String, Collection> colletionMap) {
69 List<SpecimenOrObservationBase> specimens = (cdmApp.getOccurrenceService().list(Specimen.class, null, null, null, null));
70 for (SpecimenOrObservationBase specOrObservBase : specimens){
71 if (specOrObservBase instanceof Specimen){
72 handleSingleSpecimen((Specimen)specOrObservBase, colletionMap);
73 }else{
74 logger.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
75 }
76 }
77 cdmApp.getOccurrenceService().save(specimens);
78 }
79
80
81 /**
82 * @param specimen
83 * @param colletionMap
84 */
85 private void handleSingleSpecimen(Specimen specimen, Map<String, Collection> collectionMap) {
86 String titleCache = specimen.getTitleCache();
87 String collectionCode = getCollectionCode(titleCache);
88 if (CdmUtils.isEmpty(collectionCode)){
89 logger.warn("Collection code is empty for: " + titleCache);
90 }else{
91 Collection collection = collectionMap.get(collectionCode);
92 if (collection != null){
93 specimen.setCollection(collection);
94 }else{
95 logger.warn("Collection not found for code: " + collectionCode + "; titleCache: " + titleCache);
96 }
97 }
98 }
99
100
101 /**
102 * @param titleCache
103 * @return
104 */
105 private String getCollectionCode(String titleCache) {
106 String result = titleCache.trim();
107 result = replaceBracket(result);
108 result = replaceLastFullStop(result);
109 result = replaceLastQuestionMark(result);
110 result = parseLastUpperCase(result);
111 return result;
112 }
113
114
115 /**
116 * @param result
117 * @return
118 */
119 private String parseLastUpperCase(String string) {
120 String result = "";
121 String tmpString = string;
122 int pos = tmpString.lastIndexOf(" ");
123 if (pos>-1){
124 tmpString = tmpString.substring(pos+1);
125 }
126 while (tmpString.length() > 0){
127 int len = tmpString.length();
128 char lastChar = tmpString.charAt(len-1);
129 if (Character.isUpperCase( lastChar)){
130 result = lastChar + result;
131 }else{
132 if (result.length() > 0){
133 logger.warn("Collection code is not space separated: " + string);
134 }
135 break;
136 }
137 //remove last character
138 tmpString = tmpString.substring(0, tmpString.length()-1);
139 }
140 return result;
141 }
142
143
144
145 /**
146 * @param result
147 * @return
148 */
149 private String replaceLastQuestionMark(String string) {
150 if (string.endsWith("?")){
151 string = string.substring(0,string.length()-1).trim();
152 }
153 return string;
154 }
155
156 /**
157 * @param result
158 * @return
159 */
160 private String replaceLastFullStop(String string) {
161 if (string.endsWith(".")){
162 string = string.substring(0,string.length()-1).trim();
163 }
164 return string;
165 }
166
167
168 /**
169 * @param result
170 * @return
171 */
172 private String replaceBracket(String string) {
173 if (string.endsWith("]")){
174 int pos = string.indexOf("[");
175 if (pos >0){
176 string = string.substring(0, pos).trim();
177 }else{
178 logger.warn("Closing bracket has no opening bracket in: " + string);
179 }
180 }
181 return string;
182 }
183
184
185 /**
186 * @param cdmApp
187 */
188 private Map<String, Collection> createCollections(CdmApplicationController cdmApp) {
189 Map<String, Collection> collectionMap = new HashMap<String, Collection>();
190 List<String[]> lines = getLines();
191 for (String[] line:lines){
192 Collection collection = makeLine(line);
193 collectionMap.put(collection.getCode(), collection);
194 }
195 cdmApp.getCollectionService().save(collectionMap.values());
196 // for (Collection collection: collectionMap.values()){
197 // System.out.println(collection.getTitleCache());
198 // }
199 return collectionMap;
200 }
201
202
203 private Collection makeLine(String[] line) {
204 String code = line[0];
205 String instituteName = line[1];
206 String lowerInstitutionName = line[2];
207 String higherInstitutionName = line[3];
208 String location = line[4];
209 String country = line[5];
210 //create objects
211 Collection collection = Collection.NewInstance();
212 collection.setCode(code);
213 Institution institution = Institution.NewInstance();
214 institution.setCode(code);
215
216 institution.setName(instituteName);
217
218 if (CdmUtils.isNotEmpty(lowerInstitutionName)){
219 Institution lowerInstitution = Institution.NewInstance();
220 lowerInstitution.setName(lowerInstitutionName);
221 lowerInstitution.setIsPartOf(institution);
222 }
223
224 if (CdmUtils.isNotEmpty(higherInstitutionName)){
225 Institution higherInstitution = Institution.NewInstance();
226 higherInstitution.setName(higherInstitutionName);
227 institution.setIsPartOf(higherInstitution);
228 }
229
230 collection.setInstitute(institution);
231 String locationAndCountry = CdmUtils.concat("/", location, country);
232 collection.setTownOrLocation(locationAndCountry);
233
234 String titleCache = CdmUtils.concat(", ", new String[]{instituteName, lowerInstitutionName, higherInstitutionName, location, country});
235 collection.setTitleCache(titleCache, true);
236
237 return collection;
238 }
239
240
241
242
243 private List<String[]> getLines() {
244 List<String[]> result = new ArrayList<String[]>();
245
246 try {
247 InputStream inStream = new FileInputStream(acronymsFile);
248 InputStreamReader inputStreamReader = new InputStreamReader(inStream, "UTF8");
249 CSVReader reader = new CSVReader(inputStreamReader, '\t');
250 String [] nextLine = reader.readNext();
251
252
253 while ((nextLine = reader.readNext()) != null) {
254 if (nextLine.length == 0){
255 continue;
256 }
257 result.add(nextLine);
258 }
259 return result;
260 } catch (Exception e) {
261 logger.error(e + " " + e.getCause() + " " + e.getMessage());
262 for(StackTraceElement ste : e.getStackTrace()) {
263 logger.error(ste);
264 }
265 throw new RuntimeException(e);
266 }
267 }
268
269
270
271
272
273 /**
274 * @param args
275 */
276 public static void main(String[] args) {
277 try {
278 DipteraCollectionImport collectionImport = new DipteraCollectionImport();
279 collectionImport.invoke(cdmDestination);
280 // String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
281 // String collectionCode = collectionImport.getCollectionCode(titleCache);
282 // System.out.println(collectionCode);
283 } catch (Exception e) {
284 e.printStackTrace();
285 System.exit(-1);
286 }
287 }
288
289 }