8e039085a9f10a6bedc3088f1d1d027161fe3ef4
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / app / wp6 / diptera / DipteraCollectionImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.app.wp6.diptera;
10
11 import java.io.File;
12 import java.io.FileInputStream;
13 import java.io.InputStream;
14 import java.io.InputStreamReader;
15 import java.util.ArrayList;
16 import java.util.HashMap;
17 import java.util.List;
18 import java.util.Map;
19
20 import org.apache.commons.lang.StringUtils;
21 import org.apache.log4j.Logger;
22 import org.springframework.transaction.TransactionStatus;
23
24 import au.com.bytecode.opencsv.CSVReader;
25 import eu.etaxonomy.cdm.api.application.CdmApplicationController;
26 import eu.etaxonomy.cdm.app.common.CdmDestinations;
27 import eu.etaxonomy.cdm.common.CdmUtils;
28 import eu.etaxonomy.cdm.database.DbSchemaValidation;
29 import eu.etaxonomy.cdm.database.ICdmDataSource;
30 import eu.etaxonomy.cdm.io.common.ImportResult;
31 import eu.etaxonomy.cdm.model.agent.Institution;
32 import eu.etaxonomy.cdm.model.occurrence.Collection;
33 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
34 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
35
36 /**
37 * @author a.mueller
38 * @date 07.04.2010
39 *
40 */
41 public class DipteraCollectionImport {
42 private static final Logger logger = Logger.getLogger(DipteraCollectionImport.class);
43
44 public static final File acronymsFile = new File("src/main/resources/collections/Acronyms.tab");
45 //datasource for use from local main()
46 static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
47
48
49 public ImportResult invoke(ICdmDataSource dataSource) {
50 ImportResult result = new ImportResult();
51 CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
52
53 //create collections
54 TransactionStatus tx = cdmApp.startTransaction();
55 Map<String, Collection> colletionMap = createCollections(cdmApp);
56
57 //add collections to specimen
58 addCollectionsToSpecimen(cdmApp, colletionMap);
59 cdmApp.commitTransaction(tx);
60
61 return result;
62
63 }
64
65
66 /**
67 * @param cdmApp
68 * @param colletionMap
69 */
70 private void addCollectionsToSpecimen(CdmApplicationController cdmApp, Map<String, Collection> colletionMap) {
71 List<DerivedUnit> specimens = cdmApp.getOccurrenceService().list(DerivedUnit.class, null, null, null, null);
72 for (SpecimenOrObservationBase<?> specOrObservBase : specimens){
73 if (specOrObservBase.getRecordBasis().isPreservedSpecimen()){
74 handleSingleSpecimen((DerivedUnit)specOrObservBase, colletionMap);
75 }else{
76 logger.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
77 }
78 }
79 List<SpecimenOrObservationBase> specimenList = new ArrayList<SpecimenOrObservationBase>(specimens);
80 cdmApp.getOccurrenceService().save(specimenList);
81 }
82
83
84 /**
85 * @param specimen
86 * @param colletionMap
87 */
88 private void handleSingleSpecimen(DerivedUnit specimen, Map<String, Collection> collectionMap) {
89 String titleCache = specimen.getTitleCache();
90 String collectionCode = getCollectionCode(titleCache);
91 if (StringUtils.isBlank(collectionCode)){
92 logger.warn("Collection code is empty for: " + titleCache);
93 }else{
94 Collection collection = collectionMap.get(collectionCode);
95 if (collection != null){
96 specimen.setCollection(collection);
97 }else{
98 logger.warn("Collection not found for code: " + collectionCode + "; titleCache: " + titleCache);
99 }
100 }
101 }
102
103
104 /**
105 * @param titleCache
106 * @return
107 */
108 private String getCollectionCode(String titleCache) {
109 String result = titleCache.trim();
110 result = replaceBracket(result);
111 result = replaceLastFullStop(result);
112 result = replaceLastQuestionMark(result);
113 result = parseLastUpperCase(result);
114 return result;
115 }
116
117
118 /**
119 * @param result
120 * @return
121 */
122 private String parseLastUpperCase(String string) {
123 String result = "";
124 String tmpString = string;
125 int pos = tmpString.lastIndexOf(" ");
126 if (pos>-1){
127 tmpString = tmpString.substring(pos+1);
128 }
129 while (tmpString.length() > 0){
130 int len = tmpString.length();
131 char lastChar = tmpString.charAt(len-1);
132 if (Character.isUpperCase( lastChar)){
133 result = lastChar + result;
134 }else{
135 if (result.length() > 0){
136 logger.warn("Collection code is not space separated: " + string);
137 }
138 break;
139 }
140 //remove last character
141 tmpString = tmpString.substring(0, tmpString.length()-1);
142 }
143 return result;
144 }
145
146
147
148 /**
149 * @param result
150 * @return
151 */
152 private String replaceLastQuestionMark(String string) {
153 if (string.endsWith("?")){
154 string = string.substring(0,string.length()-1).trim();
155 }
156 return string;
157 }
158
159 /**
160 * @param result
161 * @return
162 */
163 private String replaceLastFullStop(String string) {
164 if (string.endsWith(".")){
165 string = string.substring(0,string.length()-1).trim();
166 }
167 return string;
168 }
169
170
171 /**
172 * @param result
173 * @return
174 */
175 private String replaceBracket(String string) {
176 if (string.endsWith("]")){
177 int pos = string.indexOf("[");
178 if (pos >0){
179 string = string.substring(0, pos).trim();
180 }else{
181 logger.warn("Closing bracket has no opening bracket in: " + string);
182 }
183 }
184 return string;
185 }
186
187
188 /**
189 * @param cdmApp
190 */
191 private Map<String, Collection> createCollections(CdmApplicationController cdmApp) {
192 Map<String, Collection> collectionMap = new HashMap<String, Collection>();
193 List<String[]> lines = getLines();
194 for (String[] line:lines){
195 Collection collection = makeLine(line);
196 collectionMap.put(collection.getCode(), collection);
197 }
198 cdmApp.getCollectionService().save(collectionMap.values());
199 // for (Collection collection: collectionMap.values()){
200 // System.out.println(collection.getTitleCache());
201 // }
202 return collectionMap;
203 }
204
205
206 private Collection makeLine(String[] line) {
207 String code = line[0];
208 String instituteName = line[1];
209 String lowerInstitutionName = line[2];
210 String higherInstitutionName = line[3];
211 String location = line[4];
212 String country = line[5];
213 //create objects
214 Collection collection = Collection.NewInstance();
215 collection.setCode(code);
216 Institution institution = Institution.NewInstance();
217 institution.setCode(code);
218
219 institution.setName(instituteName);
220
221 if (StringUtils.isNotBlank(lowerInstitutionName)){
222 Institution lowerInstitution = Institution.NewInstance();
223 lowerInstitution.setName(lowerInstitutionName);
224 lowerInstitution.setIsPartOf(institution);
225 }
226
227 if (StringUtils.isNotBlank(higherInstitutionName)){
228 Institution higherInstitution = Institution.NewInstance();
229 higherInstitution.setName(higherInstitutionName);
230 institution.setIsPartOf(higherInstitution);
231 }
232
233 collection.setInstitute(institution);
234 String locationAndCountry = CdmUtils.concat("/", location, country);
235 collection.setTownOrLocation(locationAndCountry);
236
237 String titleCache = CdmUtils.concat(", ", new String[]{instituteName, lowerInstitutionName, higherInstitutionName, location, country});
238 collection.setTitleCache(titleCache, true);
239
240 return collection;
241 }
242
243
244
245
246 private List<String[]> getLines() {
247 List<String[]> result = new ArrayList<String[]>();
248
249 try {
250 InputStream inStream = new FileInputStream(acronymsFile);
251 InputStreamReader inputStreamReader = new InputStreamReader(inStream, "UTF8");
252 CSVReader reader = new CSVReader(inputStreamReader, '\t');
253 String [] nextLine = reader.readNext();
254
255
256 while ((nextLine = reader.readNext()) != null) {
257 if (nextLine.length == 0){
258 continue;
259 }
260 result.add(nextLine);
261 }
262 return result;
263 } catch (Exception e) {
264 logger.error(e + " " + e.getCause() + " " + e.getMessage());
265 for(StackTraceElement ste : e.getStackTrace()) {
266 logger.error(ste);
267 }
268 throw new RuntimeException(e);
269 }
270 }
271
272
273
274
275
276 /**
277 * @param args
278 */
279 public static void main(String[] args) {
280 try {
281 DipteraCollectionImport collectionImport = new DipteraCollectionImport();
282 collectionImport.invoke(cdmDestination);
283 // String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
284 // String collectionCode = collectionImport.getCollectionCode(titleCache);
285 // System.out.println(collectionCode);
286 } catch (Exception e) {
287 e.printStackTrace();
288 System.exit(-1);
289 }
290 }
291
292 }