remove dipteraH2
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / app / wp6 / diptera / DipteraCollectionImport.java
1 // $Id$
2 /**
3 * Copyright (C) 2007 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.app.wp6.diptera;
11
12 import java.io.File;
13 import java.io.FileInputStream;
14 import java.io.InputStream;
15 import java.io.InputStreamReader;
16 import java.util.ArrayList;
17 import java.util.HashMap;
18 import java.util.List;
19 import java.util.Map;
20
21 import org.apache.commons.lang.StringUtils;
22 import org.apache.log4j.Logger;
23 import org.springframework.transaction.TransactionStatus;
24
25 import au.com.bytecode.opencsv.CSVReader;
26 import eu.etaxonomy.cdm.api.application.CdmApplicationController;
27 import eu.etaxonomy.cdm.app.common.CdmDestinations;
28 import eu.etaxonomy.cdm.common.CdmUtils;
29 import eu.etaxonomy.cdm.database.DbSchemaValidation;
30 import eu.etaxonomy.cdm.database.ICdmDataSource;
31 import eu.etaxonomy.cdm.model.agent.Institution;
32 import eu.etaxonomy.cdm.model.occurrence.Collection;
33 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
34 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
35
36 /**
37 * @author a.mueller
38 * @date 07.04.2010
39 *
40 */
41 public class DipteraCollectionImport {
42 private static final Logger logger = Logger.getLogger(DipteraCollectionImport.class);
43
44 public static final File acronymsFile = new File("src/main/resources/collections/Acronyms.tab");
45 //datasource for use from local main()
46 static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_dipera();
47
48
49 public boolean invoke(ICdmDataSource dataSource) {
50 CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
51
52 //create collections
53 TransactionStatus tx = cdmApp.startTransaction();
54 Map<String, Collection> colletionMap = createCollections(cdmApp);
55
56 //add collections to specimen
57 addCollectionsToSpecimen(cdmApp, colletionMap);
58 cdmApp.commitTransaction(tx);
59
60 return true;
61
62 }
63
64
65 /**
66 * @param cdmApp
67 * @param colletionMap
68 */
69 private void addCollectionsToSpecimen(CdmApplicationController cdmApp, Map<String, Collection> colletionMap) {
70 List<DerivedUnit> specimens = cdmApp.getOccurrenceService().list(DerivedUnit.class, null, null, null, null);
71 for (SpecimenOrObservationBase<?> specOrObservBase : specimens){
72 if (specOrObservBase.getRecordBasis().isPreservedSpecimen()){
73 handleSingleSpecimen((DerivedUnit)specOrObservBase, colletionMap);
74 }else{
75 logger.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
76 }
77 }
78 List<SpecimenOrObservationBase> specimenList = new ArrayList<SpecimenOrObservationBase>(specimens);
79 cdmApp.getOccurrenceService().save(specimenList);
80 }
81
82
83 /**
84 * @param specimen
85 * @param colletionMap
86 */
87 private void handleSingleSpecimen(DerivedUnit specimen, Map<String, Collection> collectionMap) {
88 String titleCache = specimen.getTitleCache();
89 String collectionCode = getCollectionCode(titleCache);
90 if (StringUtils.isBlank(collectionCode)){
91 logger.warn("Collection code is empty for: " + titleCache);
92 }else{
93 Collection collection = collectionMap.get(collectionCode);
94 if (collection != null){
95 specimen.setCollection(collection);
96 }else{
97 logger.warn("Collection not found for code: " + collectionCode + "; titleCache: " + titleCache);
98 }
99 }
100 }
101
102
103 /**
104 * @param titleCache
105 * @return
106 */
107 private String getCollectionCode(String titleCache) {
108 String result = titleCache.trim();
109 result = replaceBracket(result);
110 result = replaceLastFullStop(result);
111 result = replaceLastQuestionMark(result);
112 result = parseLastUpperCase(result);
113 return result;
114 }
115
116
117 /**
118 * @param result
119 * @return
120 */
121 private String parseLastUpperCase(String string) {
122 String result = "";
123 String tmpString = string;
124 int pos = tmpString.lastIndexOf(" ");
125 if (pos>-1){
126 tmpString = tmpString.substring(pos+1);
127 }
128 while (tmpString.length() > 0){
129 int len = tmpString.length();
130 char lastChar = tmpString.charAt(len-1);
131 if (Character.isUpperCase( lastChar)){
132 result = lastChar + result;
133 }else{
134 if (result.length() > 0){
135 logger.warn("Collection code is not space separated: " + string);
136 }
137 break;
138 }
139 //remove last character
140 tmpString = tmpString.substring(0, tmpString.length()-1);
141 }
142 return result;
143 }
144
145
146
147 /**
148 * @param result
149 * @return
150 */
151 private String replaceLastQuestionMark(String string) {
152 if (string.endsWith("?")){
153 string = string.substring(0,string.length()-1).trim();
154 }
155 return string;
156 }
157
158 /**
159 * @param result
160 * @return
161 */
162 private String replaceLastFullStop(String string) {
163 if (string.endsWith(".")){
164 string = string.substring(0,string.length()-1).trim();
165 }
166 return string;
167 }
168
169
170 /**
171 * @param result
172 * @return
173 */
174 private String replaceBracket(String string) {
175 if (string.endsWith("]")){
176 int pos = string.indexOf("[");
177 if (pos >0){
178 string = string.substring(0, pos).trim();
179 }else{
180 logger.warn("Closing bracket has no opening bracket in: " + string);
181 }
182 }
183 return string;
184 }
185
186
187 /**
188 * @param cdmApp
189 */
190 private Map<String, Collection> createCollections(CdmApplicationController cdmApp) {
191 Map<String, Collection> collectionMap = new HashMap<String, Collection>();
192 List<String[]> lines = getLines();
193 for (String[] line:lines){
194 Collection collection = makeLine(line);
195 collectionMap.put(collection.getCode(), collection);
196 }
197 cdmApp.getCollectionService().save(collectionMap.values());
198 // for (Collection collection: collectionMap.values()){
199 // System.out.println(collection.getTitleCache());
200 // }
201 return collectionMap;
202 }
203
204
205 private Collection makeLine(String[] line) {
206 String code = line[0];
207 String instituteName = line[1];
208 String lowerInstitutionName = line[2];
209 String higherInstitutionName = line[3];
210 String location = line[4];
211 String country = line[5];
212 //create objects
213 Collection collection = Collection.NewInstance();
214 collection.setCode(code);
215 Institution institution = Institution.NewInstance();
216 institution.setCode(code);
217
218 institution.setName(instituteName);
219
220 if (StringUtils.isNotBlank(lowerInstitutionName)){
221 Institution lowerInstitution = Institution.NewInstance();
222 lowerInstitution.setName(lowerInstitutionName);
223 lowerInstitution.setIsPartOf(institution);
224 }
225
226 if (StringUtils.isNotBlank(higherInstitutionName)){
227 Institution higherInstitution = Institution.NewInstance();
228 higherInstitution.setName(higherInstitutionName);
229 institution.setIsPartOf(higherInstitution);
230 }
231
232 collection.setInstitute(institution);
233 String locationAndCountry = CdmUtils.concat("/", location, country);
234 collection.setTownOrLocation(locationAndCountry);
235
236 String titleCache = CdmUtils.concat(", ", new String[]{instituteName, lowerInstitutionName, higherInstitutionName, location, country});
237 collection.setTitleCache(titleCache, true);
238
239 return collection;
240 }
241
242
243
244
245 private List<String[]> getLines() {
246 List<String[]> result = new ArrayList<String[]>();
247
248 try {
249 InputStream inStream = new FileInputStream(acronymsFile);
250 InputStreamReader inputStreamReader = new InputStreamReader(inStream, "UTF8");
251 CSVReader reader = new CSVReader(inputStreamReader, '\t');
252 String [] nextLine = reader.readNext();
253
254
255 while ((nextLine = reader.readNext()) != null) {
256 if (nextLine.length == 0){
257 continue;
258 }
259 result.add(nextLine);
260 }
261 return result;
262 } catch (Exception e) {
263 logger.error(e + " " + e.getCause() + " " + e.getMessage());
264 for(StackTraceElement ste : e.getStackTrace()) {
265 logger.error(ste);
266 }
267 throw new RuntimeException(e);
268 }
269 }
270
271
272
273
274
275 /**
276 * @param args
277 */
278 public static void main(String[] args) {
279 try {
280 DipteraCollectionImport collectionImport = new DipteraCollectionImport();
281 collectionImport.invoke(cdmDestination);
282 // String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
283 // String collectionCode = collectionImport.getCollectionCode(titleCache);
284 // System.out.println(collectionCode);
285 } catch (Exception e) {
286 e.printStackTrace();
287 System.exit(-1);
288 }
289 }
290
291 }