3 * Copyright (C) 2007 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.app
.wp6
.diptera
;
13 import java
.io
.FileInputStream
;
14 import java
.io
.InputStream
;
15 import java
.io
.InputStreamReader
;
16 import java
.util
.ArrayList
;
17 import java
.util
.HashMap
;
18 import java
.util
.List
;
21 import org
.apache
.log4j
.Logger
;
22 import org
.springframework
.transaction
.TransactionStatus
;
24 import au
.com
.bytecode
.opencsv
.CSVReader
;
25 import eu
.etaxonomy
.cdm
.api
.application
.CdmApplicationController
;
26 import eu
.etaxonomy
.cdm
.app
.common
.CdmDestinations
;
27 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
28 import eu
.etaxonomy
.cdm
.database
.DbSchemaValidation
;
29 import eu
.etaxonomy
.cdm
.database
.ICdmDataSource
;
30 import eu
.etaxonomy
.cdm
.model
.agent
.Institution
;
31 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
32 import eu
.etaxonomy
.cdm
.model
.occurrence
.Specimen
;
33 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
40 public class DipteraCollectionImport
{
41 private static final Logger logger
= Logger
.getLogger(DipteraCollectionImport
.class);
43 public static final File acronymsFile
= new File("src/main/resources/collections/Acronyms.tab");
44 //datasource for use from local main()
45 static final ICdmDataSource cdmDestination
= CdmDestinations
.cdm_edit_diptera_a();
48 public boolean invoke(ICdmDataSource dataSource
) {
49 CdmApplicationController cdmApp
= CdmApplicationController
.NewInstance(dataSource
, DbSchemaValidation
.VALIDATE
);
52 TransactionStatus tx
= cdmApp
.startTransaction();
53 Map
<String
, Collection
> colletionMap
= createCollections(cdmApp
);
55 //add collections to specimen
56 addCollectionsToSpecimen(cdmApp
, colletionMap
);
57 cdmApp
.commitTransaction(tx
);
68 private void addCollectionsToSpecimen(CdmApplicationController cdmApp
, Map
<String
, Collection
> colletionMap
) {
69 List
<SpecimenOrObservationBase
> specimens
= (cdmApp
.getOccurrenceService().list(Specimen
.class, null, null, null, null));
70 for (SpecimenOrObservationBase specOrObservBase
: specimens
){
71 if (specOrObservBase
instanceof Specimen
){
72 handleSingleSpecimen((Specimen
)specOrObservBase
, colletionMap
);
74 logger
.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
77 cdmApp
.getOccurrenceService().save(specimens
);
85 private void handleSingleSpecimen(Specimen specimen
, Map
<String
, Collection
> collectionMap
) {
86 String titleCache
= specimen
.getTitleCache();
87 String collectionCode
= getCollectionCode(titleCache
);
88 if (CdmUtils
.isEmpty(collectionCode
)){
89 logger
.warn("Collection code is empty for: " + titleCache
);
91 Collection collection
= collectionMap
.get(collectionCode
);
92 if (collection
!= null){
93 specimen
.setCollection(collection
);
95 logger
.warn("Collection not found for code: " + collectionCode
+ "; titleCache: " + titleCache
);
105 private String
getCollectionCode(String titleCache
) {
106 String result
= titleCache
.trim();
107 result
= replaceBracket(result
);
108 result
= replaceLastFullStop(result
);
109 result
= replaceLastQuestionMark(result
);
110 result
= parseLastUpperCase(result
);
119 private String
parseLastUpperCase(String string
) {
121 String tmpString
= string
;
122 int pos
= tmpString
.lastIndexOf(" ");
124 tmpString
= tmpString
.substring(pos
+1);
126 while (tmpString
.length() > 0){
127 int len
= tmpString
.length();
128 char lastChar
= tmpString
.charAt(len
-1);
129 if (Character
.isUpperCase( lastChar
)){
130 result
= lastChar
+ result
;
132 if (result
.length() > 0){
133 logger
.warn("Collection code is not space separated: " + string
);
137 //remove last character
138 tmpString
= tmpString
.substring(0, tmpString
.length()-1);
149 private String
replaceLastQuestionMark(String string
) {
150 if (string
.endsWith("?")){
151 string
= string
.substring(0,string
.length()-1).trim();
160 private String
replaceLastFullStop(String string
) {
161 if (string
.endsWith(".")){
162 string
= string
.substring(0,string
.length()-1).trim();
172 private String
replaceBracket(String string
) {
173 if (string
.endsWith("]")){
174 int pos
= string
.indexOf("[");
176 string
= string
.substring(0, pos
).trim();
178 logger
.warn("Closing bracket has no opening bracket in: " + string
);
188 private Map
<String
, Collection
> createCollections(CdmApplicationController cdmApp
) {
189 Map
<String
, Collection
> collectionMap
= new HashMap
<String
, Collection
>();
190 List
<String
[]> lines
= getLines();
191 for (String
[] line
:lines
){
192 Collection collection
= makeLine(line
);
193 collectionMap
.put(collection
.getCode(), collection
);
195 cdmApp
.getCollectionService().save(collectionMap
.values());
196 // for (Collection collection: collectionMap.values()){
197 // System.out.println(collection.getTitleCache());
199 return collectionMap
;
203 private Collection
makeLine(String
[] line
) {
204 String code
= line
[0];
205 String instituteName
= line
[1];
206 String lowerInstitutionName
= line
[2];
207 String higherInstitutionName
= line
[3];
208 String location
= line
[4];
209 String country
= line
[5];
211 Collection collection
= Collection
.NewInstance();
212 collection
.setCode(code
);
213 Institution institution
= Institution
.NewInstance();
214 institution
.setCode(code
);
216 institution
.setName(instituteName
);
218 if (CdmUtils
.isNotEmpty(lowerInstitutionName
)){
219 Institution lowerInstitution
= Institution
.NewInstance();
220 lowerInstitution
.setName(lowerInstitutionName
);
221 lowerInstitution
.setIsPartOf(institution
);
224 if (CdmUtils
.isNotEmpty(higherInstitutionName
)){
225 Institution higherInstitution
= Institution
.NewInstance();
226 higherInstitution
.setName(higherInstitutionName
);
227 institution
.setIsPartOf(higherInstitution
);
230 collection
.setInstitute(institution
);
231 String locationAndCountry
= CdmUtils
.concat("/", location
, country
);
232 collection
.setTownOrLocation(locationAndCountry
);
234 String titleCache
= CdmUtils
.concat(", ", new String
[]{instituteName
, lowerInstitutionName
, higherInstitutionName
, location
, country
});
235 collection
.setTitleCache(titleCache
, true);
243 private List
<String
[]> getLines() {
244 List
<String
[]> result
= new ArrayList
<String
[]>();
247 InputStream inStream
= new FileInputStream(acronymsFile
);
248 InputStreamReader inputStreamReader
= new InputStreamReader(inStream
, "UTF8");
249 CSVReader reader
= new CSVReader(inputStreamReader
, '\t');
250 String
[] nextLine
= reader
.readNext();
253 while ((nextLine
= reader
.readNext()) != null) {
254 if (nextLine
.length
== 0){
257 result
.add(nextLine
);
260 } catch (Exception e
) {
261 logger
.error(e
+ " " + e
.getCause() + " " + e
.getMessage());
262 for(StackTraceElement ste
: e
.getStackTrace()) {
265 throw new RuntimeException(e
);
276 public static void main(String
[] args
) {
278 DipteraCollectionImport collectionImport
= new DipteraCollectionImport();
279 collectionImport
.invoke(cdmDestination
);
280 // String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
281 // String collectionCode = collectionImport.getCollectionCode(titleCache);
282 // System.out.println(collectionCode);
283 } catch (Exception e
) {