3 * Copyright (C) 2007 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.app
.wp6
.diptera
;
13 import java
.io
.FileInputStream
;
14 import java
.io
.InputStream
;
15 import java
.io
.InputStreamReader
;
16 import java
.util
.ArrayList
;
17 import java
.util
.HashMap
;
18 import java
.util
.List
;
21 import org
.apache
.commons
.lang
.StringUtils
;
22 import org
.apache
.log4j
.Logger
;
23 import org
.springframework
.transaction
.TransactionStatus
;
25 import au
.com
.bytecode
.opencsv
.CSVReader
;
26 import eu
.etaxonomy
.cdm
.api
.application
.CdmApplicationController
;
27 import eu
.etaxonomy
.cdm
.app
.common
.CdmDestinations
;
28 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
29 import eu
.etaxonomy
.cdm
.database
.DbSchemaValidation
;
30 import eu
.etaxonomy
.cdm
.database
.ICdmDataSource
;
31 import eu
.etaxonomy
.cdm
.model
.agent
.Institution
;
32 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
33 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
34 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
41 public class DipteraCollectionImport
{
42 private static final Logger logger
= Logger
.getLogger(DipteraCollectionImport
.class);
44 public static final File acronymsFile
= new File("src/main/resources/collections/Acronyms.tab");
45 //datasource for use from local main()
46 static final ICdmDataSource cdmDestination
= CdmDestinations
.cdm_local_dipera();
49 public boolean invoke(ICdmDataSource dataSource
) {
50 CdmApplicationController cdmApp
= CdmApplicationController
.NewInstance(dataSource
, DbSchemaValidation
.VALIDATE
);
53 TransactionStatus tx
= cdmApp
.startTransaction();
54 Map
<String
, Collection
> colletionMap
= createCollections(cdmApp
);
56 //add collections to specimen
57 addCollectionsToSpecimen(cdmApp
, colletionMap
);
58 cdmApp
.commitTransaction(tx
);
69 private void addCollectionsToSpecimen(CdmApplicationController cdmApp
, Map
<String
, Collection
> colletionMap
) {
70 List
<DerivedUnit
> specimens
= cdmApp
.getOccurrenceService().list(DerivedUnit
.class, null, null, null, null);
71 for (SpecimenOrObservationBase
<?
> specOrObservBase
: specimens
){
72 if (specOrObservBase
.getRecordBasis().isPreservedSpecimen()){
73 handleSingleSpecimen((DerivedUnit
)specOrObservBase
, colletionMap
);
75 logger
.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
78 List
<SpecimenOrObservationBase
> specimenList
= new ArrayList
<SpecimenOrObservationBase
>(specimens
);
79 cdmApp
.getOccurrenceService().save(specimenList
);
87 private void handleSingleSpecimen(DerivedUnit specimen
, Map
<String
, Collection
> collectionMap
) {
88 String titleCache
= specimen
.getTitleCache();
89 String collectionCode
= getCollectionCode(titleCache
);
90 if (StringUtils
.isBlank(collectionCode
)){
91 logger
.warn("Collection code is empty for: " + titleCache
);
93 Collection collection
= collectionMap
.get(collectionCode
);
94 if (collection
!= null){
95 specimen
.setCollection(collection
);
97 logger
.warn("Collection not found for code: " + collectionCode
+ "; titleCache: " + titleCache
);
107 private String
getCollectionCode(String titleCache
) {
108 String result
= titleCache
.trim();
109 result
= replaceBracket(result
);
110 result
= replaceLastFullStop(result
);
111 result
= replaceLastQuestionMark(result
);
112 result
= parseLastUpperCase(result
);
121 private String
parseLastUpperCase(String string
) {
123 String tmpString
= string
;
124 int pos
= tmpString
.lastIndexOf(" ");
126 tmpString
= tmpString
.substring(pos
+1);
128 while (tmpString
.length() > 0){
129 int len
= tmpString
.length();
130 char lastChar
= tmpString
.charAt(len
-1);
131 if (Character
.isUpperCase( lastChar
)){
132 result
= lastChar
+ result
;
134 if (result
.length() > 0){
135 logger
.warn("Collection code is not space separated: " + string
);
139 //remove last character
140 tmpString
= tmpString
.substring(0, tmpString
.length()-1);
151 private String
replaceLastQuestionMark(String string
) {
152 if (string
.endsWith("?")){
153 string
= string
.substring(0,string
.length()-1).trim();
162 private String
replaceLastFullStop(String string
) {
163 if (string
.endsWith(".")){
164 string
= string
.substring(0,string
.length()-1).trim();
174 private String
replaceBracket(String string
) {
175 if (string
.endsWith("]")){
176 int pos
= string
.indexOf("[");
178 string
= string
.substring(0, pos
).trim();
180 logger
.warn("Closing bracket has no opening bracket in: " + string
);
190 private Map
<String
, Collection
> createCollections(CdmApplicationController cdmApp
) {
191 Map
<String
, Collection
> collectionMap
= new HashMap
<String
, Collection
>();
192 List
<String
[]> lines
= getLines();
193 for (String
[] line
:lines
){
194 Collection collection
= makeLine(line
);
195 collectionMap
.put(collection
.getCode(), collection
);
197 cdmApp
.getCollectionService().save(collectionMap
.values());
198 // for (Collection collection: collectionMap.values()){
199 // System.out.println(collection.getTitleCache());
201 return collectionMap
;
205 private Collection
makeLine(String
[] line
) {
206 String code
= line
[0];
207 String instituteName
= line
[1];
208 String lowerInstitutionName
= line
[2];
209 String higherInstitutionName
= line
[3];
210 String location
= line
[4];
211 String country
= line
[5];
213 Collection collection
= Collection
.NewInstance();
214 collection
.setCode(code
);
215 Institution institution
= Institution
.NewInstance();
216 institution
.setCode(code
);
218 institution
.setName(instituteName
);
220 if (StringUtils
.isNotBlank(lowerInstitutionName
)){
221 Institution lowerInstitution
= Institution
.NewInstance();
222 lowerInstitution
.setName(lowerInstitutionName
);
223 lowerInstitution
.setIsPartOf(institution
);
226 if (StringUtils
.isNotBlank(higherInstitutionName
)){
227 Institution higherInstitution
= Institution
.NewInstance();
228 higherInstitution
.setName(higherInstitutionName
);
229 institution
.setIsPartOf(higherInstitution
);
232 collection
.setInstitute(institution
);
233 String locationAndCountry
= CdmUtils
.concat("/", location
, country
);
234 collection
.setTownOrLocation(locationAndCountry
);
236 String titleCache
= CdmUtils
.concat(", ", new String
[]{instituteName
, lowerInstitutionName
, higherInstitutionName
, location
, country
});
237 collection
.setTitleCache(titleCache
, true);
245 private List
<String
[]> getLines() {
246 List
<String
[]> result
= new ArrayList
<String
[]>();
249 InputStream inStream
= new FileInputStream(acronymsFile
);
250 InputStreamReader inputStreamReader
= new InputStreamReader(inStream
, "UTF8");
251 CSVReader reader
= new CSVReader(inputStreamReader
, '\t');
252 String
[] nextLine
= reader
.readNext();
255 while ((nextLine
= reader
.readNext()) != null) {
256 if (nextLine
.length
== 0){
259 result
.add(nextLine
);
262 } catch (Exception e
) {
263 logger
.error(e
+ " " + e
.getCause() + " " + e
.getMessage());
264 for(StackTraceElement ste
: e
.getStackTrace()) {
267 throw new RuntimeException(e
);
278 public static void main(String
[] args
) {
280 DipteraCollectionImport collectionImport
= new DipteraCollectionImport();
281 collectionImport
.invoke(cdmDestination
);
282 // String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
283 // String collectionCode = collectionImport.getCollectionCode(titleCache);
284 // System.out.println(collectionCode);
285 } catch (Exception e
) {