2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.app
.wp6
.diptera
;
12 import java
.io
.FileInputStream
;
13 import java
.io
.InputStream
;
14 import java
.io
.InputStreamReader
;
15 import java
.util
.ArrayList
;
16 import java
.util
.HashMap
;
17 import java
.util
.List
;
20 import org
.apache
.commons
.lang
.StringUtils
;
21 import org
.apache
.log4j
.Logger
;
22 import org
.springframework
.transaction
.TransactionStatus
;
24 import au
.com
.bytecode
.opencsv
.CSVReader
;
25 import eu
.etaxonomy
.cdm
.api
.application
.CdmApplicationController
;
26 import eu
.etaxonomy
.cdm
.app
.common
.CdmDestinations
;
27 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
28 import eu
.etaxonomy
.cdm
.database
.DbSchemaValidation
;
29 import eu
.etaxonomy
.cdm
.database
.ICdmDataSource
;
30 import eu
.etaxonomy
.cdm
.io
.common
.ImportResult
;
31 import eu
.etaxonomy
.cdm
.model
.agent
.Institution
;
32 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
33 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
34 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
41 public class DipteraCollectionImport
{
42 private static final Logger logger
= Logger
.getLogger(DipteraCollectionImport
.class);
44 public static final File acronymsFile
= new File("src/main/resources/collections/Acronyms.tab");
45 //datasource for use from local main()
46 static final ICdmDataSource cdmDestination
= CdmDestinations
.localH2();
49 public ImportResult
invoke(ICdmDataSource dataSource
) {
50 ImportResult result
= new ImportResult();
51 CdmApplicationController cdmApp
= CdmApplicationController
.NewInstance(dataSource
, DbSchemaValidation
.VALIDATE
);
54 TransactionStatus tx
= cdmApp
.startTransaction();
55 Map
<String
, Collection
> colletionMap
= createCollections(cdmApp
);
57 //add collections to specimen
58 addCollectionsToSpecimen(cdmApp
, colletionMap
);
59 cdmApp
.commitTransaction(tx
);
70 private void addCollectionsToSpecimen(CdmApplicationController cdmApp
, Map
<String
, Collection
> colletionMap
) {
71 List
<DerivedUnit
> specimens
= cdmApp
.getOccurrenceService().list(DerivedUnit
.class, null, null, null, null);
72 for (SpecimenOrObservationBase
<?
> specOrObservBase
: specimens
){
73 if (specOrObservBase
.getRecordBasis().isPreservedSpecimen()){
74 handleSingleSpecimen((DerivedUnit
)specOrObservBase
, colletionMap
);
76 logger
.warn("There are specimenOrObservationBase objects which are not of class Specimen. This is probably an error.");
79 List
<SpecimenOrObservationBase
> specimenList
= new ArrayList
<SpecimenOrObservationBase
>(specimens
);
80 cdmApp
.getOccurrenceService().save(specimenList
);
88 private void handleSingleSpecimen(DerivedUnit specimen
, Map
<String
, Collection
> collectionMap
) {
89 String titleCache
= specimen
.getTitleCache();
90 String collectionCode
= getCollectionCode(titleCache
);
91 if (StringUtils
.isBlank(collectionCode
)){
92 logger
.warn("Collection code is empty for: " + titleCache
);
94 Collection collection
= collectionMap
.get(collectionCode
);
95 if (collection
!= null){
96 specimen
.setCollection(collection
);
98 logger
.warn("Collection not found for code: " + collectionCode
+ "; titleCache: " + titleCache
);
108 private String
getCollectionCode(String titleCache
) {
109 String result
= titleCache
.trim();
110 result
= replaceBracket(result
);
111 result
= replaceLastFullStop(result
);
112 result
= replaceLastQuestionMark(result
);
113 result
= parseLastUpperCase(result
);
122 private String
parseLastUpperCase(String string
) {
124 String tmpString
= string
;
125 int pos
= tmpString
.lastIndexOf(" ");
127 tmpString
= tmpString
.substring(pos
+1);
129 while (tmpString
.length() > 0){
130 int len
= tmpString
.length();
131 char lastChar
= tmpString
.charAt(len
-1);
132 if (Character
.isUpperCase( lastChar
)){
133 result
= lastChar
+ result
;
135 if (result
.length() > 0){
136 logger
.warn("Collection code is not space separated: " + string
);
140 //remove last character
141 tmpString
= tmpString
.substring(0, tmpString
.length()-1);
152 private String
replaceLastQuestionMark(String string
) {
153 if (string
.endsWith("?")){
154 string
= string
.substring(0,string
.length()-1).trim();
163 private String
replaceLastFullStop(String string
) {
164 if (string
.endsWith(".")){
165 string
= string
.substring(0,string
.length()-1).trim();
175 private String
replaceBracket(String string
) {
176 if (string
.endsWith("]")){
177 int pos
= string
.indexOf("[");
179 string
= string
.substring(0, pos
).trim();
181 logger
.warn("Closing bracket has no opening bracket in: " + string
);
191 private Map
<String
, Collection
> createCollections(CdmApplicationController cdmApp
) {
192 Map
<String
, Collection
> collectionMap
= new HashMap
<String
, Collection
>();
193 List
<String
[]> lines
= getLines();
194 for (String
[] line
:lines
){
195 Collection collection
= makeLine(line
);
196 collectionMap
.put(collection
.getCode(), collection
);
198 cdmApp
.getCollectionService().save(collectionMap
.values());
199 // for (Collection collection: collectionMap.values()){
200 // System.out.println(collection.getTitleCache());
202 return collectionMap
;
206 private Collection
makeLine(String
[] line
) {
207 String code
= line
[0];
208 String instituteName
= line
[1];
209 String lowerInstitutionName
= line
[2];
210 String higherInstitutionName
= line
[3];
211 String location
= line
[4];
212 String country
= line
[5];
214 Collection collection
= Collection
.NewInstance();
215 collection
.setCode(code
);
216 Institution institution
= Institution
.NewInstance();
217 institution
.setCode(code
);
219 institution
.setName(instituteName
);
221 if (StringUtils
.isNotBlank(lowerInstitutionName
)){
222 Institution lowerInstitution
= Institution
.NewInstance();
223 lowerInstitution
.setName(lowerInstitutionName
);
224 lowerInstitution
.setIsPartOf(institution
);
227 if (StringUtils
.isNotBlank(higherInstitutionName
)){
228 Institution higherInstitution
= Institution
.NewInstance();
229 higherInstitution
.setName(higherInstitutionName
);
230 institution
.setIsPartOf(higherInstitution
);
233 collection
.setInstitute(institution
);
234 String locationAndCountry
= CdmUtils
.concat("/", location
, country
);
235 collection
.setTownOrLocation(locationAndCountry
);
237 String titleCache
= CdmUtils
.concat(", ", new String
[]{instituteName
, lowerInstitutionName
, higherInstitutionName
, location
, country
});
238 collection
.setTitleCache(titleCache
, true);
246 private List
<String
[]> getLines() {
247 List
<String
[]> result
= new ArrayList
<String
[]>();
250 InputStream inStream
= new FileInputStream(acronymsFile
);
251 InputStreamReader inputStreamReader
= new InputStreamReader(inStream
, "UTF8");
252 CSVReader reader
= new CSVReader(inputStreamReader
, '\t');
253 String
[] nextLine
= reader
.readNext();
256 while ((nextLine
= reader
.readNext()) != null) {
257 if (nextLine
.length
== 0){
260 result
.add(nextLine
);
263 } catch (Exception e
) {
264 logger
.error(e
+ " " + e
.getCause() + " " + e
.getMessage());
265 for(StackTraceElement ste
: e
.getStackTrace()) {
268 throw new RuntimeException(e
);
279 public static void main(String
[] args
) {
281 DipteraCollectionImport collectionImport
= new DipteraCollectionImport();
282 collectionImport
.invoke(cdmDestination
);
283 // String titleCache = "Peru. Mouth of Rio Pachitea. ST 2R SMT. [fig. of male abdomen]";
284 // String collectionCode = collectionImport.getCollectionCode(titleCache);
285 // System.out.println(collectionCode);
286 } catch (Exception e
) {