latest updates to the Globis import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisImageImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.io.IOException;
13 import java.net.MalformedURLException;
14 import java.net.URI;
15 import java.sql.ResultSet;
16 import java.sql.SQLException;
17 import java.util.HashMap;
18 import java.util.HashSet;
19 import java.util.Map;
20 import java.util.Set;
21
22 import org.apache.http.client.ClientProtocolException;
23 import org.apache.log4j.Logger;
24 import org.springframework.stereotype.Component;
25
26 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
27 import eu.etaxonomy.cdm.common.UriUtils;
28 import eu.etaxonomy.cdm.io.common.IOValidator;
29 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
30 import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
31 import eu.etaxonomy.cdm.model.common.CdmBase;
32 import eu.etaxonomy.cdm.model.common.Language;
33 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
34 import eu.etaxonomy.cdm.model.description.SpecimenDescription;
35 import eu.etaxonomy.cdm.model.description.TextData;
36 import eu.etaxonomy.cdm.model.media.Media;
37 import eu.etaxonomy.cdm.model.occurrence.Specimen;
38 import eu.etaxonomy.cdm.model.reference.Reference;
39 import eu.etaxonomy.cdm.model.taxon.Taxon;
40
41
42 /**
43 * @author a.mueller
44 * @created 20.02.2010
45 * @version 1.0
46 */
47 @Component
48 public class GlobisImageImport extends GlobisImportBase<Taxon> {
49 private static final Logger logger = Logger.getLogger(GlobisImageImport.class);
50
51 private int modCount = 10000;
52 private static final String pluralString = "images";
53 private static final String dbTableName = "Einzelbilder";
54 private static final Class cdmTargetClass = Media
55 .class; //not needed
56
57 private static final String IMAGE_NAMESPACE = "Einzelbilder";
58
59 public GlobisImageImport(){
60 super(pluralString, dbTableName, cdmTargetClass);
61 }
62
63
64
65
66 /* (non-Javadoc)
67 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
68 */
69 @Override
70 protected String getIdQuery() {
71 String strRecordQuery =
72 " SELECT BildId " +
73 " FROM " + dbTableName;
74 return strRecordQuery;
75 }
76
77
78
79
80 /* (non-Javadoc)
81 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
82 */
83 @Override
84 protected String getRecordQuery(GlobisImportConfigurator config) {
85 String strRecordQuery =
86 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
87 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
88 " FROM " + getTableName() + " i " +
89 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
90 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
91 return strRecordQuery;
92 }
93
94
95
96 /* (non-Javadoc)
97 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
98 */
99 @Override
100 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
101 boolean success = true;
102
103 Set<Media> objectsToSave = new HashSet<Media>();
104
105 Map<String, Specimen> typeMap = (Map<String, Specimen>) partitioner.getObjectMap(GlobisSpecTaxImport.TYPE_NAMESPACE);
106
107 Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
108 // Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
109
110 ResultSet rs = partitioner.getResultSet();
111
112 try {
113
114 int i = 0;
115
116 //for each reference
117 while (rs.next()){
118
119 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
120
121 Integer bildID = rs.getInt("BildID");
122
123 Integer spectaxID = nullSafeInt(rs, "spectaxID");
124
125 Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
126
127 String copyright = rs.getString("copyright");
128
129 //ignore:
130 // [file lab2], same as Dateiname04 but less data
131 // Dateipfad
132 Set<Media> recordMedia = new HashSet<Media>();
133
134 try {
135
136 //make image path
137 String pathShort = rs.getString("Dateipfad_kurz");
138 String fileOS = rs.getString("file OS");
139 pathShort= pathShort.replace(fileOS, "");
140 String newPath = state.getConfig().getImageBaseUrl();
141 String path = pathShort.replace("image:Webversionen/", newPath);
142
143 Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
144 recordMedia.add(singleMedia);
145 singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
146 recordMedia.add(singleMedia);
147 singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
148 recordMedia.add(singleMedia);
149 singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
150 recordMedia.add(singleMedia);
151
152 if (spectaxID != null){
153 String collectionCode = transformCopyright2CollectionCode(copyright);
154 String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
155 Specimen typeSpecimen = typeMap.get(id);
156 if (typeSpecimen != null){
157 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(typeSpecimen);
158 for (Media media: recordMedia){
159 facade.addDerivedUnitMedia(media);
160 }
161 }else{
162 //TODO
163 }
164 }else{
165 //TODO
166 }
167
168
169
170
171 } catch (Exception e) {
172 logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
173 e.printStackTrace();
174 }
175
176 }
177
178 logger.info(pluralString + " to save: " + objectsToSave.size());
179 getMediaService().save(objectsToSave);
180
181 return success;
182 } catch (SQLException e) {
183 logger.error("SQLException:" + e);
184 return false;
185 }
186 }
187
188 private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
189 Media media = null;
190 String fileName = rs.getString(fileNameAttr);
191 String legend = rs.getString(legendAttr);
192 Integer bildID = rs.getInt("BildID");
193
194 URI uri = URI.create(path+fileName);
195
196 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
197
198 try {
199 boolean readMediaData = state.getConfig().isDoReadMediaData();
200 if (isBlank(legend) && readMediaData){
201 if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
202 logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
203 }else{
204 return null;
205 }
206 }
207
208 media = this.getImageMedia(uri.toString(), readMediaData, false);
209 media.putTitle(Language.ENGLISH(), legend);
210 this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
211
212 objectsToSave.add(media);
213
214
215 } catch (MalformedURLException e) {
216 e.printStackTrace();
217 } catch (ClientProtocolException e) {
218 e.printStackTrace();
219 } catch (IOException e) {
220 e.printStackTrace();
221 }
222
223 return media;
224 }
225
226 private String transformCopyright2CollectionCode(String copyright){
227
228 if (isBlank(copyright)){
229 return "";
230 }else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
231 return "MFNB";
232 }else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
233 return "SMTD";
234 }else if(copyright.equals("Natural History Museum, London")){
235 return "BMNH";
236 }else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
237 return "ZSSM";
238 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
239 return "SMNK";
240 }else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
241 return "DEIE";
242 }else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
243 return "SMFM";
244 }else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
245 return "MNHN";
246 }else if(copyright.equals("Naturhistorisches Museum Wien")){
247 return "NHMW";
248 }else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
249 return "NRMS";
250 }else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
251 return "SMNS";
252 }else if(copyright.equals("United States National Museum of Natural History, Washington")){
253 return "USNM";
254 }else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
255 return "ZFBS";
256 }else if(copyright.equals("Zoological Museum, University of Copenhagen")){
257 return "ZMUC";
258 }else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
259 return "ZFMK";
260 }else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
261 return "ZFMK";
262 }else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
263 return "ZIUH";
264 }else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
265 return "ZIUT";
266 }else{
267 logger.warn("Unknown copyright entry: " + copyright);
268 return "";
269 }
270
271
272 }
273
274
275
276 /* (non-Javadoc)
277 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
278 */
279 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
280 String nameSpace;
281 Class cdmClass;
282 Set<String> idSet;
283 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
284 try{
285 Set<String> currSpecIdSet = new HashSet<String>();
286 Set<String> typeIdSet = new HashSet<String>();
287
288 while (rs.next()){
289 handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
290 handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
291 }
292
293 //taxon map
294 nameSpace = TAXON_NAMESPACE;
295 cdmClass = Taxon.class;
296 idSet = currSpecIdSet;
297 Map<String, Taxon> taxonMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
298 result.put(nameSpace, taxonMap);
299
300 //type map
301 nameSpace = GlobisSpecTaxImport.TYPE_NAMESPACE;
302 cdmClass = Specimen.class;
303 idSet = typeIdSet;
304 Map<String, Specimen> typeMap = (Map<String, Specimen>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
305 result.put(nameSpace, typeMap);
306
307
308 } catch (SQLException e) {
309 throw new RuntimeException(e);
310 }
311 return result;
312 }
313
314 private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr)
315 throws SQLException {
316 Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
317 if (specTaxId != null){
318 String copyright = rs.getString(copyrightAttr);
319 if (isNotBlank(copyright)){
320 String id = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
321 idSet.add(id);
322 }
323 }
324 }
325
326 /* (non-Javadoc)
327 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
328 */
329 @Override
330 protected boolean doCheck(GlobisImportState state){
331 IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
332 return validator.validate(state);
333 }
334
335
336 /* (non-Javadoc)
337 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
338 */
339 protected boolean isIgnore(GlobisImportState state){
340 return ! state.getConfig().isDoImages();
341 }
342
343
344
345
346
347 }