5b26a8e85206af94129b6abe7df2539d3511f424
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelNameFactsImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.NAME_FACT_ALSO_PUBLISHED_IN;
13 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.NAME_FACT_BIBLIOGRAPHY;
14 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.NAME_FACT_PROTOLOGUE;
15
16 import java.io.File;
17 import java.io.IOException;
18 import java.net.MalformedURLException;
19 import java.net.URI;
20 import java.net.URISyntaxException;
21 import java.net.URL;
22 import java.sql.ResultSet;
23 import java.sql.SQLException;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.Map;
27 import java.util.Set;
28
29 import org.apache.commons.lang.StringUtils;
30 import org.apache.http.HttpException;
31 import org.apache.log4j.Logger;
32 import org.springframework.stereotype.Component;
33
34 import eu.etaxonomy.cdm.common.CdmUtils;
35 import eu.etaxonomy.cdm.common.media.ImageInfo;
36 import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelNameFactsImportValidator;
37 import eu.etaxonomy.cdm.io.common.IOValidator;
38 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
39 import eu.etaxonomy.cdm.model.agent.Person;
40 import eu.etaxonomy.cdm.model.common.CdmBase;
41 import eu.etaxonomy.cdm.model.common.Language;
42 import eu.etaxonomy.cdm.model.description.Feature;
43 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
44 import eu.etaxonomy.cdm.model.description.TextData;
45 import eu.etaxonomy.cdm.model.media.ImageFile;
46 import eu.etaxonomy.cdm.model.media.Media;
47 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
48 import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
49 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
50 import eu.etaxonomy.cdm.model.reference.Reference;
51
52
53 /**
54 * @author a.mueller
55 * @created 20.03.2008
56 * @version 1.0
57 */
58 @Component
59 public class BerlinModelNameFactsImport extends BerlinModelImportBase {
60 private static final Logger logger = Logger.getLogger(BerlinModelNameFactsImport.class);
61
62 public static final String NAMESPACE = "NameFact";
63
64 /**
65 * write info message after modCount iterations
66 */
67 private int modCount = 500;
68 private static final String pluralString = "name facts";
69 private static final String dbTableName = "NameFact";
70
71
72 public BerlinModelNameFactsImport(){
73 super(dbTableName, pluralString);
74 }
75
76
77
78
79 @Override
80 protected String getIdQuery(BerlinModelImportState state) {
81 if (StringUtils.isNotEmpty(state.getConfig().getNameIdTable())){
82 String result = super.getIdQuery(state);
83 result += " WHERE ptNameFk IN (SELECT NameId FROM " + state.getConfig().getNameIdTable() + ")";
84 return result;
85 }else{
86 return super.getIdQuery(state);
87 }
88 }
89
90
91
92
93 /* (non-Javadoc)
94 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
95 */
96 @Override
97 protected String getRecordQuery(BerlinModelImportConfigurator config) {
98 String strQuery =
99 " SELECT NameFact.*, Name.NameID as nameId, NameFactCategory.NameFactCategory " +
100 " FROM NameFact INNER JOIN " +
101 " Name ON NameFact.PTNameFk = Name.NameId INNER JOIN "+
102 " NameFactCategory ON NameFactCategory.NameFactCategoryID = NameFact.NameFactCategoryFK " +
103 " WHERE (NameFactId IN ("+ ID_LIST_TOKEN+") )";
104 return strQuery;
105 }
106
107 /* (non-Javadoc)
108 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
109 */
110 public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
111 boolean success = true ;
112 BerlinModelImportConfigurator config = state.getConfig();
113 Set<TaxonNameBase> nameToSave = new HashSet<TaxonNameBase>();
114 Map<String, TaxonNameBase> nameMap = (Map<String, TaxonNameBase>) partitioner.getObjectMap(BerlinModelTaxonNameImport.NAMESPACE);
115 Map<String, Reference> biblioRefMap = partitioner.getObjectMap(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE);
116 Map<String, Reference> nomRefMap = partitioner.getObjectMap(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE);
117
118 ResultSet rs = partitioner.getResultSet();
119
120 Reference<?> sourceRef = state.getTransactionalSourceReference();
121 try {
122 int i = 0;
123 //for each reference
124 while (rs.next() && (config.getMaximumNumberOfNameFacts() == 0 || i < config.getMaximumNumberOfNameFacts())){
125
126 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("NameFacts handled: " + (i-1));}
127
128 int nameFactId = rs.getInt("nameFactId");
129 int nameId = rs.getInt("nameId");
130 Object nameFactRefFkObj = rs.getObject("nameFactRefFk");
131 String nameFactRefDetail = rs.getString("nameFactRefDetail");
132
133 String category = CdmUtils.Nz(rs.getString("NameFactCategory"));
134 String nameFact = CdmUtils.Nz(rs.getString("nameFact"));
135
136 TaxonNameBase taxonNameBase = nameMap.get(String.valueOf(nameId));
137 String nameFactRefFk = String.valueOf(nameFactRefFkObj);
138 Reference citation = getReferenceOnlyFromMaps(biblioRefMap,
139 nomRefMap, nameFactRefFk);
140
141 if (taxonNameBase != null){
142 //PROTOLOGUE
143 if (category.equalsIgnoreCase(NAME_FACT_PROTOLOGUE)){
144 //Reference ref = (Reference)taxonNameBase.getNomenclaturalReference();
145 //ref = Book.NewInstance();
146 try{
147 Media media = getMedia(nameFact, config.getMediaUrl(), config.getMediaPath());
148 if (media.getRepresentations().size() > 0){
149 TaxonNameDescription description = TaxonNameDescription.NewInstance();
150 TextData protolog = TextData.NewInstance(Feature.PROTOLOGUE());
151 protolog.addMedia(media);
152 protolog.addSource(String.valueOf(nameFactId), NAMESPACE, null, null, null, null);
153 description.addElement(protolog);
154 taxonNameBase.addDescription(description);
155 if (citation != null){
156 description.addSource(null, null, citation, null);
157 protolog.addSource(null, null, citation, nameFactRefDetail, null, null);
158 }
159 }//end NAME_FACT_PROTOLOGUE
160 }catch(NullPointerException e){
161 logger.warn("MediaUrl and/or MediaPath not set. Could not get protologue.");
162 success = false;
163 }
164 }else if (category.equalsIgnoreCase(NAME_FACT_ALSO_PUBLISHED_IN)){
165 if (StringUtils.isNotBlank(nameFact)){
166 TaxonNameDescription description = TaxonNameDescription.NewInstance();
167 TextData additionalPublication = TextData.NewInstance(Feature.ADDITIONAL_PUBLICATION());
168 //TODO language
169 Language language = Language.DEFAULT();
170 additionalPublication.putText(language, nameFact);
171 additionalPublication.addSource(String.valueOf(nameFactId), NAMESPACE, citation,
172 nameFactRefDetail, null, null);
173 description.addElement(additionalPublication);
174 taxonNameBase.addDescription(description);
175 }
176 }else if (category.equalsIgnoreCase(NAME_FACT_BIBLIOGRAPHY)){
177 if (StringUtils.isNotBlank(nameFact)){
178 TaxonNameDescription description = TaxonNameDescription.NewInstance();
179 TextData bibliography = TextData.NewInstance(Feature.CITATION());
180 //TODO language
181 Language language = Language.DEFAULT();
182 bibliography.putText(language, nameFact);
183 bibliography.addSource(String.valueOf(nameFactId), NAMESPACE, citation,
184 nameFactRefDetail, null, null);
185 description.addElement(bibliography);
186 taxonNameBase.addDescription(description);
187 }
188 }else {
189 //TODO
190 logger.warn("NameFactCategory '" + category + "' not yet implemented");
191 success = false;
192 }
193
194 //TODO
195 // DoubtfulFlag bit Checked
196 // PublishFlag bit Checked
197 // Created_When datetime Checked
198 // Updated_When datetime Checked
199 // Created_Who nvarchar(255) Checked
200 // Updated_Who nvarchar(255) Checked
201 // Notes nvarchar(1000) Checked
202
203 nameToSave.add(taxonNameBase);
204 }else{
205 //TODO
206 logger.warn("TaxonName for NameFact " + nameFactId + " does not exist in store");
207 success = false;
208 }
209 //put
210 }
211 if (config.getMaximumNumberOfNameFacts() != 0 && i >= config.getMaximumNumberOfNameFacts() - 1){
212 logger.warn("ONLY " + config.getMaximumNumberOfNameFacts() + " NAMEFACTS imported !!!" )
213 ;};
214 logger.info("Names to save: " + nameToSave.size());
215 getNameService().save(nameToSave);
216 return success;
217 } catch (SQLException e) {
218 logger.error("SQLException:" + e);
219 return false;
220 }
221
222 }
223
224 /* (non-Javadoc)
225 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
226 */
227 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
228 String nameSpace;
229 Class cdmClass;
230 Set<String> idSet;
231 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
232
233 try{
234 Set<String> nameIdSet = new HashSet<String>();
235 Set<String> referenceIdSet = new HashSet<String>();
236 while (rs.next()){
237 handleForeignKey(rs, nameIdSet, "PTnameFk");
238 handleForeignKey(rs, referenceIdSet, "nameFactRefFk");
239 }
240
241 //name map
242 nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
243 cdmClass = TaxonNameBase.class;
244 idSet = nameIdSet;
245 Map<String, Person> objectMap = (Map<String, Person>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
246 result.put(nameSpace, objectMap);
247
248 //nom reference map
249 nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
250 cdmClass = Reference.class;
251 idSet = referenceIdSet;
252 Map<String, Reference> nomReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
253 result.put(nameSpace, nomReferenceMap);
254
255 //biblio reference map
256 nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
257 cdmClass = Reference.class;
258 idSet = referenceIdSet;
259 Map<String, Reference> biblioReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
260 result.put(nameSpace, biblioReferenceMap);
261
262
263 } catch (SQLException e) {
264 throw new RuntimeException(e);
265 }
266 return result;
267 }
268
269
270 //FIXME gibt es da keine allgemeine Methode in common?
271 public Media getMedia(String nameFact, URL mediaUrl, File mediaPath){
272 if (mediaUrl == null){
273 logger.warn("Media Url should not be null");
274 return null;
275 }
276 String mimeTypeTif = "image/tiff";
277 String mimeTypeJpg = "image/jpeg";
278 String mimeTypePng = "image/png";
279 String mimeTypePdf = "application/pdf";
280 String suffixTif = "tif";
281 String suffixJpg = "jpg";
282 String suffixPng = "png";
283 String suffixPdf = "pdf";
284
285 String sep = File.separator;
286 Integer size = null;
287
288 logger.debug("Getting media for NameFact: " + nameFact);
289
290 Media media = Media.NewInstance();
291
292 String mediaUrlString = mediaUrl.toString();
293
294 //tiff
295 String urlStringTif = mediaUrlString + "tif/" + nameFact + "." + suffixTif;
296 File file = new File(mediaPath, "tif" + sep + nameFact + "." + suffixTif);
297 MediaRepresentation representationTif = MediaRepresentation.NewInstance(mimeTypeTif, suffixTif);
298 if (file.exists()){
299 representationTif.addRepresentationPart(makeImage(urlStringTif, size, file));
300 }
301 if(representationTif.getParts().size() > 0){
302 media.addRepresentation(representationTif);
303 }
304 // end tif
305 // jpg
306 boolean fileExists = true;
307 int jpgCount = 0;
308 MediaRepresentation representationJpg = MediaRepresentation.NewInstance(mimeTypeJpg, suffixJpg);
309 while(fileExists){
310 String urlStringJpeg = mediaUrlString + "cmd_jpg/" + nameFact + "_page_000" + jpgCount + "." + suffixJpg;
311 file = new File(mediaPath, "cmd_jpg" + sep + nameFact + "_page_000" + jpgCount + "." + suffixJpg);
312 jpgCount++;
313 if (file.exists()){
314 representationJpg.addRepresentationPart(makeImage(urlStringJpeg, size, file));
315 }else{
316 fileExists = false;
317 }
318 }
319 if(representationJpg.getParts().size() > 0){
320 media.addRepresentation(representationJpg);
321 }
322 // end jpg
323 //png
324 String urlStringPng = mediaUrlString + "png/" + nameFact + "." + suffixPng;
325 file = new File(mediaPath, "png" + sep + nameFact + "." + suffixPng);
326 MediaRepresentation representationPng = MediaRepresentation.NewInstance(mimeTypePng, suffixPng);
327 if (file.exists()){
328 representationPng.addRepresentationPart(makeImage(urlStringPng, size, file));
329 }else{
330 fileExists = true;
331 int pngCount = 0;
332 while (fileExists){
333 pngCount++;
334 urlStringPng = mediaUrlString + "png/" + nameFact + "00" + pngCount + "." + suffixPng;
335 file = new File(mediaPath, "png" + sep + nameFact + "00" + pngCount + "." + suffixPng);
336
337 if (file.exists()){
338 representationPng.addRepresentationPart(makeImage(urlStringPng, size, file));
339 }else{
340 fileExists = false;
341 }
342 }
343 }
344 if(representationPng.getParts().size() > 0){
345 media.addRepresentation(representationPng);
346 }
347 //end png
348 //pdf
349 String urlStringPdf = mediaUrlString + "pdf/" + nameFact + "." + suffixPdf;
350 URI uriPdf;
351 try {
352 uriPdf = new URI(urlStringPdf);
353 file = new File(mediaPath, "pdf" + sep + nameFact + "." + suffixPdf);
354 MediaRepresentation representationPdf = MediaRepresentation.NewInstance(mimeTypePdf, suffixPdf);
355 if (file.exists()){
356 representationPdf.addRepresentationPart(MediaRepresentationPart.NewInstance(uriPdf, size));
357 }else{
358 fileExists = true;
359 int pdfCount = 0;
360 while (fileExists){
361 pdfCount++;
362 urlStringPdf = mediaUrlString + "pdf/" + nameFact + "00" + pdfCount + "." + suffixPdf;
363 file = new File(mediaPath, "pdf/" + sep + nameFact + "00" + pdfCount + "." + suffixPdf);
364
365 if (file.exists()){
366 representationPdf.addRepresentationPart(MediaRepresentationPart.NewInstance(uriPdf, size));
367 }else{
368 fileExists = false;
369 }
370 }
371 }
372 if(representationPdf.getParts().size() > 0){
373 media.addRepresentation(representationPdf);
374 }
375 } catch (URISyntaxException e) {
376 e.printStackTrace();
377 logger.error("URISyntaxException" + urlStringPdf);
378 }
379 //end pdf
380
381 if(logger.isDebugEnabled()){
382 for (MediaRepresentation rep : media.getRepresentations()){
383 for (MediaRepresentationPart part : rep.getParts()){
384 logger.debug("in representation: " + part.getUri());
385 }
386 }
387 }
388
389 return media;
390 }
391
392
393 private ImageFile makeImage(String imageUri, Integer size, File file){
394 ImageInfo imageMetaData = null;
395 URI uri;
396 try {
397 uri = new URI(imageUri);
398 try {
399 imageMetaData = ImageInfo.NewInstance(uri, 0);
400 } catch (IOException e) {
401 logger.error("IOError reading image metadata." , e);
402 } catch (HttpException e) {
403 logger.error("HttpException reading image metadata." , e);
404 }
405 ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
406 return image;
407 } catch (URISyntaxException e1) {
408 logger.warn("URISyntaxException: " + imageUri);
409 return null;
410 }
411
412 }
413
414
415 /* (non-Javadoc)
416 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
417 */
418 @Override
419 protected boolean doCheck(BerlinModelImportState state){
420 IOValidator<BerlinModelImportState> validator = new BerlinModelNameFactsImportValidator();
421 return validator.validate(state);
422 }
423
424
425 /* (non-Javadoc)
426 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
427 */
428 protected boolean isIgnore(BerlinModelImportState state){
429 return ! state.getConfig().isDoNameFacts();
430 }
431
432
433
434 //for testing only
435 public static void main(String[] args) {
436
437 BerlinModelNameFactsImport nf = new BerlinModelNameFactsImport();
438
439 URL url;
440 try {
441 url = new URL("http://wp5.e-taxonomy.eu/dataportal/cichorieae/media/protolog/");
442 File path = new File("/Volumes/protolog/protolog/");
443 if(path.exists()){
444 String fact = "Acanthocephalus_amplexifolius";
445 // make getMedia public for this to work
446 Media media = nf.getMedia(fact, url, path);
447 logger.info(media);
448 for (MediaRepresentation rep : media.getRepresentations()){
449 logger.info(rep.getMimeType());
450 for (MediaRepresentationPart part : rep.getParts()){
451 logger.info(part.getUri());
452 }
453 }
454 }
455 } catch (MalformedURLException e) {
456 e.printStackTrace();
457 }
458 }
459 }