2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.globis
;
12 import java
.io
.IOException
;
13 import java
.net
.MalformedURLException
;
15 import java
.sql
.ResultSet
;
16 import java
.sql
.SQLException
;
17 import java
.util
.HashMap
;
18 import java
.util
.HashSet
;
21 import java
.util
.UUID
;
22 import java
.util
.regex
.Matcher
;
23 import java
.util
.regex
.Pattern
;
25 import org
.apache
.http
.client
.ClientProtocolException
;
26 import org
.apache
.log4j
.Logger
;
27 import org
.springframework
.stereotype
.Component
;
29 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
30 import eu
.etaxonomy
.cdm
.common
.UriUtils
;
31 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
32 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
33 import eu
.etaxonomy
.cdm
.io
.globis
.validation
.GlobisImageImportValidator
;
34 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
35 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
36 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
37 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
38 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
39 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
40 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
41 import eu
.etaxonomy
.cdm
.model
.name
.IZoologicalName
;
42 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
43 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
44 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
45 import eu
.etaxonomy
.cdm
.model
.occurrence
.DeterminationEvent
;
46 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
47 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
48 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
57 public class GlobisImageImport
extends GlobisImportBase
<Taxon
> {
58 private static final Logger logger
= Logger
.getLogger(GlobisImageImport
.class);
60 private int modCount
= 1000;
62 private UUID uuidArtNonSpecTaxMarkerType
= UUID
.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
63 private static final String pluralString
= "images";
64 private static final String dbTableName
= "Einzelbilder";
65 private static final Class
<?
> cdmTargetClass
= Media
.class; //not needed
67 private static UUID uuidGartRef
= UUID
.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
69 public GlobisImageImport(){
70 super(pluralString
, dbTableName
, cdmTargetClass
);
77 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
80 protected String
getIdQuery() {
81 String strRecordQuery
=
83 " FROM " + dbTableName
;
84 return strRecordQuery
;
91 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
94 protected String
getRecordQuery(GlobisImportConfigurator config
) {
95 String strRecordQuery
=
96 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
97 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
98 " FROM " + getTableName() + " i " +
99 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
100 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN
+ ") )";
101 return strRecordQuery
;
107 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
110 public boolean doPartition(ResultSetPartitioner partitioner
, GlobisImportState state
) {
111 boolean success
= true;
113 Set
<Media
> objectsToSave
= new HashSet
<>();
115 Map
<String
, DerivedUnit
> typeMap
= partitioner
.getObjectMap(TYPE_NAMESPACE
);
117 Map
<String
, Taxon
> taxonMap
= partitioner
.getObjectMap(TAXON_NAMESPACE
);
118 Map
<String
, TaxonName
> specTaxNameMap
= partitioner
.getObjectMap(SPEC_TAX_NAMESPACE
);
120 ResultSet rs
= partitioner
.getResultSet();
122 Reference refGart
= getReferenceService().find(uuidGartRef
);
132 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info(pluralString
+ " handled: " + (i
-1));}
134 Integer bildID
= rs
.getInt("BildID");
135 Integer spectaxID
= nullSafeInt(rs
, "spectaxID");
136 Integer taxonID
= nullSafeInt(rs
, "SpecCurrspecID");
137 String copyright
= rs
.getString("copyright");
138 String specimenId
= rs
.getString("specimenID");
139 String bemerkungen
= rs
.getString("Bemerkungen");
140 String artNotSpecTax
= rs
.getString("Art non spectax");
141 String motiv
= rs
.getString("Motiv");
144 // [file lab2], same as Dateiname04 but less data
147 Set
<Media
> recordMedia
= new HashSet
<Media
>();
151 makeAllMedia(state
, rs
, recordMedia
, objectsToSave
);
155 DerivedUnit specimen
= null;
156 if (spectaxID
!= null){
157 //try to find type specimen
158 if (isNotBlank(motiv
) && (motiv
.startsWith("type specimen"))){
159 String collectionCode
= transformCopyright2CollectionCode(copyright
);
160 String id
= GlobisSpecTaxImport
.getTypeId(spectaxID
, collectionCode
);
161 specimen
= typeMap
.get(id
);
164 //try to find specTaxName
165 IZoologicalName specTaxTaxonName
= specTaxNameMap
.get(String
.valueOf(spectaxID
));
166 if (specTaxTaxonName
!= null){
167 title
= " taxon name " + specTaxTaxonName
.getTitleCache();
169 title
= " spectaxID " + spectaxID
;
172 title
= " name " + getNameFromFileOs(rs
) + (isBlank(specimenId
)?
"" : " (specimenId: " + specimenId
+ ")");
176 if (specimen
== null){
177 specimen
= DerivedUnit
.NewPreservedSpecimenInstance();
178 specimen
.setTitleCache("Specimen for " + title
);
179 String collectionCode
= transformCopyright2CollectionCode(copyright
);
181 Collection collection
= getCollection(collectionCode
);
182 specimen
.setCollection(collection
);
187 specimen
.addSource(OriginalSourceType
.Import
, String
.valueOf(bildID
), IMAGE_NAMESPACE
, state
.getTransactionalSourceReference(), null);
189 //GART id (specimenID)
190 if (isNotBlank(specimenId
)){
191 specimen
.addSource(OriginalSourceType
.Lineage
, specimenId
, "specimenId", refGart
, null);
194 if (isNotBlank(bemerkungen
)){
195 Annotation annotation
= Annotation
.NewInstance(bemerkungen
, null, null);
196 specimen
.addAnnotation(annotation
);
199 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(specimen
);
200 for (Media media
: recordMedia
){
201 facade
.addDerivedUnitMedia(media
);
204 if (isNotBlank(artNotSpecTax
)){
205 if (artNotSpecTax
.equalsIgnoreCase("ja")){
206 MarkerType artNotSpecTaxMarker
= getMarkerType(state
, uuidArtNonSpecTaxMarkerType
, "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
207 specimen
.addMarker(Marker
.NewInstance(artNotSpecTaxMarker
, true));
209 logger
.warn(artNotSpecTax
+ " is not a valid value for 'Art non spectax' (BildID: " + bildID
+ ")" );
213 if (spectaxID
!= null){
215 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
216 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
217 IZoologicalName specTaxTaxonName
= specTaxNameMap
.get(String
.valueOf(spectaxID
));
220 // if (taxon == null){
221 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
222 //// specTaxName = specTaxMap.g
224 // if (taxon == null){
225 // logger.warn("No taxon available for specTaxID: " + spectaxID);
227 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
232 if (specTaxTaxonName
== null){
233 logger
.warn("Name could not be found for spectaxID: " + spectaxID
+ " in BildID: " + bildID
);
236 for (Taxon specTaxTaxon
: specTaxTaxonName
.getTaxa()){
237 taxon
= specTaxTaxon
;
241 Reference undefinedSec
= null;
242 taxon
= Taxon
.NewInstance(specTaxTaxonName
, undefinedSec
);
245 DeterminationEvent
.NewInstance(taxon
, specimen
);
252 // if (taxon != null){
253 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
254 // if (taxonDescription.getElements().size() == 0){
255 // TextData textData = TextData.NewInstance(Feature.IMAGE());
256 // taxonDescription.addElement(textData);
258 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
259 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
260 // for (Media media: recordMedia){
261 // textData.addMedia(media);
266 } catch (Exception e
) {
267 logger
.warn("Exception in Einzelbilder: bildID " + bildID
+ ". " + e
.getMessage());
273 logger
.info(pluralString
+ " to save: " + objectsToSave
.size());
274 getMediaService().save(objectsToSave
);
277 } catch (SQLException e
) {
278 logger
.error("SQLException:" + e
);
283 private Collection
getCollection(String collectionCode
) {
291 private String
getNameFromFileOs(ResultSet rs
) throws SQLException
{
292 String fileOS
= rs
.getString("file OS");
293 Pattern pattern
= Pattern
.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
294 Matcher matcher
= pattern
.matcher(fileOS
);
295 if (matcher
.matches()){
296 String match
= matcher
.group(1);
299 logger
.warn("FileOS does not match: " + fileOS
);
307 private void makeAllMedia(GlobisImportState state
, ResultSet rs
, Set
<Media
> recordMedia
, Set
<Media
> objectsToSave
) throws SQLException
{
309 String pathShort
= rs
.getString("Dateipfad_kurz");
310 String fileOS
= rs
.getString("file OS");
311 pathShort
= pathShort
.replace(fileOS
, "");
312 String newPath
= state
.getConfig().getImageBaseUrl();
313 String path
= pathShort
.replace("image:Webversionen/", newPath
);
315 Media singleMedia
= makeMedia(state
, rs
, "file OS", "Legende 1", path
, objectsToSave
);
316 recordMedia
.add(singleMedia
);
317 singleMedia
= makeMedia(state
, rs
, "Dateinamen02", "Legende 2", path
, objectsToSave
);
318 recordMedia
.add(singleMedia
);
319 singleMedia
= makeMedia(state
, rs
, "Dateinamen03", "Legende 3", path
, objectsToSave
);
320 recordMedia
.add(singleMedia
);
321 singleMedia
= makeMedia(state
, rs
, "Dateinamen04", "Legende 4", path
, objectsToSave
);
322 recordMedia
.add(singleMedia
);
326 private Media
makeMedia(GlobisImportState state
, ResultSet rs
, String fileNameAttr
, String legendAttr
, String path
, Set
<Media
> objectsToSave
) throws SQLException
{
328 String fileName
= rs
.getString(fileNameAttr
);
329 String legend
= rs
.getString(legendAttr
);
330 Integer bildID
= rs
.getInt("BildID");
332 String uriStr
= path
+fileName
;
333 uriStr
= uriStr
.replace(" ", "%20");
335 URI uri
= URI
.create(uriStr
);
337 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
340 boolean readMediaData
= state
.getConfig().isDoReadMediaData();
341 if (isBlank(legend
) && readMediaData
){
342 if (UriUtils
.isOk(UriUtils
.getResponse(uri
, null))){
343 logger
.warn("Image exists but legend is null " + uri
+ ", bildID" + bildID
);
349 media
= this.getImageMedia(uri
.toString(), readMediaData
);
350 media
.putTitle(Language
.ENGLISH(), legend
);
351 this.doIdCreatedUpdatedNotes(state
, media
, rs
, bildID
, IMAGE_NAMESPACE
);
353 objectsToSave
.add(media
);
356 } catch (MalformedURLException e
) {
358 } catch (ClientProtocolException e
) {
360 } catch (IOException e
) {
367 private String
transformCopyright2CollectionCode(String copyright
){
369 if (isBlank(copyright
)){
371 }else if(copyright
.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
373 }else if(copyright
.matches("Staatliches Museum f.?r Tierkunde Dresden")){
375 }else if(copyright
.equals("Natural History Museum, London")){
377 }else if(copyright
.matches("Zoologische Staatssammlung M.?nchen")){
379 }else if(copyright
.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
381 }else if(copyright
.matches("Deutsches Entomologisches Institut M.?ncheberg")){
383 }else if(copyright
.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
385 }else if(copyright
.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
387 }else if(copyright
.equals("Naturhistorisches Museum Wien")){
389 }else if(copyright
.equals("Naturhistoriska Riksmuseet Stockholm")){
391 }else if(copyright
.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
393 }else if(copyright
.equals("United States National Museum of Natural History, Washington")){
395 }else if(copyright
.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
397 }else if(copyright
.equals("Zoological Museum, University of Copenhagen")){
399 }else if(copyright
.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
401 }else if(copyright
.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
403 }else if(copyright
.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
405 }else if(copyright
.matches("Zoologisches Institut Universit.?t T.?bingen")){
408 logger
.warn("Unknown copyright entry: " + copyright
);
415 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
, GlobisImportState state
) {
419 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
421 Set
<String
> currSpecIdSet
= new HashSet
<>();
422 Set
<String
> specTaxIdSet
= new HashSet
<>();
423 Set
<String
> typeIdSet
= new HashSet
<>();
426 handleForeignKey(rs
, currSpecIdSet
, "SpecCurrspecID");
427 handleForeignKey(rs
, specTaxIdSet
, "spectaxID");
428 handleTypeKey(rs
, typeIdSet
, "spectaxID", "copyright");
432 nameSpace
= SPEC_TAX_NAMESPACE
;
433 cdmClass
= TaxonName
.class;
434 idSet
= specTaxIdSet
;
435 Map
<String
, TaxonName
> specTaxNameMap
= (Map
<String
, TaxonName
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
436 result
.put(nameSpace
, specTaxNameMap
);
439 // nameSpace = TAXON_NAMESPACE;
440 // cdmClass = Taxon.class;
441 // idSet = currSpecIdSet;
442 // Map<String, Taxon> taxonMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
443 // result.put(nameSpace, taxonMap);
447 nameSpace
= GlobisSpecTaxImport
.TYPE_NAMESPACE
;
448 cdmClass
= DerivedUnit
.class;
450 Map
<String
, DerivedUnit
> typeMap
= (Map
<String
, DerivedUnit
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
451 result
.put(nameSpace
, typeMap
);
454 } catch (SQLException e
) {
455 throw new RuntimeException(e
);
460 private void handleTypeKey(ResultSet rs
, Set
<String
> idSet
, String specTaxIdAttr
, String copyrightAttr
) throws SQLException
{
461 Integer specTaxId
= nullSafeInt(rs
, specTaxIdAttr
);
462 if (specTaxId
!= null){
463 String copyright
= rs
.getString(copyrightAttr
);
464 if (isNotBlank(copyright
)){
465 String id
= GlobisSpecTaxImport
.getTypeId(specTaxId
, transformCopyright2CollectionCode(copyright
));
472 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
475 protected boolean doCheck(GlobisImportState state
){
476 IOValidator
<GlobisImportState
> validator
= new GlobisImageImportValidator();
477 return validator
.validate(state
);
482 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
485 protected boolean isIgnore(GlobisImportState state
){
486 return ! state
.getConfig().isDoImages();
493 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doInvoke(eu.etaxonomy.cdm.io.globis.GlobisImportState)
496 protected void doInvoke(GlobisImportState state
) {
497 Reference refGart
= ReferenceFactory
.newGeneric();
498 refGart
.setTitleCache("GART");
499 refGart
.setUuid(uuidGartRef
);
500 getReferenceService().saveOrUpdate(refGart
);
501 super.doInvoke(state
);