2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.globis
;
12 import java
.io
.IOException
;
13 import java
.net
.MalformedURLException
;
15 import java
.sql
.ResultSet
;
16 import java
.sql
.SQLException
;
17 import java
.util
.HashMap
;
18 import java
.util
.HashSet
;
21 import java
.util
.UUID
;
22 import java
.util
.regex
.Matcher
;
23 import java
.util
.regex
.Pattern
;
25 import org
.apache
.http
.client
.ClientProtocolException
;
26 import org
.apache
.log4j
.Logger
;
27 import org
.springframework
.stereotype
.Component
;
29 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
30 import eu
.etaxonomy
.cdm
.common
.UriUtils
;
31 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
32 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
33 import eu
.etaxonomy
.cdm
.io
.globis
.validation
.GlobisImageImportValidator
;
34 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
35 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
36 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
37 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
38 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
39 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
40 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
41 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
42 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
43 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
44 import eu
.etaxonomy
.cdm
.model
.occurrence
.DeterminationEvent
;
45 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
46 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
47 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
56 public class GlobisImageImport
extends GlobisImportBase
<Taxon
> {
57 private static final Logger logger
= Logger
.getLogger(GlobisImageImport
.class);
59 private int modCount
= 1000;
61 private UUID uuidArtNonSpecTaxMarkerType
= UUID
.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
62 private static final String pluralString
= "images";
63 private static final String dbTableName
= "Einzelbilder";
64 private static final Class
<?
> cdmTargetClass
= Media
.class; //not needed
66 private static UUID uuidGartRef
= UUID
.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
68 public GlobisImageImport(){
69 super(pluralString
, dbTableName
, cdmTargetClass
);
76 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
79 protected String
getIdQuery() {
80 String strRecordQuery
=
82 " FROM " + dbTableName
;
83 return strRecordQuery
;
90 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
93 protected String
getRecordQuery(GlobisImportConfigurator config
) {
94 String strRecordQuery
=
95 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
96 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
97 " FROM " + getTableName() + " i " +
98 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
99 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN
+ ") )";
100 return strRecordQuery
;
106 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
109 public boolean doPartition(ResultSetPartitioner partitioner
, GlobisImportState state
) {
110 boolean success
= true;
112 Set
<Media
> objectsToSave
= new HashSet
<Media
>();
114 Map
<String
, DerivedUnit
> typeMap
= (Map
<String
, DerivedUnit
>) partitioner
.getObjectMap(TYPE_NAMESPACE
);
116 Map
<String
, Taxon
> taxonMap
= (Map
<String
, Taxon
>) partitioner
.getObjectMap(TAXON_NAMESPACE
);
117 Map
<String
, ZoologicalName
> specTaxNameMap
= (Map
<String
, ZoologicalName
>) partitioner
.getObjectMap(SPEC_TAX_NAMESPACE
);
119 ResultSet rs
= partitioner
.getResultSet();
121 Reference
<?
> refGart
= getReferenceService().find(uuidGartRef
);
131 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info(pluralString
+ " handled: " + (i
-1));}
133 Integer bildID
= rs
.getInt("BildID");
134 Integer spectaxID
= nullSafeInt(rs
, "spectaxID");
135 Integer taxonID
= nullSafeInt(rs
, "SpecCurrspecID");
136 String copyright
= rs
.getString("copyright");
137 String specimenId
= rs
.getString("specimenID");
138 String bemerkungen
= rs
.getString("Bemerkungen");
139 String artNotSpecTax
= rs
.getString("Art non spectax");
140 String motiv
= rs
.getString("Motiv");
143 // [file lab2], same as Dateiname04 but less data
146 Set
<Media
> recordMedia
= new HashSet
<Media
>();
150 makeAllMedia(state
, rs
, recordMedia
, objectsToSave
);
154 DerivedUnit specimen
= null;
155 if (spectaxID
!= null){
156 //try to find type specimen
157 if (isNotBlank(motiv
) && (motiv
.startsWith("type specimen"))){
158 String collectionCode
= transformCopyright2CollectionCode(copyright
);
159 String id
= GlobisSpecTaxImport
.getTypeId(spectaxID
, collectionCode
);
160 specimen
= typeMap
.get(id
);
163 //try to find specTaxName
164 ZoologicalName specTaxTaxonName
= specTaxNameMap
.get(String
.valueOf(spectaxID
));
165 if (specTaxTaxonName
!= null){
166 title
= " taxon name " + specTaxTaxonName
.getTitleCache();
168 title
= " spectaxID " + spectaxID
;
171 title
= " name " + getNameFromFileOs(rs
) + (isBlank(specimenId
)?
"" : " (specimenId: " + specimenId
+ ")");
175 if (specimen
== null){
176 specimen
= DerivedUnit
.NewPreservedSpecimenInstance();
177 specimen
.setTitleCache("Specimen for " + title
);
178 String collectionCode
= transformCopyright2CollectionCode(copyright
);
180 Collection collection
= getCollection(collectionCode
);
181 specimen
.setCollection(collection
);
186 specimen
.addSource(OriginalSourceType
.Import
, String
.valueOf(bildID
), IMAGE_NAMESPACE
, state
.getTransactionalSourceReference(), null);
188 //GART id (specimenID)
189 if (isNotBlank(specimenId
)){
190 specimen
.addSource(OriginalSourceType
.Lineage
, specimenId
, "specimenId", refGart
, null);
193 if (isNotBlank(bemerkungen
)){
194 Annotation annotation
= Annotation
.NewInstance(bemerkungen
, null, null);
195 specimen
.addAnnotation(annotation
);
198 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(specimen
);
199 for (Media media
: recordMedia
){
200 facade
.addDerivedUnitMedia(media
);
203 if (isNotBlank(artNotSpecTax
)){
204 if (artNotSpecTax
.equalsIgnoreCase("ja")){
205 MarkerType artNotSpecTaxMarker
= getMarkerType(state
, uuidArtNonSpecTaxMarkerType
, "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
206 specimen
.addMarker(Marker
.NewInstance(artNotSpecTaxMarker
, true));
208 logger
.warn(artNotSpecTax
+ " is not a valid value for 'Art non spectax' (BildID: " + bildID
+ ")" );
212 if (spectaxID
!= null){
214 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
215 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
216 ZoologicalName specTaxTaxonName
= specTaxNameMap
.get(String
.valueOf(spectaxID
));
219 // if (taxon == null){
220 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
221 //// specTaxName = specTaxMap.g
223 // if (taxon == null){
224 // logger.warn("No taxon available for specTaxID: " + spectaxID);
226 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
231 if (specTaxTaxonName
== null){
232 logger
.warn("Name could not be found for spectaxID: " + spectaxID
+ " in BildID: " + bildID
);
235 for (Taxon specTaxTaxon
: specTaxTaxonName
.getTaxa()){
236 taxon
= specTaxTaxon
;
240 Reference
<?
> undefinedSec
= null;
241 taxon
= Taxon
.NewInstance(specTaxTaxonName
, undefinedSec
);
244 DeterminationEvent
.NewInstance(taxon
, specimen
);
251 // if (taxon != null){
252 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
253 // if (taxonDescription.getElements().size() == 0){
254 // TextData textData = TextData.NewInstance(Feature.IMAGE());
255 // taxonDescription.addElement(textData);
257 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
258 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
259 // for (Media media: recordMedia){
260 // textData.addMedia(media);
265 } catch (Exception e
) {
266 logger
.warn("Exception in Einzelbilder: bildID " + bildID
+ ". " + e
.getMessage());
272 logger
.info(pluralString
+ " to save: " + objectsToSave
.size());
273 getMediaService().save(objectsToSave
);
276 } catch (SQLException e
) {
277 logger
.error("SQLException:" + e
);
282 private Collection
getCollection(String collectionCode
) {
290 private String
getNameFromFileOs(ResultSet rs
) throws SQLException
{
291 String fileOS
= rs
.getString("file OS");
292 Pattern pattern
= Pattern
.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
293 Matcher matcher
= pattern
.matcher(fileOS
);
294 if (matcher
.matches()){
295 String match
= matcher
.group(1);
298 logger
.warn("FileOS does not match: " + fileOS
);
306 private void makeAllMedia(GlobisImportState state
, ResultSet rs
, Set
<Media
> recordMedia
, Set
<Media
> objectsToSave
) throws SQLException
{
308 String pathShort
= rs
.getString("Dateipfad_kurz");
309 String fileOS
= rs
.getString("file OS");
310 pathShort
= pathShort
.replace(fileOS
, "");
311 String newPath
= state
.getConfig().getImageBaseUrl();
312 String path
= pathShort
.replace("image:Webversionen/", newPath
);
314 Media singleMedia
= makeMedia(state
, rs
, "file OS", "Legende 1", path
, objectsToSave
);
315 recordMedia
.add(singleMedia
);
316 singleMedia
= makeMedia(state
, rs
, "Dateinamen02", "Legende 2", path
, objectsToSave
);
317 recordMedia
.add(singleMedia
);
318 singleMedia
= makeMedia(state
, rs
, "Dateinamen03", "Legende 3", path
, objectsToSave
);
319 recordMedia
.add(singleMedia
);
320 singleMedia
= makeMedia(state
, rs
, "Dateinamen04", "Legende 4", path
, objectsToSave
);
321 recordMedia
.add(singleMedia
);
325 private Media
makeMedia(GlobisImportState state
, ResultSet rs
, String fileNameAttr
, String legendAttr
, String path
, Set
<Media
> objectsToSave
) throws SQLException
{
327 String fileName
= rs
.getString(fileNameAttr
);
328 String legend
= rs
.getString(legendAttr
);
329 Integer bildID
= rs
.getInt("BildID");
331 String uriStr
= path
+fileName
;
332 uriStr
= uriStr
.replace(" ", "%20");
334 URI uri
= URI
.create(uriStr
);
336 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
339 boolean readMediaData
= state
.getConfig().isDoReadMediaData();
340 if (isBlank(legend
) && readMediaData
){
341 if (UriUtils
.isOk(UriUtils
.getResponse(uri
, null))){
342 logger
.warn("Image exists but legend is null " + uri
+ ", bildID" + bildID
);
348 media
= this.getImageMedia(uri
.toString(), readMediaData
, false);
349 media
.putTitle(Language
.ENGLISH(), legend
);
350 this.doIdCreatedUpdatedNotes(state
, media
, rs
, bildID
, IMAGE_NAMESPACE
);
352 objectsToSave
.add(media
);
355 } catch (MalformedURLException e
) {
357 } catch (ClientProtocolException e
) {
359 } catch (IOException e
) {
366 private String
transformCopyright2CollectionCode(String copyright
){
368 if (isBlank(copyright
)){
370 }else if(copyright
.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
372 }else if(copyright
.matches("Staatliches Museum f.?r Tierkunde Dresden")){
374 }else if(copyright
.equals("Natural History Museum, London")){
376 }else if(copyright
.matches("Zoologische Staatssammlung M.?nchen")){
378 }else if(copyright
.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
380 }else if(copyright
.matches("Deutsches Entomologisches Institut M.?ncheberg")){
382 }else if(copyright
.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
384 }else if(copyright
.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
386 }else if(copyright
.equals("Naturhistorisches Museum Wien")){
388 }else if(copyright
.equals("Naturhistoriska Riksmuseet Stockholm")){
390 }else if(copyright
.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
392 }else if(copyright
.equals("United States National Museum of Natural History, Washington")){
394 }else if(copyright
.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
396 }else if(copyright
.equals("Zoological Museum, University of Copenhagen")){
398 }else if(copyright
.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
400 }else if(copyright
.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
402 }else if(copyright
.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
404 }else if(copyright
.matches("Zoologisches Institut Universit.?t T.?bingen")){
407 logger
.warn("Unknown copyright entry: " + copyright
);
414 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
, GlobisImportState state
) {
418 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
420 Set
<String
> currSpecIdSet
= new HashSet
<String
>();
421 Set
<String
> specTaxIdSet
= new HashSet
<String
>();
422 Set
<String
> typeIdSet
= new HashSet
<String
>();
425 handleForeignKey(rs
, currSpecIdSet
, "SpecCurrspecID");
426 handleForeignKey(rs
, specTaxIdSet
, "spectaxID");
427 handleTypeKey(rs
, typeIdSet
, "spectaxID", "copyright");
431 nameSpace
= SPEC_TAX_NAMESPACE
;
432 cdmClass
= ZoologicalName
.class;
433 idSet
= specTaxIdSet
;
434 Map
<String
, ZoologicalName
> specTaxNameMap
= (Map
<String
, ZoologicalName
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
435 result
.put(nameSpace
, specTaxNameMap
);
438 // nameSpace = TAXON_NAMESPACE;
439 // cdmClass = Taxon.class;
440 // idSet = currSpecIdSet;
441 // Map<String, Taxon> taxonMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
442 // result.put(nameSpace, taxonMap);
446 nameSpace
= GlobisSpecTaxImport
.TYPE_NAMESPACE
;
447 cdmClass
= DerivedUnit
.class;
449 Map
<String
, DerivedUnit
> typeMap
= (Map
<String
, DerivedUnit
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
450 result
.put(nameSpace
, typeMap
);
453 } catch (SQLException e
) {
454 throw new RuntimeException(e
);
459 private void handleTypeKey(ResultSet rs
, Set
<String
> idSet
, String specTaxIdAttr
, String copyrightAttr
) throws SQLException
{
460 Integer specTaxId
= nullSafeInt(rs
, specTaxIdAttr
);
461 if (specTaxId
!= null){
462 String copyright
= rs
.getString(copyrightAttr
);
463 if (isNotBlank(copyright
)){
464 String id
= GlobisSpecTaxImport
.getTypeId(specTaxId
, transformCopyright2CollectionCode(copyright
));
471 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
474 protected boolean doCheck(GlobisImportState state
){
475 IOValidator
<GlobisImportState
> validator
= new GlobisImageImportValidator();
476 return validator
.validate(state
);
481 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
483 protected boolean isIgnore(GlobisImportState state
){
484 return ! state
.getConfig().isDoImages();
491 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doInvoke(eu.etaxonomy.cdm.io.globis.GlobisImportState)
494 protected void doInvoke(GlobisImportState state
) {
495 Reference refGart
= ReferenceFactory
.newGeneric();
496 refGart
.setTitleCache("GART");
497 refGart
.setUuid(uuidGartRef
);
498 getReferenceService().saveOrUpdate(refGart
);
499 super.doInvoke(state
);