2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.globis
;
12 import java
.io
.IOException
;
13 import java
.net
.MalformedURLException
;
14 import java
.sql
.ResultSet
;
15 import java
.sql
.SQLException
;
16 import java
.util
.HashMap
;
17 import java
.util
.HashSet
;
20 import java
.util
.UUID
;
21 import java
.util
.regex
.Matcher
;
22 import java
.util
.regex
.Pattern
;
24 import org
.apache
.http
.client
.ClientProtocolException
;
25 import org
.apache
.logging
.log4j
.LogManager
;
26 import org
.apache
.logging
.log4j
.Logger
;
27 import org
.springframework
.stereotype
.Component
;
29 import eu
.etaxonomy
.cdm
.common
.URI
;
30 import eu
.etaxonomy
.cdm
.common
.UriUtils
;
31 import eu
.etaxonomy
.cdm
.facade
.DerivedUnitFacade
;
32 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
33 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
34 import eu
.etaxonomy
.cdm
.io
.globis
.validation
.GlobisImageImportValidator
;
35 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
36 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
37 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
38 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
39 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
40 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
41 import eu
.etaxonomy
.cdm
.model
.name
.IZoologicalName
;
42 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
43 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
44 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
45 import eu
.etaxonomy
.cdm
.model
.occurrence
.DeterminationEvent
;
46 import eu
.etaxonomy
.cdm
.model
.reference
.OriginalSourceType
;
47 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
48 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
49 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
57 public class GlobisImageImport
extends GlobisImportBase
<Taxon
> {
59 private static final long serialVersionUID
= 5697033145326415146L;
60 private static final Logger logger
= LogManager
.getLogger();
62 private int modCount
= 1000;
64 private UUID uuidArtNonSpecTaxMarkerType
= UUID
.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
65 private static final String pluralString
= "images";
66 private static final String dbTableName
= "Einzelbilder";
67 private static final Class
<?
> cdmTargetClass
= Media
.class; //not needed
69 private static UUID uuidGartRef
= UUID
.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
71 public GlobisImageImport(){
72 super(pluralString
, dbTableName
, cdmTargetClass
);
76 protected String
getIdQuery() {
77 String strRecordQuery
=
79 " FROM " + dbTableName
;
80 return strRecordQuery
;
84 protected String
getRecordQuery(GlobisImportConfigurator config
) {
85 String strRecordQuery
=
86 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
87 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
88 " FROM " + getTableName() + " i " +
89 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
90 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN
+ ") )";
91 return strRecordQuery
;
95 public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner
, GlobisImportState state
) {
96 boolean success
= true;
98 Set
<Media
> objectsToSave
= new HashSet
<>();
100 @SuppressWarnings("unchecked")
101 Map
<String
, DerivedUnit
> typeMap
= partitioner
.getObjectMap(TYPE_NAMESPACE
);
103 @SuppressWarnings("unchecked")
104 Map
<String
, Taxon
> taxonMap
= partitioner
.getObjectMap(TAXON_NAMESPACE
);
105 @SuppressWarnings("unchecked")
106 Map
<String
, TaxonName
> specTaxNameMap
= partitioner
.getObjectMap(SPEC_TAX_NAMESPACE
);
108 ResultSet rs
= partitioner
.getResultSet();
110 Reference refGart
= getReferenceService().find(uuidGartRef
);
120 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info(pluralString
+ " handled: " + (i
-1));}
122 Integer bildID
= rs
.getInt("BildID");
123 Integer spectaxID
= nullSafeInt(rs
, "spectaxID");
124 Integer taxonID
= nullSafeInt(rs
, "SpecCurrspecID");
125 String copyright
= rs
.getString("copyright");
126 String specimenId
= rs
.getString("specimenID");
127 String bemerkungen
= rs
.getString("Bemerkungen");
128 String artNotSpecTax
= rs
.getString("Art non spectax");
129 String motiv
= rs
.getString("Motiv");
132 // [file lab2], same as Dateiname04 but less data
135 Set
<Media
> recordMedia
= new HashSet
<>();
139 makeAllMedia(state
, rs
, recordMedia
, objectsToSave
);
143 DerivedUnit specimen
= null;
144 if (spectaxID
!= null){
145 //try to find type specimen
146 if (isNotBlank(motiv
) && (motiv
.startsWith("type specimen"))){
147 String collectionCode
= transformCopyright2CollectionCode(copyright
);
148 String id
= GlobisSpecTaxImport
.getTypeId(spectaxID
, collectionCode
);
149 specimen
= typeMap
.get(id
);
152 //try to find specTaxName
153 IZoologicalName specTaxTaxonName
= specTaxNameMap
.get(String
.valueOf(spectaxID
));
154 if (specTaxTaxonName
!= null){
155 title
= " taxon name " + specTaxTaxonName
.getTitleCache();
157 title
= " spectaxID " + spectaxID
;
160 title
= " name " + getNameFromFileOs(rs
) + (isBlank(specimenId
)?
"" : " (specimenId: " + specimenId
+ ")");
164 if (specimen
== null){
165 specimen
= DerivedUnit
.NewPreservedSpecimenInstance();
166 specimen
.setTitleCache("Specimen for " + title
, true);
167 String collectionCode
= transformCopyright2CollectionCode(copyright
);
169 Collection collection
= getCollection(collectionCode
);
170 specimen
.setCollection(collection
);
174 specimen
.addSource(OriginalSourceType
.Import
, String
.valueOf(bildID
), IMAGE_NAMESPACE
, state
.getTransactionalSourceReference(), null);
176 //GART id (specimenID)
177 if (isNotBlank(specimenId
)){
178 specimen
.addSource(OriginalSourceType
.Lineage
, specimenId
, "specimenId", refGart
, null);
181 if (isNotBlank(bemerkungen
)){
182 Annotation annotation
= Annotation
.NewInstance(bemerkungen
, null, null);
183 specimen
.addAnnotation(annotation
);
186 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(specimen
);
187 for (Media media
: recordMedia
){
188 facade
.addDerivedUnitMedia(media
);
191 if (isNotBlank(artNotSpecTax
)){
192 if (artNotSpecTax
.equalsIgnoreCase("ja")){
193 MarkerType artNotSpecTaxMarker
= getMarkerType(state
, uuidArtNonSpecTaxMarkerType
, "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
194 specimen
.addMarker(Marker
.NewInstance(artNotSpecTaxMarker
, true));
196 logger
.warn(artNotSpecTax
+ " is not a valid value for 'Art non spectax' (BildID: " + bildID
+ ")" );
200 if (spectaxID
!= null){
202 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
203 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
204 IZoologicalName specTaxTaxonName
= specTaxNameMap
.get(String
.valueOf(spectaxID
));
207 // if (taxon == null){
208 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
209 //// specTaxName = specTaxMap.g
211 // if (taxon == null){
212 // logger.warn("No taxon available for specTaxID: " + spectaxID);
214 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
219 if (specTaxTaxonName
== null){
220 logger
.warn("Name could not be found for spectaxID: " + spectaxID
+ " in BildID: " + bildID
);
223 for (Taxon specTaxTaxon
: specTaxTaxonName
.getTaxa()){
224 taxon
= specTaxTaxon
;
228 Reference undefinedSec
= null;
229 taxon
= Taxon
.NewInstance(specTaxTaxonName
, undefinedSec
);
232 DeterminationEvent
.NewInstance(taxon
, specimen
);
239 // if (taxon != null){
240 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
241 // if (taxonDescription.getElements().size() == 0){
242 // TextData textData = TextData.NewInstance(Feature.IMAGE());
243 // taxonDescription.addElement(textData);
245 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
246 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
247 // for (Media media: recordMedia){
248 // textData.addMedia(media);
253 } catch (Exception e
) {
254 logger
.warn("Exception in Einzelbilder: bildID " + bildID
+ ". " + e
.getMessage());
260 logger
.info(pluralString
+ " to save: " + objectsToSave
.size());
261 getMediaService().save(objectsToSave
);
264 } catch (SQLException e
) {
265 logger
.error("SQLException:" + e
);
270 private Collection
getCollection(String collectionCode
) {
275 private String
getNameFromFileOs(ResultSet rs
) throws SQLException
{
276 String fileOS
= rs
.getString("file OS");
277 Pattern pattern
= Pattern
.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
278 Matcher matcher
= pattern
.matcher(fileOS
);
279 if (matcher
.matches()){
280 String match
= matcher
.group(1);
283 logger
.warn("FileOS does not match: " + fileOS
);
288 private void makeAllMedia(GlobisImportState state
, ResultSet rs
, Set
<Media
> recordMedia
, Set
<Media
> objectsToSave
) throws SQLException
{
290 String pathShort
= rs
.getString("Dateipfad_kurz");
291 String fileOS
= rs
.getString("file OS");
292 pathShort
= pathShort
.replace(fileOS
, "");
293 String newPath
= state
.getConfig().getImageBaseUrl();
294 String path
= pathShort
.replace("image:Webversionen/", newPath
);
296 Media singleMedia
= makeMedia(state
, rs
, "file OS", "Legende 1", path
, objectsToSave
);
297 recordMedia
.add(singleMedia
);
298 singleMedia
= makeMedia(state
, rs
, "Dateinamen02", "Legende 2", path
, objectsToSave
);
299 recordMedia
.add(singleMedia
);
300 singleMedia
= makeMedia(state
, rs
, "Dateinamen03", "Legende 3", path
, objectsToSave
);
301 recordMedia
.add(singleMedia
);
302 singleMedia
= makeMedia(state
, rs
, "Dateinamen04", "Legende 4", path
, objectsToSave
);
303 recordMedia
.add(singleMedia
);
307 private Media
makeMedia(GlobisImportState state
, ResultSet rs
, String fileNameAttr
, String legendAttr
, String path
, Set
<Media
> objectsToSave
) throws SQLException
{
309 String fileName
= rs
.getString(fileNameAttr
);
310 String legend
= rs
.getString(legendAttr
);
311 Integer bildID
= rs
.getInt("BildID");
313 String uriStr
= path
+fileName
;
314 uriStr
= uriStr
.replace(" ", "%20");
316 URI uri
= URI
.create(uriStr
);
318 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
321 boolean readMediaData
= state
.getConfig().isDoReadMediaData();
322 if (isBlank(legend
) && readMediaData
){
323 if (UriUtils
.isOk(UriUtils
.getResponse(uri
, null))){
324 logger
.warn("Image exists but legend is null " + uri
+ ", bildID" + bildID
);
330 media
= this.getImageMedia(uri
.toString(), readMediaData
);
331 media
.putTitle(Language
.ENGLISH(), legend
);
332 this.doIdCreatedUpdatedNotes(state
, media
, rs
, bildID
, IMAGE_NAMESPACE
);
334 objectsToSave
.add(media
);
337 } catch (MalformedURLException e
) {
339 } catch (ClientProtocolException e
) {
341 } catch (IOException e
) {
348 private String
transformCopyright2CollectionCode(String copyright
){
350 if (isBlank(copyright
)){
352 }else if(copyright
.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
354 }else if(copyright
.matches("Staatliches Museum f.?r Tierkunde Dresden")){
356 }else if(copyright
.equals("Natural History Museum, London")){
358 }else if(copyright
.matches("Zoologische Staatssammlung M.?nchen")){
360 }else if(copyright
.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
362 }else if(copyright
.matches("Deutsches Entomologisches Institut M.?ncheberg")){
364 }else if(copyright
.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
366 }else if(copyright
.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
368 }else if(copyright
.equals("Naturhistorisches Museum Wien")){
370 }else if(copyright
.equals("Naturhistoriska Riksmuseet Stockholm")){
372 }else if(copyright
.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
374 }else if(copyright
.equals("United States National Museum of Natural History, Washington")){
376 }else if(copyright
.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
378 }else if(copyright
.equals("Zoological Museum, University of Copenhagen")){
380 }else if(copyright
.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
382 }else if(copyright
.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
384 }else if(copyright
.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
386 }else if(copyright
.matches("Zoologisches Institut Universit.?t T.?bingen")){
389 logger
.warn("Unknown copyright entry: " + copyright
);
396 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
, GlobisImportState state
) {
400 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<>();
402 Set
<String
> currSpecIdSet
= new HashSet
<>();
403 Set
<String
> specTaxIdSet
= new HashSet
<>();
404 Set
<String
> typeIdSet
= new HashSet
<>();
407 handleForeignKey(rs
, currSpecIdSet
, "SpecCurrspecID");
408 handleForeignKey(rs
, specTaxIdSet
, "spectaxID");
409 handleTypeKey(rs
, typeIdSet
, "spectaxID", "copyright");
413 nameSpace
= SPEC_TAX_NAMESPACE
;
414 idSet
= specTaxIdSet
;
415 Map
<String
, TaxonName
> specTaxNameMap
= getCommonService().getSourcedObjectsByIdInSourceC(TaxonName
.class, idSet
, nameSpace
);
416 result
.put(nameSpace
, specTaxNameMap
);
419 // nameSpace = TAXON_NAMESPACE;
420 // idSet = currSpecIdSet;
421 // Map<String, Taxon> taxonMap = getCommonService().getSourcedObjectsByIdInSourceC(Taxon.class, idSet, nameSpace);
422 // result.put(nameSpace, taxonMap);
426 nameSpace
= GlobisImportBase
.TYPE_NAMESPACE
;
428 Map
<String
, DerivedUnit
> typeMap
= getCommonService().getSourcedObjectsByIdInSourceC(DerivedUnit
.class, idSet
, nameSpace
);
429 result
.put(nameSpace
, typeMap
);
432 } catch (SQLException e
) {
433 throw new RuntimeException(e
);
438 private void handleTypeKey(ResultSet rs
, Set
<String
> idSet
, String specTaxIdAttr
, String copyrightAttr
) throws SQLException
{
439 Integer specTaxId
= nullSafeInt(rs
, specTaxIdAttr
);
440 if (specTaxId
!= null){
441 String copyright
= rs
.getString(copyrightAttr
);
442 if (isNotBlank(copyright
)){
443 String id
= GlobisSpecTaxImport
.getTypeId(specTaxId
, transformCopyright2CollectionCode(copyright
));
450 protected boolean doCheck(GlobisImportState state
){
451 IOValidator
<GlobisImportState
> validator
= new GlobisImageImportValidator();
452 return validator
.validate(state
);
456 protected boolean isIgnore(GlobisImportState state
){
457 return ! state
.getConfig().isDoImages();
461 protected void doInvoke(GlobisImportState state
) {
462 Reference refGart
= ReferenceFactory
.newGeneric();
463 refGart
.setTitleCache("GART", true);
464 refGart
.setUuid(uuidGartRef
);
465 getReferenceService().saveOrUpdate(refGart
);
466 super.doInvoke(state
);