2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.globis
;
12 import java
.io
.IOException
;
13 import java
.net
.MalformedURLException
;
15 import java
.sql
.ResultSet
;
16 import java
.sql
.SQLException
;
17 import java
.util
.HashMap
;
18 import java
.util
.HashSet
;
21 import java
.util
.UUID
;
22 import java
.util
.regex
.Matcher
;
23 import java
.util
.regex
.Pattern
;
25 import org
.apache
.http
.client
.ClientProtocolException
;
26 import org
.apache
.log4j
.Logger
;
27 import org
.springframework
.stereotype
.Component
;
29 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
30 import eu
.etaxonomy
.cdm
.common
.UriUtils
;
31 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
32 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
33 import eu
.etaxonomy
.cdm
.io
.globis
.validation
.GlobisImageImportValidator
;
34 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
35 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
36 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
37 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
38 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
39 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
40 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
41 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
42 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
43 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
44 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
45 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
46 import eu
.etaxonomy
.cdm
.model
.occurrence
.DeterminationEvent
;
47 import eu
.etaxonomy
.cdm
.model
.occurrence
.Specimen
;
48 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
49 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
50 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
51 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
60 public class GlobisImageImport
extends GlobisImportBase
<Taxon
> {
61 private static final Logger logger
= Logger
.getLogger(GlobisImageImport
.class);
63 private int modCount
= 10000;
65 private UUID uuidArtNonSpecTaxMarkerType
= UUID
.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
66 private static final String pluralString
= "images";
67 private static final String dbTableName
= "Einzelbilder";
68 private static final Class cdmTargetClass
= Media
.class; //not needed
70 private static final String IMAGE_NAMESPACE
= "Einzelbilder";
71 private static UUID uuidGartRef
= UUID
.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
73 public GlobisImageImport(){
74 super(pluralString
, dbTableName
, cdmTargetClass
);
81 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
84 protected String
getIdQuery() {
85 String strRecordQuery
=
87 " FROM " + dbTableName
;
88 return strRecordQuery
;
95 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
98 protected String
getRecordQuery(GlobisImportConfigurator config
) {
99 String strRecordQuery
=
100 " SELECT i.*, NULL as Created_When, NULL as Created_Who," +
101 " NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
102 " FROM " + getTableName() + " i " +
103 " LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
104 " WHERE ( i.BildId IN (" + ID_LIST_TOKEN
+ ") )";
105 return strRecordQuery
;
111 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
114 public boolean doPartition(ResultSetPartitioner partitioner
, GlobisImportState state
) {
115 boolean success
= true;
117 Set
<Media
> objectsToSave
= new HashSet
<Media
>();
119 Map
<String
, Specimen
> typeMap
= (Map
<String
, Specimen
>) partitioner
.getObjectMap(GlobisSpecTaxImport
.TYPE_NAMESPACE
);
121 Map
<String
, Taxon
> taxonMap
= (Map
<String
, Taxon
>) partitioner
.getObjectMap(TAXON_NAMESPACE
);
122 Map
<String
, ZoologicalName
> specTaxNameMap
= (Map
<String
, ZoologicalName
>) partitioner
.getObjectMap(GlobisSpecTaxImport
.SPEC_TAX_NAMESPACE
);
124 ResultSet rs
= partitioner
.getResultSet();
126 Reference
<?
> refGart
= getReferenceService().find(uuidGartRef
);
136 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info(pluralString
+ " handled: " + (i
-1));}
138 Integer bildID
= rs
.getInt("BildID");
139 Integer spectaxID
= nullSafeInt(rs
, "spectaxID");
140 Integer taxonID
= nullSafeInt(rs
, "SpecCurrspecID");
141 String copyright
= rs
.getString("copyright");
142 String specimenId
= rs
.getString("specimenID");
143 String bemerkungen
= rs
.getString("Bemerkungen");
144 String artNotSpecTax
= rs
.getString("Art non spectax");
145 String motiv
= rs
.getString("Motiv");
148 // [file lab2], same as Dateiname04 but less data
151 Set
<Media
> recordMedia
= new HashSet
<Media
>();
155 makeAllMedia(state
, rs
, recordMedia
, objectsToSave
);
159 Specimen specimen
= null;
160 if (spectaxID
!= null){
161 //try to find type specimen
162 if (isNotBlank(motiv
) && (motiv
.startsWith("type specimen"))){
163 String collectionCode
= transformCopyright2CollectionCode(copyright
);
164 String id
= GlobisSpecTaxImport
.getTypeId(spectaxID
, collectionCode
);
165 specimen
= typeMap
.get(id
);
168 //try to find specTaxName
169 ZoologicalName specTaxTaxonName
= specTaxNameMap
.get(String
.valueOf(spectaxID
));
170 if (specTaxTaxonName
!= null){
171 title
= " taxon name " + specTaxTaxonName
.getTitleCache();
173 title
= " spectaxID " + spectaxID
;
176 title
= " name " + getNameFromFileOs(rs
) + (isBlank(specimenId
)?
"" : " (specimenId: " + specimenId
+ ")");
180 if (specimen
== null){
181 specimen
= Specimen
.NewInstance();
182 specimen
.setTitleCache("Specimen for " + title
);
183 String collectionCode
= transformCopyright2CollectionCode(copyright
);
185 Collection collection
= getCollection(collectionCode
);
186 specimen
.setCollection(collection
);
191 specimen
.addSource(String
.valueOf(bildID
), IMAGE_NAMESPACE
, state
.getTransactionalSourceReference(), null);
193 //GART id (specimenID)
194 if (isNotBlank(specimenId
)){
195 specimen
.addSource(specimenId
, "", refGart
, null);
198 if (isNotBlank(bemerkungen
)){
199 Annotation annotation
= Annotation
.NewInstance(bemerkungen
, null, null);
200 specimen
.addAnnotation(annotation
);
203 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(specimen
);
204 for (Media media
: recordMedia
){
205 facade
.addDerivedUnitMedia(media
);
208 if (isNotBlank(artNotSpecTax
)){
209 if (artNotSpecTax
.equalsIgnoreCase("ja")){
210 MarkerType artNotSpecTaxMarker
= getMarkerType(state
, uuidArtNonSpecTaxMarkerType
, "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
211 specimen
.addMarker(Marker
.NewInstance(artNotSpecTaxMarker
, true));
213 logger
.warn(artNotSpecTax
+ " is not a valid value for 'Art non spectax' (BildID: " + bildID
+ ")" );
217 if (spectaxID
!= null){
219 //add to image gallery (discuss if this is also needed if taxon is already added to type specimen
220 // Taxon taxon = taxonMap.get(String.valueOf(taxonID));
221 ZoologicalName specTaxTaxonName
= specTaxNameMap
.get(String
.valueOf(spectaxID
));
224 // if (taxon == null){
225 //// taxon = specTaxMap.get(String.valueOf(spectaxID));
226 //// specTaxName = specTaxMap.g
228 // if (taxon == null){
229 // logger.warn("No taxon available for specTaxID: " + spectaxID);
231 // name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
236 if (specTaxTaxonName
== null){
237 logger
.warn("Name could not be found for spectaxID: " + spectaxID
+ " in BildID: " + bildID
);
240 for (Taxon specTaxTaxon
: specTaxTaxonName
.getTaxa()){
241 taxon
= specTaxTaxon
;
245 Reference undefinedSec
= null;
246 taxon
= Taxon
.NewInstance(specTaxTaxonName
, undefinedSec
);
249 DeterminationEvent determinationEvent
= DeterminationEvent
.NewInstance();
250 determinationEvent
.setTaxon(taxon
);
251 determinationEvent
.setIdentifiedUnit(specimen
);
257 // if (taxon != null){
258 // TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
259 // if (taxonDescription.getElements().size() == 0){
260 // TextData textData = TextData.NewInstance(Feature.IMAGE());
261 // taxonDescription.addElement(textData);
263 // Set<DescriptionElementBase> elements = taxonDescription.getElements();
264 // TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
265 // for (Media media: recordMedia){
266 // textData.addMedia(media);
271 } catch (Exception e
) {
272 logger
.warn("Exception in Einzelbilder: bildID " + bildID
+ ". " + e
.getMessage());
278 logger
.info(pluralString
+ " to save: " + objectsToSave
.size());
279 getMediaService().save(objectsToSave
);
282 } catch (SQLException e
) {
283 logger
.error("SQLException:" + e
);
288 private Collection
getCollection(String collectionCode
) {
296 private String
getNameFromFileOs(ResultSet rs
) throws SQLException
{
297 String fileOS
= rs
.getString("file OS");
298 Pattern pattern
= Pattern
.compile("(.+)(_...._..\\.jpg)");
299 Matcher matcher
= pattern
.matcher(fileOS
);
300 if (matcher
.matches()){
301 String match
= matcher
.group(1);
304 logger
.warn("FileOS does not match: " + fileOS
);
312 private void makeAllMedia(GlobisImportState state
, ResultSet rs
, Set
<Media
> recordMedia
, Set
<Media
> objectsToSave
) throws SQLException
{
314 String pathShort
= rs
.getString("Dateipfad_kurz");
315 String fileOS
= rs
.getString("file OS");
316 pathShort
= pathShort
.replace(fileOS
, "");
317 String newPath
= state
.getConfig().getImageBaseUrl();
318 String path
= pathShort
.replace("image:Webversionen/", newPath
);
320 Media singleMedia
= makeMedia(state
, rs
, "file OS", "Legende 1", path
, objectsToSave
);
321 recordMedia
.add(singleMedia
);
322 singleMedia
= makeMedia(state
, rs
, "Dateinamen02", "Legende 2", path
, objectsToSave
);
323 recordMedia
.add(singleMedia
);
324 singleMedia
= makeMedia(state
, rs
, "Dateinamen03", "Legende 3", path
, objectsToSave
);
325 recordMedia
.add(singleMedia
);
326 singleMedia
= makeMedia(state
, rs
, "Dateinamen04", "Legende 4", path
, objectsToSave
);
327 recordMedia
.add(singleMedia
);
331 private Media
makeMedia(GlobisImportState state
, ResultSet rs
, String fileNameAttr
, String legendAttr
, String path
, Set
<Media
> objectsToSave
) throws SQLException
{
333 String fileName
= rs
.getString(fileNameAttr
);
334 String legend
= rs
.getString(legendAttr
);
335 Integer bildID
= rs
.getInt("BildID");
337 String uriStr
= path
+fileName
;
338 uriStr
= uriStr
.replace(" ", "%20");
340 URI uri
= URI
.create(uriStr
);
342 // Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
345 boolean readMediaData
= state
.getConfig().isDoReadMediaData();
346 if (isBlank(legend
) && readMediaData
){
347 if (UriUtils
.isOk(UriUtils
.getResponse(uri
, null))){
348 logger
.warn("Image exists but legend is null " + uri
+ ", bildID" + bildID
);
354 media
= this.getImageMedia(uri
.toString(), readMediaData
, false);
355 media
.putTitle(Language
.ENGLISH(), legend
);
356 this.doIdCreatedUpdatedNotes(state
, media
, rs
, bildID
, IMAGE_NAMESPACE
);
358 objectsToSave
.add(media
);
361 } catch (MalformedURLException e
) {
363 } catch (ClientProtocolException e
) {
365 } catch (IOException e
) {
372 private String
transformCopyright2CollectionCode(String copyright
){
374 if (isBlank(copyright
)){
376 }else if(copyright
.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
378 }else if(copyright
.matches("Staatliches Museum f.?r Tierkunde Dresden")){
380 }else if(copyright
.equals("Natural History Museum, London")){
382 }else if(copyright
.matches("Zoologische Staatssammlung M.?nchen")){
384 }else if(copyright
.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
386 }else if(copyright
.matches("Deutsches Entomologisches Institut M.?ncheberg")){
388 }else if(copyright
.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
390 }else if(copyright
.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
392 }else if(copyright
.equals("Naturhistorisches Museum Wien")){
394 }else if(copyright
.equals("Naturhistoriska Riksmuseet Stockholm")){
396 }else if(copyright
.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
398 }else if(copyright
.equals("United States National Museum of Natural History, Washington")){
400 }else if(copyright
.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
402 }else if(copyright
.equals("Zoological Museum, University of Copenhagen")){
404 }else if(copyright
.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
406 }else if(copyright
.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
408 }else if(copyright
.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
410 }else if(copyright
.matches("Zoologisches Institut Universit.?t T.?bingen")){
413 logger
.warn("Unknown copyright entry: " + copyright
);
423 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
425 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
429 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
431 Set
<String
> currSpecIdSet
= new HashSet
<String
>();
432 Set
<String
> specTaxIdSet
= new HashSet
<String
>();
433 Set
<String
> typeIdSet
= new HashSet
<String
>();
436 handleForeignKey(rs
, currSpecIdSet
, "SpecCurrspecID");
437 handleTypeKey(rs
, typeIdSet
, "spectaxID", "copyright");
441 nameSpace
= GlobisSpecTaxImport
.SPEC_TAX_NAMESPACE
;
442 cdmClass
= TaxonBase
.class;
443 idSet
= specTaxIdSet
;
444 Map
<String
, TaxonBase
> specTaxMap
= (Map
<String
, TaxonBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
445 result
.put(nameSpace
, specTaxMap
);
448 nameSpace
= TAXON_NAMESPACE
;
449 cdmClass
= Taxon
.class;
450 idSet
= currSpecIdSet
;
451 Map
<String
, Taxon
> taxonMap
= (Map
<String
, Taxon
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
452 result
.put(nameSpace
, taxonMap
);
456 nameSpace
= GlobisSpecTaxImport
.TYPE_NAMESPACE
;
457 cdmClass
= Specimen
.class;
459 Map
<String
, Specimen
> typeMap
= (Map
<String
, Specimen
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
460 result
.put(nameSpace
, typeMap
);
463 } catch (SQLException e
) {
464 throw new RuntimeException(e
);
469 private void handleTypeKey(ResultSet rs
, Set
<String
> idSet
, String specTaxIdAttr
, String copyrightAttr
)
470 throws SQLException
{
471 Integer specTaxId
= nullSafeInt(rs
, specTaxIdAttr
);
472 if (specTaxId
!= null){
473 String copyright
= rs
.getString(copyrightAttr
);
474 if (isNotBlank(copyright
)){
475 String id
= GlobisSpecTaxImport
.getTypeId(specTaxId
, transformCopyright2CollectionCode(copyright
));
482 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
485 protected boolean doCheck(GlobisImportState state
){
486 IOValidator
<GlobisImportState
> validator
= new GlobisImageImportValidator();
487 return validator
.validate(state
);
492 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
494 protected boolean isIgnore(GlobisImportState state
){
495 return ! state
.getConfig().isDoImages();
502 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doInvoke(eu.etaxonomy.cdm.io.globis.GlobisImportState)
505 protected void doInvoke(GlobisImportState state
) {
506 Reference refGart
= ReferenceFactory
.newGeneric();
507 refGart
.setTitleCache("GART");
508 refGart
.setUuid(uuidGartRef
);
509 getReferenceService().saveOrUpdate(refGart
);
510 super.doInvoke(state
);