latest developments in DwcA import
authorAndreas Müller <a.mueller@bgbm.org>
Mon, 19 Mar 2012 17:45:55 +0000 (17:45 +0000)
committerAndreas Müller <a.mueller@bgbm.org>
Mon, 19 Mar 2012 17:45:55 +0000 (17:45 +0000)
19 files changed:
.gitattributes
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/CsvStream.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/DwcTaxonCsv2CdmTaxonConverter.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/DwcTaxonCsv2CdmTaxonRelationConverter.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/DwcaImport.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/DwcaImportConfigurator.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/DwcaImportState.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/GbifDescriptionCsv2CdmConverter.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/GbifVernacularNameCsv2CdmConverter.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IImportMapping.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespace.java [new file with mode: 0644]
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespaceReader.java [new file with mode: 0644]
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IPartitionableConverter.java [new file with mode: 0644]
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IReader.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/InMemoryMapping.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/LookAheadStream.java [new file with mode: 0644]
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/MappingEntry.java [new file with mode: 0644]
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/PartitionableConverterBase.java [moved from cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/ConverterBase.java with 76% similarity]
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/StreamPartitioner.java [new file with mode: 0644]

index 966fa0f74ddc9cf9da55671fe8f276e2aecd70f9..8cbb4e9bcb39050861db631e1eb1efbf28a48c19 100644 (file)
@@ -288,7 +288,6 @@ cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/mapping/out/IndexCounter.java
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/mapping/out/MethodMapper.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/mapping/out/ObjectChangeMapper.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/TermUri.java -text
-cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/ConverterBase.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/CsvStream.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/CsvStreamItem.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/DwcTaxonCsv2CdmTaxonConverter.java -text
@@ -304,10 +303,17 @@ cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IConverter.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IConverterInput.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IConverterOutput.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IImportMapping.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespace.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespaceReader.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IPartitionableConverter.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IReader.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/InMemoryMapping.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/ListReader.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/LookAheadStream.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/MappedCdmBase.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/MappingEntry.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/PartitionableConverterBase.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/StreamPartitioner.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/jaxb/Archive.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/jaxb/ArchiveEntryBase.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/jaxb/Core.java -text
index dfaec39556aa6e9b86bdd85a40a25cfb5885f2dd..350e5a403d7a760b4b80f2b3219665b18096b02c 100644 (file)
@@ -28,7 +28,7 @@ import eu.etaxonomy.cdm.io.dwca.jaxb.Field;
  * @date 17.10.2011\r
  *\r
  */\r
-public class CsvStream implements IReader<CsvStreamItem>{\r
+public class CsvStream implements INamespaceReader<CsvStreamItem>{\r
        @SuppressWarnings("unused")\r
        private static Logger logger = Logger.getLogger(CsvStream.class);\r
 \r
@@ -134,4 +134,7 @@ public class CsvStream implements IReader<CsvStreamItem>{
        public String getFilesLocation() {\r
                return this.archiveEntry.getFiles().getLocation();\r
        }\r
+\r
+       \r
+       \r
 }\r
index f89c61142ef1e6bbbf7acfe20ae6728afe3d4c33..038c62e45581bae2444b2630c0c2a1c0d9167665 100644 (file)
 package eu.etaxonomy.cdm.io.dwca.in;\r
 \r
 import java.util.ArrayList;\r
+import java.util.HashMap;\r
 import java.util.List;\r
 import java.util.Map;\r
+import java.util.Set;\r
 \r
 import org.apache.commons.lang.StringUtils;\r
 import org.apache.log4j.Logger;\r
@@ -38,7 +40,7 @@ import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
  * @date 22.11.2011\r
  *\r
  */\r
-public class DwcTaxonCsv2CdmTaxonConverter extends ConverterBase<DwcaImportState> implements IConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+public class DwcTaxonCsv2CdmTaxonConverter extends PartitionableConverterBase<DwcaImportState> implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
        @SuppressWarnings("unused")\r
        private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonConverter.class);\r
 \r
@@ -313,11 +315,24 @@ public class DwcTaxonCsv2CdmTaxonConverter extends ConverterBase<DwcaImportState
                }\r
                        \r
                return result;\r
+\r
        }\r
        \r
+// ********************** PARTITIONABLE ****************************************/\r
+\r
+\r
+       @Override\r
+       protected void makeForeignKeysForItem(CsvStreamItem next, Map<String, Set<String>> result) {\r
+               //do nothing, no foreign keys needed here\r
+       }\r
+       \r
+//** ***************************** TO STRING *********************************************/\r
+       \r
        @Override\r
        public String toString(){\r
                return this.getClass().getName();\r
        }\r
 \r
+\r
+       \r
 }\r
index a05283274045857f07a112620c769c60505e21a9..5722e60b78aa58f7014bed87f34d5d2037a06ca6 100644 (file)
@@ -10,6 +10,7 @@
 package eu.etaxonomy.cdm.io.dwca.in;\r
 \r
 import java.util.ArrayList;\r
+import java.util.HashMap;\r
 import java.util.HashSet;\r
 import java.util.List;\r
 import java.util.Map;\r
@@ -32,8 +33,8 @@ import eu.etaxonomy.cdm.model.taxon.TaxonBase;
  * @date 23.11.2011\r
  *\r
  */\r
-public class DwcTaxonCsv2CdmTaxonRelationConverter<STATE extends DwcaImportState> extends ConverterBase<DwcaImportState> \r
-                                               implements IConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+public class DwcTaxonCsv2CdmTaxonRelationConverter<STATE extends DwcaImportState> extends PartitionableConverterBase<DwcaImportState> \r
+                                               implements IPartitionableConverter<CsvStreamItem, INamespaceReader<CdmBase>, String>{\r
        private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonRelationConverter.class);\r
 \r
        private static final String ID = "id";\r
@@ -251,11 +252,21 @@ public class DwcTaxonCsv2CdmTaxonRelationConverter<STATE extends DwcaImportState
        }\r
 \r
 \r
+//**************************** PARTITIONABLE ************************************************\r
 \r
+\r
+\r
+\r
+       protected void makeForeignKeysForItem(CsvStreamItem item, Map<String, Set<String>> fkMap){\r
+               //do nothing, their are no foreign keys yet to handle \r
+       }\r
+       \r
+//************************************* TO STRING ********************************************\r
        \r
        @Override\r
        public String toString(){\r
                return this.getClass().getName();\r
        }\r
 \r
+\r
 }\r
index 5d23fc1b34ad7902b6dc33143b07b34cc186b472..01984150771262162d2956acfd302440fc051418 100644 (file)
@@ -9,6 +9,8 @@
 package eu.etaxonomy.cdm.io.dwca.in;\r
 \r
 import java.net.URI;\r
+import java.util.Map;\r
+import java.util.Set;\r
 \r
 import org.apache.log4j.Logger;\r
 import org.springframework.stereotype.Component;\r
@@ -39,24 +41,74 @@ public class DwcaImport extends CdmImportBase<DwcaImportConfigurator, DwcaImport
                IReader<CsvStream> stream = streamConverter.getStreamStream(state);\r
                while (stream.hasNext()){\r
                        CsvStream csvStream = stream.read();\r
-                       while (csvStream.hasNext()){\r
-                               CsvStreamItem item = csvStream.read();\r
-                               TransactionStatus tx = startTransaction();\r
-                               handleCsvStreamItem(state, item);\r
-                               commitTransaction(tx);\r
+                       \r
+                       if (state.getConfig().isUsePartitions()){\r
+                               StreamPartitioner<CsvStreamItem> partitionStream = new StreamPartitioner<CsvStreamItem>(csvStream, null, 1000);\r
+                               \r
+                               while (partitionStream.hasNext()){\r
+                                       TransactionStatus tx = startTransaction();\r
+                                       \r
+                                       handlePartitionedStreamItem(state, partitionStream);\r
+                                       commitTransaction(tx);\r
+                               }\r
+                       }else {\r
+                                       \r
+                               while (csvStream.hasNext()){\r
+                                               TransactionStatus tx = startTransaction();\r
+                                               \r
+                                               CsvStreamItem item = csvStream.read();\r
+                                               handleCsvStreamItem(state, item);\r
+                                               \r
+                                               commitTransaction(tx);\r
+                                       }\r
                        }\r
-                       finalizeStream(csvStream, state);\r
+\r
+                       finalizeStream(csvStream, state);\r
                }\r
                return;\r
        }\r
 \r
+       private void handlePartitionedStreamItem(DwcaImportState state,  StreamPartitioner<CsvStreamItem> partStream) {\r
+               IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String> converter = getConverter(partStream.getTerm(), state);\r
+               if (converter == null){\r
+                       state.setSuccess(false);\r
+                       return;\r
+               }\r
+               \r
+               IReader<CsvStreamItem> inputStream = partStream.read();\r
+               Map<String, Set<String>> foreignKeys = converter.getPartitionForeignKeys(inputStream);\r
+               IImportMapping mapping = state.getMapping();\r
+               IImportMapping partialMapping = mapping.getPartialMapping(foreignKeys);\r
+               state.loadRelatedObjects(partialMapping);\r
+               \r
+//             while (inputStream.hasNext()){\r
+//                     IReader<MappedCdmBase> resultReader = converter.map(inputStream.read(), xx);\r
+//                     \r
+//                     xx;\r
+//                     while (resultReader.hasNext()){\r
+//                             \r
+//                             MappedCdmBase mappedCdmBase = (resultReader.read());\r
+//                             CdmBase cdmBase = mappedCdmBase.getCdmBase();\r
+//                             save(cdmBase, state, item.getLocation());\r
+//                             if (mappedCdmBase.getSourceId() != null && cdmBase.isInstanceOf(IdentifiableEntity.class)){\r
+//                                     IdentifiableEntity<?> entity = CdmBase.deproxy(cdmBase, IdentifiableEntity.class);\r
+//                                     \r
+//                                     String namespace = mappedCdmBase.getNamespace();\r
+//                                     state.putMapping(namespace,mappedCdmBase.getSourceId(), entity);\r
+//                             }\r
+//                     }\r
+//             }\r
+               return;\r
+               \r
+       }\r
+\r
        /**\r
         * @param state\r
         * @param item\r
         * @return\r
         */\r
        private void handleCsvStreamItem(DwcaImportState state, CsvStreamItem item) {\r
-               IConverter<CsvStreamItem, IReader<CdmBase>, String> converter = getConverter(item, state);\r
+               IConverter<CsvStreamItem, IReader<CdmBase>, String> converter = getConverter(item.term, state);\r
                if (converter == null){\r
                        state.setSuccess(false);\r
                        return;\r
@@ -114,8 +166,7 @@ public class DwcaImport extends CdmImportBase<DwcaImportConfigurator, DwcaImport
                }\r
        }\r
 \r
-       private IConverter<CsvStreamItem,IReader<CdmBase>, String> getConverter(CsvStreamItem item, DwcaImportState state) {\r
-               TermUri namespace = item.term;\r
+       private IPartitionableConverter<CsvStreamItem,IReader<CdmBase>, String> getConverter(TermUri namespace, DwcaImportState state) {\r
                if (namespace.equals(TermUri.DWC_TAXON)){\r
                        if (! state.isTaxaCreated()){\r
                                return new DwcTaxonCsv2CdmTaxonConverter(state);\r
index 7f1302b2a80c48390311c3fbd355acc47844c57e..d252d74044bc5c74da029a194a95a74db6233bd5 100644 (file)
@@ -29,6 +29,8 @@ public class DwcaImportConfigurator extends ImportConfiguratorBase<DwcaImportSta
        private static final Logger logger = Logger.getLogger(DwcaImportConfigurator.class);\r
        private static IInputTransformer defaultTransformer = new DwcaImportTransformer();\r
        \r
+       private boolean usePartitions;\r
+       \r
 //     //new\r
 //     private boolean doSpecimen = true;  //reads the specimen worksheet\r
 //     private boolean doAreaLevels = true;  //reads the areaLevels worksheet\r
@@ -53,8 +55,8 @@ public class DwcaImportConfigurator extends ImportConfiguratorBase<DwcaImportSta
 ////   private boolean includeSynonymsForTaxonMatching = false;\r
 \r
                \r
-       \r
-       \r
+\r
+\r
        @SuppressWarnings("unchecked")\r
        protected void makeIoClassList(){\r
                ioClassList = new Class[]{\r
@@ -96,12 +98,21 @@ public class DwcaImportConfigurator extends ImportConfiguratorBase<DwcaImportSta
                //TODO\r
                if (this.sourceReference == null){\r
                        logger.warn("getSource Reference not yet fully implemented");\r
-                       sourceReference = ReferenceFactory.newDatabase();\r
-                       sourceReference.setTitleCache("Specimen import", true);\r
+                       sourceReference = ReferenceFactory.newGeneric();\r
+                       sourceReference.setTitleCache("DwC-A import", true);\r
                }\r
                return sourceReference;\r
        }\r
+\r
+       \r
        \r
+       public boolean isUsePartitions() {\r
+               return usePartitions;\r
+       }\r
+\r
+       public void setUsePartitions(boolean usePartitions) {\r
+               this.usePartitions = usePartitions;\r
+       }\r
        \r
        \r
 }\r
index f57c8989c41a5b01c31fe0b5193eece1b74f59de..13a312821cb32adf74d138440194874cb3f78517 100644 (file)
@@ -12,6 +12,7 @@ package eu.etaxonomy.cdm.io.dwca.in;
 \r
 import java.util.ArrayList;\r
 import java.util.HashMap;\r
+import java.util.HashSet;\r
 import java.util.List;\r
 import java.util.Map;\r
 import java.util.Set;\r
@@ -23,17 +24,16 @@ import eu.etaxonomy.cdm.io.common.ImportStateBase;
 import eu.etaxonomy.cdm.io.dwca.in.InMemoryMapping.CdmKey;\r
 import eu.etaxonomy.cdm.model.common.CdmBase;\r
 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;\r
-import eu.etaxonomy.cdm.model.taxon.TaxonBase;\r
 \r
 /**\r
  * @author a.mueller\r
  * @created 23.11.2011\r
  */\r
 public class DwcaImportState extends ImportStateBase<DwcaImportConfigurator, DwcaImport>{\r
-       @SuppressWarnings("unused")\r
        private static final Logger logger = Logger.getLogger(DwcaImportState.class);\r
 \r
        boolean taxaCreated;\r
+       private Map<String, Map<String, IdentifiableEntity>> partitionStore;\r
        \r
        private IImportMapping mapping = new InMemoryMapping();\r
        \r
@@ -57,6 +57,9 @@ public class DwcaImportState extends ImportStateBase<DwcaImportConfigurator, Dwc
                this.taxaCreated = taxaCreated;\r
        }\r
        \r
+//********************* MAPPING ACCESS *********************************\r
+       //TODO this may move to an external class soon\r
+       \r
        public void putMapping(String namespace, Integer sourceKey, IdentifiableEntity<?> destinationObject){\r
                mapping.putMapping(namespace, sourceKey, destinationObject);\r
        }\r
@@ -72,13 +75,26 @@ public class DwcaImportState extends ImportStateBase<DwcaImportConfigurator, Dwc
        \r
        public <CLASS extends IdentifiableEntity> List<CLASS> get(String namespace, String sourceKey,Class<CLASS> destinationClass){\r
                List<CLASS> result = new ArrayList<CLASS>(); \r
-               Set<CdmKey> keySet = mapping.get(namespace, sourceKey);\r
-               for (CdmKey<CLASS> key: keySet){\r
-                       if (destinationClass == null || destinationClass.isAssignableFrom(key.clazz)){\r
-                               IIdentifiableEntityService<CLASS> service = getCurrentIO().getServiceByClass(key.clazz);\r
-                               CLASS entity = CdmBase.deproxy(service.find(key.id), key.clazz);\r
-                               result.add(entity);\r
+               if (this.partitionStore != null){\r
+                       Map<String, IdentifiableEntity> namespaceMap = this.partitionStore.get(namespace);\r
+                       if (namespaceMap != null){\r
+                               IdentifiableEntity cdmBase = namespaceMap.get(sourceKey);\r
+                               if (cdmBase.isInstanceOf(destinationClass)){\r
+                                       CLASS typedCdmBase = CdmBase.deproxy(cdmBase, destinationClass);\r
+                                       result.add(typedCdmBase);\r
+                               }\r
+                               \r
+                       }\r
+               }else{\r
+                       Set<CdmKey> keySet = mapping.get(namespace, sourceKey);\r
+                       for (CdmKey<CLASS> key: keySet){\r
+                               if (destinationClass == null || destinationClass.isAssignableFrom(key.clazz)){\r
+                                       IIdentifiableEntityService<CLASS> service = getCurrentIO().getServiceByClass(key.clazz);\r
+                                       CLASS entity = CdmBase.deproxy(service.find(key.id), key.clazz);\r
+                                       result.add(entity);\r
+                               }\r
                        }\r
+                       return result;\r
                }\r
                return result;\r
        }\r
@@ -86,8 +102,92 @@ public class DwcaImportState extends ImportStateBase<DwcaImportConfigurator, Dwc
        public boolean exists(String namespace, String sourceKey,Class<?> destinationClass){\r
                return mapping.exists(namespace, sourceKey, destinationClass);\r
        }\r
+       \r
+       \r
+       public  void loadRelatedObjects (IImportMapping mapping){\r
+                Map<String, Map<String, IdentifiableEntity>> result = new HashMap<String, Map<String,IdentifiableEntity>>();\r
+               \r
+               List<MappingEntry<String, String, Class, Integer>> mappingEntryList = mapping.getEntryList();\r
+               \r
+               //order ids by destination classes\r
+               Map<Class, Set<Integer>> destinationNamespaceMap = new HashMap<Class, Set<Integer>>(); \r
+               for (MappingEntry<String, String, Class, Integer> entry : mappingEntryList){\r
+                       Set<Integer> idSet = destinationNamespaceMap.get(entry.destinationNamespace);\r
+                       if (idSet == null){\r
+                               idSet = new HashSet<Integer>();\r
+                               destinationNamespaceMap.put(entry.destinationNamespace, idSet);\r
+                       }\r
+                       idSet.add(entry.destinationId);\r
+               }\r
+               \r
+               //retrieve cdm objects per class\r
+               Map<Class, Map<Integer, IdentifiableEntity>> classMap = new HashMap<Class, Map<Integer,IdentifiableEntity>>();\r
+               for (Class<?> cdmClass :destinationNamespaceMap.keySet()){\r
+                       IIdentifiableEntityService<?> classService = getCurrentIO().getServiceByClass(cdmClass);\r
+                       Set<Integer> idSet = destinationNamespaceMap.get(cdmClass);\r
+                       List<? extends IdentifiableEntity> relatedObjects = classService.findById(idSet);\r
+                       \r
+                       //put into id map\r
+                       Map<Integer, IdentifiableEntity> idMap = new HashMap<Integer, IdentifiableEntity>();\r
+                       for (IdentifiableEntity identEnt : relatedObjects){\r
+                               idMap.put(identEnt.getId(), identEnt);\r
+                       }\r
+                       \r
+                       //add to class map\r
+                       classMap.put(cdmClass, idMap);\r
+               }\r
+               \r
+               //fill related object map\r
+               for (MappingEntry<String, String, Class, Integer> entry : mappingEntryList){\r
+                       Map<String, IdentifiableEntity> namespaceMap = getOrMakeNamespaceMap(result, entry.namespace);\r
+                       IdentifiableEntity cdmBase = getCdmObject(classMap, entry);\r
+                       if (cdmBase != null){\r
+                               namespaceMap.put(entry.sourceKey, cdmBase);\r
+                       }else{\r
+                               logger.info("CdmBase not found for mapping entry.");\r
+                       }\r
+               }\r
+               \r
+               //store\r
+               this.partitionStore = result;\r
+               \r
+       }\r
+\r
+\r
+//     public Map<String, Map<String, IdentifiableEntity>> getPartitionStore() {\r
+//             return partitionStore;\r
+//     }\r
+\r
+       public void unloadPartitionStore(Map<String, Map<String, IdentifiableEntity>> partitionStore) {\r
+               this.partitionStore = new HashMap<String, Map<String,IdentifiableEntity>>();\r
+       }\r
+\r
+       public IImportMapping getMapping() {\r
+               return this.mapping;\r
+       }\r
 \r
        \r
+       private Map<String, IdentifiableEntity> getOrMakeNamespaceMap(Map<String, Map<String, IdentifiableEntity>> relatedObjectMap2, String namespace) {\r
+               Map<String, IdentifiableEntity> namespaceMap = relatedObjectMap2.get(namespace);\r
+               if (namespaceMap == null){\r
+                       namespaceMap = new HashMap<String, IdentifiableEntity>();\r
+                       relatedObjectMap2.put(namespace, namespaceMap);\r
+               }\r
+               return namespaceMap;\r
+       }\r
+       \r
+\r
+       private IdentifiableEntity getCdmObject(Map<Class, Map<Integer, IdentifiableEntity>> classMap,\r
+                       MappingEntry<String, String, Class, Integer> entry) {\r
+               Class<?> cdmClass = entry.destinationNamespace;\r
+               Integer cdmKey = entry.destinationId;\r
+               Map<Integer, IdentifiableEntity> idMap = classMap.get(cdmClass);\r
+               if (idMap != null){\r
+                       return idMap.get(cdmKey);\r
+               }else{\r
+                       return null;\r
+               }\r
+       }\r
        \r
        \r
 \r
index 7ff32ae733c1513d1b416106771e116191e14944..71755394b80291dd2938e2537e902f58c975910f 100644 (file)
@@ -12,6 +12,7 @@ package eu.etaxonomy.cdm.io.dwca.in;
 import java.util.ArrayList;\r
 import java.util.List;\r
 import java.util.Map;\r
+import java.util.Set;\r
 \r
 import org.apache.log4j.Logger;\r
 \r
@@ -24,7 +25,9 @@ import eu.etaxonomy.cdm.model.taxon.Taxon;
  * @date 22.11.2011\r
  *\r
  */\r
-public class GbifDescriptionCsv2CdmConverter extends ConverterBase<DwcaImportState>  implements IConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+public class GbifDescriptionCsv2CdmConverter extends PartitionableConverterBase<DwcaImportState>  \r
+                                               implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+       \r
        @SuppressWarnings("unused")\r
        private static final Logger logger = Logger.getLogger(GbifDescriptionCsv2CdmConverter.class);\r
 \r
@@ -67,9 +70,20 @@ public class GbifDescriptionCsv2CdmConverter extends ConverterBase<DwcaImportSta
                return null;\r
        }\r
 \r
+       \r
+//********************** PARTITIONABLE **************************************/\r
+\r
+       @Override\r
+       protected void makeForeignKeysForItem(CsvStreamItem next, Map<String, Set<String>> result) {\r
+               logger.warn("Not yet implemented");     \r
+       }\r
+       \r
+//******************* TO STRING ******************************************/\r
+       \r
        @Override\r
        public String toString(){\r
                return this.getClass().getName();\r
        }\r
 \r
+\r
 }\r
index c1bc9c7c46397c6681d3b5a732545047077a1c37..647622453c1b21ceea19b9f0ac7fb5e309cb84bf 100644 (file)
 package eu.etaxonomy.cdm.io.dwca.in;\r
 \r
 import java.util.ArrayList;\r
+import java.util.HashMap;\r
 import java.util.List;\r
 import java.util.Map;\r
+import java.util.Set;\r
 \r
 import org.apache.log4j.Logger;\r
 \r
@@ -24,7 +26,7 @@ import eu.etaxonomy.cdm.model.taxon.Taxon;
  * @date 22.11.2011\r
  *\r
  */\r
-public class GbifVernacularNameCsv2CdmConverter extends ConverterBase<DwcaImportState> implements IConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+public class GbifVernacularNameCsv2CdmConverter extends PartitionableConverterBase<DwcaImportState> implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String> {\r
        @SuppressWarnings("unused")\r
        private static final Logger logger = Logger.getLogger(GbifVernacularNameCsv2CdmConverter.class);\r
        private static final String CORE_ID = "coreId";\r
@@ -70,10 +72,26 @@ public class GbifVernacularNameCsv2CdmConverter extends ConverterBase<DwcaImport
                // TODO Auto-generated method stub\r
                return null;\r
        }\r
+\r
+\r
+\r
+//**************************** PARTITIONABLE ************************************************\r
+\r
+       @Override\r
+       protected void makeForeignKeysForItem(CsvStreamItem next, Map<String, Set<String>> result) {\r
+               // TODO Auto-generated method stub\r
+               \r
+       }\r
+       \r
+//************************ STRING ************************************************/\r
        \r
        @Override\r
        public String toString(){\r
                return this.getClass().getName();\r
        }\r
 \r
+\r
+\r
+\r
+\r
 }\r
index acb465c265936236bb7a5006385f8ea8f8abdbc1..ad8fb63691f7cb552c2098b1614c9a05e7ac1447 100644 (file)
@@ -8,6 +8,8 @@
 */\r
 package eu.etaxonomy.cdm.io.dwca.in;\r
 \r
+import java.util.List;\r
+import java.util.Map;\r
 import java.util.Set;\r
 \r
 import eu.etaxonomy.cdm.io.dwca.in.InMemoryMapping.CdmKey;\r
@@ -56,5 +58,19 @@ public interface IImportMapping {
         */\r
        public boolean exists(String namespace, String sourceKey,Class<?> destinationClass);\r
 \r
+       /**\r
+        * Returns the mapping for only those obejcts addressed by the namespacedSourceKeys parameter\r
+        * @param namespacedKeys\r
+        * @return\r
+        */\r
+       public IImportMapping getPartialMapping(Map<String, Set<String>> namespacedSourceKeys);\r
 \r
+       /**\r
+        * Returns a list for all mapping entries.\r
+        * @return\r
+        */\r
+       public List<MappingEntry<String, String, Class, Integer>> getEntryList();\r
+       \r
+       \r
+       \r
 }\r
diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespace.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespace.java
new file mode 100644 (file)
index 0000000..189917f
--- /dev/null
@@ -0,0 +1,28 @@
+// $Id$\r
+/**\r
+* Copyright (C) 2009 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+import eu.etaxonomy.cdm.io.dwca.TermUri;\r
+\r
+/**\r
+ * @author a.mueller\r
+ * @date 10.03.2012\r
+ *\r
+ */\r
+public interface INamespace {\r
+\r
+       \r
+       /**\r
+        * Returns the namespace of the items included in the stream\r
+        * @return the term\r
+        */\r
+       public TermUri getTerm();\r
+       \r
+}\r
diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespaceReader.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespaceReader.java
new file mode 100644 (file)
index 0000000..9bced6e
--- /dev/null
@@ -0,0 +1,21 @@
+// $Id$\r
+/**\r
+* Copyright (C) 2009 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+\r
+/**\r
+ * @author a.mueller\r
+ * @date 23.11.2011\r
+ *\r
+ */\r
+public interface INamespaceReader<TYPE extends Object> extends IConverterOutput<IReader<TYPE>>, INamespace, IReader<TYPE>{\r
+\r
+\r
+}\r
diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IPartitionableConverter.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IPartitionableConverter.java
new file mode 100644 (file)
index 0000000..cb83f75
--- /dev/null
@@ -0,0 +1,30 @@
+/**\r
+* Copyright (C) 2009 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+import java.util.Map;\r
+import java.util.Set;\r
+\r
+/**\r
+ * Interface for converter that allow partitioned converting.\r
+ * @author a.mueller\r
+ *\r
+ */\r
+public interface IPartitionableConverter<IN extends IConverterInput<CsvStreamItem>, OUT extends IConverterOutput, OBJ extends Object> extends IConverter<IN, OUT, OBJ> {\r
+       \r
+       \r
+       /**\r
+        * Returns those foreign keys included in the input streams partition separated by namespaces.\r
+        * @return\r
+        */\r
+       //TODO make instream a more generic type of stream\r
+       public Map<String, Set<String>> getPartitionForeignKeys(IReader<CsvStreamItem> instream);\r
+       \r
+       \r
+}\r
index fa3d23fd5615f4b533d04b8bb26cb56fd070d07c..fed360086e45c52010272caece486f3085c64744 100644 (file)
@@ -9,6 +9,7 @@
 */\r
 package eu.etaxonomy.cdm.io.dwca.in;\r
 \r
+\r
 /**\r
  * @author a.mueller\r
  * @date 23.11.2011\r
@@ -28,4 +29,6 @@ public interface IReader<TYPE extends Object> extends IConverterOutput<IReader<T
         * @return True if there is a next object, false otherwise.\r
         */\r
        boolean hasNext();\r
+       \r
+       \r
 }\r
index 86988deb792649047f59eb4a835a695d04ba3179..f69e3754c3fcc677327e0c440a4a7163ae31c65a 100644 (file)
@@ -8,11 +8,15 @@
 */\r
 package eu.etaxonomy.cdm.io.dwca.in;\r
 \r
+import java.util.ArrayList;\r
 import java.util.HashMap;\r
 import java.util.HashSet;\r
+import java.util.List;\r
 import java.util.Map;\r
+import java.util.Map.Entry;\r
 import java.util.Set;\r
 \r
+import eu.etaxonomy.cdm.io.dwca.in.InMemoryMapping.CdmKey;\r
 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;\r
 \r
 /**\r
@@ -31,7 +35,7 @@ public class InMemoryMapping implements IImportMapping {
                Class<CLASS> clazz;\r
                int id;\r
                \r
-               private CdmKey(IdentifiableEntity object){\r
+               private CdmKey(IdentifiableEntity<?> object){\r
                        this.clazz = (Class)object.getClass();\r
                        this.id = object.getId();\r
                }\r
@@ -47,8 +51,14 @@ public class InMemoryMapping implements IImportMapping {
                putMapping(namespace, String.valueOf(sourceKey), destinationObject);\r
        }\r
                \r
+\r
        @Override\r
        public void putMapping(String namespace, String sourceKey, IdentifiableEntity destinationObject){\r
+               CdmKey<IdentifiableEntity<?>> cdmKey = new CdmKey(destinationObject);\r
+               putMapping(namespace, sourceKey, cdmKey);\r
+       }\r
+       \r
+       public void putMapping(String namespace, String sourceKey, CdmKey<IdentifiableEntity<?>> cdmKey){\r
                Map<String, Set<CdmKey>> namespaceMap = mapping.get(namespace);\r
                if (namespaceMap == null){\r
                        namespaceMap = new HashMap<String, Set<CdmKey>>();\r
@@ -60,7 +70,7 @@ public class InMemoryMapping implements IImportMapping {
                        namespaceMap.put(sourceKey, keySet);\r
                }\r
                \r
-               keySet.add(new CdmKey(destinationObject));\r
+               keySet.add(cdmKey);\r
        }\r
        \r
        @Override\r
@@ -86,4 +96,35 @@ public class InMemoryMapping implements IImportMapping {
                return false;\r
        }\r
 \r
+       @Override\r
+       public IImportMapping getPartialMapping( Map<String, Set<String>> namespacedSourceKeys) {\r
+               InMemoryMapping partialMapping = new InMemoryMapping();\r
+               for (Entry<String,Set<String>> entry  : namespacedSourceKeys.entrySet()){\r
+                       String namespace = entry.getKey();\r
+                       for (String sourceKey : entry.getValue() ){\r
+                               Set<CdmKey> destObjects = this.get(namespace, sourceKey);\r
+                               for (CdmKey cdmKey : destObjects){\r
+                                       partialMapping.putMapping(namespace, sourceKey, cdmKey);\r
+                               }\r
+                       }\r
+               }\r
+               return partialMapping;\r
+       }\r
+\r
+\r
+       @Override\r
+       public List<MappingEntry<String, String, Class, Integer>> getEntryList() {\r
+               List<MappingEntry<String, String, Class, Integer>> result = new ArrayList<MappingEntry<String,String,Class,Integer>>();\r
+               for (Entry<String, Map<String, Set<CdmKey>>> namespaceEntry : mapping.entrySet() ){\r
+                       String sourceNamespace = namespaceEntry.getKey();\r
+                       for (Entry<String, Set<CdmKey>> idEntry : namespaceEntry.getValue().entrySet() ){\r
+                               String sourceId = idEntry.getKey();\r
+                               for (CdmKey cdmKey : idEntry.getValue()){\r
+                                       result.add(new MappingEntry<String, String, Class, Integer>(sourceNamespace, sourceId, cdmKey.clazz, cdmKey.id));\r
+                               }\r
+                       }\r
+               }\r
+               return result;\r
+       }\r
+\r
 }\r
diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/LookAheadStream.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/LookAheadStream.java
new file mode 100644 (file)
index 0000000..64b5020
--- /dev/null
@@ -0,0 +1,89 @@
+/**\r
+* Copyright (C) 2007 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+import java.util.Queue;\r
+import java.util.concurrent.LinkedBlockingQueue;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import eu.etaxonomy.cdm.io.dwca.TermUri;\r
+\r
+\r
+/**\r
+ * @author a.mueller\r
+ *\r
+ */\r
+public class LookAheadStream<ITEM> implements INamespaceReader<ITEM>{\r
+       @SuppressWarnings("unused")\r
+       private static final Logger logger = Logger.getLogger(LookAheadStream.class);\r
+       \r
+       private Queue<ITEM> fifo = new LinkedBlockingQueue<ITEM>();\r
+       \r
+       private INamespaceReader<ITEM> stream;\r
+\r
+       public LookAheadStream(INamespaceReader<ITEM> stream) {\r
+               super();\r
+               this.stream = stream;\r
+               if (stream == null){\r
+                       throw new RuntimeException("Stream may not be null.");\r
+               }\r
+       }\r
+       \r
+       public ITEM read(){\r
+               if (! fifo.isEmpty()){\r
+                       return fifo.remove();\r
+               }else{\r
+                       return stream.read();\r
+               }\r
+       }\r
+       \r
+       public ITEM readLookAhead(){\r
+               ITEM result = stream.read();\r
+               fifo.add(result);\r
+               return result;\r
+       }\r
+       \r
+       public ITEM readLookAhead(int max){\r
+               if (max < fifo.size()){\r
+                       ITEM result = stream.read();\r
+                       fifo.add(result);\r
+                       return result;\r
+               }else{\r
+                       return null;\r
+               }\r
+       }\r
+       \r
+       public boolean hasNextLookAhead(int max){\r
+               if (max < fifo.size() && stream.hasNext()){\r
+                       return true;\r
+               }else{\r
+                       return false;\r
+               }\r
+       }\r
+\r
+       public int sizeLookAhead(){\r
+               return fifo.size();\r
+       }\r
+       \r
+       public boolean hasLookAhead(){\r
+               return ! fifo.isEmpty();\r
+       }\r
+\r
+       public boolean hasNext() {\r
+               return ! fifo.isEmpty() || stream.hasNext();\r
+       }\r
+\r
+       @Override\r
+       public TermUri getTerm() {\r
+               return stream.getTerm();\r
+       }\r
+\r
+       \r
+}\r
diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/MappingEntry.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/MappingEntry.java
new file mode 100644 (file)
index 0000000..f6218f6
--- /dev/null
@@ -0,0 +1,34 @@
+/**\r
+* Copyright (C) 2007 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+/**\r
+ * @author a.mueller\r
+ * @created 19.03.2012\r
+ *\r
+ */\r
+public class MappingEntry<SOURCE_NS extends Object, SOURCE_KEY extends Object, \r
+                       DEST_NS extends Object, DEST_KEY extends Object> {\r
+       \r
+       SOURCE_NS namespace;\r
+       SOURCE_KEY sourceKey;\r
+       DEST_NS destinationNamespace;\r
+       DEST_KEY destinationId;\r
+       \r
+       public MappingEntry(SOURCE_NS namespace, SOURCE_KEY sourceKey,\r
+                       DEST_NS destinationNamespace, DEST_KEY destinationKey) {\r
+               super();\r
+               this.namespace = namespace;\r
+               this.sourceKey = sourceKey;\r
+               this.destinationNamespace = destinationNamespace;\r
+               this.destinationId = destinationKey;\r
+       }\r
+               \r
+       \r
+}\r
similarity index 76%
rename from cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/ConverterBase.java
rename to cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/PartitionableConverterBase.java
index 38939b242ad2d053cfc7e99fd5b755a86dbe6dd4..45e11667fc66c816836239e5c5ea292441e14aa5 100644 (file)
@@ -9,6 +9,8 @@
 */\r
 package eu.etaxonomy.cdm.io.dwca.in;\r
 \r
+import java.util.HashMap;\r
+import java.util.Map;\r
 import java.util.Set;\r
 \r
 import org.apache.commons.lang.StringUtils;\r
@@ -25,9 +27,10 @@ import eu.etaxonomy.cdm.io.dwca.TermUri;
  * @date 23.11.2011\r
  *\r
  */\r
-public class ConverterBase<STATE extends IoStateBase> {\r
-       @SuppressWarnings("unused")\r
-       private static final Logger logger = Logger.getLogger(ConverterBase.class);\r
+public abstract class PartitionableConverterBase<STATE extends IoStateBase> \r
+               /*implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String> */ {\r
+       \r
+       private static final Logger logger = Logger.getLogger(PartitionableConverterBase.class);\r
 \r
        protected STATE state;\r
        \r
@@ -89,5 +92,19 @@ public class ConverterBase<STATE extends IoStateBase> {
        protected boolean exists(TermUri term, CsvStreamItem item) {\r
                return ! StringUtils.isBlank(getValue(item, term));\r
        }\r
+       \r
+\r
+       \r
+       public Map<String, Set<String>> getPartitionForeignKeys(IReader<CsvStreamItem> instream) {\r
+               Map<String, Set<String>> result = new HashMap<String, Set<String>>();\r
+               \r
+               while (instream.hasNext()){\r
+                       CsvStreamItem next = instream.read();\r
+                       makeForeignKeysForItem(next, result);\r
+               }\r
+               return result;\r
+       }\r
+\r
+       protected abstract void makeForeignKeysForItem(CsvStreamItem next, Map<String, Set<String>> result);\r
 \r
 }\r
diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/StreamPartitioner.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/StreamPartitioner.java
new file mode 100644 (file)
index 0000000..146b863
--- /dev/null
@@ -0,0 +1,76 @@
+/**\r
+* Copyright (C) 2007 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+import java.util.ArrayList;\r
+import java.util.List;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import eu.etaxonomy.cdm.io.dwca.TermUri;\r
+\r
+\r
+/**\r
+ * @author a.mueller\r
+ *\r
+ */\r
+public class StreamPartitioner<ITEM extends IConverterInput>  implements INamespaceReader<IReader<ITEM>>{\r
+       private static final Logger logger = Logger.getLogger(StreamPartitioner.class);\r
+       \r
+       private int partitionSize;\r
+       private LookAheadStream<ITEM> stream;\r
+       private IConverter<ITEM, IConverterOutput, Object> converter;\r
+       \r
+       public StreamPartitioner(INamespaceReader<ITEM> reader, IConverter converter, Integer size){\r
+                this.stream = new LookAheadStream<ITEM>(reader);\r
+                this.converter = converter;\r
+                this.partitionSize = size;\r
+       }\r
+       \r
+       private List<ITEM> readPartition(){\r
+               List<ITEM> partitionItems = new ArrayList<ITEM>();\r
+               while ( stream.hasNextLookAhead(partitionSize)){\r
+                       ITEM next = stream.readLookAhead(partitionSize);\r
+                       partitionItems.add(next);\r
+               }\r
+               return partitionItems;\r
+       }\r
+\r
+       public boolean hasNext() {\r
+               return stream.hasNext();\r
+       }\r
+\r
+       @Override\r
+       public IReader<ITEM> read() {\r
+               List<ITEM> partitionItems = readPartition();\r
+               for (ITEM partitionItem : partitionItems){\r
+                       IReader<MappedCdmBase> newItem = converter.map(partitionItem);\r
+               }\r
+               \r
+               while (stream.readLookAhead(partitionSize) != null){\r
+                       //TODO\r
+                       //should this method return a reader of OUTPUT items instead of a List of input items??\r
+                       logger.warn("Unclear what todo here");\r
+               }\r
+               \r
+               // TODO Auto-generated method stub\r
+               return null;\r
+       }\r
+       \r
+       @Override\r
+       public TermUri getTerm() {\r
+               return stream.getTerm();\r
+       }\r
+       \r
+       \r
+       \r
+               \r
+       \r
+\r
+}\r