cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/mapping/out/MethodMapper.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/mapping/out/ObjectChangeMapper.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/TermUri.java -text
-cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/ConverterBase.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/CsvStream.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/CsvStreamItem.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/DwcTaxonCsv2CdmTaxonConverter.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IConverterInput.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IConverterOutput.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IImportMapping.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespace.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/INamespaceReader.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IPartitionableConverter.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/IReader.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/InMemoryMapping.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/ListReader.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/LookAheadStream.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/MappedCdmBase.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/MappingEntry.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/PartitionableConverterBase.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/in/StreamPartitioner.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/jaxb/Archive.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/jaxb/ArchiveEntryBase.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/dwca/jaxb/Core.java -text
* @date 17.10.2011\r
*\r
*/\r
-public class CsvStream implements IReader<CsvStreamItem>{\r
+public class CsvStream implements INamespaceReader<CsvStreamItem>{\r
@SuppressWarnings("unused")\r
private static Logger logger = Logger.getLogger(CsvStream.class);\r
\r
public String getFilesLocation() {\r
return this.archiveEntry.getFiles().getLocation();\r
}\r
+\r
+ \r
+ \r
}\r
package eu.etaxonomy.cdm.io.dwca.in;\r
\r
import java.util.ArrayList;\r
+import java.util.HashMap;\r
import java.util.List;\r
import java.util.Map;\r
+import java.util.Set;\r
\r
import org.apache.commons.lang.StringUtils;\r
import org.apache.log4j.Logger;\r
* @date 22.11.2011\r
*\r
*/\r
-public class DwcTaxonCsv2CdmTaxonConverter extends ConverterBase<DwcaImportState> implements IConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+public class DwcTaxonCsv2CdmTaxonConverter extends PartitionableConverterBase<DwcaImportState> implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
@SuppressWarnings("unused")\r
private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonConverter.class);\r
\r
}\r
\r
return result;\r
+\r
}\r
\r
+// ********************** PARTITIONABLE ****************************************/\r
+\r
+\r
+ @Override\r
+ protected void makeForeignKeysForItem(CsvStreamItem next, Map<String, Set<String>> result) {\r
+ //do nothing, no foreign keys needed here\r
+ }\r
+ \r
+//** ***************************** TO STRING *********************************************/\r
+ \r
@Override\r
public String toString(){\r
return this.getClass().getName();\r
}\r
\r
+\r
+ \r
}\r
package eu.etaxonomy.cdm.io.dwca.in;\r
\r
import java.util.ArrayList;\r
+import java.util.HashMap;\r
import java.util.HashSet;\r
import java.util.List;\r
import java.util.Map;\r
* @date 23.11.2011\r
*\r
*/\r
-public class DwcTaxonCsv2CdmTaxonRelationConverter<STATE extends DwcaImportState> extends ConverterBase<DwcaImportState> \r
- implements IConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+public class DwcTaxonCsv2CdmTaxonRelationConverter<STATE extends DwcaImportState> extends PartitionableConverterBase<DwcaImportState> \r
+ implements IPartitionableConverter<CsvStreamItem, INamespaceReader<CdmBase>, String>{\r
private static Logger logger = Logger.getLogger(DwcTaxonCsv2CdmTaxonRelationConverter.class);\r
\r
private static final String ID = "id";\r
}\r
\r
\r
+//**************************** PARTITIONABLE ************************************************\r
\r
+\r
+\r
+\r
+ protected void makeForeignKeysForItem(CsvStreamItem item, Map<String, Set<String>> fkMap){\r
+ //do nothing, their are no foreign keys yet to handle \r
+ }\r
+ \r
+//************************************* TO STRING ********************************************\r
\r
@Override\r
public String toString(){\r
return this.getClass().getName();\r
}\r
\r
+\r
}\r
package eu.etaxonomy.cdm.io.dwca.in;\r
\r
import java.net.URI;\r
+import java.util.Map;\r
+import java.util.Set;\r
\r
import org.apache.log4j.Logger;\r
import org.springframework.stereotype.Component;\r
IReader<CsvStream> stream = streamConverter.getStreamStream(state);\r
while (stream.hasNext()){\r
CsvStream csvStream = stream.read();\r
- while (csvStream.hasNext()){\r
- CsvStreamItem item = csvStream.read();\r
- TransactionStatus tx = startTransaction();\r
- handleCsvStreamItem(state, item);\r
- commitTransaction(tx);\r
+ \r
+ if (state.getConfig().isUsePartitions()){\r
+ StreamPartitioner<CsvStreamItem> partitionStream = new StreamPartitioner<CsvStreamItem>(csvStream, null, 1000);\r
+ \r
+ while (partitionStream.hasNext()){\r
+ TransactionStatus tx = startTransaction();\r
+ \r
+ handlePartitionedStreamItem(state, partitionStream);\r
+ commitTransaction(tx);\r
+ }\r
+ }else {\r
+ \r
+ while (csvStream.hasNext()){\r
+ TransactionStatus tx = startTransaction();\r
+ \r
+ CsvStreamItem item = csvStream.read();\r
+ handleCsvStreamItem(state, item);\r
+ \r
+ commitTransaction(tx);\r
+ }\r
}\r
- finalizeStream(csvStream, state);\r
+\r
+ finalizeStream(csvStream, state);\r
}\r
return;\r
}\r
\r
+ private void handlePartitionedStreamItem(DwcaImportState state, StreamPartitioner<CsvStreamItem> partStream) {\r
+ IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String> converter = getConverter(partStream.getTerm(), state);\r
+ if (converter == null){\r
+ state.setSuccess(false);\r
+ return;\r
+ }\r
+ \r
+ IReader<CsvStreamItem> inputStream = partStream.read();\r
+ Map<String, Set<String>> foreignKeys = converter.getPartitionForeignKeys(inputStream);\r
+ IImportMapping mapping = state.getMapping();\r
+ IImportMapping partialMapping = mapping.getPartialMapping(foreignKeys);\r
+ state.loadRelatedObjects(partialMapping);\r
+ \r
+// while (inputStream.hasNext()){\r
+// IReader<MappedCdmBase> resultReader = converter.map(inputStream.read(), xx);\r
+// \r
+// xx;\r
+// while (resultReader.hasNext()){\r
+// \r
+// MappedCdmBase mappedCdmBase = (resultReader.read());\r
+// CdmBase cdmBase = mappedCdmBase.getCdmBase();\r
+// save(cdmBase, state, item.getLocation());\r
+// if (mappedCdmBase.getSourceId() != null && cdmBase.isInstanceOf(IdentifiableEntity.class)){\r
+// IdentifiableEntity<?> entity = CdmBase.deproxy(cdmBase, IdentifiableEntity.class);\r
+// \r
+// String namespace = mappedCdmBase.getNamespace();\r
+// state.putMapping(namespace,mappedCdmBase.getSourceId(), entity);\r
+// }\r
+// }\r
+// }\r
+ return;\r
+ \r
+ }\r
+\r
/**\r
* @param state\r
* @param item\r
* @return\r
*/\r
private void handleCsvStreamItem(DwcaImportState state, CsvStreamItem item) {\r
- IConverter<CsvStreamItem, IReader<CdmBase>, String> converter = getConverter(item, state);\r
+ IConverter<CsvStreamItem, IReader<CdmBase>, String> converter = getConverter(item.term, state);\r
if (converter == null){\r
state.setSuccess(false);\r
return;\r
}\r
}\r
\r
- private IConverter<CsvStreamItem,IReader<CdmBase>, String> getConverter(CsvStreamItem item, DwcaImportState state) {\r
- TermUri namespace = item.term;\r
+ private IPartitionableConverter<CsvStreamItem,IReader<CdmBase>, String> getConverter(TermUri namespace, DwcaImportState state) {\r
if (namespace.equals(TermUri.DWC_TAXON)){\r
if (! state.isTaxaCreated()){\r
return new DwcTaxonCsv2CdmTaxonConverter(state);\r
private static final Logger logger = Logger.getLogger(DwcaImportConfigurator.class);\r
private static IInputTransformer defaultTransformer = new DwcaImportTransformer();\r
\r
+ private boolean usePartitions;\r
+ \r
// //new\r
// private boolean doSpecimen = true; //reads the specimen worksheet\r
// private boolean doAreaLevels = true; //reads the areaLevels worksheet\r
//// private boolean includeSynonymsForTaxonMatching = false;\r
\r
\r
- \r
- \r
+\r
+\r
@SuppressWarnings("unchecked")\r
protected void makeIoClassList(){\r
ioClassList = new Class[]{\r
//TODO\r
if (this.sourceReference == null){\r
logger.warn("getSource Reference not yet fully implemented");\r
- sourceReference = ReferenceFactory.newDatabase();\r
- sourceReference.setTitleCache("Specimen import", true);\r
+ sourceReference = ReferenceFactory.newGeneric();\r
+ sourceReference.setTitleCache("DwC-A import", true);\r
}\r
return sourceReference;\r
}\r
+\r
+ \r
\r
+ public boolean isUsePartitions() {\r
+ return usePartitions;\r
+ }\r
+\r
+ public void setUsePartitions(boolean usePartitions) {\r
+ this.usePartitions = usePartitions;\r
+ }\r
\r
\r
}\r
\r
import java.util.ArrayList;\r
import java.util.HashMap;\r
+import java.util.HashSet;\r
import java.util.List;\r
import java.util.Map;\r
import java.util.Set;\r
import eu.etaxonomy.cdm.io.dwca.in.InMemoryMapping.CdmKey;\r
import eu.etaxonomy.cdm.model.common.CdmBase;\r
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;\r
-import eu.etaxonomy.cdm.model.taxon.TaxonBase;\r
\r
/**\r
* @author a.mueller\r
* @created 23.11.2011\r
*/\r
public class DwcaImportState extends ImportStateBase<DwcaImportConfigurator, DwcaImport>{\r
- @SuppressWarnings("unused")\r
private static final Logger logger = Logger.getLogger(DwcaImportState.class);\r
\r
boolean taxaCreated;\r
+ private Map<String, Map<String, IdentifiableEntity>> partitionStore;\r
\r
private IImportMapping mapping = new InMemoryMapping();\r
\r
this.taxaCreated = taxaCreated;\r
}\r
\r
+//********************* MAPPING ACCESS *********************************\r
+ //TODO this may move to an external class soon\r
+ \r
public void putMapping(String namespace, Integer sourceKey, IdentifiableEntity<?> destinationObject){\r
mapping.putMapping(namespace, sourceKey, destinationObject);\r
}\r
\r
public <CLASS extends IdentifiableEntity> List<CLASS> get(String namespace, String sourceKey,Class<CLASS> destinationClass){\r
List<CLASS> result = new ArrayList<CLASS>(); \r
- Set<CdmKey> keySet = mapping.get(namespace, sourceKey);\r
- for (CdmKey<CLASS> key: keySet){\r
- if (destinationClass == null || destinationClass.isAssignableFrom(key.clazz)){\r
- IIdentifiableEntityService<CLASS> service = getCurrentIO().getServiceByClass(key.clazz);\r
- CLASS entity = CdmBase.deproxy(service.find(key.id), key.clazz);\r
- result.add(entity);\r
+ if (this.partitionStore != null){\r
+ Map<String, IdentifiableEntity> namespaceMap = this.partitionStore.get(namespace);\r
+ if (namespaceMap != null){\r
+ IdentifiableEntity cdmBase = namespaceMap.get(sourceKey);\r
+ if (cdmBase.isInstanceOf(destinationClass)){\r
+ CLASS typedCdmBase = CdmBase.deproxy(cdmBase, destinationClass);\r
+ result.add(typedCdmBase);\r
+ }\r
+ \r
+ }\r
+ }else{\r
+ Set<CdmKey> keySet = mapping.get(namespace, sourceKey);\r
+ for (CdmKey<CLASS> key: keySet){\r
+ if (destinationClass == null || destinationClass.isAssignableFrom(key.clazz)){\r
+ IIdentifiableEntityService<CLASS> service = getCurrentIO().getServiceByClass(key.clazz);\r
+ CLASS entity = CdmBase.deproxy(service.find(key.id), key.clazz);\r
+ result.add(entity);\r
+ }\r
}\r
+ return result;\r
}\r
return result;\r
}\r
public boolean exists(String namespace, String sourceKey,Class<?> destinationClass){\r
return mapping.exists(namespace, sourceKey, destinationClass);\r
}\r
+ \r
+ \r
+ public void loadRelatedObjects (IImportMapping mapping){\r
+ Map<String, Map<String, IdentifiableEntity>> result = new HashMap<String, Map<String,IdentifiableEntity>>();\r
+ \r
+ List<MappingEntry<String, String, Class, Integer>> mappingEntryList = mapping.getEntryList();\r
+ \r
+ //order ids by destination classes\r
+ Map<Class, Set<Integer>> destinationNamespaceMap = new HashMap<Class, Set<Integer>>(); \r
+ for (MappingEntry<String, String, Class, Integer> entry : mappingEntryList){\r
+ Set<Integer> idSet = destinationNamespaceMap.get(entry.destinationNamespace);\r
+ if (idSet == null){\r
+ idSet = new HashSet<Integer>();\r
+ destinationNamespaceMap.put(entry.destinationNamespace, idSet);\r
+ }\r
+ idSet.add(entry.destinationId);\r
+ }\r
+ \r
+ //retrieve cdm objects per class\r
+ Map<Class, Map<Integer, IdentifiableEntity>> classMap = new HashMap<Class, Map<Integer,IdentifiableEntity>>();\r
+ for (Class<?> cdmClass :destinationNamespaceMap.keySet()){\r
+ IIdentifiableEntityService<?> classService = getCurrentIO().getServiceByClass(cdmClass);\r
+ Set<Integer> idSet = destinationNamespaceMap.get(cdmClass);\r
+ List<? extends IdentifiableEntity> relatedObjects = classService.findById(idSet);\r
+ \r
+ //put into id map\r
+ Map<Integer, IdentifiableEntity> idMap = new HashMap<Integer, IdentifiableEntity>();\r
+ for (IdentifiableEntity identEnt : relatedObjects){\r
+ idMap.put(identEnt.getId(), identEnt);\r
+ }\r
+ \r
+ //add to class map\r
+ classMap.put(cdmClass, idMap);\r
+ }\r
+ \r
+ //fill related object map\r
+ for (MappingEntry<String, String, Class, Integer> entry : mappingEntryList){\r
+ Map<String, IdentifiableEntity> namespaceMap = getOrMakeNamespaceMap(result, entry.namespace);\r
+ IdentifiableEntity cdmBase = getCdmObject(classMap, entry);\r
+ if (cdmBase != null){\r
+ namespaceMap.put(entry.sourceKey, cdmBase);\r
+ }else{\r
+ logger.info("CdmBase not found for mapping entry.");\r
+ }\r
+ }\r
+ \r
+ //store\r
+ this.partitionStore = result;\r
+ \r
+ }\r
+\r
+\r
+// public Map<String, Map<String, IdentifiableEntity>> getPartitionStore() {\r
+// return partitionStore;\r
+// }\r
+\r
+ public void unloadPartitionStore(Map<String, Map<String, IdentifiableEntity>> partitionStore) {\r
+ this.partitionStore = new HashMap<String, Map<String,IdentifiableEntity>>();\r
+ }\r
+\r
+ public IImportMapping getMapping() {\r
+ return this.mapping;\r
+ }\r
\r
\r
+ private Map<String, IdentifiableEntity> getOrMakeNamespaceMap(Map<String, Map<String, IdentifiableEntity>> relatedObjectMap2, String namespace) {\r
+ Map<String, IdentifiableEntity> namespaceMap = relatedObjectMap2.get(namespace);\r
+ if (namespaceMap == null){\r
+ namespaceMap = new HashMap<String, IdentifiableEntity>();\r
+ relatedObjectMap2.put(namespace, namespaceMap);\r
+ }\r
+ return namespaceMap;\r
+ }\r
+ \r
+\r
+ private IdentifiableEntity getCdmObject(Map<Class, Map<Integer, IdentifiableEntity>> classMap,\r
+ MappingEntry<String, String, Class, Integer> entry) {\r
+ Class<?> cdmClass = entry.destinationNamespace;\r
+ Integer cdmKey = entry.destinationId;\r
+ Map<Integer, IdentifiableEntity> idMap = classMap.get(cdmClass);\r
+ if (idMap != null){\r
+ return idMap.get(cdmKey);\r
+ }else{\r
+ return null;\r
+ }\r
+ }\r
\r
\r
\r
import java.util.ArrayList;\r
import java.util.List;\r
import java.util.Map;\r
+import java.util.Set;\r
\r
import org.apache.log4j.Logger;\r
\r
* @date 22.11.2011\r
*\r
*/\r
-public class GbifDescriptionCsv2CdmConverter extends ConverterBase<DwcaImportState> implements IConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+public class GbifDescriptionCsv2CdmConverter extends PartitionableConverterBase<DwcaImportState> \r
+ implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+ \r
@SuppressWarnings("unused")\r
private static final Logger logger = Logger.getLogger(GbifDescriptionCsv2CdmConverter.class);\r
\r
return null;\r
}\r
\r
+ \r
+//********************** PARTITIONABLE **************************************/\r
+\r
+ @Override\r
+ protected void makeForeignKeysForItem(CsvStreamItem next, Map<String, Set<String>> result) {\r
+ logger.warn("Not yet implemented"); \r
+ }\r
+ \r
+//******************* TO STRING ******************************************/\r
+ \r
@Override\r
public String toString(){\r
return this.getClass().getName();\r
}\r
\r
+\r
}\r
package eu.etaxonomy.cdm.io.dwca.in;\r
\r
import java.util.ArrayList;\r
+import java.util.HashMap;\r
import java.util.List;\r
import java.util.Map;\r
+import java.util.Set;\r
\r
import org.apache.log4j.Logger;\r
\r
* @date 22.11.2011\r
*\r
*/\r
-public class GbifVernacularNameCsv2CdmConverter extends ConverterBase<DwcaImportState> implements IConverter<CsvStreamItem, IReader<CdmBase>, String>{\r
+public class GbifVernacularNameCsv2CdmConverter extends PartitionableConverterBase<DwcaImportState> implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String> {\r
@SuppressWarnings("unused")\r
private static final Logger logger = Logger.getLogger(GbifVernacularNameCsv2CdmConverter.class);\r
private static final String CORE_ID = "coreId";\r
// TODO Auto-generated method stub\r
return null;\r
}\r
+\r
+\r
+\r
+//**************************** PARTITIONABLE ************************************************\r
+\r
+ @Override\r
+ protected void makeForeignKeysForItem(CsvStreamItem next, Map<String, Set<String>> result) {\r
+ // TODO Auto-generated method stub\r
+ \r
+ }\r
+ \r
+//************************ STRING ************************************************/\r
\r
@Override\r
public String toString(){\r
return this.getClass().getName();\r
}\r
\r
+\r
+\r
+\r
+\r
}\r
*/\r
package eu.etaxonomy.cdm.io.dwca.in;\r
\r
+import java.util.List;\r
+import java.util.Map;\r
import java.util.Set;\r
\r
import eu.etaxonomy.cdm.io.dwca.in.InMemoryMapping.CdmKey;\r
*/\r
public boolean exists(String namespace, String sourceKey,Class<?> destinationClass);\r
\r
+ /**\r
+ * Returns the mapping for only those obejcts addressed by the namespacedSourceKeys parameter\r
+ * @param namespacedKeys\r
+ * @return\r
+ */\r
+ public IImportMapping getPartialMapping(Map<String, Set<String>> namespacedSourceKeys);\r
\r
+ /**\r
+ * Returns a list for all mapping entries.\r
+ * @return\r
+ */\r
+ public List<MappingEntry<String, String, Class, Integer>> getEntryList();\r
+ \r
+ \r
+ \r
}\r
--- /dev/null
+// $Id$\r
+/**\r
+* Copyright (C) 2009 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+import eu.etaxonomy.cdm.io.dwca.TermUri;\r
+\r
+/**\r
+ * @author a.mueller\r
+ * @date 10.03.2012\r
+ *\r
+ */\r
+public interface INamespace {\r
+\r
+ \r
+ /**\r
+ * Returns the namespace of the items included in the stream\r
+ * @return the term\r
+ */\r
+ public TermUri getTerm();\r
+ \r
+}\r
--- /dev/null
+// $Id$\r
+/**\r
+* Copyright (C) 2009 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+\r
+/**\r
+ * @author a.mueller\r
+ * @date 23.11.2011\r
+ *\r
+ */\r
+public interface INamespaceReader<TYPE extends Object> extends IConverterOutput<IReader<TYPE>>, INamespace, IReader<TYPE>{\r
+\r
+\r
+}\r
--- /dev/null
+/**\r
+* Copyright (C) 2009 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+import java.util.Map;\r
+import java.util.Set;\r
+\r
+/**\r
+ * Interface for converter that allow partitioned converting.\r
+ * @author a.mueller\r
+ *\r
+ */\r
+public interface IPartitionableConverter<IN extends IConverterInput<CsvStreamItem>, OUT extends IConverterOutput, OBJ extends Object> extends IConverter<IN, OUT, OBJ> {\r
+ \r
+ \r
+ /**\r
+ * Returns those foreign keys included in the input streams partition separated by namespaces.\r
+ * @return\r
+ */\r
+ //TODO make instream a more generic type of stream\r
+ public Map<String, Set<String>> getPartitionForeignKeys(IReader<CsvStreamItem> instream);\r
+ \r
+ \r
+}\r
*/\r
package eu.etaxonomy.cdm.io.dwca.in;\r
\r
+\r
/**\r
* @author a.mueller\r
* @date 23.11.2011\r
* @return True if there is a next object, false otherwise.\r
*/\r
boolean hasNext();\r
+ \r
+ \r
}\r
*/\r
package eu.etaxonomy.cdm.io.dwca.in;\r
\r
+import java.util.ArrayList;\r
import java.util.HashMap;\r
import java.util.HashSet;\r
+import java.util.List;\r
import java.util.Map;\r
+import java.util.Map.Entry;\r
import java.util.Set;\r
\r
+import eu.etaxonomy.cdm.io.dwca.in.InMemoryMapping.CdmKey;\r
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;\r
\r
/**\r
Class<CLASS> clazz;\r
int id;\r
\r
- private CdmKey(IdentifiableEntity object){\r
+ private CdmKey(IdentifiableEntity<?> object){\r
this.clazz = (Class)object.getClass();\r
this.id = object.getId();\r
}\r
putMapping(namespace, String.valueOf(sourceKey), destinationObject);\r
}\r
\r
+\r
@Override\r
public void putMapping(String namespace, String sourceKey, IdentifiableEntity destinationObject){\r
+ CdmKey<IdentifiableEntity<?>> cdmKey = new CdmKey(destinationObject);\r
+ putMapping(namespace, sourceKey, cdmKey);\r
+ }\r
+ \r
+ public void putMapping(String namespace, String sourceKey, CdmKey<IdentifiableEntity<?>> cdmKey){\r
Map<String, Set<CdmKey>> namespaceMap = mapping.get(namespace);\r
if (namespaceMap == null){\r
namespaceMap = new HashMap<String, Set<CdmKey>>();\r
namespaceMap.put(sourceKey, keySet);\r
}\r
\r
- keySet.add(new CdmKey(destinationObject));\r
+ keySet.add(cdmKey);\r
}\r
\r
@Override\r
return false;\r
}\r
\r
+ @Override\r
+ public IImportMapping getPartialMapping( Map<String, Set<String>> namespacedSourceKeys) {\r
+ InMemoryMapping partialMapping = new InMemoryMapping();\r
+ for (Entry<String,Set<String>> entry : namespacedSourceKeys.entrySet()){\r
+ String namespace = entry.getKey();\r
+ for (String sourceKey : entry.getValue() ){\r
+ Set<CdmKey> destObjects = this.get(namespace, sourceKey);\r
+ for (CdmKey cdmKey : destObjects){\r
+ partialMapping.putMapping(namespace, sourceKey, cdmKey);\r
+ }\r
+ }\r
+ }\r
+ return partialMapping;\r
+ }\r
+\r
+\r
+ @Override\r
+ public List<MappingEntry<String, String, Class, Integer>> getEntryList() {\r
+ List<MappingEntry<String, String, Class, Integer>> result = new ArrayList<MappingEntry<String,String,Class,Integer>>();\r
+ for (Entry<String, Map<String, Set<CdmKey>>> namespaceEntry : mapping.entrySet() ){\r
+ String sourceNamespace = namespaceEntry.getKey();\r
+ for (Entry<String, Set<CdmKey>> idEntry : namespaceEntry.getValue().entrySet() ){\r
+ String sourceId = idEntry.getKey();\r
+ for (CdmKey cdmKey : idEntry.getValue()){\r
+ result.add(new MappingEntry<String, String, Class, Integer>(sourceNamespace, sourceId, cdmKey.clazz, cdmKey.id));\r
+ }\r
+ }\r
+ }\r
+ return result;\r
+ }\r
+\r
}\r
--- /dev/null
+/**\r
+* Copyright (C) 2007 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+import java.util.Queue;\r
+import java.util.concurrent.LinkedBlockingQueue;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import eu.etaxonomy.cdm.io.dwca.TermUri;\r
+\r
+\r
+/**\r
+ * @author a.mueller\r
+ *\r
+ */\r
+public class LookAheadStream<ITEM> implements INamespaceReader<ITEM>{\r
+ @SuppressWarnings("unused")\r
+ private static final Logger logger = Logger.getLogger(LookAheadStream.class);\r
+ \r
+ private Queue<ITEM> fifo = new LinkedBlockingQueue<ITEM>();\r
+ \r
+ private INamespaceReader<ITEM> stream;\r
+\r
+ public LookAheadStream(INamespaceReader<ITEM> stream) {\r
+ super();\r
+ this.stream = stream;\r
+ if (stream == null){\r
+ throw new RuntimeException("Stream may not be null.");\r
+ }\r
+ }\r
+ \r
+ public ITEM read(){\r
+ if (! fifo.isEmpty()){\r
+ return fifo.remove();\r
+ }else{\r
+ return stream.read();\r
+ }\r
+ }\r
+ \r
+ public ITEM readLookAhead(){\r
+ ITEM result = stream.read();\r
+ fifo.add(result);\r
+ return result;\r
+ }\r
+ \r
+ public ITEM readLookAhead(int max){\r
+ if (max < fifo.size()){\r
+ ITEM result = stream.read();\r
+ fifo.add(result);\r
+ return result;\r
+ }else{\r
+ return null;\r
+ }\r
+ }\r
+ \r
+ public boolean hasNextLookAhead(int max){\r
+ if (max < fifo.size() && stream.hasNext()){\r
+ return true;\r
+ }else{\r
+ return false;\r
+ }\r
+ }\r
+\r
+ public int sizeLookAhead(){\r
+ return fifo.size();\r
+ }\r
+ \r
+ public boolean hasLookAhead(){\r
+ return ! fifo.isEmpty();\r
+ }\r
+\r
+ public boolean hasNext() {\r
+ return ! fifo.isEmpty() || stream.hasNext();\r
+ }\r
+\r
+ @Override\r
+ public TermUri getTerm() {\r
+ return stream.getTerm();\r
+ }\r
+\r
+ \r
+}\r
--- /dev/null
+/**\r
+* Copyright (C) 2007 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+/**\r
+ * @author a.mueller\r
+ * @created 19.03.2012\r
+ *\r
+ */\r
+public class MappingEntry<SOURCE_NS extends Object, SOURCE_KEY extends Object, \r
+ DEST_NS extends Object, DEST_KEY extends Object> {\r
+ \r
+ SOURCE_NS namespace;\r
+ SOURCE_KEY sourceKey;\r
+ DEST_NS destinationNamespace;\r
+ DEST_KEY destinationId;\r
+ \r
+ public MappingEntry(SOURCE_NS namespace, SOURCE_KEY sourceKey,\r
+ DEST_NS destinationNamespace, DEST_KEY destinationKey) {\r
+ super();\r
+ this.namespace = namespace;\r
+ this.sourceKey = sourceKey;\r
+ this.destinationNamespace = destinationNamespace;\r
+ this.destinationId = destinationKey;\r
+ }\r
+ \r
+ \r
+}\r
*/\r
package eu.etaxonomy.cdm.io.dwca.in;\r
\r
+import java.util.HashMap;\r
+import java.util.Map;\r
import java.util.Set;\r
\r
import org.apache.commons.lang.StringUtils;\r
* @date 23.11.2011\r
*\r
*/\r
-public class ConverterBase<STATE extends IoStateBase> {\r
- @SuppressWarnings("unused")\r
- private static final Logger logger = Logger.getLogger(ConverterBase.class);\r
+public abstract class PartitionableConverterBase<STATE extends IoStateBase> \r
+ /*implements IPartitionableConverter<CsvStreamItem, IReader<CdmBase>, String> */ {\r
+ \r
+ private static final Logger logger = Logger.getLogger(PartitionableConverterBase.class);\r
\r
protected STATE state;\r
\r
protected boolean exists(TermUri term, CsvStreamItem item) {\r
return ! StringUtils.isBlank(getValue(item, term));\r
}\r
+ \r
+\r
+ \r
+ public Map<String, Set<String>> getPartitionForeignKeys(IReader<CsvStreamItem> instream) {\r
+ Map<String, Set<String>> result = new HashMap<String, Set<String>>();\r
+ \r
+ while (instream.hasNext()){\r
+ CsvStreamItem next = instream.read();\r
+ makeForeignKeysForItem(next, result);\r
+ }\r
+ return result;\r
+ }\r
+\r
+ protected abstract void makeForeignKeysForItem(CsvStreamItem next, Map<String, Set<String>> result);\r
\r
}\r
--- /dev/null
+/**\r
+* Copyright (C) 2007 EDIT\r
+* European Distributed Institute of Taxonomy \r
+* http://www.e-taxonomy.eu\r
+* \r
+* The contents of this file are subject to the Mozilla Public License Version 1.1\r
+* See LICENSE.TXT at the top of this package for the full license terms.\r
+*/\r
+package eu.etaxonomy.cdm.io.dwca.in;\r
+\r
+import java.util.ArrayList;\r
+import java.util.List;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import eu.etaxonomy.cdm.io.dwca.TermUri;\r
+\r
+\r
+/**\r
+ * @author a.mueller\r
+ *\r
+ */\r
+public class StreamPartitioner<ITEM extends IConverterInput> implements INamespaceReader<IReader<ITEM>>{\r
+ private static final Logger logger = Logger.getLogger(StreamPartitioner.class);\r
+ \r
+ private int partitionSize;\r
+ private LookAheadStream<ITEM> stream;\r
+ private IConverter<ITEM, IConverterOutput, Object> converter;\r
+ \r
+ public StreamPartitioner(INamespaceReader<ITEM> reader, IConverter converter, Integer size){\r
+ this.stream = new LookAheadStream<ITEM>(reader);\r
+ this.converter = converter;\r
+ this.partitionSize = size;\r
+ }\r
+ \r
+ private List<ITEM> readPartition(){\r
+ List<ITEM> partitionItems = new ArrayList<ITEM>();\r
+ while ( stream.hasNextLookAhead(partitionSize)){\r
+ ITEM next = stream.readLookAhead(partitionSize);\r
+ partitionItems.add(next);\r
+ }\r
+ return partitionItems;\r
+ }\r
+\r
+ public boolean hasNext() {\r
+ return stream.hasNext();\r
+ }\r
+\r
+ @Override\r
+ public IReader<ITEM> read() {\r
+ List<ITEM> partitionItems = readPartition();\r
+ for (ITEM partitionItem : partitionItems){\r
+ IReader<MappedCdmBase> newItem = converter.map(partitionItem);\r
+ }\r
+ \r
+ while (stream.readLookAhead(partitionSize) != null){\r
+ //TODO\r
+ //should this method return a reader of OUTPUT items instead of a List of input items??\r
+ logger.warn("Unclear what todo here");\r
+ }\r
+ \r
+ // TODO Auto-generated method stub\r
+ return null;\r
+ }\r
+ \r
+ @Override\r
+ public TermUri getTerm() {\r
+ return stream.getTerm();\r
+ }\r
+ \r
+ \r
+ \r
+ \r
+ \r
+\r
+}\r