sdd import matches descriptions to existing taxa
authorAndreas Kohlbecker <a.kohlbecker@bgbm.org>
Mon, 13 Sep 2010 14:41:25 +0000 (14:41 +0000)
committerAndreas Kohlbecker <a.kohlbecker@bgbm.org>
Mon, 13 Sep 2010 14:41:25 +0000 (14:41 +0000)
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/sdd/SDDDescriptionIO.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/sdd/SDDImportConfigurator.java

index ec8175c9fd07a0a28fb82f783747f8a14af29992..899986d1724eda14b40515fedda262200f06ca4d 100644 (file)
@@ -11,7 +11,6 @@ package eu.etaxonomy.cdm.io.sdd;
 \r
 import java.io.File;\r
 import java.net.MalformedURLException;\r
-import java.net.URI;\r
 import java.net.URL;\r
 import java.text.SimpleDateFormat;\r
 import java.util.ArrayList;\r
@@ -24,26 +23,27 @@ import java.util.Map;
 import java.util.Set;\r
 \r
 import org.apache.log4j.Logger;\r
+import org.apache.log4j.spi.Configurator;\r
 import org.jdom.Element;\r
 import org.jdom.Namespace;\r
 import org.joda.time.DateTime;\r
 import org.springframework.stereotype.Component;\r
 import org.springframework.transaction.TransactionStatus;\r
 \r
-import eu.etaxonomy.cdm.api.service.IAgentService;\r
-import eu.etaxonomy.cdm.api.service.IVersionableService;//rajout\r
 import eu.etaxonomy.cdm.api.service.IDescriptionService;\r
 import eu.etaxonomy.cdm.api.service.IReferenceService;\r
+import eu.etaxonomy.cdm.api.service.ITaxonService;\r
 import eu.etaxonomy.cdm.api.service.ITermService;\r
+import eu.etaxonomy.cdm.api.service.config.ITaxonServiceConfigurator;\r
+import eu.etaxonomy.cdm.api.service.config.impl.TaxonServiceConfiguratorImpl;\r
+import eu.etaxonomy.cdm.api.service.pager.Pager;\r
 import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;\r
+import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;\r
 import eu.etaxonomy.cdm.io.common.CdmImportBase;\r
 import eu.etaxonomy.cdm.io.common.ICdmImport;\r
 import eu.etaxonomy.cdm.io.common.IImportConfigurator;\r
 import eu.etaxonomy.cdm.io.common.ImportHelper;\r
-import eu.etaxonomy.cdm.model.agent.Contact;\r
-import eu.etaxonomy.cdm.model.agent.Institution;\r
 import eu.etaxonomy.cdm.model.agent.Person;\r
-import eu.etaxonomy.cdm.model.agent.Address;//rajout\r
 import eu.etaxonomy.cdm.model.agent.Team;\r
 import eu.etaxonomy.cdm.model.common.Annotation;\r
 import eu.etaxonomy.cdm.model.common.AnnotationType;\r
@@ -58,13 +58,13 @@ import eu.etaxonomy.cdm.model.common.MarkerType;
 import eu.etaxonomy.cdm.model.common.Representation;\r
 import eu.etaxonomy.cdm.model.common.TermBase;\r
 import eu.etaxonomy.cdm.model.common.TermVocabulary;\r
-import eu.etaxonomy.cdm.model.common.User;\r
 import eu.etaxonomy.cdm.model.common.VersionableEntity;\r
 import eu.etaxonomy.cdm.model.description.CategoricalData;\r
 import eu.etaxonomy.cdm.model.description.Feature;\r
 import eu.etaxonomy.cdm.model.description.FeatureNode;\r
 import eu.etaxonomy.cdm.model.description.FeatureTree;\r
 import eu.etaxonomy.cdm.model.description.MeasurementUnit;\r
+import eu.etaxonomy.cdm.model.description.Modifier;\r
 import eu.etaxonomy.cdm.model.description.QuantitativeData;\r
 import eu.etaxonomy.cdm.model.description.State;\r
 import eu.etaxonomy.cdm.model.description.StateData;\r
@@ -72,26 +72,25 @@ import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
 import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;\r
 import eu.etaxonomy.cdm.model.description.TaxonDescription;\r
 import eu.etaxonomy.cdm.model.description.TextData;\r
-import eu.etaxonomy.cdm.model.description.Modifier;\r
-import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;\r
-import eu.etaxonomy.cdm.model.taxon.TaxonNode;\r
+import eu.etaxonomy.cdm.model.location.NamedArea;\r
+import eu.etaxonomy.cdm.model.media.IdentifiableMediaEntity;\r
 import eu.etaxonomy.cdm.model.media.ImageFile;\r
 import eu.etaxonomy.cdm.model.media.Media;\r
 import eu.etaxonomy.cdm.model.media.MediaRepresentation;\r
-import eu.etaxonomy.cdm.model.media.IdentifiableMediaEntity;\r
 import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;\r
 import eu.etaxonomy.cdm.model.media.Rights;\r
 import eu.etaxonomy.cdm.model.name.NonViralName;\r
 import eu.etaxonomy.cdm.model.name.TaxonNameBase;\r
 import eu.etaxonomy.cdm.model.occurrence.Specimen;\r
 import eu.etaxonomy.cdm.model.reference.IArticle;\r
-import eu.etaxonomy.cdm.model.reference.IDatabase;\r
-import eu.etaxonomy.cdm.model.reference.IGeneric;\r
 import eu.etaxonomy.cdm.model.reference.ReferenceBase;\r
 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;\r
-import eu.etaxonomy.cdm.model.taxon.Taxon;\r
 import eu.etaxonomy.cdm.model.taxon.Synonym;\r
-import eu.etaxonomy.cdm.model.location.NamedArea;\r
+import eu.etaxonomy.cdm.model.taxon.Taxon;\r
+import eu.etaxonomy.cdm.model.taxon.TaxonBase;\r
+import eu.etaxonomy.cdm.model.taxon.TaxonNode;\r
+import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;\r
+import eu.etaxonomy.cdm.persistence.query.MatchMode;\r
 \r
 /**\r
  * @author h.fradin\r
@@ -477,6 +476,25 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
                importDescriptiveConcepts(elDataset, sddNamespace, sddConfig);\r
                importCharacters(elDataset, sddNamespace, sddConfig, success);\r
                importCharacterTrees(elDataset, sddNamespace, sddConfig, success);\r
+               \r
+               //FIXME (a.mueller) \r
+               MarkerType editorMarkerType = MarkerType.NewInstance("Editor", "editor", "edt") ;\r
+               MarkerType geographicAreaMarkerType = MarkerType.NewInstance("", "SDDGeographicArea", "ga");\r
+               MarkerType descriptiveConceptMarkerType = MarkerType.NewInstance("Descriptive Concept", "DescriptiveConcept", "DC");\r
+               markerTypes.add(editorMarkerType);\r
+               markerTypes.add(geographicAreaMarkerType);\r
+               markerTypes.add(descriptiveConceptMarkerType);\r
+\r
+               //saving of all imported data into the CDM db\r
+               saveFeatures();\r
+               saveModifiers();\r
+               saveStates();\r
+               saveMarkerType();\r
+               saveAreas(geographicAreaMarkerType);            \r
+               saveUnits();\r
+               saveStatisticalMeasure();               \r
+               saveAnnotationType();\r
+               \r
                importCodedDescriptions(elDataset, sddNamespace, sddConfig, success);\r
                importAgents(elDataset, sddNamespace, sddConfig, success);\r
                importPublications(elDataset, sddNamespace, sddConfig, success);\r
@@ -485,12 +503,6 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
                importGeographicAreas(elDataset, sddNamespace, sddConfig);\r
                importSpecimens(elDataset,sddNamespace, sddConfig);\r
                        \r
-               MarkerType editorMarkerType = MarkerType.NewInstance("Editor", "editor", "edt") ;\r
-               MarkerType geographicAreaMarkerType = MarkerType.NewInstance("", "SDDGeographicArea", "ga");\r
-               MarkerType descriptiveConceptMarkerType = MarkerType.NewInstance("Descriptive Concept", "DescriptiveConcept", "DC");\r
-               markerTypes.add(editorMarkerType);\r
-               markerTypes.add(geographicAreaMarkerType);\r
-               markerTypes.add(descriptiveConceptMarkerType);\r
                \r
                \r
                if ((authors != null)||(editors != null)) {\r
@@ -527,6 +539,7 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
                        descriptionService.save(taxonDescription);\r
                }\r
 \r
+\r
                \r
                for (Iterator<String> refCD = taxonDescriptions.keySet().iterator() ; refCD.hasNext() ;){\r
                        String ref = refCD.next();\r
@@ -554,16 +567,7 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
 \r
                //sddConfig.setSourceReference(sourceReference);\r
 \r
-               //saving of all imported data into the CDM db\r
-               ITermService termService = getTermService();\r
-               \r
-               \r
-               for (Iterator<Modifier> k = modifiers.values().iterator() ; k.hasNext() ;){\r
-                       Modifier modifier = k.next();\r
-                       termService.save(modifier);\r
-               }\r
-               \r
-               //termService.save(descriptiveConceptMarkerType);\r
+\r
                if (descriptiveConcepts != null) {\r
                        for (Iterator<Feature> feat = descriptiveConcepts.iterator() ; feat.hasNext() ;) {\r
                                Marker marker = Marker.NewInstance();\r
@@ -572,11 +576,7 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
                                feature.addMarker(marker);\r
                        }\r
                }\r
-               \r
-               for (Iterator<State> k = states.values().iterator() ; k.hasNext() ;){\r
-                       State state = k.next();\r
-                       termService.save(state);\r
-               }\r
+               saveFeatures();\r
                \r
                /*Marker markerd = Marker.NewInstance();\r
                markerd.setMarkerType(descriptiveConceptMarker);\r
@@ -589,45 +589,10 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
                fiture.addRecommendedModifierEnumeration(termVocabularyState);\r
                termService.save(modif);\r
                termService.save(fiture);*/\r
-               \r
-               for (Iterator<Feature> k = features.values().iterator() ; k.hasNext() ;){\r
-                       Feature feature = k.next();\r
-                       termService.save(feature); \r
-               }\r
-               \r
-               for(Iterator<MarkerType> k = markerTypes.iterator() ; k.hasNext() ;){\r
-                       MarkerType markerType = k.next();\r
-                       termService.save(markerType);\r
-               }\r
-               \r
+       \r
                //XIMtermService.save(editorMarkerType);\r
                \r
                //XIMtermService.save(geographicAreaMarkerType);\r
-               for (Iterator<NamedArea> k = namedAreas.values().iterator() ; k.hasNext() ;) {\r
-                       Marker marker = Marker.NewInstance();\r
-                       marker.setMarkerType(geographicAreaMarkerType);\r
-                       NamedArea area = k.next();\r
-                       area.addMarker(marker);\r
-                       //getTermService().save(area);\r
-                       termService.save(area);\r
-               }               \r
-               \r
-               if (units != null) {\r
-                       for (Iterator<MeasurementUnit> k = units.values().iterator() ; k.hasNext() ;){\r
-                               MeasurementUnit unit = k.next();\r
-                               if (unit != null) {\r
-                                       termService.save(unit); \r
-                               }\r
-                       }\r
-               }\r
-               for (Iterator<StatisticalMeasure> k = statisticalMeasures.iterator() ; k.hasNext() ;) {\r
-                       StatisticalMeasure sm = k.next();\r
-                       termService.save(sm); \r
-               }\r
-               for (Iterator<AnnotationType> at = annotationTypes.iterator() ; at.hasNext() ;) {\r
-                       AnnotationType annotationType = at.next();\r
-                       termService.save(annotationType); \r
-               }\r
 \r
                IReferenceService referenceService = getReferenceService();\r
                // referenceService.saveReference(sourceReference); \r
@@ -650,6 +615,74 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
                }\r
                logger.info("end of persistence ...");\r
                \r
+               \r
+       }\r
+\r
+       private void saveAnnotationType() {\r
+               for (Iterator<AnnotationType> at = annotationTypes.iterator() ; at.hasNext() ;) {\r
+                       AnnotationType annotationType = at.next();\r
+                       getTermService().save(annotationType); \r
+               }\r
+       }\r
+\r
+       private void saveStatisticalMeasure() {\r
+               for (Iterator<StatisticalMeasure> k = statisticalMeasures.iterator() ; k.hasNext() ;) {\r
+                       StatisticalMeasure sm = k.next();\r
+                       getTermService().save(sm); \r
+               }\r
+       }\r
+\r
+       private void saveUnits() {\r
+               if (units != null) {\r
+                       for (Iterator<MeasurementUnit> k = units.values().iterator() ; k.hasNext() ;){\r
+                               MeasurementUnit unit = k.next();\r
+                               if (unit != null) {\r
+                                       getTermService().save(unit); \r
+                               }\r
+                       }\r
+               }\r
+       }\r
+\r
+       private void saveAreas(MarkerType geographicAreaMarkerType) {\r
+               for (Iterator<NamedArea> k = namedAreas.values().iterator() ; k.hasNext() ;) {\r
+                       Marker marker = Marker.NewInstance();\r
+                       marker.setMarkerType(geographicAreaMarkerType);\r
+                       NamedArea area = k.next();\r
+                       area.addMarker(marker);\r
+                       //getTermService().save(area);\r
+                       getTermService().save(area);\r
+               }\r
+       }\r
+\r
+       private void saveStates() {\r
+               for (Iterator<State> k = states.values().iterator() ; k.hasNext() ;){\r
+                       State state = k.next();\r
+                       getTermService().save(state);\r
+               }\r
+       }\r
+\r
+       private void saveMarkerType() {\r
+               for(Iterator<MarkerType> k = markerTypes.iterator() ; k.hasNext() ;){\r
+                       MarkerType markerType = k.next();\r
+                       getTermService().save(markerType);\r
+               }\r
+       }\r
+\r
+       private void saveModifiers() {\r
+\r
+               for (Iterator<Modifier> k = modifiers.values().iterator() ; k.hasNext() ;){\r
+                       Modifier modifier = k.next();\r
+                       getTermService().save(modifier);\r
+               }\r
+       }\r
+\r
+       private void saveFeatures() {\r
+               ITermService termService = getTermService();\r
+               \r
+               for (Iterator<Feature> k = features.values().iterator() ; k.hasNext() ;){\r
+                       Feature feature = k.next();\r
+                       termService.save(feature); \r
+               }\r
        }\r
 \r
        // imports the default language of the dataset\r
@@ -1004,6 +1037,9 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
                logger.info("start CodedDescriptions ...");\r
                Element elCodedDescriptions = elDataset.getChild("CodedDescriptions",sddNamespace);\r
                // <CodedDescription id="D101">\r
+               \r
+\r
+               ITaxonService taxonService = getTaxonService();\r
 \r
                if (elCodedDescriptions != null) {\r
                        List<Element> listCodedDescriptions = elCodedDescriptions.getChildren("CodedDescription", sddNamespace);\r
@@ -1032,19 +1068,46 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
                                        if (elScope != null) {\r
                                                Element elTaxonName = elScope.getChild("TaxonName",sddNamespace);\r
                                                ref = elTaxonName.getAttributeValue("ref");\r
-\r
                                                NonViralName taxonNameBase = taxonNameBases.get(ref);\r
-                                               taxon = Taxon.NewInstance(taxonNameBase, sec);\r
+                                               \r
+                                               if(sddConfig.isDoMatchTaxa()){\r
+                                                       taxon = getTaxonService().findBestMatchingTaxon(taxonNameBase.getTitleCache());\r
+                                               }\r
+                                               \r
+                                               if(taxon != null){\r
+                                                       logger.info("using existing Taxon" + taxon.getTitleCache());\r
+                                                       if(!taxonNameBase.getUuid().equals(taxon.getName().getUuid())){\r
+                                                               logger.warn("TaxonNameBase entity of existing taxon does not match Name in list -> replacing Name in list");\r
+                                                               taxonNameBase = HibernateProxyHelper.deproxy(taxon.getName(), NonViralName.class);\r
+                                                       }                               \r
+                                               } else {                                                        \r
+                                                       logger.info("creating new Taxon from TaxonName" + taxonNameBase.getTitleCache());\r
+                                                       taxon = Taxon.NewInstance(taxonNameBase, sec);\r
+                                               }\r
                                        }\r
+                                       \r
                                        else {//in case no taxon is linked to the description, a new one is created\r
                                                NonViralName tnb = NonViralName.NewInstance(null);\r
                                                String id = new String(""+taxonNamesCount);\r
                                                IdentifiableSource source = IdentifiableSource.NewInstance(id, "TaxonName");\r
                                                importRepresentation(elCodedDescription, sddNamespace, tnb, id, sddConfig);\r
-                                               tnb.addSource(source);\r
-                                               taxonNameBases.put(id ,tnb);\r
-                                               taxonNamesCount++;\r
-                                               taxon = Taxon.NewInstance(tnb, sec);\r
+                                               \r
+                                               if(sddConfig.isDoMatchTaxa()){\r
+                                                       taxon = getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());\r
+                                               }\r
+                                               \r
+                                               if(taxon != null){\r
+                                                       tnb = HibernateProxyHelper.deproxy(taxon.getName(), NonViralName.class);\r
+//                                                     taxonNameBases.put(id ,tnb);\r
+//                                                     taxonNamesCount++;\r
+                                                       logger.info("using existing Taxon" + taxon.getTitleCache());\r
+                                               } else {\r
+                                                       tnb.addSource(source);\r
+                                                       taxonNameBases.put(id ,tnb);\r
+                                                       taxonNamesCount++;                                              \r
+                                                       logger.info("creating new Taxon from TaxonName" + tnb.getTitleCache());\r
+                                                       taxon = Taxon.NewInstance(tnb, sec);\r
+                                               }\r
                                        }\r
 \r
                                        String refCitation = "";\r
@@ -1202,7 +1265,7 @@ public class SDDDescriptionIO extends CdmImportBase<SDDImportConfigurator, SDDIm
 \r
                                } catch (Exception e) {\r
                                        //FIXME\r
-                                       logger.warn("Import of CodedDescription " + j + " failed.");\r
+                                       logger.warn("Import of CodedDescription " + j + " failed.", e);\r
                                        success = false;\r
                                }\r
                                if ((++j % modCount) == 0){ logger.info("CodedDescriptions handled: " + j);}\r
index 82d15bf7ab07eea3e3b43b75eed01d2ecd210663..fb3bb5219bd8224f1b9909d04450871740df49ee 100644 (file)
@@ -38,6 +38,8 @@ public class SDDImportConfigurator extends ImportConfiguratorBase implements IIm
        //TODO\r
        private static IInputTransformer defaultTransformer = null;\r
        \r
+       private boolean doMatchTaxa = false;\r
+       \r
        //xml xmlNamespace\r
        Namespace sddNamespace;\r
 \r
@@ -149,4 +151,18 @@ public class SDDImportConfigurator extends ImportConfiguratorBase implements IIm
        public void setSddNamespace(Namespace xmlNamespace) {\r
                this.sddNamespace = xmlNamespace;\r
        }\r
+\r
+       /**\r
+        * @param doMatchTaxa the doMatchTaxa to set\r
+        */\r
+       public void setDoMatchTaxa(boolean doMatchTaxa) {\r
+               this.doMatchTaxa = doMatchTaxa;\r
+       }\r
+\r
+       /**\r
+        * @return the doMatchTaxa\r
+        */\r
+       public boolean isDoMatchTaxa() {\r
+               return doMatchTaxa;\r
+       }\r
 }\r