latest markup updates
authorAndreas Müller <a.mueller@bgbm.org>
Tue, 30 Apr 2013 15:14:02 +0000 (15:14 +0000)
committerAndreas Müller <a.mueller@bgbm.org>
Tue, 30 Apr 2013 15:14:02 +0000 (15:14 +0000)
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupImportBase.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupImportState.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupKeyImport.java

index 74186f2c7c40202fc6fdb29285342b6b09071c70..58610f4c0db7277bef90fbe8e461fef9429c71ac 100644 (file)
@@ -211,7 +211,7 @@ public abstract class MarkupImportBase  {
 \r
 \r
        /**\r
-        * Returns the value of a given attribute name and returns the attribute from the attributes map. \r
+        * Returns the value of a given attribute name and removes the attribute from the attributes map. \r
         * @param attributes\r
         * @param attrName\r
         * @return\r
index d08ef84587b7bb9c8938a7ac63dd51b0cd6ff671..805534a20d5b2c0ff65c9cc88f740b3f3f607782 100644 (file)
@@ -39,6 +39,7 @@ public class MarkupImportState extends XmlImportState<MarkupImportConfigurator,
        \r
 \r
        private UnmatchedLeads unmatchedLeads;\r
+       private boolean onlyNumberedTaxaExist; //attribute in <key>\r
 \r
        private Set<FeatureNode> featureNodesToSave = new HashSet<FeatureNode>();\r
        \r
@@ -238,7 +239,13 @@ public class MarkupImportState extends XmlImportState<MarkupImportConfigurator,
        public UUID putAreaUuid(String key, UUID value) {\r
                return areaMap.put(key, value);\r
        }\r
-       \r
-       \r
+\r
+       public boolean isOnlyNumberedTaxaExist() {\r
+               return onlyNumberedTaxaExist;\r
+       }\r
+\r
+       public void setOnlyNumberedTaxaExist(boolean onlyNumberedTaxaExist) {\r
+               this.onlyNumberedTaxaExist = onlyNumberedTaxaExist;\r
+       }\r
 \r
 }
\ No newline at end of file
index 85be04814fa750d5854b95ea96ca74666c6652ee..707a7e67971f87b05e70f0a635bc3040a5886382 100644 (file)
@@ -14,6 +14,7 @@ import java.util.List;
 import java.util.Map;\r
 import java.util.Set;\r
 \r
+import javax.xml.stream.Location;\r
 import javax.xml.stream.XMLEventReader;\r
 import javax.xml.stream.XMLStreamException;\r
 import javax.xml.stream.events.Attribute;\r
@@ -27,6 +28,7 @@ import eu.etaxonomy.cdm.model.common.Language;
 import eu.etaxonomy.cdm.model.description.KeyStatement;\r
 import eu.etaxonomy.cdm.model.description.PolytomousKey;\r
 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;\r
+import eu.etaxonomy.cdm.model.name.BotanicalName;\r
 import eu.etaxonomy.cdm.model.name.NonViralName;\r
 import eu.etaxonomy.cdm.model.name.Rank;\r
 import eu.etaxonomy.cdm.model.taxon.Taxon;\r
@@ -43,6 +45,8 @@ public class MarkupKeyImport  extends MarkupImportBase  {
        \r
        private static final String COUPLET = "couplet";\r
        private static final String IS_SPOTCHARACTERS = "isSpotcharacters";\r
+       private static final String ONLY_NUMBERED_TAXA_EXIST = "onlyNumberedTaxaExist";\r
+       private static final String EXISTS = "exists";\r
        private static final String KEYNOTES = "keynotes";\r
        private static final String KEY_TITLE = "keyTitle";\r
        private static final String QUESTION = "question";\r
@@ -65,6 +69,8 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                        String message = "Attribute isSpotcharacters not yet implemented for <key>";\r
                        fireWarningEvent(message, parentEvent, 4);\r
                }\r
+               boolean onlyNumberedTaxaExist = checkAndRemoveAttributeValue(attributes, ONLY_NUMBERED_TAXA_EXIST, "true");\r
+               state.setOnlyNumberedTaxaExist(onlyNumberedTaxaExist);\r
                \r
                PolytomousKey key = PolytomousKey.NewInstance();\r
                key.addTaxonomicScope(state.getCurrentTaxon());\r
@@ -75,7 +81,9 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                        XMLEvent next = readNoWhitespace(reader);\r
                        if (isMyEndingElement(next, parentEvent)) {\r
                                save(key, state);\r
+                               //reset state\r
                                state.setCurrentKey(null);\r
+                               state.setOnlyNumberedTaxaExist(false);\r
                                return;\r
                        } else if (isEndingElement(next, KEYNOTES)){\r
                                popUnimplemented(next.asEndElement());\r
@@ -161,6 +169,8 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                if (parentNode != null){\r
                        for (PolytomousKeyNode childNode : childList){\r
                                parentNode.addChild(childNode);\r
+                               //just to be on the save side\r
+                               parentNode.refreshNodeNumbering();\r
                        }\r
                }else if (isNotBlank(num)){\r
                        UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);\r
@@ -168,6 +178,8 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                        for(PolytomousKeyNode nodeToMatch: nodes){\r
                                for (PolytomousKeyNode childNode : childList){\r
                                        nodeToMatch.addChild(childNode);\r
+                                       //just to be on the save side\r
+                                       nodeToMatch.refreshNodeNumbering();\r
                                }\r
                                state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);\r
                        }\r
@@ -181,7 +193,7 @@ public class MarkupKeyImport  extends MarkupImportBase  {
        private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {\r
                // attributes\r
                Map<String, Attribute> attributes = getAttributes(parentEvent);\r
-               //needed only for data lineage\r
+               //TODO needed only for data lineage\r
                String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);\r
                \r
                PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();\r
@@ -230,7 +242,7 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                String num = getOnlyAttribute(next, NUM, true);\r
                String cData = getCData(state, reader, next, false);\r
                if (isNotBlank(cData) && ! cData.equals(num)){\r
-                       String message = "CData ('%s') not handled in <toCouplet>";\r
+                       String message = "CData ('%s') not be handled in <toCouplet>";\r
                        message = String.format(message, cData);\r
                        fireWarningEvent(message, next, 4);\r
                }\r
@@ -241,19 +253,34 @@ public class MarkupKeyImport  extends MarkupImportBase  {
        private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {\r
                Map<String, Attribute> attributes = getAttributes(parentEvent);\r
                String num = getAndRemoveAttributeValue(attributes, NUM);\r
-               String taxonStr = getCData(state, reader, parentEvent, false).trim();\r
-               if (taxonStr.endsWith(".")){\r
-                       taxonStr = taxonStr.substring(0, taxonStr.length()-1).trim();\r
-               }\r
+               boolean taxonNotExists = checkAndRemoveAttributeValue(attributes, EXISTS, "false");\r
+               \r
+               String taxonCData = getCData(state, reader, parentEvent, false).trim();\r
+               \r
                //TODO ?\r
-               taxonStr = makeTaxonKey(taxonStr, state.getCurrentTaxon());\r
-               UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonStr);\r
-               state.getUnmatchedLeads().addKey(unmatched, node);\r
+               String taxonKeyStr = makeTaxonKey(taxonCData, state.getCurrentTaxon(), parentEvent.getLocation());\r
+               taxonNotExists = taxonNotExists || (isBlank(num) && state.isOnlyNumberedTaxaExist());\r
+               if (taxonNotExists){\r
+                       Taxon taxon = Taxon.NewInstance(BotanicalName.NewInstance(Rank.UNKNOWN_RANK()), null);\r
+                       taxon.getName().setTitleCache(taxonKeyStr, true);\r
+                       node.setTaxon(taxon);\r
+               }else{\r
+                       UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonKeyStr);\r
+                       state.getUnmatchedLeads().addKey(unmatched, node);\r
+               }\r
                return;\r
        }\r
        \r
        \r
-       private String makeTaxonKey(String strGoto, Taxon taxon) {\r
+       /**\r
+        * Creates a string that represents the given taxon. The string will try to replace e.g.\r
+        * abbreviated genus epithets by its full name etc.\r
+        * @param strGoto\r
+        * @param taxon\r
+        * @param location \r
+        * @return\r
+        */\r
+       private String makeTaxonKey(String strGoto, Taxon taxon, Location location) {\r
                String result = "";\r
                if (strGoto == null){\r
                        return "";\r
@@ -262,12 +289,16 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                NonViralName<?> name = CdmBase.deproxy(taxon.getName(), NonViralName.class);\r
                String strGenusName = name.getGenusOrUninomial();\r
                \r
-               \r
-               strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", "");  //replace all brackets\r
+               String bracketPattern = ".*\\([^\\(\\)]*\\).*";\r
+               if (strGoto.matches(bracketPattern)){\r
+                       fireWarningEvent("toTaxon has bracket", makeLocationStr(location), 4);\r
+                       strGoto = strGoto.replaceAll(bracketPattern, "");  //replace all brackets\r
+               }\r
                strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace\r
                \r
                strGoto = strGoto.trim();  \r
                String[] split = strGoto.split("\\s");\r
+               //handle single epithets and markers\r
                for (int i = 0; i<split.length; i++){\r
                        String single = split[i];\r
                        if (isGenusAbbrev(single, strGenusName)){\r
@@ -281,6 +312,10 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                        }\r
                        result = (result + " " + split[i]).trim();\r
                }\r
+               //remove trailing "."\r
+               if (result.endsWith(".")){\r
+                       result = result.substring(0, result.length()-1).trim();\r
+               }\r
                return result;\r
        }\r
        \r
@@ -343,6 +378,8 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                for (PolytomousKeyNode matchingNode : matchingNodes){\r
                        state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);\r
                        matchingNode.setTaxon(taxon);\r
+                       //just to be on the save side\r
+                       matchingNode.refreshNodeNumbering();\r
                        state.getPolytomousKeyNodesToSave().add(matchingNode);\r
                }\r
                return matchingNodes;\r