some changes to the update script
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupKeyImport.java
index e819d5def477bcbf756bb967b26fa9c438aec2ea..af17fbe3336a2a2912cfe1656e357afdac8db0ac 100644 (file)
@@ -22,6 +22,7 @@ import javax.xml.stream.events.XMLEvent;
 \r
 import org.apache.log4j.Logger;\r
 \r
+import eu.etaxonomy.cdm.common.CdmUtils;\r
 import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;\r
 import eu.etaxonomy.cdm.model.common.CdmBase;\r
 import eu.etaxonomy.cdm.model.common.Language;\r
@@ -36,24 +37,11 @@ import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
 /**\r
  * @author a.mueller\r
  * @created 26.04.2013\r
- * \r
  */\r
 public class MarkupKeyImport  extends MarkupImportBase  {\r
        @SuppressWarnings("unused")\r
        private static final Logger logger = Logger.getLogger(MarkupKeyImport.class);\r
        \r
-       private static final String COUPLET = "couplet";\r
-       private static final String IS_SPOTCHARACTERS = "isSpotcharacters";\r
-       private static final String ONLY_NUMBERED_TAXA_EXIST = "onlyNumberedTaxaExist";\r
-       private static final String EXISTS = "exists";\r
-       private static final String KEYNOTES = "keynotes";\r
-       private static final String KEY_TITLE = "keyTitle";\r
-       private static final String QUESTION = "question";\r
-       private static final String TEXT = "text";\r
-       private static final String TO_COUPLET = "toCouplet";\r
-       private static final String TO_KEY = "toKey";\r
-       private static final String TO_TAXON = "toTaxon";\r
-\r
        \r
        public MarkupKeyImport(MarkupDocumentImport docImport) {\r
                super(docImport);\r
@@ -205,11 +193,7 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                                return;\r
                        } else if (isStartingElement(next, TEXT)) {\r
                                String text = getCData(state, reader, next);\r
-                               Language language = state.getDefaultLanguage();\r
-                               if (language == null){\r
-                                       language = Language.DEFAULT();\r
-                               }\r
-                               KeyStatement statement = KeyStatement.NewInstance(language, text);\r
+                               KeyStatement statement = KeyStatement.NewInstance(getDefaultLanguage(state), text);\r
                                myNode.setStatement(statement);\r
                        } else if (isStartingElement(next, COUPLET)) {\r
                                //TODO test\r
@@ -254,9 +238,8 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                String num = getAndRemoveAttributeValue(attributes, NUM);\r
                boolean taxonNotExists = checkAndRemoveAttributeValue(attributes, EXISTS, "false");\r
                \r
-               String taxonCData = getCData(state, reader, parentEvent, false).trim();\r
+               String taxonCData = handleInnerToTaxon(state, reader, parentEvent, node).trim();\r
                \r
-               //TODO ?\r
                String taxonKeyStr = makeTaxonKey(taxonCData, state.getCurrentTaxon(), parentEvent.getLocation());\r
                taxonNotExists = taxonNotExists || (isBlank(num) && state.isOnlyNumberedTaxaExist());\r
                if (taxonNotExists){\r
@@ -275,6 +258,41 @@ public class MarkupKeyImport  extends MarkupImportBase  {
        }\r
        \r
        \r
+       /**\r
+        * Returns the taxon text of the toTaxon element and handles all annotations as ';'-concatenated modifying text.\r
+        * Footnote refs are not yet handled.\r
+        * @param state\r
+        * @param reader\r
+        * @param parentEvent\r
+        * @param node\r
+        * @return\r
+        * @throws XMLStreamException\r
+        */\r
+       private String handleInnerToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {\r
+               String taxonText = "";\r
+               String modifyingText = null;\r
+               while (reader.hasNext()) {\r
+                       XMLEvent next = readNoWhitespace(reader);\r
+                       if (isMyEndingElement(next, parentEvent)) {\r
+                               if (isNotBlank(modifyingText)){\r
+                                       node.putModifyingText(getDefaultLanguage(state), modifyingText);\r
+                               }\r
+                               return taxonText;\r
+                       } else if (next.isCharacters()) {\r
+                               taxonText += next.asCharacters().getData();\r
+                       } else if (isStartingElement(next, ANNOTATION)) {\r
+                               String annotation = handleSimpleAnnotation(state, reader, next);\r
+                               modifyingText = CdmUtils.concat("; ", modifyingText, annotation);\r
+                       } else if (isStartingElement(next, FOOTNOTE_REF)) {\r
+                               handleNotYetImplementedElement(next);\r
+                       } else {\r
+                               handleUnexpectedElement(next);\r
+                       }\r
+               }\r
+               throw new IllegalStateException("Event has no closing tag");\r
+\r
+       }\r
+\r
        /**\r
         * Creates a string that represents the given taxon. The string will try to replace e.g.\r
         * abbreviated genus epithets by its full name etc.\r
@@ -313,14 +331,14 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                        }\r
                        if (isInfraSpecificMarker(single)){\r
                                String strSpeciesEpi = name.getSpecificEpithet();\r
-                               if (isBlank(result)){\r
+                               if (isBlank(result) && isNotBlank(strSpeciesEpi)){\r
                                        result += strGenusName + " " + strSpeciesEpi;\r
                                }\r
                        }\r
                        result = (result + " " + split[i]).trim();\r
                }\r
-               //remove trailing "."\r
-               while (result.endsWith(".")){\r
+               //remove trailing "." except for "sp."\r
+               while (result.matches(".*(?<!sp)\\.$")){\r
                        result = result.substring(0, result.length()-1).trim();\r
                }\r
                return result;\r
@@ -339,15 +357,7 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                }\r
        }\r
        \r
-       private boolean isGenusAbbrev(String single, String strGenusName) {\r
-               if (! single.matches("[A-Z]\\.?")) {\r
-                       return false;\r
-               }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){\r
-                       return false; \r
-               }else{\r
-                       return single.charAt(0) == strGenusName.charAt(0);\r
-               }\r
-       }\r
+\r
        \r
        \r
 //******************************** recognize nodes ***********/\r
@@ -372,7 +382,7 @@ public class MarkupKeyImport  extends MarkupImportBase  {
                        }\r
                }\r
                //report missing match, if num exists\r
-               if (matchingNodes.isEmpty() && num != null){\r
+               if (matchingNodes.isEmpty() /* TODO redo comment && num != null  */){\r
                        String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";\r
                        message = String.format(message, num, leadsKey.toString());\r
                        fireWarningEvent(message, event, 1);\r