Markup import updates
authorAndreas Müller <a.mueller@bgbm.org>
Mon, 29 Apr 2013 23:08:59 +0000 (23:08 +0000)
committerAndreas Müller <a.mueller@bgbm.org>
Mon, 29 Apr 2013 23:08:59 +0000 (23:08 +0000)
.gitattributes
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupDocumentImport.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupDocumentImportNoComponent.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupImportBase.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupKeyImport.java [new file with mode: 0644]
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupSpecimenImport.java

index 6e2a6ae0576e58e82a1a692db164c1dc1e9363e8..8e5ea7383e0e8ebcba4f245c8b27abb2f5cb699f 100644 (file)
@@ -413,6 +413,7 @@ cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupImportBase.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupImportConfigurator.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupImportState.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupInputStream.java -text
+cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupKeyImport.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupSpecimenImport.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupTransformer.java -text
 cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/UnmatchedLeads.java -text
index 99c979e45c588c6ea47ff7bc95542f6e15c0a02a..682532aab5d39890bde3d23c9d7ad4e3800f9c7a 100644 (file)
@@ -52,6 +52,7 @@ import eu.etaxonomy.cdm.model.taxon.Taxon;
  */\r
 @Component\r
 public class MarkupDocumentImport extends XmlImportBase<MarkupImportConfigurator, MarkupImportState> implements ICdmIO<MarkupImportState> {\r
+       @SuppressWarnings("unused")\r
        private static final Logger logger = Logger.getLogger(MarkupDocumentImport.class);\r
 \r
 \r
index f30fa647ba8e6ecd816009ce3f3682b5f5b1fac0..7f44aa49451fc718b369ef5577d9885ed51467c1 100644 (file)
@@ -30,6 +30,7 @@ import javax.xml.stream.events.StartElement;
 import javax.xml.stream.events.XMLEvent;\r
 \r
 import org.apache.commons.lang.StringUtils;\r
+import org.apache.commons.lang.WordUtils;\r
 import org.apache.log4j.Logger;\r
 \r
 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;\r
@@ -134,7 +135,6 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
        private static final String COLLECTOR = "collector";\r
        private static final String COLLECTION = "collection";\r
        private static final String COORDINATES = "coordinates";\r
-       private static final String COUPLET = "couplet";\r
        private static final String DATES = "dates";\r
        private static final String DEDICATION = "dedication";\r
        private static final String DEFAULT_MEDIA_URL = "defaultMediaUrl";\r
@@ -167,12 +167,9 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
        private static final String INFRANK = "infrank";\r
        private static final String INFRAUT = "infraut";\r
        private static final String INFRPARAUT = "infrparaut";\r
-       private static final String IS_SPOTCHARACTERS = "isSpotcharacters";\r
        private static final String ISSUE = "issue";\r
        private static final String ITALICS = "italics";\r
        private static final String KEY = "key";\r
-       private static final String KEY_TITLE = "keyTitle";\r
-       private static final String KEYNOTES = "keynotes";\r
        private static final String LIFE_CYCLE_PERIODS = "lifeCyclePeriods";\r
        private static final String LOCALITY = "locality";\r
        private static final String LOST = "lost";\r
@@ -184,7 +181,6 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
        private static final String NOT_FOUND = "notFound";\r
        private static final String NOT_SEEN = "notSeen";\r
        private static final String NOTES = "notes";\r
-       private static final String NUM = "num";\r
        private static final String ORIGINAL_DETERMINATION = "originalDetermination";\r
        private static final String PAGES = "pages";\r
        private static final String PARAUT = "paraut";\r
@@ -193,7 +189,6 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
        private static final String PUBNAME = "pubname";\r
        private static final String PUBTITLE = "pubtitle";\r
        private static final String PUBTYPE = "pubtype";\r
-       private static final String QUESTION = "question";\r
        private static final String QUOTE = "quote";\r
        private static final String RANK = "rank";\r
        private static final String REF = "ref";\r
@@ -204,11 +199,7 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
        private static final String TAXON = "taxon";\r
        private static final String TAXONTITLE = "taxontitle";\r
        private static final String TAXONTYPE = "taxontype";\r
-       private static final String TEXT = "text";\r
        private static final String TEXT_SECTION = "textSection";\r
-       private static final String TO_COUPLET = "toCouplet";\r
-       private static final String TO_KEY = "toKey";\r
-       private static final String TO_TAXON = "toTaxon";\r
        private static final String TYPE = "type";\r
        private static final String TYPE_STATUS = "typeStatus";\r
        private static final String TREATMENT = "treatment";\r
@@ -235,15 +226,18 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
 \r
        private IEditGeoService editGeoService;\r
        \r
+       private MarkupKeyImport keyImport;\r
+       \r
        public MarkupDocumentImportNoComponent(MarkupDocumentImport docImport) {\r
                super(docImport);\r
                this.editGeoService = docImport.getEditGeoService();\r
+               keyImport = new MarkupKeyImport(docImport);\r
        }\r
 \r
        public void doInvoke(MarkupImportState state) throws XMLStreamException { \r
                XMLEventReader reader = state.getReader();\r
                \r
-               // publication\r
+               // publication (= root element)\r
                String elName = PUBLICATION;\r
                boolean hasPublication = false;\r
                \r
@@ -268,8 +262,7 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
 \r
        }\r
 \r
-       private void handlePublication(MarkupImportState state, XMLEventReader reader, XMLEvent currentEvent, \r
-                       String elName) throws XMLStreamException {\r
+       private void handlePublication(MarkupImportState state, XMLEventReader reader, XMLEvent currentEvent, String elName) throws XMLStreamException {\r
 \r
                // attributes\r
                StartElement element = currentEvent.asStartElement();\r
@@ -491,7 +484,7 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
                                                fireWarningEvent(warning, next, 12);\r
                                        }\r
                                        \r
-                                       makeKeyNodes(state, parentEvent, taxonTitle);\r
+                                       keyImport.makeKeyNodes(state, parentEvent, taxonTitle);\r
                                        state.setCurrentTaxon(null);\r
                                        state.setCurrentTaxonNum(null);\r
                                        save(taxon, state);\r
@@ -524,7 +517,7 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
                                } else if (isStartingElement(next, TEXT_SECTION)) {\r
                                        handleNotYetImplementedElement(next);\r
                                } else if (isStartingElement(next, KEY)) {\r
-                                       handleKey(state, reader, next);\r
+                                       keyImport.handleKey(state, reader, next);\r
                                } else if (isStartingElement(next, NOMENCLATURE)) {\r
                                        handleNomenclature(state, reader, next);\r
                                        hasNomenclature = true;\r
@@ -573,279 +566,6 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
                throw new IllegalStateException("<Taxon> has no closing tag");\r
        }\r
 \r
-       private void makeKeyNodes(MarkupImportState state, XMLEvent event, String taxonTitle) {\r
-               Taxon taxon = state.getCurrentTaxon();\r
-               String num = state.getCurrentTaxonNum();\r
-               \r
-               String nameString = CdmBase.deproxy(taxon.getName(), NonViralName.class).getNameCache();\r
-//             String nameString = taxonTitle;\r
-               \r
-               //try to find matching lead nodes \r
-               UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, nameString);\r
-               Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);\r
-               \r
-               if (num != null){//same without using the num\r
-                       UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", nameString);\r
-                       Set<PolytomousKeyNode> noNumMatchingNodes = handleMatchingNodes(state, taxon, noNumLeadsKey);\r
-                       if(noNumMatchingNodes.size() > 0){\r
-                               String message ="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";\r
-                               fireWarningEvent(message, event, 1);\r
-                       }\r
-               }\r
-               //report missing match, if num exists\r
-               if (matchingNodes.isEmpty() && num != null){\r
-                       String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";\r
-                       message = String.format(message, num, leadsKey.toString());\r
-                       fireWarningEvent(message, event, 1);\r
-               }\r
-               \r
-       }\r
-       \r
-       private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {\r
-               Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);\r
-               for (PolytomousKeyNode matchingNode : matchingNodes){\r
-                       state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);\r
-                       matchingNode.setTaxon(taxon);\r
-                       state.getPolytomousKeyNodesToSave().add(matchingNode);\r
-               }\r
-               return matchingNodes;\r
-       }\r
-\r
-       private void handleKey(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {\r
-               // attributes\r
-               Map<String, Attribute> attributes = getAttributes(parentEvent);\r
-               String isSpotcharacters = getAndRemoveAttributeValue(attributes, IS_SPOTCHARACTERS);\r
-               if (isNotBlank(isSpotcharacters) ) {\r
-                       //TODO isSpotcharacters\r
-                       String message = "Attribute isSpotcharacters not yet implemented for <key>";\r
-                       fireWarningEvent(message, parentEvent, 4);\r
-               }\r
-               \r
-               PolytomousKey key = PolytomousKey.NewInstance();\r
-               key.addTaxonomicScope(state.getCurrentTaxon());\r
-               state.setCurrentKey(key);\r
-               \r
-               boolean isFirstCouplet = true;\r
-               while (reader.hasNext()) {\r
-                       XMLEvent next = readNoWhitespace(reader);\r
-                       if (isMyEndingElement(next, parentEvent)) {\r
-                               save(key, state);\r
-                               state.setCurrentKey(null);\r
-                               return;\r
-                       } else if (isEndingElement(next, KEYNOTES)){\r
-                               popUnimplemented(next.asEndElement());\r
-                       } else if (isStartingElement(next, KEY_TITLE)) {\r
-                               handleKeyTitle(state, reader, next);\r
-                       } else if (isStartingElement(next, KEYNOTES)) {\r
-                               //TODO\r
-                               handleNotYetImplementedElement(next);\r
-                       } else if (isStartingElement(next, COUPLET)) {\r
-                               PolytomousKeyNode node = null;\r
-                               if (isFirstCouplet){\r
-                                       node = key.getRoot();\r
-                                       isFirstCouplet = false;\r
-                               }\r
-                               handleCouplet(state, reader, next, node);\r
-                       } else {\r
-                               handleUnexpectedElement(next);\r
-                       }\r
-               }\r
-               throw new IllegalStateException("<key> has no closing tag");\r
-       }\r
-\r
-       /**\r
-        * @param state\r
-        * @param reader\r
-        * @param key\r
-        * @param next\r
-        * @throws XMLStreamException\r
-        */\r
-       private void handleKeyTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {\r
-               PolytomousKey key = state.getCurrentKey();\r
-               String keyTitle = getCData(state, reader, parentEvent);\r
-               String standardTitles = "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";\r
-               \r
-               if (isNotBlank(keyTitle) ){\r
-                       if (!state.getConfig().isReplaceStandardKeyTitles() || ! keyTitle.matches(standardTitles)){\r
-                               key.setTitleCache(keyTitle, true);\r
-                       }\r
-               }\r
-       }\r
-\r
-       private void handleCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode parentNode) throws XMLStreamException {\r
-               String num = getOnlyAttribute(parentEvent, NUM, true);\r
-               List<PolytomousKeyNode> childList = new ArrayList<PolytomousKeyNode>(); \r
-               \r
-               while (reader.hasNext()) {\r
-                       XMLEvent next = readNoWhitespace(reader);\r
-                       if (isMyEndingElement(next, parentEvent)) {\r
-                               completeCouplet(state, parentEvent, parentNode, num, childList);\r
-                               return;\r
-                       } else if (isStartingElement(next, QUESTION)) {\r
-                               handleQuestion(state, reader, next, childList);\r
-                       } else if (isStartingElement(next, KEYNOTES)) {\r
-                               //TODO\r
-                               handleNotYetImplementedElement(next);\r
-                       } else if (isEndingElement(next, KEYNOTES)) {\r
-                               //TODO\r
-                               popUnimplemented(next.asEndElement());\r
-                       } else {\r
-                               handleUnexpectedElement(next);\r
-                       }\r
-               }\r
-               throw new IllegalStateException("<couplet> has no closing tag");\r
-       }\r
-\r
-       /**\r
-        * @param state\r
-        * @param parentEvent\r
-        * @param parentNode\r
-        * @param num\r
-        * @param childList\r
-        */\r
-       private void completeCouplet(MarkupImportState state, XMLEvent parentEvent,\r
-                       PolytomousKeyNode parentNode, String num, List<PolytomousKeyNode> childList) {\r
-               if (parentNode != null){\r
-                       for (PolytomousKeyNode childNode : childList){\r
-                               parentNode.addChild(childNode);\r
-                       }\r
-               }else if (isNotBlank(num)){\r
-                       UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);\r
-                       Set<PolytomousKeyNode> nodes = state.getUnmatchedLeads().getNodes(unmatchedKey);\r
-                       for(PolytomousKeyNode nodeToMatch: nodes){\r
-                               for (PolytomousKeyNode childNode : childList){\r
-                                       nodeToMatch.addChild(childNode);\r
-                               }\r
-                               state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);\r
-                       }\r
-               }else{\r
-                       String message = "Parent num could not be matched. Please check if num (%s) is correct";\r
-                       message = String.format(message, num);\r
-                       fireWarningEvent(message, parentEvent, 6);\r
-               }\r
-       }\r
-\r
-       private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {\r
-               // attributes\r
-               Map<String, Attribute> attributes = getAttributes(parentEvent);\r
-               //needed only for data lineage\r
-               String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);\r
-               \r
-               PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();\r
-               myNode.setKey(state.getCurrentKey());  //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent\r
-               nodesList.add(myNode);\r
-               \r
-               while (reader.hasNext()) {\r
-                       XMLEvent next = readNoWhitespace(reader);\r
-                       if (isMyEndingElement(next, parentEvent)) {\r
-                               return;\r
-                       } else if (isStartingElement(next, TEXT)) {\r
-                               String text = getCData(state, reader, next);\r
-                               KeyStatement statement = KeyStatement.NewInstance(text);\r
-                               myNode.setStatement(statement);\r
-                       } else if (isStartingElement(next, COUPLET)) {\r
-                               //TODO test\r
-                               handleCouplet(state, reader, next, myNode);\r
-                       } else if (isStartingElement(next, TO_COUPLET)) {\r
-                               handleToCouplet(state, reader, next, myNode);\r
-                       } else if (isStartingElement(next, TO_TAXON)) {\r
-                               handleToTaxon(state, reader, next, myNode);\r
-                       } else if (isStartingElement(next, TO_KEY)) {\r
-                               //TODO\r
-                               handleNotYetImplementedElement(next);\r
-                       } else if (isEndingElement(next, TO_KEY)){\r
-                               //TODO\r
-                               popUnimplemented(next.asEndElement());\r
-                       } else if (isStartingElement(next, KEYNOTES)) {\r
-                               //TODO\r
-                               handleNotYetImplementedElement(next);\r
-                       } else if (isEndingElement(next, KEYNOTES)){\r
-                               //TODO\r
-                               popUnimplemented(next.asEndElement());\r
-                       } else {\r
-                               handleUnexpectedElement(next);\r
-                       }\r
-               }\r
-               throw new IllegalStateException("<question> has no closing tag");\r
-       }\r
-\r
-       private void handleToCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent next, PolytomousKeyNode node) throws XMLStreamException {\r
-               String num = getOnlyAttribute(next, NUM, true);\r
-               String cData = getCData(state, reader, next, false);\r
-               if (isNotBlank(cData) && ! cData.equals(num)){\r
-                       String message = "CData ('%s') not handled in <toCouplet>";\r
-                       message = String.format(message, cData);\r
-                       fireWarningEvent(message, next, 4);\r
-               }\r
-               UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);\r
-               state.getUnmatchedLeads().addKey(unmatched, node);\r
-       }\r
-\r
-       private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {\r
-               Map<String, Attribute> attributes = getAttributes(parentEvent);\r
-               String num = getAndRemoveAttributeValue(attributes, NUM);\r
-               String taxonStr = getCData(state, reader, parentEvent, false);\r
-               //TODO ?\r
-               taxonStr = makeTaxonKey(taxonStr, state.getCurrentTaxon());\r
-               UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonStr);\r
-               state.getUnmatchedLeads().addKey(unmatched, node);\r
-               return;\r
-       }\r
-       \r
-       private String makeTaxonKey(String strGoto, Taxon taxon) {\r
-               String result = "";\r
-               if (strGoto == null){\r
-                       return "";\r
-               }\r
-               \r
-               NonViralName<?> name = CdmBase.deproxy(taxon.getName(), NonViralName.class);\r
-               String strGenusName = name.getGenusOrUninomial();\r
-               \r
-               \r
-               strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", "");  //replace all brackets\r
-               strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace\r
-               \r
-               strGoto = strGoto.trim();  \r
-               String[] split = strGoto.split("\\s");\r
-               for (int i = 0; i<split.length; i++){\r
-                       String single = split[i];\r
-                       if (isGenusAbbrev(single, strGenusName)){\r
-                               split[i] = strGenusName;\r
-                       }\r
-                       if (isInfraSpecificMarker(single)){\r
-                               String strSpeciesEpi = name.getSpecificEpithet();\r
-                               if (isBlank(result)){\r
-                                       result += strGenusName + " " + strSpeciesEpi;\r
-                               }\r
-                       }\r
-                       result = (result + " " + split[i]).trim();\r
-               }\r
-               return result;\r
-       }\r
-       \r
-\r
-       private boolean isInfraSpecificMarker(String single) {\r
-               try {\r
-                       if (Rank.getRankByAbbreviation(single).isInfraSpecific()){\r
-                               return true;\r
-                       }else{\r
-                               return false;\r
-                       }\r
-               } catch (UnknownCdmTypeException e) {\r
-                       return false;\r
-               }\r
-       }\r
-       \r
-       private boolean isGenusAbbrev(String single, String strGenusName) {\r
-               if (! single.matches("[A-Z]\\.?")) {\r
-                       return false;\r
-               }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){\r
-                       return false; \r
-               }else{\r
-                       return single.charAt(0) == strGenusName.charAt(0);\r
-               }\r
-       }\r
-\r
        /**\r
         * @param state\r
         * @param reader\r
@@ -1952,40 +1672,6 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
                return createAuthor(collectorStr);\r
        }\r
 \r
-       private String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent next) throws XMLStreamException {\r
-               return getCData(state, reader, next, true);\r
-       }\r
-               \r
-       /**\r
-        * Reads character data. Any element other than character data or the ending\r
-        * tag will fire an unexpected element event.\r
-        * \r
-        * @param state\r
-        * @param reader\r
-        * @param next\r
-        * @return\r
-        * @throws XMLStreamException\r
-        */\r
-       private String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent next,boolean checkAttributes) throws XMLStreamException {\r
-               if (checkAttributes){\r
-                       checkNoAttributes(next);\r
-               }\r
-\r
-               String text = "";\r
-               while (reader.hasNext()) {\r
-                       XMLEvent myNext = readNoWhitespace(reader);\r
-                       if (isMyEndingElement(myNext, next)) {\r
-                               return text;\r
-                       } else if (myNext.isCharacters()) {\r
-                               text += myNext.asCharacters().getData();\r
-                       } else {\r
-                               handleUnexpectedElement(myNext);\r
-                       }\r
-               }\r
-               throw new IllegalStateException("Event has no closing tag");\r
-\r
-       }\r
-\r
        /**\r
         * Creates the name defined by a nom tag. Adds it to the given homotypical\r
         * group (if not null).\r
@@ -2160,7 +1846,11 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
                                } else if (rank.isInfraGeneric()) {\r
                                        name.setInfraGenericEpithet(toFirstCapital(value));\r
                                } else if (rank.isSpecies()) {\r
-                                       name.setSpecificEpithet(value.toLowerCase());\r
+                                       if (isFirstCapitalWord(value)){ //capital letters are allowed for species epithet in case of person names (e.g. Manilkara Welwitschii Engl.\r
+                                               name.setSpecificEpithet(value);\r
+                                       }else{\r
+                                               name.setSpecificEpithet(value.toLowerCase());\r
+                                       }\r
                                } else if (rank.isInfraSpecific()) {\r
                                        name.setInfraSpecificEpithet(value.toLowerCase());\r
                                } else {\r
@@ -2333,7 +2023,7 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
                return name;\r
        }\r
 \r
-       private void handleName(MarkupImportState state, XMLEventReader reader,\r
+       private void handleName(MarkupImportState state, XMLEventReader reader, \r
                        XMLEvent parentEvent, Map<String, String> nameMap)\r
                        throws XMLStreamException {\r
                String classValue = getClassOnlyAttribute(parentEvent);\r
@@ -3129,7 +2819,7 @@ public class MarkupDocumentImportNoComponent extends MarkupImportBase {
                        } else if (next.isCharacters()) {\r
                                text += next.asCharacters().getData();\r
                        } else {\r
-                               handleUnexpectedEndElement(next.asEndElement());\r
+                               handleUnexpectedElement(next);\r
                        }\r
                }\r
                throw new IllegalStateException("<DistributionLocality> has no closing tag");\r
index 8b4e2d9a75b24f2d89ec4526ad84822cbb4d6954..74186f2c7c40202fc6fdb29285342b6b09071c70 100644 (file)
@@ -27,13 +27,13 @@ import javax.xml.stream.events.StartElement;
 import javax.xml.stream.events.XMLEvent;\r
 \r
 import org.apache.commons.lang.StringUtils;\r
+import org.apache.commons.lang.WordUtils;\r
 import org.apache.log4j.Logger;\r
 \r
 import eu.etaxonomy.cdm.api.service.IClassificationService;\r
 import eu.etaxonomy.cdm.api.service.ITermService;\r
 import eu.etaxonomy.cdm.common.CdmUtils;\r
 import eu.etaxonomy.cdm.io.common.CdmImportBase.TermMatchMode;\r
-import eu.etaxonomy.cdm.io.common.XmlImportBase;\r
 import eu.etaxonomy.cdm.io.common.events.IIoEvent;\r
 import eu.etaxonomy.cdm.io.common.events.IoProblemEvent;\r
 import eu.etaxonomy.cdm.model.common.AnnotationType;\r
@@ -56,13 +56,13 @@ import eu.etaxonomy.cdm.model.taxon.TaxonBase;
 /**\r
  * @author a.mueller\r
  * @created 04.08.2008\r
- * @version 1.0\r
  */\r
 public abstract class MarkupImportBase  {\r
        @SuppressWarnings("unused")\r
        private static final Logger logger = Logger.getLogger(MarkupImportBase.class);\r
        \r
        protected static final String CLASS = "class";\r
+       protected static final String NUM = "num";\r
 \r
        protected MarkupDocumentImport docImport;\r
        \r
@@ -474,7 +474,7 @@ public abstract class MarkupImportBase  {
         */\r
        protected void handleNotYetImplementedElement(XMLEvent event) {\r
                QName qName = event.asStartElement().getName();\r
-               boolean isTopLevel = unhandledElements.size() == 0;\r
+               boolean isTopLevel = unhandledElements.isEmpty();\r
                unhandledElements.push(qName);\r
                if (isTopLevel){\r
                        fireNotYetImplementedElement(event.getLocation(), qName, 1);\r
@@ -569,6 +569,23 @@ public abstract class MarkupImportBase  {
        }\r
        \r
 \r
+       /**\r
+        * Checks if all words in the given string start with a capital letter but do not have any further capital letter.\r
+        * @param word the string to be checekd. Usually should be a single word.\r
+        * @return true if the above is the case, false otherwise\r
+        */\r
+       protected boolean isFirstCapitalWord(String word) {\r
+               if (WordUtils.capitalizeFully(word).equals(word)){\r
+                       return true;\r
+               }else if (WordUtils.capitalizeFully(word,new char[]{'-'}).equals(word)){\r
+                       //for words like Le-Testui (which is a species epithet)\r
+                       return true;\r
+               }else{\r
+                       return false;\r
+               }\r
+       }\r
+       \r
+\r
        /**\r
         * Read next event. Ignore whitespace events.\r
         * @param reader\r
@@ -687,6 +704,53 @@ public abstract class MarkupImportBase  {
                return docImport.getLanguage(state, uuid, label, text, labelAbbrev, voc);\r
        }\r
        \r
+// *************************************** Concrete methods **********************************************/\r
+       \r
+       \r
+       /**\r
+        * Reads character data. Any element other than character data or the ending\r
+        * tag will fire an unexpected element event.\r
+     *\r
+        * @see #getCData(MarkupImportState, XMLEventReader, XMLEvent, boolean)\r
+        * @param state\r
+        * @param reader\r
+        * @param next\r
+        * @return\r
+        * @throws XMLStreamException\r
+        */\r
+       protected String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent next) throws XMLStreamException {\r
+               return getCData(state, reader, next, true);\r
+       }\r
+               \r
+       /**\r
+        * Reads character data. Any element other than character data or the ending\r
+        * tag will fire an unexpected element event.\r
+        * \r
+        * @param state\r
+        * @param reader\r
+        * @param next\r
+        * @return\r
+        * @throws XMLStreamException\r
+        */\r
+       protected String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent next,boolean checkAttributes) throws XMLStreamException {\r
+               if (checkAttributes){\r
+                       checkNoAttributes(next);\r
+               }\r
+\r
+               String text = "";\r
+               while (reader.hasNext()) {\r
+                       XMLEvent myNext = readNoWhitespace(reader);\r
+                       if (isMyEndingElement(myNext, next)) {\r
+                               return text;\r
+                       } else if (myNext.isCharacters()) {\r
+                               text += myNext.asCharacters().getData();\r
+                       } else {\r
+                               handleUnexpectedElement(myNext);\r
+                       }\r
+               }\r
+               throw new IllegalStateException("Event has no closing tag");\r
+\r
+       }\r
        \r
 //********************************************** OLD *************************************     \r
 \r
diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupKeyImport.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupKeyImport.java
new file mode 100644 (file)
index 0000000..85be048
--- /dev/null
@@ -0,0 +1,351 @@
+/**\r
+ * Copyright (C) 2009 EDIT\r
+ * European Distributed Institute of Taxonomy\r
+ * http://www.e-taxonomy.eu\r
+ *\r
+ * The contents of this file are subject to the Mozilla Public License Version 1.1\r
+ * See LICENSE.TXT at the top of this package for the full license terms.\r
+ */\r
+\r
+package eu.etaxonomy.cdm.io.markup;\r
+\r
+import java.util.ArrayList;\r
+import java.util.List;\r
+import java.util.Map;\r
+import java.util.Set;\r
+\r
+import javax.xml.stream.XMLEventReader;\r
+import javax.xml.stream.XMLStreamException;\r
+import javax.xml.stream.events.Attribute;\r
+import javax.xml.stream.events.XMLEvent;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;\r
+import eu.etaxonomy.cdm.model.common.CdmBase;\r
+import eu.etaxonomy.cdm.model.common.Language;\r
+import eu.etaxonomy.cdm.model.description.KeyStatement;\r
+import eu.etaxonomy.cdm.model.description.PolytomousKey;\r
+import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;\r
+import eu.etaxonomy.cdm.model.name.NonViralName;\r
+import eu.etaxonomy.cdm.model.name.Rank;\r
+import eu.etaxonomy.cdm.model.taxon.Taxon;\r
+import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;\r
+\r
+/**\r
+ * @author a.mueller\r
+ * @created 26.04.2013\r
+ * \r
+ */\r
+public class MarkupKeyImport  extends MarkupImportBase  {\r
+       @SuppressWarnings("unused")\r
+       private static final Logger logger = Logger.getLogger(MarkupKeyImport.class);\r
+       \r
+       private static final String COUPLET = "couplet";\r
+       private static final String IS_SPOTCHARACTERS = "isSpotcharacters";\r
+       private static final String KEYNOTES = "keynotes";\r
+       private static final String KEY_TITLE = "keyTitle";\r
+       private static final String QUESTION = "question";\r
+       private static final String TEXT = "text";\r
+       private static final String TO_COUPLET = "toCouplet";\r
+       private static final String TO_KEY = "toKey";\r
+       private static final String TO_TAXON = "toTaxon";\r
+\r
+       \r
+       public MarkupKeyImport(MarkupDocumentImport docImport) {\r
+               super(docImport);\r
+       }\r
+       \r
+       public void handleKey(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {\r
+               // attributes\r
+               Map<String, Attribute> attributes = getAttributes(parentEvent);\r
+               String isSpotcharacters = getAndRemoveAttributeValue(attributes, IS_SPOTCHARACTERS);\r
+               if (isNotBlank(isSpotcharacters) ) {\r
+                       //TODO isSpotcharacters\r
+                       String message = "Attribute isSpotcharacters not yet implemented for <key>";\r
+                       fireWarningEvent(message, parentEvent, 4);\r
+               }\r
+               \r
+               PolytomousKey key = PolytomousKey.NewInstance();\r
+               key.addTaxonomicScope(state.getCurrentTaxon());\r
+               state.setCurrentKey(key);\r
+               \r
+               boolean isFirstCouplet = true;\r
+               while (reader.hasNext()) {\r
+                       XMLEvent next = readNoWhitespace(reader);\r
+                       if (isMyEndingElement(next, parentEvent)) {\r
+                               save(key, state);\r
+                               state.setCurrentKey(null);\r
+                               return;\r
+                       } else if (isEndingElement(next, KEYNOTES)){\r
+                               popUnimplemented(next.asEndElement());\r
+                       } else if (isStartingElement(next, KEY_TITLE)) {\r
+                               handleKeyTitle(state, reader, next);\r
+                       } else if (isStartingElement(next, KEYNOTES)) {\r
+                               //TODO\r
+                               handleNotYetImplementedElement(next);\r
+                       } else if (isStartingElement(next, COUPLET)) {\r
+                               PolytomousKeyNode node = null;\r
+                               if (isFirstCouplet){\r
+                                       node = key.getRoot();\r
+                                       isFirstCouplet = false;\r
+                               }\r
+                               handleCouplet(state, reader, next, node);\r
+                       } else {\r
+                               handleUnexpectedElement(next);\r
+                       }\r
+               }\r
+               throw new IllegalStateException("<key> has no closing tag");\r
+       }\r
+\r
+\r
+       /**\r
+        * @param state\r
+        * @param reader\r
+        * @param key\r
+        * @param next\r
+        * @throws XMLStreamException\r
+        */\r
+       private void handleKeyTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {\r
+               PolytomousKey key = state.getCurrentKey();\r
+               String keyTitle = getCData(state, reader, parentEvent);\r
+               String standardTitlesEngl = "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";\r
+               String standardTitlesFrench = "(?i)(Cl\u00e9\\sdes\\s(genres|esp\u00e8ces))";\r
+               String standardTitles = standardTitlesEngl;\r
+               if (state.getDefaultLanguage() != null && state.getDefaultLanguage().equals(Language.FRENCH())){\r
+                       standardTitles = standardTitlesFrench;\r
+               }\r
+               \r
+               if (isNotBlank(keyTitle) ){\r
+                       if (!state.getConfig().isReplaceStandardKeyTitles() || ! keyTitle.matches(standardTitles)){\r
+                               key.setTitleCache(keyTitle, true);\r
+                       }\r
+               }\r
+       }\r
+       \r
+\r
+       private void handleCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode parentNode) throws XMLStreamException {\r
+               String num = getOnlyAttribute(parentEvent, NUM, true);\r
+               List<PolytomousKeyNode> childList = new ArrayList<PolytomousKeyNode>(); \r
+               \r
+               while (reader.hasNext()) {\r
+                       XMLEvent next = readNoWhitespace(reader);\r
+                       if (isMyEndingElement(next, parentEvent)) {\r
+                               completeCouplet(state, parentEvent, parentNode, num, childList);\r
+                               return;\r
+                       } else if (isStartingElement(next, QUESTION)) {\r
+                               handleQuestion(state, reader, next, childList);\r
+                       } else if (isStartingElement(next, KEYNOTES)) {\r
+                               //TODO\r
+                               handleNotYetImplementedElement(next);\r
+                       } else if (isEndingElement(next, KEYNOTES)) {\r
+                               //TODO\r
+                               popUnimplemented(next.asEndElement());\r
+                       } else {\r
+                               handleUnexpectedElement(next);\r
+                       }\r
+               }\r
+               throw new IllegalStateException("<couplet> has no closing tag");\r
+       }\r
+       \r
+\r
+       /**\r
+        * @param state\r
+        * @param parentEvent\r
+        * @param parentNode\r
+        * @param num\r
+        * @param childList\r
+        */\r
+       private void completeCouplet(MarkupImportState state, XMLEvent parentEvent,\r
+                       PolytomousKeyNode parentNode, String num, List<PolytomousKeyNode> childList) {\r
+               if (parentNode != null){\r
+                       for (PolytomousKeyNode childNode : childList){\r
+                               parentNode.addChild(childNode);\r
+                       }\r
+               }else if (isNotBlank(num)){\r
+                       UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);\r
+                       Set<PolytomousKeyNode> nodes = state.getUnmatchedLeads().getNodes(unmatchedKey);\r
+                       for(PolytomousKeyNode nodeToMatch: nodes){\r
+                               for (PolytomousKeyNode childNode : childList){\r
+                                       nodeToMatch.addChild(childNode);\r
+                               }\r
+                               state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);\r
+                       }\r
+               }else{\r
+                       String message = "Parent num could not be matched. Please check if num (%s) is correct";\r
+                       message = String.format(message, num);\r
+                       fireWarningEvent(message, parentEvent, 6);\r
+               }\r
+       }\r
+\r
+       private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {\r
+               // attributes\r
+               Map<String, Attribute> attributes = getAttributes(parentEvent);\r
+               //needed only for data lineage\r
+               String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);\r
+               \r
+               PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();\r
+               myNode.setKey(state.getCurrentKey());  //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent\r
+               nodesList.add(myNode);\r
+               \r
+               while (reader.hasNext()) {\r
+                       XMLEvent next = readNoWhitespace(reader);\r
+                       if (isMyEndingElement(next, parentEvent)) {\r
+                               return;\r
+                       } else if (isStartingElement(next, TEXT)) {\r
+                               String text = getCData(state, reader, next);\r
+                               Language language = state.getDefaultLanguage();\r
+                               if (language == null){\r
+                                       language = Language.DEFAULT();\r
+                               }\r
+                               KeyStatement statement = KeyStatement.NewInstance(language, text);\r
+                               myNode.setStatement(statement);\r
+                       } else if (isStartingElement(next, COUPLET)) {\r
+                               //TODO test\r
+                               handleCouplet(state, reader, next, myNode);\r
+                       } else if (isStartingElement(next, TO_COUPLET)) {\r
+                               handleToCouplet(state, reader, next, myNode);\r
+                       } else if (isStartingElement(next, TO_TAXON)) {\r
+                               handleToTaxon(state, reader, next, myNode);\r
+                       } else if (isStartingElement(next, TO_KEY)) {\r
+                               //TODO\r
+                               handleNotYetImplementedElement(next);\r
+                       } else if (isEndingElement(next, TO_KEY)){\r
+                               //TODO\r
+                               popUnimplemented(next.asEndElement());\r
+                       } else if (isStartingElement(next, KEYNOTES)) {\r
+                               //TODO\r
+                               handleNotYetImplementedElement(next);\r
+                       } else if (isEndingElement(next, KEYNOTES)){\r
+                               //TODO\r
+                               popUnimplemented(next.asEndElement());\r
+                       } else {\r
+                               handleUnexpectedElement(next);\r
+                       }\r
+               }\r
+               throw new IllegalStateException("<question> has no closing tag");\r
+       }\r
+\r
+       private void handleToCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent next, PolytomousKeyNode node) throws XMLStreamException {\r
+               String num = getOnlyAttribute(next, NUM, true);\r
+               String cData = getCData(state, reader, next, false);\r
+               if (isNotBlank(cData) && ! cData.equals(num)){\r
+                       String message = "CData ('%s') not handled in <toCouplet>";\r
+                       message = String.format(message, cData);\r
+                       fireWarningEvent(message, next, 4);\r
+               }\r
+               UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);\r
+               state.getUnmatchedLeads().addKey(unmatched, node);\r
+       }\r
+\r
+       private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {\r
+               Map<String, Attribute> attributes = getAttributes(parentEvent);\r
+               String num = getAndRemoveAttributeValue(attributes, NUM);\r
+               String taxonStr = getCData(state, reader, parentEvent, false).trim();\r
+               if (taxonStr.endsWith(".")){\r
+                       taxonStr = taxonStr.substring(0, taxonStr.length()-1).trim();\r
+               }\r
+               //TODO ?\r
+               taxonStr = makeTaxonKey(taxonStr, state.getCurrentTaxon());\r
+               UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonStr);\r
+               state.getUnmatchedLeads().addKey(unmatched, node);\r
+               return;\r
+       }\r
+       \r
+       \r
+       private String makeTaxonKey(String strGoto, Taxon taxon) {\r
+               String result = "";\r
+               if (strGoto == null){\r
+                       return "";\r
+               }\r
+               \r
+               NonViralName<?> name = CdmBase.deproxy(taxon.getName(), NonViralName.class);\r
+               String strGenusName = name.getGenusOrUninomial();\r
+               \r
+               \r
+               strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", "");  //replace all brackets\r
+               strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace\r
+               \r
+               strGoto = strGoto.trim();  \r
+               String[] split = strGoto.split("\\s");\r
+               for (int i = 0; i<split.length; i++){\r
+                       String single = split[i];\r
+                       if (isGenusAbbrev(single, strGenusName)){\r
+                               split[i] = strGenusName;\r
+                       }\r
+                       if (isInfraSpecificMarker(single)){\r
+                               String strSpeciesEpi = name.getSpecificEpithet();\r
+                               if (isBlank(result)){\r
+                                       result += strGenusName + " " + strSpeciesEpi;\r
+                               }\r
+                       }\r
+                       result = (result + " " + split[i]).trim();\r
+               }\r
+               return result;\r
+       }\r
+       \r
+\r
+       private boolean isInfraSpecificMarker(String single) {\r
+               try {\r
+                       if (Rank.getRankByAbbreviation(single).isInfraSpecific()){\r
+                               return true;\r
+                       }else{\r
+                               return false;\r
+                       }\r
+               } catch (UnknownCdmTypeException e) {\r
+                       return false;\r
+               }\r
+       }\r
+       \r
+       private boolean isGenusAbbrev(String single, String strGenusName) {\r
+               if (! single.matches("[A-Z]\\.?")) {\r
+                       return false;\r
+               }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){\r
+                       return false; \r
+               }else{\r
+                       return single.charAt(0) == strGenusName.charAt(0);\r
+               }\r
+       }\r
+       \r
+       \r
+//******************************** recognize nodes ***********/\r
+\r
+       public void makeKeyNodes(MarkupImportState state, XMLEvent event, String taxonTitle) {\r
+               Taxon taxon = state.getCurrentTaxon();\r
+               String num = state.getCurrentTaxonNum();\r
+               \r
+               String nameString = CdmBase.deproxy(taxon.getName(), NonViralName.class).getNameCache();\r
+//             String nameString = taxonTitle;\r
+               \r
+               //try to find matching lead nodes \r
+               UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, nameString);\r
+               Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);\r
+               \r
+               if (num != null){//same without using the num\r
+                       UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", nameString);\r
+                       Set<PolytomousKeyNode> noNumMatchingNodes = handleMatchingNodes(state, taxon, noNumLeadsKey);\r
+                       if(noNumMatchingNodes.size() > 0){\r
+                               String message ="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";\r
+                               fireWarningEvent(message, event, 1);\r
+                       }\r
+               }\r
+               //report missing match, if num exists\r
+               if (matchingNodes.isEmpty() && num != null){\r
+                       String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";\r
+                       message = String.format(message, num, leadsKey.toString());\r
+                       fireWarningEvent(message, event, 1);\r
+               }\r
+               \r
+       }\r
+       \r
+       private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {\r
+               Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);\r
+               for (PolytomousKeyNode matchingNode : matchingNodes){\r
+                       state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);\r
+                       matchingNode.setTaxon(taxon);\r
+                       state.getPolytomousKeyNodesToSave().add(matchingNode);\r
+               }\r
+               return matchingNodes;\r
+       }\r
+\r
+}\r
index a583c06a037d26787b80e8608af90cdba8fe20bd..2c7917415b817db34b8245ed3c71d1821056d406 100644 (file)
@@ -16,15 +16,12 @@ import org.apache.log4j.Logger;
  * @created 30.05.2012\r
  * \r
  */\r
-public class MarkupSpecimenImport  {\r
+public class MarkupSpecimenImport   extends MarkupImportBase  {\r
+       @SuppressWarnings("unused")\r
        private static final Logger logger = Logger.getLogger(MarkupSpecimenImport.class);\r
 \r
-       private MarkupDocumentImport docImport;\r
-       \r
-\r
        public MarkupSpecimenImport(MarkupDocumentImport docImport) {\r
-               super();\r
-               this.docImport = docImport;\r
+               super(docImport);\r
        }\r
 \r
 \r