correcting utf8 chars
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / eflora / EfloraTaxonImport.java
index d43dbcd877f9bdb9f3f4bcdc258bf91bf4fe67d8..f8d6d620554c9db078fd02a3f57e5ecfba193034 100644 (file)
@@ -671,7 +671,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                String chromosomesPart = getChromosomesPart(value);\r
                String references = value.replace(chromosomesPart, "").trim();\r
                chromosomesPart = chromosomesPart.replace(":", "").trim();\r
-               return addDescriptionElement(taxon, chromosomesPart, chromosomeFeature, references);    \r
+               return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);     \r
        }\r
 \r
 \r
@@ -755,8 +755,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
         * @param attribute\r
         * @return\r
         */\r
-       private TextData handleDescriptiveElement(EfloraImportState state,\r
-                       Element element, Taxon taxon, String classValue) {\r
+       private TextData handleDescriptiveElement(EfloraImportState state, Element element, Taxon taxon, String classValue) {\r
                TextData result = null;\r
                Feature feature = getFeature(classValue, state);\r
                if (feature == null){\r
@@ -765,7 +764,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                        String value = element.getValue();\r
                        value = replaceStart(value, "Notes");\r
                        value = replaceStart(value, "Note");\r
-                       result = addDescriptionElement(taxon, value, feature, null);\r
+                       result = addDescriptionElement(state, taxon, value, feature, null);\r
                }\r
                return result;\r
        }\r
@@ -789,7 +788,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                String value = element.getTextNormalize();\r
                value = replaceStart(value, "Uses");\r
                Feature feature = Feature.USES();\r
-               return addDescriptionElement(taxon, value, feature, null);\r
+               return addDescriptionElement(state, taxon, value, feature, null);\r
                \r
        }\r
 \r
@@ -807,7 +806,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                value = replaceStart(value, "Distribution");\r
                Feature feature = Feature.DISTRIBUTION();\r
                //distribution parsing almost impossible as there is lots of freetext in the distribution tag\r
-               return addDescriptionElement(taxon, value, feature, null);\r
+               return addDescriptionElement(state, taxon, value, feature, null);\r
        }\r
 \r
 \r
@@ -829,7 +828,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                        value = replaceStart(value, "Habitat");\r
                        feature = getFeature("Habitat", state);\r
                }\r
-               return addDescriptionElement(taxon, value, feature, null);\r
+               return addDescriptionElement(state, taxon, value, feature, null);\r
        }\r
 \r
 \r
@@ -842,13 +841,26 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                if (value.startsWith(replacementString) ){\r
                        value = value.substring(replacementString.length()).trim();\r
                }\r
-               while (value.startsWith("-") ){\r
+               while (value.startsWith("-") || value.startsWith("–") ){\r
                        value = value.substring("-".length()).trim();\r
                }\r
                return value;\r
        }\r
 \r
 \r
+       /**\r
+        * @param value\r
+        * @param replacementString\r
+        */\r
+       protected String removeTrailing(String value, String replacementString) {\r
+               if (value == null){\r
+                       return null;\r
+               }\r
+               if (value.endsWith(replacementString) ){\r
+                       value = value.substring(0, value.length() - replacementString.length()).trim();\r
+               }\r
+               return value;\r
+       }\r
 \r
        /**\r
         * @param state\r
@@ -938,9 +950,9 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                \r
                List<Element> elements = elNom.getChildren();\r
                for (Element element : elements){\r
-                       if (element.getName().equals("name")){\r
+                       if (element.getName().equals("name") || element.getName().equals("homonym") ){\r
                                if (taxonBaseClassType == false){\r
-                                       logger.warn("Name tag not allowed in non taxon nom tag");\r
+                                       logger.warn("Name or homonym tag not allowed in non taxon nom tag");\r
                                }\r
                        }else{\r
                                unhandledNomChildren.add(element.getName());\r
@@ -995,7 +1007,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
         */\r
        protected String removeStartingTypeRefMinus(String typeRef) {\r
                typeRef = replaceStart(typeRef, "-");\r
-               typeRef = replaceStart(typeRef, "\97");\r
+               typeRef = replaceStart(typeRef, "");\r
                typeRef = replaceStart(typeRef, "\u002d");\r
                typeRef = replaceStart(typeRef, "\u2013");\r
                typeRef = replaceStart(typeRef, "--");\r
@@ -1022,7 +1034,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                //create name\r
                BotanicalName nameType = (BotanicalName)parser.parseFullName(typeText, NomenclaturalCode.ICBN, Rank.SPECIES());\r
                ((NameTypeDesignation) typeDesignation).setTypeName(nameType);\r
-               //TODO wie können NameTypes den Namen zugeordnet werden? -  wird aber vom Portal via NameCache matching gemacht\r
+               //TODO wie können NameTypes den Namen zugeordnet werden? -  wird aber vom Portal via NameCache matching gemacht\r
        }\r
 \r
 \r
@@ -1181,7 +1193,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                                        logger.warn("Unhandled name class: " +  classValue);\r
                                }\r
                        }else if(element.getName().equals("homonym")){\r
-                               handleHomonym(element, name);\r
+                               handleHomonym(state, element, name);\r
                        }else{\r
                                // child element is not "name"\r
                                unhandledNomChildren.add(element.getName());\r
@@ -1205,7 +1217,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                }\r
                \r
                //test nom element has no text\r
-               if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("\97", "").replace("\u002d","").replace("\u2013", ""))){\r
+               if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("", "").replace("\u002d","").replace("\u2013", ""))){\r
                        String strElNom = elNom.getTextNormalize();\r
                        if ("?".equals(strElNom)){\r
                                handleQuestionMark(name, taxon);\r
@@ -1231,7 +1243,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
 \r
 \r
        //merge with handleNomTaxon     \r
-       private void handleHomonym(Element elHomonym, NonViralName upperName) {\r
+       private void handleHomonym(EfloraImportState state, Element elHomonym, NonViralName upperName) {\r
                verifyNoAttribute(elHomonym);\r
                \r
                //hommonym name\r
@@ -1250,6 +1262,8 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                                homonymName.setSpecificEpithet(value);\r
                        }else if (classValue.equalsIgnoreCase("author")){\r
                                handleNameAuthors(elName, homonymName);\r
+                       }else if (classValue.equalsIgnoreCase("paraut")){\r
+                               handleBasionymAuthor(state, elName, homonymName, true);\r
                        }else if (classValue.equalsIgnoreCase("pub")){\r
                                handleNomenclaturalReference(homonymName, value);\r
                        }else if (classValue.equalsIgnoreCase("note")){\r
@@ -1269,7 +1283,12 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                        TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();\r
                        homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart())  > 0;\r
                }else{\r
-                       logger.warn("Classification name has no nomenclatural reference");\r
+                       if (upperName.getNomenclaturalReference() == null){\r
+                               logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());\r
+                       }\r
+                       if (homonymName.getNomenclaturalReference() == null){\r
+                               logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());\r
+                       }\r
                }\r
                if (homonymIsLater){\r
                        homonymName.addRelationshipToName(upperName, relType, null);\r
@@ -1640,8 +1659,8 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
         * @param element\r
         * @param name\r
         */\r
-       private void handleBasionymAuthor(EfloraImportState state, Element element, NonViralName name, boolean overwrite) {\r
-               String strAuthor = element.getValue().trim();\r
+       private void handleBasionymAuthor(EfloraImportState state, Element elBasionymAuthor, NonViralName name, boolean overwrite) {\r
+               String strAuthor = elBasionymAuthor.getValue().trim();\r
                Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");\r
                if (reBasionymAuthor.matcher(strAuthor).matches()){\r
                        strAuthor = strAuthor.substring(1, strAuthor.length()-1);\r
@@ -1672,6 +1691,9 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                if (strAuthor.endsWith(",")){\r
                        strAuthor = strAuthor.substring(0, strAuthor.length() -1);\r
                }\r
+               if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){\r
+                       logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);\r
+               }\r
                TeamOrPersonBase[] team = getTeam(strAuthor);\r
                name.setCombinationAuthorTeam(team[0]);\r
                name.setExCombinationAuthorTeam(team[1]);\r
@@ -1714,7 +1736,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
        }\r
 \r
 \r
-       private TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {\r
+       protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {\r
                TeamOrPersonBase result;\r
                String[] split = strBaseAuthor.split("&");\r
                if (split.length > 1){\r
@@ -1784,7 +1806,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                                        logger.warn("Unhandled feature: " + classValue);\r
                                }else{\r
                                        String value = element.getValue();\r
-                                       addDescriptionElement(taxon, value, feature, null);\r
+                                       addDescriptionElement(state, taxon, value, feature, null);\r
                                }\r
                                \r
                        }\r
@@ -1946,7 +1968,7 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
         * @param value\r
         * @param taxonNameBase \r
         */\r
-       private void handleGenus(String value, TaxonNameBase taxonName) {\r
+       protected void handleGenus(String value, TaxonNameBase taxonName) {\r
                Matcher matcher = rexGenusAuthor.matcher(value);\r
                if (matcher.find()){\r
                        String author = matcher.group();\r
@@ -2036,14 +2058,16 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
 \r
 \r
        /**\r
+        * @param state \r
         * @param taxon\r
         * @param value\r
         * @param feature\r
         * @return \r
         */\r
-       private TextData addDescriptionElement(Taxon taxon, String value, Feature feature, String references) {\r
+       private TextData addDescriptionElement(EfloraImportState state, Taxon taxon, String value, Feature feature, String references) {\r
                TextData textData = TextData.NewInstance(feature);\r
-               textData.putText(value, Language.ENGLISH());\r
+               Language textLanguage = getDefaultLanguage(state);\r
+               textData.putText(value, textLanguage);\r
                TaxonDescription description = getDescription(taxon);\r
                description.addElement(textData);\r
                if (references != null){\r
@@ -2052,6 +2076,24 @@ public class EfloraTaxonImport  extends EfloraImportBase implements ICdmIO<Eflor
                return textData;\r
        }\r
 \r
+       private Language getDefaultLanguage(EfloraImportState state) {\r
+               UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();\r
+               if (defaultLanguageUuid != null){\r
+                       Language result = state.getDefaultLanguage();\r
+                       if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){\r
+                               result = (Language)getTermService().find(defaultLanguageUuid);\r
+                               state.setDefaultLanguage(result);\r
+                               if (result == null){\r
+                                       logger.warn("Default language for " + defaultLanguageUuid +  " does not exist.");\r
+                               }\r
+                       }\r
+                       return result;\r
+               }else{\r
+                       return Language.DEFAULT();\r
+               }\r
+       }\r
+\r
+\r
        /**\r
         * @param elNomenclature\r
         */\r