ref#10446 normalize to IPNI standard
authorAndreas Müller <a.mueller@bgbm.org>
Thu, 1 Feb 2024 21:50:59 +0000 (22:50 +0100)
committerAndreas Müller <a.mueller@bgbm.org>
Thu, 1 Feb 2024 21:50:59 +0000 (22:50 +0100)
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/wfo/out/WfoBackboneExport.java
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/wfo/out/WfoBackboneExportConfigurator.java
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/cache/agent/TeamDefaultCacheStrategy.java
cdmlib-model/src/test/java/eu/etaxonomy/cdm/strategy/cache/agent/TeamDefaultCacheStrategyTest.java

index 906c8dcb56a44e02bad69acb3ab2fccab2dcd317..c888fa221a32562c5a2aba05ff7d944fa6bb4363 100644 (file)
@@ -63,6 +63,7 @@ import eu.etaxonomy.cdm.model.taxon.TaxonBase;
 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
 import eu.etaxonomy.cdm.model.taxon.TaxonNodeStatus;
 import eu.etaxonomy.cdm.model.term.IdentifierType;
+import eu.etaxonomy.cdm.strategy.cache.agent.TeamDefaultCacheStrategy;
 
 /**
  * Classification or taxon tree exporter into WFO Backbone format.
@@ -639,7 +640,8 @@ public class WfoBackboneExport
 
             //authorship
             //TODO 3 handle empty authorship cache warning
-            csvLine[table.getIndex(WfoBackboneExportTable.NAME_AUTHORSHIP)] = normalizedAuthor(name);
+            csvLine[table.getIndex(WfoBackboneExportTable.NAME_AUTHORSHIP)]
+                    = normalizedAuthor(state, name);
 
             //family (use familystr if provided, otherwise try to compute from the family taxon
             String familyStr = state.getFamilyStr();
@@ -707,17 +709,19 @@ public class WfoBackboneExport
         return wfoId;
     }
 
-    //TODO 2 make it public somewhere in author formatter
-    private String normalizedAuthor(TaxonName name) {
-        if (isBlank(name.getAuthorshipCache())) {
+    private String normalizedAuthor(WfoBackboneExportState state, TaxonName name) {
+        if (name == null) {
             return null;
+        } else if (state.getConfig().isNormalizeAuthorsToIpniStandard()) {
+            return TeamDefaultCacheStrategy.removeWhitespaces(name.getAuthorshipCache());
+        } else {
+            String result = name.getAuthorshipCache();
+            if (result == null) {
+                return null;
+            }else {
+                return result.replaceAll("\\s+", " ").trim();
+            }
         }
-        String result = name.getAuthorshipCache();
-        result = result.replaceAll("\\.\\s+", ".")
-                .replaceAll("\\.\\&", ". &")
-                .replaceAll("\\.ex\\s+", ". ex ")
-                ;
-        return result;
     }
 
     private Set<TaxonName> getOrthographicVariants(TaxonName name) {
@@ -759,8 +763,9 @@ public class WfoBackboneExport
 
         //authorship, take from mainname if it does not exist
         //TODO 3 take from csvLine of both names
-        if (isBlank(normalizedAuthor(name))) {
-            csvLine[table.getIndex(WfoBackboneExportTable.NAME_AUTHORSHIP)] = normalizedAuthor(mainName);
+        if (isBlank(normalizedAuthor(state, name))) {
+            csvLine[table.getIndex(WfoBackboneExportTable.NAME_AUTHORSHIP)]
+                    = normalizedAuthor(state, mainName);
         }
 
         //nom. ref, take from main name if it does not exist
index 5276a5821625c2466e667c43c672ff16bbc60735..be9580c718f91f6333c3d8a34bf7d918aa0c4388 100644 (file)
@@ -40,6 +40,8 @@ public class WfoBackboneExportConfigurator
 
     private String sourceLinkBaseUrl = null;
 
+    private boolean normalizeAuthorsToIpniStandard = true;
+
     private static final WfoBackboneExportTransformer transformer = new WfoBackboneExportTransformer();
 
 //************************* FACTORY ******************************/
@@ -147,4 +149,11 @@ public class WfoBackboneExportConfigurator
     public void setSourceLinkBaseUrl(String sourceLinkBaseUrl) {
         this.sourceLinkBaseUrl = sourceLinkBaseUrl;
     }
+
+    public boolean isNormalizeAuthorsToIpniStandard() {
+        return normalizeAuthorsToIpniStandard;
+    }
+    public void setNormalizeAuthorsToIpniStandard(boolean normalizeAuthorsToIpniStandard) {
+        this.normalizeAuthorsToIpniStandard = normalizeAuthorsToIpniStandard;
+    }
 }
\ No newline at end of file
index 8d3cc750894bd6951d3a975020a9fa73ce306978..0615374e706a7b2ef25f1cf239e3ae81e4ef9511 100644 (file)
@@ -22,7 +22,9 @@ import eu.etaxonomy.cdm.strategy.StrategyBase;
 /**
  * @author AM
  */
-public class TeamDefaultCacheStrategy extends StrategyBase implements INomenclaturalAuthorCacheStrategy<Team> {
+public class TeamDefaultCacheStrategy
+        extends StrategyBase
+        implements INomenclaturalAuthorCacheStrategy<Team> {
 
     private static final long serialVersionUID = 8375295443642690479L;
     @SuppressWarnings("unused")
@@ -220,4 +222,26 @@ public class TeamDefaultCacheStrategy extends StrategyBase implements INomenclat
         return str + ET_AL_TEAM_CONCATINATION_ABBREV + "al.";
     }
 
+    /**
+     * Removes the whitespaces in an authorship string
+     * to be compliant with IPNI abbreviated authorship
+     * standard.
+     * @param authorship
+     * @return the authorship without certain whitespaces
+     */
+    public static String removeWhitespaces(String authorship) {
+        if (authorship == null) {
+            return null;
+        }
+        String result = authorship
+                .replaceAll("\\.\\s+", ".") //remove whitespace after "."
+                .replaceAll("\\.\\&", ". &")  //... but add whitespace before "&"
+                .replaceAll("\\.ex\\s", ". ex ") //...and add whitespace between "." and "ex "
+                .replaceAll("\\s+", " ")  //replace multiple whitespaces by a single one
+                .replaceAll("\\s+,", ",")  //remove whitespaces before ","
+                .trim()                    //trim
+                ;
+        return result;
+    }
+
 }
\ No newline at end of file
index e1f96f477530777a55c2bbf5b2058e10e703a370..21def977806069ff8810d3ac962c82b283451b74 100644 (file)
@@ -239,4 +239,38 @@ public class TeamDefaultCacheStrategyTest {
                person1.setGivenName("O.");
                Assert.assertEquals("team1 title cache should be P1FN, O.", "P1FN, O.", team1.getTitleCache());
        }
+
+       @Test
+       public final void testRemoveWhitespaces() {
+           String author = null;
+           Assert.assertEquals(null, TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+           author = "  ";
+           Assert.assertEquals("", TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+           author = "Mill. ";
+           Assert.assertEquals("Mill.", TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+           author = " Miller ";
+           Assert.assertEquals("Result should always be trimed", "Miller", TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+           author = "A. Mill.";
+        Assert.assertEquals("A.Mill.", TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+        author = "A. Mill.";
+        Assert.assertEquals("A.Mill.", TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+        author = "A.   Mill.";
+        Assert.assertEquals("A.Mill.", TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+        author = "A.   Mill. & B. Kohl.-Haber";
+        Assert.assertEquals("A.Mill. & B.Kohl.-Haber", TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+        author = "A.   Mill. ,J. N. Bohl. f.& B. Kohl.-Haber";
+        Assert.assertEquals("A.Mill.,J.N.Bohl.f. & B.Kohl.-Haber", TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+        author = " (Ab. ex CD. , All , Bet & J.Vall.) A.  Mill.ex Kohl.";
+        Assert.assertEquals("(Ab. ex CD., All, Bet & J.Vall.) A.Mill. ex Kohl.", TeamDefaultCacheStrategy.removeWhitespaces(author));
+
+       }
 }