ref #10178: fuzzy name searching without Authors
authorBelen Escobari <b.escobari@bo.berlin>
Mon, 19 Jun 2023 08:23:07 +0000 (10:23 +0200)
committerBelen Escobari <b.escobari@bo.berlin>
Mon, 19 Jun 2023 08:23:07 +0000 (10:23 +0200)
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/CdmUtilsBelen.java
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INameService.java
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NameServiceImpl.java
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NameServiceImplementBelen.java
cdmlib-services/src/test/java/eu/etaxonomy/cdm/api/service/NameServiceImplTest.java
cdmlib-services/src/test/java/eu/etaxonomy/cdm/api/service/NameServiceImplementBelenTest.java
cdmlib-services/src/test/resources/eu/etaxonomy/cdm/api/service/NameServiceImplTest.testFindMatchingNames.xml

index 662b7cc430900aad36135daf66a28f1ac36e56a7..7d60cef46794a6042dc4dc27673d026a4e753c69 100644 (file)
@@ -89,4 +89,16 @@ public class CdmUtilsBelen {
                String output = firstPart + lastTwoChar;
                return output;
        }
+
+       public static String normalized(String x) {
+           String result;
+
+           result=CdmUtilsBelen.replaceSpecialCharacters(x);
+           result=CdmUtilsBelen.soundalike(result);
+           result=CdmUtilsBelen.replacerGenderEnding(result);
+//      tempGenus =NameServiceImplementBelen.replaceInitialCharacter(tempGenus);
+           result=CdmUtilsBelen.removeDuplicate(result);
+
+        return result;
+       }
 }
index 438077612e38806fe40dd773b64a84508d72dfb0..5730579571abef919e457d7984788ae65bf8f119 100644 (file)
@@ -635,5 +635,5 @@ public interface INameService
      */
     public DeleteResult isDeletable(UUID nameUuid, DeleteConfiguratorBase config, UUID taxonUuid);
 
-    public List<DoubleResult<TaxonNameParts, Integer>> findMatchingNames(String taxonName, int maxDistanceGenus, int maxDisEpith, int limit);
+    public List<DoubleResult<TaxonNameParts, Integer>> findMatchingNames(String taxonName, Integer maxDistanceGenus, Integer maxDisEpith);
 }
index 862d7248b08d956ac369c3f06a616406707d709f..8126fe29aa65b0e8b8b3f81c8217d7dd408517eb 100644 (file)
@@ -53,6 +53,7 @@ import eu.etaxonomy.cdm.api.service.search.SearchResult;
 import eu.etaxonomy.cdm.api.service.search.SearchResultBuilder;
 import eu.etaxonomy.cdm.api.util.TaxonNamePartsFilter;
 import eu.etaxonomy.cdm.common.CdmUtils;
+import eu.etaxonomy.cdm.common.CdmUtilsBelen;
 import eu.etaxonomy.cdm.common.DoubleResult;
 import eu.etaxonomy.cdm.common.URI;
 import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
@@ -1310,62 +1311,188 @@ public class NameServiceImpl
      */
     //TODO work in progress
     @Override
-       public List<DoubleResult<TaxonNameParts, Integer>> findMatchingNames(String taxonName, int maxDistanceGenus, int maxDisEpith, int limit) {
+       public List<DoubleResult<TaxonNameParts, Integer>> findMatchingNames(String taxonName, Integer maxDistanceGenus, Integer maxDisEpith) {
 
        //0. Normalizing and parsing
 
-//     TODO Remove all qualifiers such as cf., aff., ?, <i>, x, etc.
+//     TODO? Remove all qualifiers such as cf., aff., ?, <i>, x, etc.
 
        TaxonName name = (TaxonName) NonViralNameParserImpl.NewInstance().parseFullName(taxonName);
-               String genusQuery = name.getGenusOrUninomial();
+
+       String genusQuery = name.getGenusOrUninomial();
                String epithetQuery = name.getSpecificEpithet();
+               int distance=0;
+               int epithetDistance=0;
+               // phonetic normalization of query (genus)
 
-               //1. Genus pre-filter
+               String tempGenusPhon= NameServiceImplementBelen.replaceInitialCharacter(genusQuery);
+               String tempGenus = CdmUtilsBelen.normalized(tempGenusPhon);
 
-               String initial= genusQuery.substring(0,1) + "*";
-               List<String> tempGenusList = dao.distinctGenusOrUninomial(initial, null, null); //list of all genera in the database starting with the initial letter of the query
-               List<String> genusList= new ArrayList <>(); // compare the length of query and the length of the database name. When the difference is less than the variable "limit", add the genus into the list
+               String tempEpithPhon= NameServiceImplementBelen.replaceInitialCharacter(epithetQuery);
+               String tempEpith= CdmUtilsBelen.normalized(tempEpithPhon);
 
-               for (String x:tempGenusList) {
-                   if (Math.abs(x.length()-genusQuery.length())<=limit) {
-                       genusList.add(x);
+               //1. Genus pre-filter
+
+               // set a list with all names in DB starting with the first character of query
+
+               String initial= tempGenus.substring(0,1).toUpperCase() + "*";
+               List<String> tempGenusListNormal = dao.distinctGenusOrUninomial(initial, null, null);
+
+               //set a list with all genera in the database starting with the initial letter of the PHONETIC TRANSFORMATION query
+           String initialPho = tempGenusPhon.substring(0,1).toUpperCase()+"*";
+           List <String> tempGenusListPhon=new ArrayList<>();
+           if (!initial.equals(initialPho)) {
+               tempGenusListPhon = dao.distinctGenusOrUninomial(initialPho, null, null);
+           }
+
+           //add genera that have a phonetic match
+           List<String> genusList= new ArrayList <>();
+               genusList.addAll(tempGenusListPhon);
+
+               // see word file Step 1. Rule 3.
+
+               for (String x:tempGenusListNormal) {
+
+                   if (Math.abs(x.length()-genusQuery.length())<=2) {
+
+                       if(genusQuery.length()<5) {
+
+                           if (genusQuery.substring(0,1).equals(x.substring(0,1)) ||
+                                   genusQuery.substring((genusQuery.length()-1),genusQuery.length()).equals(x.substring((x.length()-1),x.length()))) {
+                               genusList.add(x);
+                           }
+                       } else if (genusQuery.length()==5) {
+                           if (genusQuery.substring(0,2).equals(x.substring(0,2)) ||
+                                   genusQuery.substring((genusQuery.length()-3),genusQuery.length()).equals(x.substring((x.length()-3),x.length()))){
+                               genusList.add(x);
+                           }
+                       } else if (genusQuery.length()>5){
+                           if (genusQuery.substring(0,3).equals(x.substring(0,3)) ||
+                                   genusQuery.substring((genusQuery.length()-3),genusQuery.length()).equals(x.substring((x.length()-3),x.length()))){
+                               genusList.add(x);
+                           }
+                       }
                    }
                }
+
+               //deduplicate list
+
+                Set<String> noDuplicate = new HashSet<>(genusList);
+               genusList.clear();
+               genusList.addAll(noDuplicate);
+
+               //2. comparison of genus
+
+               if (maxDistanceGenus==null) {
+                   maxDistanceGenus=4;
+               }
+
+//             List<String>tempListGenus = new ArrayList<>();
+               String queryDocu;
                List<DoubleResult<TaxonNameParts,Integer>> fullTaxonNamePartsList = new ArrayList<>();
 
-               //2. comparison of:
+               for (String genusNameInDB:genusList) {
+                   String genusNameInDBNor=NameServiceImplementBelen.replaceInitialCharacter(genusNameInDB);
+                   genusNameInDBNor=CdmUtilsBelen.normalized(genusNameInDBNor);
+                   if (NameServiceImplementBelen.trimCommonChar(tempGenus, genusNameInDBNor).trim().isEmpty()) {
+                       queryDocu="";
+                   } else {
+                       queryDocu=NameServiceImplementBelen.trimCommonChar(tempGenus, genusNameInDBNor);
+                   }
 
-                   //genus
-               for (String genusNameInDB : genusList) {
-                   int distance = CdmUtils.modifiedDamerauLevenshteinDistance(genusQuery, genusNameInDB);
-            if (distance <= maxDistanceGenus) {
-                List<TaxonNameParts> tempParts = dao.findTaxonNameParts(Optional.of(genusNameInDB),null, null, null, null, null, null, null, null);
-                for (TaxonNameParts namePart: tempParts) {
-                    fullTaxonNamePartsList.add(new DoubleResult<TaxonNameParts, Integer>(namePart, distance));
+                   if (queryDocu=="") {
+                       distance = 0;
+                   } else {
+                       String inputShort= queryDocu.split(" ")[0];
+                       String DbShort=queryDocu.split(" ")[1];
+                       distance = CdmUtils.modifiedDamerauLevenshteinDistance(inputShort,DbShort);
+                   }
+
+           //3. genus post-filter
+                   int lengthTemp = genusQuery.length();
+                   int lengthDB=genusNameInDB.length();
+                   int half=Math.max(lengthTemp,lengthDB)/2;
+//Genera that match in at least 50% are kept. i.e., if genus length = 6(or7) then at least 3 characters must match AND the initial character must match in all cases where ED >1
+                   if (distance <=maxDistanceGenus) {
+                       List<TaxonNameParts> tempParts1 = dao.findTaxonNameParts(Optional.of(genusNameInDB),null, null, null, null, null, null, null, null);
+                for (TaxonNameParts namePart1: tempParts1) {
+                    fullTaxonNamePartsList.add(new DoubleResult<TaxonNameParts, Integer>(namePart1, distance));
                 }
-            }
+                   } else if(half<maxDistanceGenus && tempGenus.substring(0,1).equals(genusNameInDBNor.substring(0,1))) {
+                       List<TaxonNameParts> tempParts2 = dao.findTaxonNameParts(Optional.of(genusNameInDB),null, null, null, null, null, null, null, null);
+                for (TaxonNameParts namePart2: tempParts2) {
+                    fullTaxonNamePartsList.add(new DoubleResult<TaxonNameParts, Integer>(namePart2, distance));
+                }
+                   }
+               }
+
+               // 4. epithet pre-filter
+               List<DoubleResult<TaxonNameParts,Integer>> fullTaxonNamePartsList2 = new ArrayList<>();
+
+               for (DoubleResult<TaxonNameParts, Integer> nameX: fullTaxonNamePartsList) {
+                   if (nameX.getFirstResult().getSpecificEpithet().length()-tempEpith.length()>4) {
+                       fullTaxonNamePartsList2.add(nameX);
+                       fullTaxonNamePartsList=fullTaxonNamePartsList2;
+                   }
+
+
+               }
+               // 5. comparison of epithet
+               if (maxDisEpith==null) {
+                   maxDisEpith=4;
                }
-                   //epithet
-                  //add epithet distance
+
                List <DoubleResult<TaxonNameParts, Integer>> epithetList = new ArrayList<>();
+               String queryDocu2;
                for (DoubleResult<TaxonNameParts, Integer> part: fullTaxonNamePartsList) {
-                   int epithetDistance = CdmUtils.modifiedDamerauLevenshteinDistance(epithetQuery, part.getFirstResult().getSpecificEpithet());
-            if (epithetDistance <= maxDisEpith) {
+
+                   String epithetInDB = part.getFirstResult().getSpecificEpithet();
+                   int lengthEpithetInDB=epithetInDB.length();
+                   int lengthEpithetQuery=epithetQuery.length();
+                   int half=Math.max(lengthEpithetInDB,lengthEpithetQuery)/2;
+
+                   String epithetinDBNorm=NameServiceImplementBelen.replaceInitialCharacter(epithetInDB);
+                   epithetinDBNorm=CdmUtilsBelen.normalized(epithetinDBNorm);
+                   if (NameServiceImplementBelen.trimCommonChar(tempEpith, epithetinDBNorm).trim().isEmpty()) {
+                       queryDocu2="";
+                   } else {
+                       queryDocu2=NameServiceImplementBelen.trimCommonChar(tempEpith, epithetinDBNorm);
+                   }
+
+            if (queryDocu2=="") {
+                epithetDistance=0;
+                   } else {
+                       String inputShort= queryDocu2.split(" ")[0];
+                String DbShort=queryDocu2.split(" ")[1];
+                epithetDistance= CdmUtils.modifiedDamerauLevenshteinDistance(inputShort,DbShort);
+                   }
+
+            int totalDist = part.getSecondResult() + epithetDistance;
+            part.setSecondResult(totalDist)  ;
+
+               // 6. species post-filter
+
+                   if (totalDist <= maxDisEpith) {
                 epithetList.add(part);
-                part.setSecondResult(part.getSecondResult() + epithetDistance)  ;
-//                tempMap.add(part, fullTaxonNamePartsList.get(part) + epithetDistance); // need to check how the final distance is calculated
-            }else {
-                //do nothing
-                //tempMap.remove(part);
-            }
+                   }else if (half<maxDisEpith) {
+                       if ((tempEpith.substring(0,1).equals(epithetInDB.substring(0,1))
+                               && epithetDistance==2||epithetDistance==3)||
+                               (tempEpith.substring(0,3).equals(epithetInDB.substring(0,3))
+                                       && epithetDistance==4)) {
+                           epithetList.add(part);
+                       }
+                   }
                }
 
                Collections.sort(epithetList, (o1,o2)->o1.getSecondResult().compareTo(o2.getSecondResult()) );
-//             tempMap.entrySet().stream().forEach(e->result);
-//
-//             Map<TaxonNameParts,Integer> sortedBestMatches = tempMap.entrySet().stream().sorted(Map.Entry.comparingByValue()).limit(limit)
-//                   .collect(Collectors.toMap(
-//                              Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new));
-        return epithetList.subList(0, Math.min(limit,epithetList.size()));
+
+               List <DoubleResult<TaxonNameParts, Integer>> exactResults = NameServiceImplementBelen.exactResults(epithetList);
+               List <DoubleResult<TaxonNameParts, Integer>> bestResults = NameServiceImplementBelen.bestResults(epithetList);
+
+               if(!exactResults.isEmpty()) {
+                   return exactResults;
+               } else {
+                   return bestResults;
+               }
        }
 }
\ No newline at end of file
index 8f06a5f58dcfe68f146517320aabd78b23edc616..5172750e8b0b2c71bf7e5c2ec34bae88cb734307 100644 (file)
@@ -3,17 +3,17 @@ package eu.etaxonomy.cdm.api.service;
 import java.util.ArrayList;
 import java.util.List;
 
+import eu.etaxonomy.cdm.common.DoubleResult;
+import eu.etaxonomy.cdm.persistence.dto.TaxonNameParts;
+
 public class NameServiceImplementBelen {
-       private String tempInputName;
-       private String tempDatabaseName;
-       private String shortenedInputName;
-       private String shortenedDatabaseName;
+
 
 // Phonetic changes performed ONLY on the initial characters of each String
 
-       public String replaceInitialCharacter(String inp) {
+       public static String replaceInitialCharacter(String inp) {
            String input=inp.toLowerCase();
-               String output="";
+               String output=input;
                String[][] phoneticChange = {
                                {"ae","e"},{"cn","n"},{"ct","t"},{"cz","c"},
                                {"dj","d"},{"ea","e"},{"eu","u"},{"gn","n"},
@@ -33,8 +33,13 @@ public class NameServiceImplementBelen {
 
 // trim common characters between query and document
 
-       public List <String> trimCommonChar(String inputName, String databaseName) {
+       public static String trimCommonChar(String inputName, String databaseName) {
 
+           String result;
+           String shortenedInputName="";
+           String shortenedDatabaseName="";
+           String tempInputName;
+           String tempDatabaseName;
         // trim common leading characters of query and document
 
         int inputNameLength = inputName.length();
@@ -53,24 +58,43 @@ public class NameServiceImplementBelen {
         tempInputName = inputName.substring(i);
         tempDatabaseName = databaseName.substring(i);
 
-        List <String> list= new ArrayList<>();
-
         // trim common tailing characters between query and document
 
         int restantInputNameLenght = tempInputName.length();
         int restantDatabaseNameLenght = tempDatabaseName.length();
         int shortestString = Math.min(restantInputNameLenght, restantDatabaseNameLenght);
-
-        for (int x = 0; x < shortestString; x++) {
+        int x;
+        for (x = 0; x < shortestString; x++) {
             if (tempInputName.charAt(restantInputNameLenght - x - 1) != tempDatabaseName
                     .charAt(restantDatabaseNameLenght - x - 1)) {
                 break;
             }
-            shortenedInputName = tempInputName.substring(0, restantInputNameLenght - x - 1);
-            shortenedDatabaseName = tempDatabaseName.substring(0, restantDatabaseNameLenght - x - 1);
 
         }
-        list.add(shortenedInputName +" "+ shortenedDatabaseName);
-        return list;
+        shortenedInputName = tempInputName.substring(0, restantInputNameLenght - x);
+        shortenedDatabaseName = tempDatabaseName.substring(0, restantDatabaseNameLenght - x);
+
+        result = shortenedInputName +" "+ shortenedDatabaseName;
+        return result;
     }
+
+       public static List <DoubleResult<TaxonNameParts, Integer>> exactResults (List <DoubleResult<TaxonNameParts, Integer>> list){
+           List <DoubleResult<TaxonNameParts, Integer>> exactResults = new ArrayList<>();
+           for (DoubleResult<TaxonNameParts, Integer> best:list) {
+            if (best.getSecondResult()==0){
+                exactResults.add(best);
+            }
+        }
+           return exactResults;
+       }
+
+       public static List <DoubleResult<TaxonNameParts, Integer>> bestResults (List <DoubleResult<TaxonNameParts, Integer>> list){
+           List <DoubleResult<TaxonNameParts, Integer>> bestResults = new ArrayList<>();
+           for (DoubleResult<TaxonNameParts, Integer> best:list) {
+               if (best.getSecondResult()==1||best.getSecondResult()==2||best.getSecondResult()==3||best.getSecondResult()==4){
+                   bestResults.add(best);
+               }
+           }
+           return bestResults;
+       }
 }
index e80834e80e233243f623ab14e9d2d61a504af7f5..53391d98490741f34fd751579fd23ee0fd695826 100644 (file)
@@ -1103,25 +1103,41 @@ public class NameServiceImplTest extends CdmTransactionalIntegrationTest {
     @Test
     @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class, value="NameServiceImplTest.testFindMatchingNames.xml")
     public void testFindingMatchingNames () {
+        String inputName;
+        List<DoubleResult<TaxonNameParts, Integer>> matchResult;
+        DoubleResult<TaxonNameParts, Integer> matchRes;
 
-        List<DoubleResult<TaxonNameParts, Integer>> matchResult = nameService.findMatchingNames("Gynxya asrerotciha", 0, 0, 10);
+        // if the query has an exact match on the DB, return the exact match
+        inputName = "Nectandra magnoliifolia";
+        matchResult = nameService.findMatchingNames(inputName, null, null);
         Assert.assertEquals(1, matchResult.size());
-
-        matchResult = nameService.findMatchingNames("Gynxya asrerotciha", 1, 0, 10);
+        matchRes= matchResult.get(0);
+        Assert.assertEquals("Nectandra", matchRes.getFirstResult().getGenusOrUninomial());
+        Assert.assertEquals("magnoliifolia", matchRes.getFirstResult().getSpecificEpithet());
+        Assert.assertEquals(20, (int)matchRes.getFirstResult().getTaxonNameId());
+        Assert.assertEquals("10989f63-c52f-4704-9574-2cc0676afe01", matchRes.getFirstResult().getTaxonNameUuid().toString());
+        Assert.assertEquals(0,(int) matchRes.getSecondResult());
+
+        inputName = "Nectandra surinamensis";
+        matchResult = nameService.findMatchingNames(inputName, null, null);
         Assert.assertEquals(2, matchResult.size());
+        matchRes= matchResult.get(0);
+        Assert.assertEquals("Nectandra", matchRes.getFirstResult().getGenusOrUninomial());
+        Assert.assertEquals("surinamensis", matchRes.getFirstResult().getSpecificEpithet());
+        Assert.assertEquals(27, (int) matchRes.getFirstResult().getTaxonNameId());
+        Assert.assertEquals("b184664e-798b-4b50-8807-2163a4de796c", matchRes.getFirstResult().getTaxonNameUuid().toString());
+        Assert.assertEquals(0,(int) matchRes.getSecondResult());
+
+        matchRes= matchResult.get(1);
+        Assert.assertEquals("Nectandra", matchRes.getFirstResult().getGenusOrUninomial());
+        Assert.assertEquals("surinamensis", matchRes.getFirstResult().getSpecificEpithet());
+        Assert.assertEquals(28, (int) matchRes.getFirstResult().getTaxonNameId());
+        Assert.assertEquals("b9c8c3ba-bc78-4229-ae7d-b3f7bf23ec85", matchRes.getFirstResult().getTaxonNameUuid().toString());
+        Assert.assertEquals(0,(int) matchRes.getSecondResult());
+
+
+        // if the query does not have an exact match on the DB, return the best matches
+        //TODO
 
-        DoubleResult<TaxonNameParts, Integer> gynxyaAsrerotciha = matchResult.get(0);
-        Assert.assertNotNull(gynxyaAsrerotciha);
-        Assert.assertEquals(10, (int)gynxyaAsrerotciha.getFirstResult().getTaxonNameId());
-        Assert.assertEquals("Gynxya", matchResult.get(0).getFirstResult().getGenusOrUninomial());
-        Assert.assertEquals("asrerotciha", matchResult.get(0).getFirstResult().getSpecificEpithet());
-        Assert.assertEquals("Distance should be 0", 0, (int)gynxyaAsrerotciha.getSecondResult());
-
-        DoubleResult<TaxonNameParts, Integer> gynxyasAsrerotciha = matchResult.get(1);
-        Assert.assertNotNull(gynxyasAsrerotciha);
-        Assert.assertEquals(12, (int)gynxyasAsrerotciha.getFirstResult().getTaxonNameId());
-        Assert.assertEquals("Gynxyas", matchResult.get(1).getFirstResult().getGenusOrUninomial());
-        Assert.assertEquals("asrerotciha", matchResult.get(1).getFirstResult().getSpecificEpithet());
-        Assert.assertEquals("Distance should be 1", 1, (int)gynxyasAsrerotciha.getSecondResult());
     }
 }
\ No newline at end of file
index e7a49402575359fa22d7516d0a95f7221d772ada..851275cafdf1d30f7b10c18e94709dc05209e9c1 100644 (file)
@@ -22,26 +22,26 @@ public class NameServiceImplementBelenTest {
         NameServiceImplementBelen test=new NameServiceImplementBelen();
 
         String name = "euphorbia";
-        Assert.assertEquals("uphorbia", test.replaceInitialCharacter(name));
+        Assert.assertEquals("uphorbia", NameServiceImplementBelen.replaceInitialCharacter(name));
         name = "Cnemidia";
-        Assert.assertEquals("nemidia", test.replaceInitialCharacter(name));
+        Assert.assertEquals("nemidia", NameServiceImplementBelen.replaceInitialCharacter(name));
         name = "Gnaphalium";
-        Assert.assertEquals("naphalium", test.replaceInitialCharacter(name));
+        Assert.assertEquals("naphalium", NameServiceImplementBelen.replaceInitialCharacter(name));
         name = "Philodendron";
-        Assert.assertEquals("filodendron", test.replaceInitialCharacter(name));
+        Assert.assertEquals("filodendron", NameServiceImplementBelen.replaceInitialCharacter(name));
         name = "Tsuga";
-        Assert.assertEquals("suga", test.replaceInitialCharacter(name));
+        Assert.assertEquals("suga", NameServiceImplementBelen.replaceInitialCharacter(name));
         name = "Czerniaevia";
-        Assert.assertEquals("cerniaevia", test.replaceInitialCharacter(name));
+        Assert.assertEquals("cerniaevia", NameServiceImplementBelen.replaceInitialCharacter(name));
     }
 
     @Test
     public void testTrimCommonChar() {
         NameServiceImplementBelen test=new NameServiceImplementBelen();
-        String query ="this is a query string";
-        String document = "this is a database string";
+        String query ="Nectandra";
+        String document = "Nectalisma";
 
-        Assert.assertEquals("query", test.trimCommonChar(query, document).get(0).toString().split(" ")[0]);
-        Assert.assertEquals("database", test.trimCommonChar(query, document).get(0).toString().split(" ")[1]);
+        Assert.assertEquals("ndr", NameServiceImplementBelen.trimCommonChar(query, document).split(" ")[0]);
+        Assert.assertEquals("lism", NameServiceImplementBelen.trimCommonChar(query, document).split(" ")[1]);
     }
 }
index bbe5bcd762096937a010c8b37beadf3fbe713b59..844b7c9e948c2d171d940f0ef72085dccbf05bd8 100644 (file)
@@ -1,8 +1,29 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <dataset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../../database/schema/dataset.xsd">
-  <TAXONNAME NAMETYPE="ICNAFP" ID="10" UUID="6dbd41d1-fe13-4d9c-bb58-31f051c2c384" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Gynxya asrerotciha"  AUTHORSHIPCACHE=""       NAMECACHE="Gynxya asrerotciha"  GENUSORUNINOMIAL="Gynxya"  SPECIFICEPITHET="asrerotciha" COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
-  <TAXONNAME NAMETYPE="ICNAFP" ID="11" UUID="f9e9c13f-5fa5-48d3-88cf-712c921a099e" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Gynxsa axrerotciha"  AUTHORSHIPCACHE="[null]" NAMECACHE="Gynxsa axrerotciha"  GENUSORUNINOMIAL="Gynxsa"  SPECIFICEPITHET="axrerotciha"                              BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
-  <TAXONNAME NAMETYPE="ICNAFP" ID="12" UUID="e1e66264-f16a-4df9-80fd-6ab5028a3c28" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Gynxyas asrerotciha" AUTHORSHIPCACHE="Turl."  NAMECACHE="Gynxyas asrerotciha" GENUSORUNINOMIAL="Gynxyas" SPECIFICEPITHET="asrerotciha" COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="10" UUID="6dbd41d1-fe13-4d9c-bb58-31f051c2c384" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra abortiens"         AUTHORSHIPCACHE=""       NAMECACHE="Nectandra abortiens"        GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="abortiens"     COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="11" UUID="f9e9c13f-5fa5-48d3-88cf-712c921a099e" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra acuminata"         AUTHORSHIPCACHE="[null]" NAMECACHE="Nectandra acuminata"        GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="acuminata"                                  BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="12" UUID="e1e66264-f16a-4df9-80fd-6ab5028a3c28" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra acutangula"        AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra acutangula"       GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="acutangula"    COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="13" UUID="9062eb8a-2139-4432-bcd7-5dc058b4e91a" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra cinnamomoides"     AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra cinnamomoides"    GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="cinnamomoides" COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="14" UUID="11a18f85-d426-4e41-b094-e1ecb1af1b37" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra glandulifolia"     AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra glandulifolia"    GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="glandulifolia" COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="15" UUID="c3fb2798-3cd4-4d29-ba6c-dfc0debee4b4" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra guadaripo"         AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra guadaripo"        GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="guadaripo"     COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="16" UUID="a17ae30e-d2c1-4d38-a06f-641fd952215d" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra laevis"            AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra laevis"           GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="laevis"        COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="17" UUID="13fff68e-58bf-4ada-acda-7e772b2c2fdc" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra leucocome"         AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra leucocome"        GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="leucocome"     COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="18" UUID="36a2592e-707d-4c11-a137-0251b143ed61" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra leucothyrsus"      AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra leucothyrsus"     GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="leucothyrsus"  COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="19" UUID="34918b5d-cd11-47b0-9593-eea48dfa5033" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra magnoliaefolia"    AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra magnoliaefolia"   GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="magnoliaefolia" COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="20" UUID="10989f63-c52f-4704-9574-2cc0676afe01" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra magnoliifolia"     AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra magnoliifolia"    GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="magnoliifolia" COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="21" UUID="cae90b7a-5deb-4838-940f-f85bb685286e" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra nigra"             AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra nigra"            GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="nigra"         COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="22" UUID="8ad82243-b902-4eb6-990d-59774454b6e7" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra nigrita"           AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra nigrita"          GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="nigrita"       COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="23" UUID="ffb79b15-879f-4271-a3c9-ff182252cc39" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra petenensis"        AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra petenensis"       GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="petenensis"    COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="24" UUID="ecfac2f2-5ff4-4ce3-9ad0-12058867383e" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra salicina"          AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra salicina"         GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="salicina"      COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="25" UUID="cf152170-fafe-48c6-8f19-c4d51acd0e77" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra saligna"           AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra saligna"          GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="asrerotciha"   COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="26" UUID="2364789a-5877-4857-b24c-fd66eb14eb82" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra sanguinea"         AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra sanguinea"        GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="sanguinea"     COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="27" UUID="b184664e-798b-4b50-8807-2163a4de796c" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra surinamensis"      AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra surinamensis"     GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="surinamensis"  COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="28" UUID="b9c8c3ba-bc78-4229-ae7d-b3f7bf23ec85" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra surinamensis"      AUTHORSHIPCACHE="Mez."   NAMECACHE="Nectandra surinamensis"     GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="surinamensis"  COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="29" UUID="2e406c49-8c3a-4d1a-a38e-f5f8ed470e4c" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectalisma natans"           AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectalisma natans"          GENUSORUNINOMIAL="Nectalisma"   SPECIFICEPITHET="natans"        COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="30" UUID="ce57d385-acf5-471f-80b2-2942f65a7161" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectalisma aequatium"        AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectalisma aequatium"       GENUSORUNINOMIAL="Nectalisma"   SPECIFICEPITHET="aequatium"     COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="31" UUID="c990da38-d8ca-409b-bb43-f59323e5683b" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Nectandra phylium"           AUTHORSHIPCACHE="Turl."  NAMECACHE="Nectandra phylium"          GENUSORUNINOMIAL="Nectandra"    SPECIFICEPITHET="phylium"       COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="32" UUID="617697ab-5cfe-445f-b65f-6b596a324ade" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Philodendron alatum"         AUTHORSHIPCACHE="Turl."  NAMECACHE="Philodendron alatum"        GENUSORUNINOMIAL="Philodendron" SPECIFICEPITHET="alatum"        COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
+  <TAXONNAME NAMETYPE="ICNAFP" ID="33" UUID="0e16e411-e472-48ab-8b32-da9d3968092c" PROTECTEDTITLECACHE="false" PROTECTEDAUTHORSHIPCACHE="false" PROTECTEDFULLTITLECACHE="false" PROTECTEDNAMECACHE="false" TITLECACHE="Philodendron aristeguietae"  AUTHORSHIPCACHE="Turl."  NAMECACHE="Philodendron aristeguietae" GENUSORUNINOMIAL="Philodendron" SPECIFICEPITHET="aristeguietae" COMBINATIONAUTHORSHIP_ID="1" BINOMHYBRID="false" HYBRIDFORMULA="false" MONOMHYBRID="false" TRINOMHYBRID="false" ANAMORPHIC="false" RANK_ID="765"/>
 
   <AGENTBASE DTYPE="Person" ID="1" UUID="a598ab3f-b33b-4b4b-b237-d616fcb6b5b1" TITLECACHE="Turland, N.J." PROTECTEDTITLECACHE="true" NOMENCLATURALTITLE="Turl." FAMILYNAME="Turland" INITIALS="N.J." />