ref #10178: new methods for normalization of names
[cdmlib.git] / cdmlib-commons / src / main / java / eu / etaxonomy / cdm / common / CdmUtilsBelen.java
1 package eu.etaxonomy.cdm.common;
2
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.List;
6
7 public class CdmUtilsBelen {
8
9 // Trim white spaces
10
11 public static String deleteEmptySpaces(String inputName) {
12 String outputName = inputName.replaceAll("\\s+", " ").trim();
13 return outputName;
14 }
15
16 // Replace characters with ASCII characters
17
18 public static String replaceSpecialCharacters(String str) {
19 String output;
20 output = str.replaceAll("[áåâãàä]", "a");
21 output = output.replaceAll("[éêèë]", "e");
22 output = output.replaceAll("[ôõøòóö]", "o");
23 output = output.replaceAll("[ìíîï]", "i");
24 output = output.replaceAll("[üûúù]", "u");
25 output = output.replaceAll("ñ", "n");
26 output = output.replaceAll("ç", "c");
27 return output;
28 }
29
30 // Change lists to lowercase
31
32 public static List <String> listToLowerCase(List<String> List) {
33 List <String> lowerCaseList = new ArrayList<>();
34 for (String x : List) {
35 lowerCaseList.add(x.toLowerCase());
36 }
37 return lowerCaseList ;
38 }
39
40 // Replace characters combinations that sound similar
41
42 public static String soundalike(String inputName) {
43 String[][] soundalike = {
44 {"ae","e"},
45 {"ia","a"},
46 {"oe", "i"},
47 {"oi", "a"},
48 {"sc", "s"}
49 };
50 for (int i = 0 ; i<soundalike.length;i++) {
51 if (inputName.contains(soundalike[i][0])) {
52 inputName = inputName.replace(soundalike[i][0],soundalike[i][1]);
53 }
54 }
55 return inputName;
56 }
57
58 // Remove duplicated letters
59
60 public static String removeDuplicate(String input) {
61 char [] temp= input.toCharArray();
62 int lenght=temp.length;
63
64 int index = 0;
65 int p;
66 for (int i = 0; i < lenght- 1; i++) {
67 p = i + 1;
68 if (!(temp[i] == temp[p])) {
69 temp[index++] = temp[i];
70 }
71 }
72 String output = String.valueOf(Arrays.copyOf(temp, index));
73 output= output+ temp[lenght- 1];
74 return output;
75 }
76
77 // normalize ending ignoring gender issues
78
79 public static String replacerGenderEnding(String input) {
80
81 String firstPart= input.substring(0, input.length() - 2);
82 String lastTwoChar = input.substring((input.length() - 2), input.length());
83 String[] endingChar = new String[] { "is", "us", "ys", "es", "im", "as", "um", "os" };
84 for (String i : endingChar) {
85 if (lastTwoChar.contains(i)) {
86 lastTwoChar = lastTwoChar.replace(i, "a");
87 }
88 }
89 String output = firstPart + lastTwoChar;
90 return output;
91 }
92 }