Project

General

Profile

« Previous | Next » 

Revision da9ff9c4

Added by Andreas Müller about 5 years ago

ref #8041 correct regex quotation method implemented in CdmUtils

View differences:

cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/CdmUtils.java
467 467
        return false;
468 468
    }
469 469

  
470
    /**
471
     * Removes all non-word character (i.e. *, @, %, line breaks, etc.)
472
     * from the given string
473
     * @param string the string which should be trimmed
474
     * @return the trimmed string
475
     */
476
    static public String trimNonWordCharacters(String string){
477
        return replaceNonWordCharacters(string, "");
478
    }
479

  
480
    /**
481
     * Replaces all non-word character (i.e. *, @, %, line breaks, etc.)
482
     * with the given replacement string
483
     * @param string the string which should be trimmed
484
     * @param replacement the replacement for the non-word characters
485
     * @return the trimmed string
486
     */
487
    static public String replaceNonWordCharacters(String string, String replacement){
488
        return string.replaceAll("\\W", replacement);
489
    }
490

  
491 470
    /**
492 471
     * Returns <code>false</code> if string is null, "" or string.trim() is ""
493 472
     * @see isNotEmpty(String string)
......
686 665
        return true;
687 666
    }
688 667

  
668
    /**
669
     * Transforms a search string which allows wildcard "*" into a
670
     * java regular expression such that all other characters are handled as normal text.
671
     * @param regEx
672
     * @return
673
     */
674
    public static String quoteRegExWithWildcard(String regEx){
675
        return Pattern.quote(regEx).replace("*", "\\E.*\\Q").replace("\\Q\\E", "");
676
    }
677

  
689 678
}
cdmlib-commons/src/test/java/eu/etaxonomy/cdm/common/CdmUtilsTest.java
14 14
import java.io.File;
15 15
import java.io.IOException;
16 16
import java.io.InputStream;
17
import java.util.regex.Pattern;
17 18

  
18 19
import org.apache.log4j.Level;
19 20
import org.apache.log4j.Logger;
......
107 108

  
108 109
    }
109 110

  
111
    @Test
112
    public void testquoteRegExWithWildcard(){
113
        String regExBase = ".(*$[ms^";
114
        String regEx = CdmUtils.quoteRegExWithWildcard(regExBase);
115
        Assert.assertEquals("\\Q.(\\E.*\\Q$[ms^\\E", regEx);
116
        boolean matches = ".(*$[ms^".matches(regEx);
117
        Assert.assertTrue(matches);
118
        matches = ".(aaaaaa$[ms^".matches(regEx);
119
        Assert.assertTrue(matches);
120
        matches = "b(aaaaaa$[ms^".matches(regEx);
121
        Assert.assertFalse(matches);
122

  
123
        regEx = CdmUtils.quoteRegExWithWildcard("*abc*");
124
        Assert.assertEquals(".*\\Qabc\\E.*", regEx);
125
        Assert.assertTrue("abc".matches(regEx));
126
        Assert.assertTrue("a80/(--e*wabc?äe".matches(regEx));
127

  
128
    }
129

  
110 130
    /**
111 131
     * This test can be used for functional testing of any task but should
112 132
     * never be committed when failing.
113 133
     */
114 134
    @Test
115 135
    public void testSomething(){
116
       String MCL = "MCL[0-9]{1,3}(\\-[0-9]{1,4}(\\-[0-9]{1,4}(\\-[0-9]{1,3}(\\-[0-9]{1,3})?)?)?)?";
117
//        String MCL = "a{1,3}";
118
        String filter = "Acc "+MCL;
119

  
120
       String notes = "Acc: 0x is Hieracium djimilense subsp. neotericum Zahn MCL293-3140-00-630";
121
       String result;
122
       if (notes.matches("Acc:.*")){
123
           if (notes.matches("Acc: .*\\$$") || (notes.matches("Acc: .*"+MCL))){
124
               result = null;
125
           }else if (notes.matches("Acc: .*(\\$|"+MCL+")\\s*\\{.*\\}")){
126
               notes = notes.substring(notes.indexOf("{")+1, notes.length()-1);
127
               result = notes;
128
           }else if (notes.matches("Acc: .*(\\$|"+MCL+")\\s*\\[.*\\]")){
129
               notes = notes.substring(notes.indexOf("[")+1, notes.length()-1);
130
               result = notes;
131
           }else{
132
               logger.warn("Namenote: " + notes);
133
               result = notes;
134
           }
135
       }else if (notes.matches("Syn:.*")){
136
           if (notes.matches("Syn: .*\\$$") || (notes.matches("Syn: .*"+MCL))){
137
               result = null;
138
           }else if (notes.matches("Syn: .*(\\$|"+MCL+")\\s*\\{.*\\}")){
139
               notes = notes.substring(notes.indexOf("{")+1, notes.length()-1);
140
               result = notes;
141
           }else if (notes.matches("Syn: .*(\\$|"+MCL+")\\s*\\[.*\\]")){
142
               notes = notes.substring(notes.indexOf("[")+1, notes.length()-1);
143
               result = notes;
144
           }else{
145
               logger.warn("Namenote: " + notes);
146
               result = notes;
147
           }
148
       }else{
149
           result = notes;
150
       }
151
       System.out.println(result);
136
        String str = ".(*$[ms^";
137
        String patQuote = Pattern.quote("str");
138
//        System.out.println(patQuote);
139
//        String matchQuote = Matcher.quoteReplacement(str);
140
//        System.out.println(matchQuote);
141
//        System.out.println(CdmUtils.quoteRegExWithWildcard(str));
152 142
    }
153 143

  
144

  
145

  
146

  
147

  
154 148
}

Also available in: Unified diff