Project

General

Profile

Download (11.9 KB) Statistics
| Branch: | Tag: | Revision:
1
/*******************************************************************************
2
 * Copyright (c) 2000, 2006 IBM Corporation and others.
3
 * All rights reserved. This program and the accompanying materials
4
 * are made available under the terms of the Eclipse Public License v1.0
5
 * which accompanies this distribution, and is available at
6
 * http://www.eclipse.org/legal/epl-v10.html
7
 *
8
 * Contributors:
9
 *     IBM Corporation - initial API and implementation
10
 *******************************************************************************/
11

    
12
package org.eclipse.ui.internal.navigator;
13

    
14
import java.util.Vector;
15

    
16
/**
17
 * A string pattern matcher, suppporting "*" and "?" wildcards.
18
 */
19
public class StringMatcher {
20
	protected String fPattern;
21

    
22
	protected int fLength; // pattern length
23

    
24
	protected boolean fIgnoreWildCards;
25

    
26
	protected boolean fIgnoreCase;
27

    
28
	protected boolean fHasLeadingStar;
29

    
30
	protected boolean fHasTrailingStar;
31

    
32
	protected String fSegments[]; // the given pattern is split into *
33

    
34
	// separated segments
35

    
36
	/* boundary value beyond which we don't need to search in the text */
37
	protected int fBound = 0;
38

    
39
	protected static final char fSingleWildCard = '\u0000';
40

    
41
	/**
42
	 * 
43
	 */
44
	static class Position {
45
		int start; // inclusive
46

    
47
		int end; // exclusive
48

    
49
		Position(int start, int end) {
50
			this.start = start;
51
			this.end = end;
52
		}
53

    
54
		int getStart() {
55
			return start;
56
		}
57

    
58
		int getEnd() {
59
			return end;
60
		}
61
	}
62

    
63
	/**
64
	 * StringMatcher constructor takes in a String object that is a simple
65
	 * pattern which may contain '*' for 0 and many characters and '?' for
66
	 * exactly one character.
67
	 * 
68
	 * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*"
69
	 * means literal "*", etc.
70
	 * 
71
	 * Escaping any other character (including the escape character itself),
72
	 * just results in that character in the pattern. e.g., "\a" means "a" and
73
	 * "\\" means "\"
74
	 * 
75
	 * If invoking the StringMatcher with string literals in Java, don't forget
76
	 * escape characters are represented by "\\".
77
	 * 
78
	 * @param pattern
79
	 *            the pattern to match text against
80
	 * @param ignoreCase
81
	 *            if true, case is ignored
82
	 * @param ignoreWildCards
83
	 *            if true, wild cards and their escape sequences are ignored
84
	 *            (everything is taken literally).
85
	 */
86
	public StringMatcher(String pattern, boolean ignoreCase,
87
			boolean ignoreWildCards) {
88
		if (pattern == null) {
89
			throw new IllegalArgumentException();
90
		}
91
		fIgnoreCase = ignoreCase;
92
		fIgnoreWildCards = ignoreWildCards;
93
		fPattern = pattern;
94
		fLength = pattern.length();
95

    
96
		if (fIgnoreWildCards) {
97
			parseNoWildCards();
98
		} else {
99
			parseWildCards();
100
		}
101
	}
102

    
103
	/**
104
	 * Find the first occurrence of the pattern between
105
	 * <code>start</code)(inclusive) 
106
	 * and <code>end</code>(exclusive).  
107
	 * @param  text  the String object to search in 
108
	 * @param  start  the starting index of the search range, inclusive
109
	 * @param  end  the ending index of the search range, exclusive
110
	 * @return an <code>StringMatcher.Position</code> object that keeps the starting 
111
	 * (inclusive) and ending positions (exclusive) of the first occurrence of the 
112
	 * pattern in the specified range of the text; return null if not found or subtext
113
	 * is empty (start==end). A pair of zeros is returned if pattern is empty string
114
	 * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
115
	 * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
116
	 */
117
	public StringMatcher.Position find(String text, int start, int end) {
118
		if (text == null) {
119
			throw new IllegalArgumentException();
120
		}
121

    
122
		int tlen = text.length();
123
		if (start < 0) {
124
			start = 0;
125
		}
126
		if (end > tlen) {
127
			end = tlen;
128
		}
129
		if (end < 0 || start >= end) {
130
			return null;
131
		}
132
		if (fLength == 0) {
133
			return new Position(start, start);
134
		}
135
		if (fIgnoreWildCards) {
136
			int x = posIn(text, start, end);
137
			if (x < 0) {
138
				return null;
139
			}
140
			return new Position(x, x + fLength);
141
		}
142

    
143
		int segCount = fSegments.length;
144
		if (segCount == 0) {
145
			return new Position(start, end);
146
		}
147

    
148
		int curPos = start;
149
		int matchStart = -1;
150
		int i;
151
		for (i = 0; i < segCount && curPos < end; ++i) {
152
			String current = fSegments[i];
153
			int nextMatch = regExpPosIn(text, curPos, end, current);
154
			if (nextMatch < 0) {
155
				return null;
156
			}
157
			if (i == 0) {
158
				matchStart = nextMatch;
159
			}
160
			curPos = nextMatch + current.length();
161
		}
162
		if (i < segCount) {
163
			return null;
164
		}
165
		return new Position(matchStart, curPos);
166
	}
167

    
168
	/**
169
	 * match the given <code>text</code> with the pattern
170
	 * 
171
	 * @return true if matched eitherwise false
172
	 * @param text
173
	 *            a String object
174
	 */
175
	public boolean match(String text) {
176
		if (text == null) {
177
			return false;
178
		}
179
		return match(text, 0, text.length());
180
	}
181

    
182
	/**
183
	 * Given the starting (inclusive) and the ending (exclusive) positions in
184
	 * the <code>text</code>, determine if the given substring matches with
185
	 * aPattern
186
	 * 
187
	 * @return true if the specified portion of the text matches the pattern
188
	 * @param text
189
	 *            a String object that contains the substring to match
190
	 * @param start
191
	 *            marks the starting position (inclusive) of the substring
192
	 * @param end
193
	 *            marks the ending index (exclusive) of the substring
194
	 */
195
	public boolean match(String text, int start, int end) {
196
		if (null == text) {
197
			throw new IllegalArgumentException();
198
		}
199

    
200
		if (start > end) {
201
			return false;
202
		}
203

    
204
		if (fIgnoreWildCards) {
205
			return (end - start == fLength)
206
					&& fPattern.regionMatches(fIgnoreCase, 0, text, start,
207
							fLength);
208
		}
209
		int segCount = fSegments.length;
210
		if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) {
211
			// contains
212
			// only
213
			// '*'(s)
214
			return true;
215
		}
216
		if (start == end) {
217
			return fLength == 0;
218
		}
219
		if (fLength == 0) {
220
			return start == end;
221
		}
222

    
223
		int tlen = text.length();
224
		if (start < 0) {
225
			start = 0;
226
		}
227
		if (end > tlen) {
228
			end = tlen;
229
		}
230

    
231
		int tCurPos = start;
232
		int bound = end - fBound;
233
		if (bound < 0) {
234
			return false;
235
		}
236
		int i = 0;
237
		String current = fSegments[i];
238
		int segLength = current.length();
239

    
240
		/* process first segment */
241
		if (!fHasLeadingStar) {
242
			if (!regExpRegionMatches(text, start, current, 0, segLength)) {
243
				return false;
244
			}
245
			++i;
246
			tCurPos = tCurPos + segLength;
247

    
248
		}
249
		if ((fSegments.length == 1) && (!fHasLeadingStar)
250
				&& (!fHasTrailingStar)) {
251
			// only one segment to match, no wildcards specified
252
			return tCurPos == end;
253
		}
254
		/* process middle segments */
255
		while (i < segCount) {
256
			current = fSegments[i];
257
			int currentMatch;
258
			int k = current.indexOf(fSingleWildCard);
259
			if (k < 0) {
260
				currentMatch = textPosIn(text, tCurPos, end, current);
261
				if (currentMatch < 0) {
262
					return false;
263
				}
264
			} else {
265
				currentMatch = regExpPosIn(text, tCurPos, end, current);
266
				if (currentMatch < 0) {
267
					return false;
268
				}
269
			}
270
			tCurPos = currentMatch + current.length();
271
			i++;
272
		}
273

    
274
		/* process final segment */
275
		if (!fHasTrailingStar && tCurPos != end) {
276
			int clen = current.length();
277
			return regExpRegionMatches(text, end - clen, current, 0, clen);
278
		}
279
		return i == segCount;
280
	}
281

    
282
	/**
283
	 * This method parses the given pattern into segments seperated by wildcard
284
	 * '*' characters. Since wildcards are not being used in this case, the
285
	 * pattern consists of a single segment.
286
	 */
287
	private void parseNoWildCards() {
288
		fSegments = new String[1];
289
		fSegments[0] = fPattern;
290
		fBound = fLength;
291
	}
292

    
293
	/**
294
	 * Parses the given pattern into segments seperated by wildcard '*'
295
	 * characters.
296
	 * 
297
	 */
298
	private void parseWildCards() {
299
		if (fPattern.startsWith("*")) { //$NON-NLS-1$
300
			fHasLeadingStar = true;
301
		}
302
		if (fPattern.endsWith("*")) {//$NON-NLS-1$
303
			/* make sure it's not an escaped wildcard */
304
			if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
305
				fHasTrailingStar = true;
306
			}
307
		}
308

    
309
		Vector temp = new Vector();
310

    
311
		int pos = 0;
312
		StringBuffer buf = new StringBuffer();
313
		while (pos < fLength) {
314
			char c = fPattern.charAt(pos++);
315
			switch (c) {
316
			case '\\':
317
				if (pos >= fLength) {
318
					buf.append(c);
319
				} else {
320
					char next = fPattern.charAt(pos++);
321
					/* if it's an escape sequence */
322
					if (next == '*' || next == '?' || next == '\\') {
323
						buf.append(next);
324
					} else {
325
						/* not an escape sequence, just insert literally */
326
						buf.append(c);
327
						buf.append(next);
328
					}
329
				}
330
				break;
331
			case '*':
332
				if (buf.length() > 0) {
333
					/* new segment */
334
					temp.addElement(buf.toString());
335
					fBound += buf.length();
336
					buf.setLength(0);
337
				}
338
				break;
339
			case '?':
340
				/* append special character representing single match wildcard */
341
				buf.append(fSingleWildCard);
342
				break;
343
			default:
344
				buf.append(c);
345
			}
346
		}
347

    
348
		/* add last buffer to segment list */
349
		if (buf.length() > 0) {
350
			temp.addElement(buf.toString());
351
			fBound += buf.length();
352
		}
353

    
354
		fSegments = new String[temp.size()];
355
		temp.copyInto(fSegments);
356
	}
357

    
358
	/**
359
	 * @param text
360
	 *            a string which contains no wildcard
361
	 * @param start
362
	 *            the starting index in the text for search, inclusive
363
	 * @param end
364
	 *            the stopping point of search, exclusive
365
	 * @return the starting index in the text of the pattern , or -1 if not
366
	 *         found
367
	 */
368
	protected int posIn(String text, int start, int end) {// no wild card in
369
		// pattern
370
		int max = end - fLength;
371

    
372
		if (!fIgnoreCase) {
373
			int i = text.indexOf(fPattern, start);
374
			if (i == -1 || i > max) {
375
				return -1;
376
			}
377
			return i;
378
		}
379

    
380
		for (int i = start; i <= max; ++i) {
381
			if (text.regionMatches(true, i, fPattern, 0, fLength)) {
382
				return i;
383
			}
384
		}
385

    
386
		return -1;
387
	}
388

    
389
	/**
390
	 * @param text
391
	 *            a simple regular expression that may only contain '?'(s)
392
	 * @param start
393
	 *            the starting index in the text for search, inclusive
394
	 * @param end
395
	 *            the stopping point of search, exclusive
396
	 * @param p
397
	 *            a simple regular expression that may contains '?'
398
	 * @return the starting index in the text of the pattern , or -1 if not
399
	 *         found
400
	 */
401
	protected int regExpPosIn(String text, int start, int end, String p) {
402
		int plen = p.length();
403

    
404
		int max = end - plen;
405
		for (int i = start; i <= max; ++i) {
406
			if (regExpRegionMatches(text, i, p, 0, plen)) {
407
				return i;
408
			}
409
		}
410
		return -1;
411
	}
412

    
413
	/**
414
	 * 
415
	 * @return boolean
416
	 * @param text
417
	 *            a String to match
418
	 * @param tStart
419
	 *            indicates the starting index of match, inclusive
420
	 * @param p
421
	 *            a simple regular expression that may contain '?'
422
	 * @param pStart
423
	 * @param plen
424
	 */
425
	protected boolean regExpRegionMatches(String text, int tStart, String p,
426
			int pStart, int plen) {
427
		while (plen-- > 0) {
428
			char tchar = text.charAt(tStart++);
429
			char pchar = p.charAt(pStart++);
430

    
431
			/* process wild cards */
432
			if (!fIgnoreWildCards) {
433
				/* skip single wild cards */
434
				if (pchar == fSingleWildCard) {
435
					continue;
436
				}
437
			}
438
			if (pchar == tchar) {
439
				continue;
440
			}
441
			if (fIgnoreCase) {
442
				if (Character.toUpperCase(tchar) == Character
443
						.toUpperCase(pchar)) {
444
					continue;
445
				}
446
				// comparing after converting to upper case doesn't handle all
447
				// cases;
448
				// also compare after converting to lower case
449
				if (Character.toLowerCase(tchar) == Character
450
						.toLowerCase(pchar)) {
451
					continue;
452
				}
453
			}
454
			return false;
455
		}
456
		return true;
457
	}
458

    
459
	/**
460
	 * @param text
461
	 *            the string to match
462
	 * @param start
463
	 *            the starting index in the text for search, inclusive
464
	 * @param end
465
	 *            the stopping point of search, exclusive
466
	 * @param p
467
	 *            a string that has no wildcard
468
	 * @return the starting index in the text of the pattern , or -1 if not
469
	 *         found
470
	 */
471
	protected int textPosIn(String text, int start, int end, String p) {
472

    
473
		int plen = p.length();
474
		int max = end - plen;
475

    
476
		if (!fIgnoreCase) {
477
			int i = text.indexOf(p, start);
478
			if (i == -1 || i > max) {
479
				return -1;
480
			}
481
			return i;
482
		}
483

    
484
		for (int i = start; i <= max; ++i) {
485
			if (text.regionMatches(true, i, p, 0, plen)) {
486
				return i;
487
			}
488
		}
489

    
490
		return -1;
491
	}
492
}
(28-28/31)