001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.core.xml.scan;
018    
019    import java.util.ArrayList;
020    import java.util.List;
021    import java.util.StringTokenizer;
022    
023    /**
024     * PathMatcher implementation for Ant-style path patterns. Examples are provided
025     * below.
026     * <p>
027     * Part of this mapping code has been kindly borrowed from <a
028     * href="http://ant.apache.org">Apache Ant</a>.
029     * <p>
030     * The mapping matches URLs using the following rules:<br>
031     * <ul>
032     * <li>? matches one character</li>
033     * <li>* matches zero or more characters</li>
034     * <li>** matches zero or more 'directories' in a path</li>
035     * </ul>
036     * <p>
037     * Some examples:<br>
038     * <ul>
039     * <li><code>com/t?st.jsp</code> - matches <code>com/test.jsp</code> but also
040     * <code>com/tast.jsp</code> or <code>com/txst.jsp</code></li>
041     * <li><code>com/*.jsp</code> - matches all <code>.jsp</code> files in the
042     * <code>com</code> directory</li>
043     * <li><code>com/&#42;&#42;/test.jsp</code> - matches all <code>test.jsp</code>
044     * files underneath the <code>com</code> path</li>
045     * <li><code>org/springframework/&#42;&#42;/*.jsp</code> - matches all
046     * <code>.jsp</code> files underneath the <code>org/springframework</code> path</li>
047     * <li><code>org/&#42;&#42;/servlet/bla.jsp</code> - matches
048     * <code>org/springframework/servlet/bla.jsp</code> but also
049     * <code>org/springframework/testing/servlet/bla.jsp</code> and
050     * <code>org/servlet/bla.jsp</code></li>
051     * </ul>
052     * 
053     * @author Alef Arendsen
054     * @author Juergen Hoeller
055     * @author Rob Harrop
056     * @since 16.07.2003
057     */
058    public class AntPathMatcher {
059    
060        /** Default path separator: "/" */
061        public static final String DEFAULT_PATH_SEPARATOR = "/";
062    
063        private String pathSeparator = DEFAULT_PATH_SEPARATOR;
064    
065        /**
066         * Set the path separator to use for pattern parsing. Default is "/", as in
067         * Ant.
068         */
069        public void setPathSeparator(String pathSeparator) {
070            this.pathSeparator = pathSeparator != null ? pathSeparator : DEFAULT_PATH_SEPARATOR;
071        }
072    
073        public boolean isPattern(String path) {
074            return path.indexOf('*') != -1 || path.indexOf('?') != -1;
075        }
076    
077        public boolean match(String pattern, String path) {
078            return doMatch(pattern, path, true);
079        }
080    
081        public boolean matchStart(String pattern, String path) {
082            return doMatch(pattern, path, false);
083        }
084    
085        /**
086         * Actually match the given <code>path</code> against the given
087         * <code>pattern</code>.
088         * 
089         * @param pattern the pattern to match against
090         * @param path the path String to test
091         * @param fullMatch whether a full pattern match is required (else a pattern
092         *            match as far as the given base path goes is sufficient)
093         * @return <code>true</code> if the supplied <code>path</code> matched,
094         *         <code>false</code> if it didn't
095         */
096        protected boolean doMatch(String pattern, String path, boolean fullMatch) {
097            if (path.startsWith(this.pathSeparator) != pattern.startsWith(this.pathSeparator)) {
098                return false;
099            }
100    
101            String[] pattDirs = tokenizeToStringArray(pattern, this.pathSeparator);
102            String[] pathDirs = tokenizeToStringArray(path, this.pathSeparator);
103    
104            int pattIdxStart = 0;
105            int pattIdxEnd = pattDirs.length - 1;
106            int pathIdxStart = 0;
107            int pathIdxEnd = pathDirs.length - 1;
108    
109            // Match all elements up to the first **
110            while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
111                String patDir = pattDirs[pattIdxStart];
112                if ("**".equals(patDir)) {
113                    break;
114                }
115                if (!matchStrings(patDir, pathDirs[pathIdxStart])) {
116                    return false;
117                }
118                pattIdxStart++;
119                pathIdxStart++;
120            }
121    
122            if (pathIdxStart > pathIdxEnd) {
123                // Path is exhausted, only match if rest of pattern is * or **'s
124                if (pattIdxStart > pattIdxEnd) {
125                    return pattern.endsWith(this.pathSeparator) ? path.endsWith(this.pathSeparator) : !path
126                        .endsWith(this.pathSeparator);
127                }
128                if (!fullMatch) {
129                    return true;
130                }
131                if (pattIdxStart == pattIdxEnd && pattDirs[pattIdxStart].equals("*")
132                    && path.endsWith(this.pathSeparator)) {
133                    return true;
134                }
135                for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
136                    if (!pattDirs[i].equals("**")) {
137                        return false;
138                    }
139                }
140                return true;
141            } else if (pattIdxStart > pattIdxEnd) {
142                // String not exhausted, but pattern is. Failure.
143                return false;
144            } else if (!fullMatch && "**".equals(pattDirs[pattIdxStart])) {
145                // Path start definitely matches due to "**" part in pattern.
146                return true;
147            }
148    
149            // up to last '**'
150            while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
151                String patDir = pattDirs[pattIdxEnd];
152                if (patDir.equals("**")) {
153                    break;
154                }
155                if (!matchStrings(patDir, pathDirs[pathIdxEnd])) {
156                    return false;
157                }
158                pattIdxEnd--;
159                pathIdxEnd--;
160            }
161            if (pathIdxStart > pathIdxEnd) {
162                // String is exhausted
163                for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
164                    if (!pattDirs[i].equals("**")) {
165                        return false;
166                    }
167                }
168                return true;
169            }
170    
171            while (pattIdxStart != pattIdxEnd && pathIdxStart <= pathIdxEnd) {
172                int patIdxTmp = -1;
173                for (int i = pattIdxStart + 1; i <= pattIdxEnd; i++) {
174                    if (pattDirs[i].equals("**")) {
175                        patIdxTmp = i;
176                        break;
177                    }
178                }
179                if (patIdxTmp == pattIdxStart + 1) {
180                    // '**/**' situation, so skip one
181                    pattIdxStart++;
182                    continue;
183                }
184                // Find the pattern between padIdxStart & padIdxTmp in str between
185                // strIdxStart & strIdxEnd
186                int patLength = patIdxTmp - pattIdxStart - 1;
187                int strLength = pathIdxEnd - pathIdxStart + 1;
188                int foundIdx = -1;
189    
190            strLoop:
191                for (int i = 0; i <= strLength - patLength; i++) {
192                    for (int j = 0; j < patLength; j++) {
193                        String subPat = (String)pattDirs[pattIdxStart + j + 1];
194                        String subStr = (String)pathDirs[pathIdxStart + i + j];
195                        if (!matchStrings(subPat, subStr)) {
196                            continue strLoop;
197                        }
198                    }
199                    foundIdx = pathIdxStart + i;
200                    break;
201                }
202    
203                if (foundIdx == -1) {
204                    return false;
205                }
206    
207                pattIdxStart = patIdxTmp;
208                pathIdxStart = foundIdx + patLength;
209            }
210    
211            for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
212                if (!pattDirs[i].equals("**")) {
213                    return false;
214                }
215            }
216    
217            return true;
218        }
219    
220        /**
221         * Tests whether or not a string matches against a pattern. The pattern may
222         * contain two special characters:<br>
223         * '*' means zero or more characters<br>
224         * '?' means one and only one character
225         * 
226         * @param pattern pattern to match against. Must not be <code>null</code>.
227         * @param str string which must be matched against the pattern. Must not be
228         *            <code>null</code>.
229         * @return <code>true</code> if the string matches against the pattern, or
230         *         <code>false</code> otherwise.
231         */
232        private boolean matchStrings(String pattern, String str) {
233            char[] patArr = pattern.toCharArray();
234            char[] strArr = str.toCharArray();
235            int patIdxStart = 0;
236            int patIdxEnd = patArr.length - 1;
237            int strIdxStart = 0;
238            int strIdxEnd = strArr.length - 1;
239            char ch;
240    
241            boolean containsStar = false;
242            for (int i = 0; i < patArr.length; i++) {
243                if (patArr[i] == '*') {
244                    containsStar = true;
245                    break;
246                }
247            }
248    
249            if (!containsStar) {
250                // No '*'s, so we make a shortcut
251                if (patIdxEnd != strIdxEnd) {
252                    return false; // Pattern and string do not have the same size
253                }
254                for (int i = 0; i <= patIdxEnd; i++) {
255                    ch = patArr[i];
256                    if (ch != '?') {
257                        if (ch != strArr[i]) {
258                            return false;
259                            // Character mismatch
260                        }
261                    }
262                }
263                return true; // String matches against pattern
264            }
265    
266            if (patIdxEnd == 0) {
267                return true; // Pattern contains only '*', which matches anything
268            }
269    
270            // Process characters before first star
271            while ((ch = patArr[patIdxStart]) != '*' && strIdxStart <= strIdxEnd) {
272                if (ch != '?') {
273                    if (ch != strArr[strIdxStart]) {
274                        return false;
275                        // Character mismatch
276                    }
277                }
278                patIdxStart++;
279                strIdxStart++;
280            }
281            if (strIdxStart > strIdxEnd) {
282                // All characters in the string are used. Check if only '*'s are
283                // left in the pattern. If so, we succeeded. Otherwise failure.
284                for (int i = patIdxStart; i <= patIdxEnd; i++) {
285                    if (patArr[i] != '*') {
286                        return false;
287                    }
288                }
289                return true;
290            }
291    
292            // Process characters after last star
293            while ((ch = patArr[patIdxEnd]) != '*' && strIdxStart <= strIdxEnd) {
294                if (ch != '?') {
295                    if (ch != strArr[strIdxEnd]) {
296                        return false;
297                        // Character mismatch
298                    }
299                }
300                patIdxEnd--;
301                strIdxEnd--;
302            }
303            if (strIdxStart > strIdxEnd) {
304                // All characters in the string are used. Check if only '*'s are
305                // left in the pattern. If so, we succeeded. Otherwise failure.
306                for (int i = patIdxStart; i <= patIdxEnd; i++) {
307                    if (patArr[i] != '*') {
308                        return false;
309                    }
310                }
311                return true;
312            }
313    
314            // process pattern between stars. padIdxStart and patIdxEnd point
315            // always to a '*'.
316            while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
317                int patIdxTmp = -1;
318                for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
319                    if (patArr[i] == '*') {
320                        patIdxTmp = i;
321                        break;
322                    }
323                }
324                if (patIdxTmp == patIdxStart + 1) {
325                    // Two stars next to each other, skip the first one.
326                    patIdxStart++;
327                    continue;
328                }
329                // Find the pattern between padIdxStart & padIdxTmp in str between
330                // strIdxStart & strIdxEnd
331                int patLength = patIdxTmp - patIdxStart - 1;
332                int strLength = strIdxEnd - strIdxStart + 1;
333                int foundIdx = -1;
334            strLoop: 
335                for (int i = 0; i <= strLength - patLength; i++) {
336                    for (int j = 0; j < patLength; j++) {
337                        ch = patArr[patIdxStart + j + 1];
338                        if (ch != '?') {
339                            if (ch != strArr[strIdxStart + i + j]) {
340                                continue strLoop;
341                            }
342                        }
343                    }
344    
345                    foundIdx = strIdxStart + i;
346                    break;
347                }
348    
349                if (foundIdx == -1) {
350                    return false;
351                }
352    
353                patIdxStart = patIdxTmp;
354                strIdxStart = foundIdx + patLength;
355            }
356    
357            // All characters in the string are used. Check if only '*'s are left
358            // in the pattern. If so, we succeeded. Otherwise failure.
359            for (int i = patIdxStart; i <= patIdxEnd; i++) {
360                if (patArr[i] != '*') {
361                    return false;
362                }
363            }
364    
365            return true;
366        }
367    
368        /**
369         * Given a pattern and a full path, determine the pattern-mapped part.
370         * <p>
371         * For example:
372         * <ul>
373         * <li>'<code>/docs/cvs/commit.html</code>' and '
374         * <code>/docs/cvs/commit.html</code> -> ''</li>
375         * <li>'<code>/docs/*</code>' and '<code>/docs/cvs/commit</code> -> '
376         * <code>cvs/commit</code>'</li>
377         * <li>'<code>/docs/cvs/*.html</code>' and '
378         * <code>/docs/cvs/commit.html</code> -> '<code>commit.html</code>'</li>
379         * <li>'<code>/docs/**</code>' and '<code>/docs/cvs/commit</code> -> '
380         * <code>cvs/commit</code>'</li>
381         * <li>'<code>/docs/**\/*.html</code>' and '
382         * <code>/docs/cvs/commit.html</code> -> '<code>cvs/commit.html</code>'</li>
383         * <li>'<code>/*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
384         * <code>docs/cvs/commit.html</code>'</li>
385         * <li>'<code>*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
386         * <code>/docs/cvs/commit.html</code>'</li>
387         * <li>'<code>*</code>' and '<code>/docs/cvs/commit.html</code> -> '
388         * <code>/docs/cvs/commit.html</code>'</li>
389         * </ul>
390         * <p>
391         * Assumes that {@link #match} returns <code>true</code> for '
392         * <code>pattern</code>' and '<code>path</code>', but does
393         * <strong>not</strong> enforce this.
394         */
395        public String extractPathWithinPattern(String pattern, String path) {
396            String[] patternParts = tokenizeToStringArray(pattern, this.pathSeparator);
397            String[] pathParts = tokenizeToStringArray(path, this.pathSeparator);
398    
399            StringBuffer buffer = new StringBuffer();
400    
401            // Add any path parts that have a wildcarded pattern part.
402            int puts = 0;
403            for (int i = 0; i < patternParts.length; i++) {
404                String patternPart = patternParts[i];
405                if ((patternPart.indexOf('*') > -1 || patternPart.indexOf('?') > -1) && pathParts.length >= i + 1) {
406                    if (puts > 0 || (i == 0 && !pattern.startsWith(this.pathSeparator))) {
407                        buffer.append(this.pathSeparator);
408                    }
409                    buffer.append(pathParts[i]);
410                    puts++;
411                }
412            }
413    
414            // Append any trailing path parts.
415            for (int i = patternParts.length; i < pathParts.length; i++) {
416                if (puts > 0 || i > 0) {
417                    buffer.append(this.pathSeparator);
418                }
419                buffer.append(pathParts[i]);
420            }
421    
422            return buffer.toString();
423        }
424    
425        /**
426         * Tokenize the given String into a String array via a StringTokenizer.
427         * Trims tokens and omits empty tokens.
428         * <p>
429         * The given delimiters string is supposed to consist of any number of
430         * delimiter characters. Each of those characters can be used to separate
431         * tokens. A delimiter is always a single character; for multi-character
432         * delimiters, consider using <code>delimitedListToStringArray</code>
433         * 
434         * @param str the String to tokenize
435         * @param delimiters the delimiter characters, assembled as String (each of
436         *            those characters is individually considered as delimiter).
437         * @return an array of the tokens
438         * @see java.util.StringTokenizer
439         * @see java.lang.String#trim()
440         */
441        public static String[] tokenizeToStringArray(String str, String delimiters) {
442            if (str == null) {
443                return null;
444            }
445            StringTokenizer st = new StringTokenizer(str, delimiters);
446            List<String> tokens = new ArrayList<String>();
447            while (st.hasMoreTokens()) {
448                String token = st.nextToken();
449                token = token.trim();
450                if (token.length() > 0) {
451                    tokens.add(token);
452                }
453            }
454            return (String[])tokens.toArray(new String[tokens.size()]);
455        }
456    
457    }