Thanks to everyone for their input. I wrote a more complete conversion than any of the previous answers:
/** * Converts a standard POSIX Shell globbing pattern into a regular expression * pattern. The result can be used with the standard {@link java.util.regex} API to * recognize strings which match the glob pattern. * <p/> * See also, the POSIX Shell language: * http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_01 * * @param pattern A glob pattern. * @return A regex pattern to recognize the given glob pattern. */ public static final String convertGlobToRegex(String pattern) { StringBuilder sb = new StringBuilder(pattern.length()); int inGroup = 0; int inClass = 0; int firstIndexInClass = -1; char[] arr = pattern.toCharArray(); for (int i = 0; i < arr.length; i++) { char ch = arr[i]; switch (ch) { case '\\': if (++i >= arr.length) { sb.append('\\'); } else { char next = arr[i]; switch (next) { case ',': // escape not needed break; case 'Q': case 'E': // extra escape needed sb.append('\\'); default: sb.append('\\'); } sb.append(next); } break; case '*': if (inClass == 0) sb.append(".*"); else sb.append('*'); break; case '?': if (inClass == 0) sb.append('.'); else sb.append('?'); break; case '[': inClass++; firstIndexInClass = i+1; sb.append('['); break; case ']': inClass--; sb.append(']'); break; case '.': case '(': case ')': case '+': case '|': case '^': case '$': case '@': case '%': if (inClass == 0 || (firstIndexInClass == i && ch == '^')) sb.append('\\'); sb.append(ch); break; case '!': if (firstIndexInClass == i) sb.append('^'); else sb.append('!'); break; case '{': inGroup++; sb.append('('); break; case '}': inGroup--; sb.append(')'); break; case ',': if (inGroup > 0) sb.append('|'); else sb.append(','); break; default: sb.append(ch); } } return sb.toString(); }
And the module checks if this works:
public class StringUtils_ConvertGlobToRegex_Test { @Test public void star_becomes_dot_star() throws Exception { assertEquals("gl.*b", StringUtils.convertGlobToRegex("gl*b")); } @Test public void escaped_star_is_unchanged() throws Exception { assertEquals("gl\\*b", StringUtils.convertGlobToRegex("gl\\*b")); } @Test public void question_mark_becomes_dot() throws Exception { assertEquals("gl.b", StringUtils.convertGlobToRegex("gl?b")); } @Test public void escaped_question_mark_is_unchanged() throws Exception { assertEquals("gl\\?b", StringUtils.convertGlobToRegex("gl\\?b")); } @Test public void character_classes_dont_need_conversion() throws Exception { assertEquals("gl[-o]b", StringUtils.convertGlobToRegex("gl[-o]b")); } @Test public void escaped_classes_are_unchanged() throws Exception { assertEquals("gl\\[-o\\]b", StringUtils.convertGlobToRegex("gl\\[-o\\]b")); } @Test public void negation_in_character_classes() throws Exception { assertEquals("gl[^an!pz]b", StringUtils.convertGlobToRegex("gl[!an!pz]b")); } @Test public void nested_negation_in_character_classes() throws Exception { assertEquals("gl[[^an]!pz]b", StringUtils.convertGlobToRegex("gl[[!an]!pz]b")); } @Test public void escape_carat_if_it_is_the_first_char_in_a_character_class() throws Exception { assertEquals("gl[\\^o]b", StringUtils.convertGlobToRegex("gl[^o]b")); } @Test public void metachars_are_escaped() throws Exception { assertEquals("gl..*\\.\\(\\)\\+\\|\\^\\$\\@\\%b", StringUtils.convertGlobToRegex("gl?*.()+|^$@%b")); } @Test public void metachars_in_character_classes_dont_need_escaping() throws Exception { assertEquals("gl[?*.()+|^$@%]b", StringUtils.convertGlobToRegex("gl[?*.()+|^$@%]b")); } @Test public void escaped_backslash_is_unchanged() throws Exception { assertEquals("gl\\\\b", StringUtils.convertGlobToRegex("gl\\\\b")); } @Test public void slashQ_and_slashE_are_escaped() throws Exception { assertEquals("\\\\Qglob\\\\E", StringUtils.convertGlobToRegex("\\Qglob\\E")); } @Test public void braces_are_turned_into_groups() throws Exception { assertEquals("(glob|regex)", StringUtils.convertGlobToRegex("{glob,regex}")); } @Test public void escaped_braces_are_unchanged() throws Exception { assertEquals("\\{glob\\}", StringUtils.convertGlobToRegex("\\{glob\\}")); } @Test public void commas_dont_need_escaping() throws Exception { assertEquals("(glob,regex),", StringUtils.convertGlobToRegex("{glob\\,regex},")); } }
Neil Traft Jun 28 '13 at 16:58 2013-06-28 16:58
source share