Fixing exclude/exclude wildcard handling

Wayne Davison wayned at users.sourceforge.net
Tue May 7 21:37:02 EST 2002


Here's a first cut at replacing the fnmatch() call with some code that
knows how to distinguish "**" from "*" and also knows how to match at
the tail-end of the path (at any point following a slash).  The new
function is based on Rich Salz's wildmat() code, which means that it
doesn't currently handle "[::alpha::]" style character classes.  If we
want that added, we could probably merge some of the lib/fnmatch.c code
into this.

See if you like this.  I tested the new function with a simple wildmat
test suite that I found on the net and modified:

    http://www.clari.net/~wayne/testwild.c

I've done only limited rsync testing with the new code, so be careful.

..wayne..

---8<------8<------8<------8<---cut here--->8------>8------>8------>8---
Index: Makefile.in
--- Makefile.in	2002/04/08 06:23:34	1.84
+++ Makefile.in	2002/05/08 04:19:50
@@ -23,7 +23,7 @@
 .SUFFIXES:
 .SUFFIXES: .c .o
 
-LIBOBJ=lib/fnmatch.o lib/compat.o lib/snprintf.o lib/mdfour.o \
+LIBOBJ=lib/wildmat.o lib/compat.o lib/snprintf.o lib/mdfour.o \
 	lib/permstring.o \
 	@LIBOBJS@
 ZLIBOBJ=zlib/deflate.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \
Index: access.c
--- access.c	2002/04/11 02:25:53	1.5
+++ access.c	2002/05/08 04:19:50
@@ -27,7 +27,7 @@
 static int match_hostname(char *host, char *tok)
 {
 	if (!host || !*host) return 0;
-	return (fnmatch(tok, host, 0) == 0);
+	return wildmat(host, tok);
 }
 
 
Index: authenticate.c
--- authenticate.c	2002/01/24 02:33:45	1.19
+++ authenticate.c	2002/05/08 04:19:50
@@ -239,7 +239,7 @@
 	if (!users) return NULL;
 
 	for (tok=strtok(users," ,\t"); tok; tok = strtok(NULL," ,\t")) {
-		if (fnmatch(tok, user, 0) == 0) break;
+		if (wildmat(user, tok)) break;
 	}
 	free(users);
 
Index: exclude.c
--- exclude.c	2002/04/11 02:25:53	1.44
+++ exclude.c	2002/05/08 04:19:50
@@ -55,18 +55,7 @@
 	if (!ret->pattern) out_of_memory("make_exclude");
 
 	if (strpbrk(pattern, "*[?")) {
-	    ret->regular_exp = 1;
-	    ret->fnmatch_flags = FNM_PATHNAME;
-	    if (strstr(pattern, "**")) {
-		    static int tested;
-		    if (!tested) {
-			    tested = 1;
-			    if (fnmatch("a/b/*", "a/b/c/d", FNM_PATHNAME)==0) {
-				    rprintf(FERROR,"WARNING: fnmatch FNM_PATHNAME is broken on your system\n");
-			    }
-		    }
-		    ret->fnmatch_flags = 0;
-	    }
+		ret->wild_match = 1;
 	}
 
 	if (strlen(pattern) > 1 && pattern[strlen(pattern)-1] == '/') {
@@ -107,10 +96,10 @@
 		pattern++;
 	}
 
-	if (ex->regular_exp) {
-		if (fnmatch(pattern, name, ex->fnmatch_flags) == 0) {
+	if (ex->wild_match) {
+		if (match_start? wildmat(name, pattern)
+			       : wildmat_tail(name, pattern))
 			return 1;
-		}
 	} else {
 		int l1 = strlen(name);
 		int l2 = strlen(pattern);
Index: rsync.h
--- rsync.h	2002/04/11 02:18:51	1.131
+++ rsync.h	2002/05/08 04:19:51
@@ -173,11 +173,7 @@
 #endif
 #endif
 
-#ifdef HAVE_FNMATCH
-#include <fnmatch.h>
-#else
-#include "lib/fnmatch.h"
-#endif
+#include "lib/wildmat.h"
 
 #ifdef HAVE_GLOB_H
 #include <glob.h>
@@ -392,8 +388,7 @@
 
 struct exclude_struct {
 	char *pattern;
-	int regular_exp;
-	int fnmatch_flags;
+	int wild_match;
 	int include;
 	int directory;
 	int local;
Index: token.c
--- token.c	2002/04/08 08:35:30	1.22
+++ token.c	2002/05/08 04:19:53
@@ -51,7 +51,7 @@
 	strlower(fname);
 
 	for (tok=strtok(dont," ");tok;tok=strtok(NULL," ")) {
-		if (fnmatch(tok, fname, 0) == 0) {
+		if (wildmat(fname, tok)) {
 			compression_level = 0;
 			break;
 		}
Index: lib/wildmat.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ lib/wildmat.c	Tue May  7 20:59:28 2002
@@ -0,0 +1,114 @@
+/*
+**  Do shell-style pattern matching for ?, \, [], and * characters.
+**  It is 8bit clean.
+**
+**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
+**  Rich $alz is now <rsalz at bbn.com>.
+**
+**  Modified by Wayne Davison to special-case '/' matching and to fix
+**  the character-class code.
+*/
+
+#ifndef NO_CONFIG_H /* for some tests */
+#include "config.h"
+#endif
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+
+/* What character marks an inverted character class? */
+#define NEGATE_CLASS '!'
+
+#define FALSE 0
+#define TRUE 1
+
+int
+wildmat(const char *text, const char *p)
+{
+    int last, matched, special;
+
+    for ( ; *p; text++, p++) {
+	if (*text == '\0' && *p != '*')
+	    return FALSE;
+	switch (*p) {
+	case '\\':
+	    /* Literal match with following character. */
+	    p++;
+	    /* FALLTHROUGH */
+	default:
+	    if (*text != *p)
+		return FALSE;
+	    continue;
+	case '?':
+	    /* Match anything but '/'. */
+	    if (*text == '/')
+		return FALSE;
+	    continue;
+	case '*':
+	    if (*++p == '*') {
+		while (*++p == '*') {}
+		special = TRUE;
+	    }
+	    else
+		special = FALSE;
+	    if (*p == '\0') {
+		/* Trailing "**" matches everything. */
+		return special? TRUE : strchr(text, '/') == 0;
+	    }
+	    for ( ; *text; text++) {
+		if (wildmat(text, p))
+		    return TRUE;
+		if (!special && *text == '/')
+		    return FALSE;
+	    }
+	    return FALSE;
+	case '[':
+	    special = *++p == NEGATE_CLASS ? TRUE : FALSE;
+	    if (special)
+		/* Inverted character class. */
+		p++;
+	    last = 0400;
+	    matched = FALSE;
+	    if (*p == ']' || *p == '-') {
+		last = *p++;
+		if (*text == last)
+		    matched = TRUE;
+	    }
+	    for ( ; *p != ']'; last = *p++) {
+		if (!*p)
+		    return FALSE;
+		if (*p == '-' && p[1] && p[1] != ']') {
+		    if (*text <= *++p && *text >= last)
+			matched = TRUE;
+		}
+		else if (*text == *p)
+		    matched = TRUE;
+	    }
+	    if (matched == special)
+		return FALSE;
+	    continue;
+	}
+    }
+
+    return *text == '\0';
+}
+
+/* Try matching the whole string or any substring after a slash. */
+
+int
+wildmat_tail(const char *text, const char *p)
+{
+    while (1) {
+	if (wildmat(text, p))
+	    return TRUE;
+	if (!(text = strchr(text, '/')))
+	    break;
+	text++;
+    }
+    return FALSE;
+}
Index: lib/wildmat.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ lib/wildmat.h	Tue May  7 21:09:53 2002
@@ -0,0 +1,4 @@
+/* wildmat.h */
+
+int wildmat(char *text, char *pattern);
+int wildmat_tail(char *text, char *pattern);
---8<------8<------8<------8<---cut here--->8------>8------>8------>8---





More information about the rsync mailing list