[SCM] The rsync repository. - branch master updated

Rsync CVS commit messages rsync-cvs at lists.samba.org
Mon Jan 10 01:48:00 UTC 2022


The branch, master has been updated
       via  6b8db0f6 Add an arg-protection idiom using backslash-escapes
       via  3b2804c8 Tweak a comment.
      from  ff1792ed Improve `--copy-links` description.

https://git.samba.org/?p=rsync.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 6b8db0f6440b28d26ef807d17517715c47e62bd9
Author: Wayne Davison <wayne at opencoder.net>
Date:   Sun Jan 9 17:35:39 2022 -0800

    Add an arg-protection idiom using backslash-escapes
    
    The new default is to protect args and options from unintended shell
    interpretation using backslash escapes.  See the new `--old-args` option
    for a way to get the old-style splitting.  This idiom was chosen over
    making `--protect-args` enabled by default because it is more backward
    compatible (e.g. it works with rrsync). Fixes #272.

commit 3b2804c8158a32e3d3b232430e48955a7dfc9178
Author: Wayne Davison <wayne at opencoder.net>
Date:   Sun Jan 9 14:03:31 2022 -0800

    Tweak a comment.

-----------------------------------------------------------------------

Summary of changes:
 NEWS.md                                  |  18 +++-
 main.c                                   |   6 +-
 options.c                                | 143 ++++++++++++++++++++-----------
 packaging/{cull_options => cull-options} |  14 +--
 rsync.1.md                               |  87 ++++++++++++-------
 support/lsh.sh                           |   3 +-
 support/rrsync                           |   6 +-
 7 files changed, 177 insertions(+), 100 deletions(-)
 rename packaging/{cull_options => cull-options} (89%)


Changeset truncated at 500 lines:

diff --git a/NEWS.md b/NEWS.md
index dc523136..8bab61cf 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,7 +4,23 @@
 
 ## Changes in this version:
 
-### OUTPUT CHANGES:
+### BEHAVIOR CHANGES:
+
+ - A new form of arg protection was added that works similarly to the older
+   `--protect-args` (`-s`) option but in a way that avoids breaking things like
+   rrsync (the restricted rsync script): rsync now uses backslash escaping for
+   sending "shell-active" characters to the remote shell. This includes spaces,
+   so fetching a remote file via a simple quoted filename value now works by
+   default without any extra quoting:
+
+   ```shell
+       rsync -aiv host:'a simple file.pdf' .
+   ```
+
+   Wildcards are not escaped in filename args, but they are escaped in options
+   like the `--suffix` and `--usermap` values.  If your rsync script depends on
+   the old arg-splitting behavior, either run it with the `--old-args` option
+   or `export RSYNC_OLD_ARGS=1` in the script's environment.
 
  - A long-standing bug was preventing rsync from figuring out the current
    locale's decimal point character, which made rsync always output numbers
diff --git a/main.c b/main.c
index ef7a3010..0378f3f3 100644
--- a/main.c
+++ b/main.c
@@ -607,11 +607,7 @@ static pid_t do_cmd(char *cmd, char *machine, char *user, char **remote_argv, in
 				rprintf(FERROR, "internal: args[] overflowed in do_cmd()\n");
 				exit_cleanup(RERR_SYNTAX);
 			}
-			if (**remote_argv == '-') {
-				if (asprintf(args + argc++, "./%s", *remote_argv++) < 0)
-					out_of_memory("do_cmd");
-			} else
-				args[argc++] = *remote_argv++;
+			args[argc++] = safe_arg(NULL, *remote_argv++);
 			remote_argc--;
 		}
 	}
diff --git a/options.c b/options.c
index 75165adf..2dba06e3 100644
--- a/options.c
+++ b/options.c
@@ -102,6 +102,7 @@ int filesfrom_fd = -1;
 char *filesfrom_host = NULL;
 int eol_nulls = 0;
 int protect_args = -1;
+int old_style_args = -1;
 int human_readable = 1;
 int recurse = 0;
 int mkpath_dest_arg = 0;
@@ -780,6 +781,8 @@ static struct poptOption long_options[] = {
   {"files-from",       0,  POPT_ARG_STRING, &files_from, 0, 0, 0 },
   {"from0",           '0', POPT_ARG_VAL,    &eol_nulls, 1, 0, 0},
   {"no-from0",         0,  POPT_ARG_VAL,    &eol_nulls, 0, 0, 0},
+  {"old-args",         0,  POPT_ARG_VAL,    &old_style_args, 1, 0, 0},
+  {"no-old-args",      0,  POPT_ARG_VAL,    &old_style_args, 0, 0, 0},
   {"protect-args",    's', POPT_ARG_VAL,    &protect_args, 1, 0, 0},
   {"no-protect-args",  0,  POPT_ARG_VAL,    &protect_args, 0, 0, 0},
   {"no-s",             0,  POPT_ARG_VAL,    &protect_args, 0, 0, 0},
@@ -1922,6 +1925,13 @@ int parse_arguments(int *argc_p, const char ***argv_p)
 		max_alloc = size;
 	}
 
+	if (old_style_args < 0) {
+		if (!am_server && (arg = getenv("RSYNC_OLD_ARGS")) != NULL && *arg)
+			old_style_args = atoi(arg) ? 1 : 0;
+		else
+			old_style_args = 0;
+	}
+
 	if (protect_args < 0) {
 		if (am_server)
 			protect_args = 0;
@@ -2447,6 +2457,61 @@ int parse_arguments(int *argc_p, const char ***argv_p)
 }
 
 
+static char SPLIT_ARG_WHEN_OLD[1];
+
+/**
+ * Do backslash quoting of any weird chars in "arg", append the resulting
+ * string to the end of the "opt" (which gets a "=" appended if it is not
+ * an empty or NULL string), and return the (perhaps malloced) result.
+ * If opt is NULL, arg is considered a filename arg that allows wildcards.
+ * If it is "" or any other value, it is considered an option.
+ **/
+char *safe_arg(const char *opt, const char *arg)
+{
+#define SHELL_CHARS "!#$&;|<>(){}\"' \t\\"
+#define WILD_CHARS  "*?[]" /* We don't allow remote brace expansion */
+	BOOL is_filename_arg = !opt;
+	char *escapes = is_filename_arg ? SHELL_CHARS : WILD_CHARS SHELL_CHARS;
+	BOOL escape_leading_dash = is_filename_arg && *arg == '-';
+	int len1 = opt && *opt ? strlen(opt) + 1 : 0;
+	int len2 = strlen(arg);
+	int extras = escape_leading_dash ? 2 : 0;
+	char *ret;
+	if (!protect_args && (!old_style_args || (!is_filename_arg && opt != SPLIT_ARG_WHEN_OLD))) {
+		const char *f;
+		for (f = arg; *f; f++) {
+			if (strchr(escapes, *f))
+				extras++;
+		}
+	}
+	if (!len1 && !extras)
+		return (char*)arg;
+	ret = new_array(char, len1 + len2 + extras + 1);
+	if (len1) {
+		memcpy(ret, opt, len1-1);
+		ret[len1-1] = '=';
+	}
+	if (escape_leading_dash) {
+		ret[len1++] = '.';
+		ret[len1++] = '/';
+		extras -= 2;
+	}
+	if (!extras)
+		memcpy(ret + len1, arg, len2);
+	else {
+		const char *f = arg;
+		char *t = ret + len1;
+		while (*f) {
+			if (strchr(escapes, *f))
+				*t++ = '\\';
+			*t++ = *f++;
+		}
+	}
+	ret[len1+len2+extras] = '\0';
+	return ret;
+}
+
+
 /**
  * Construct a filtered list of options to pass through from the
  * client to the server.
@@ -2590,9 +2655,7 @@ void server_options(char **args, int *argc_p)
 			set++;
 		else
 			set = iconv_opt;
-		if (asprintf(&arg, "--iconv=%s", set) < 0)
-			goto oom;
-		args[ac++] = arg;
+		args[ac++] = safe_arg("--iconv", set);
 	}
 #endif
 
@@ -2658,33 +2721,24 @@ void server_options(char **args, int *argc_p)
 	}
 
 	if (backup_dir) {
+		/* This split idiom allows for ~/path expansion via the shell. */
 		args[ac++] = "--backup-dir";
-		args[ac++] = backup_dir;
+		args[ac++] = safe_arg("", backup_dir);
 	}
 
 	/* Only send --suffix if it specifies a non-default value. */
-	if (strcmp(backup_suffix, backup_dir ? "" : BACKUP_SUFFIX) != 0) {
-		/* We use the following syntax to avoid weirdness with '~'. */
-		if (asprintf(&arg, "--suffix=%s", backup_suffix) < 0)
-			goto oom;
-		args[ac++] = arg;
-	}
+	if (strcmp(backup_suffix, backup_dir ? "" : BACKUP_SUFFIX) != 0)
+		args[ac++] = safe_arg("--suffix", backup_suffix);
 
-	if (checksum_choice) {
-		if (asprintf(&arg, "--checksum-choice=%s", checksum_choice) < 0)
-			goto oom;
-		args[ac++] = arg;
-	}
+	if (checksum_choice)
+		args[ac++] = safe_arg("--checksum-choice", checksum_choice);
 
 	if (do_compression == CPRES_ZLIBX)
 		args[ac++] = "--new-compress";
 	else if (compress_choice && do_compression == CPRES_ZLIB)
 		args[ac++] = "--old-compress";
-	else if (compress_choice) {
-		if (asprintf(&arg, "--compress-choice=%s", compress_choice) < 0)
-			goto oom;
-		args[ac++] = arg;
-	}
+	else if (compress_choice)
+		args[ac++] = safe_arg("--compress-choice", compress_choice);
 
 	if (am_sender) {
 		if (max_delete > 0) {
@@ -2693,14 +2747,10 @@ void server_options(char **args, int *argc_p)
 			args[ac++] = arg;
 		} else if (max_delete == 0)
 			args[ac++] = "--max-delete=-1";
-		if (min_size >= 0) {
-			args[ac++] = "--min-size";
-			args[ac++] = min_size_arg;
-		}
-		if (max_size >= 0) {
-			args[ac++] = "--max-size";
-			args[ac++] = max_size_arg;
-		}
+		if (min_size >= 0)
+			args[ac++] = safe_arg("--min-size", min_size_arg);
+		if (max_size >= 0)
+			args[ac++] = safe_arg("--max-size", max_size_arg);
 		if (delete_before)
 			args[ac++] = "--delete-before";
 		else if (delete_during == 2)
@@ -2724,17 +2774,12 @@ void server_options(char **args, int *argc_p)
 		if (do_stats)
 			args[ac++] = "--stats";
 	} else {
-		if (skip_compress) {
-			if (asprintf(&arg, "--skip-compress=%s", skip_compress) < 0)
-				goto oom;
-			args[ac++] = arg;
-		}
+		if (skip_compress)
+			args[ac++] = safe_arg("--skip-compress", skip_compress);
 	}
 
-	if (max_alloc_arg && max_alloc != DEFAULT_MAX_ALLOC) {
-		args[ac++] = "--max-alloc";
-		args[ac++] = max_alloc_arg;
-	}
+	if (max_alloc_arg && max_alloc != DEFAULT_MAX_ALLOC)
+		args[ac++] = safe_arg("--max-alloc", max_alloc_arg);
 
 	/* --delete-missing-args needs the cooperation of both sides, but
 	 * the sender can handle --ignore-missing-args by itself. */
@@ -2759,7 +2804,7 @@ void server_options(char **args, int *argc_p)
 	if (partial_dir && am_sender) {
 		if (partial_dir != tmp_partialdir) {
 			args[ac++] = "--partial-dir";
-			args[ac++] = partial_dir;
+			args[ac++] = safe_arg("", partial_dir);
 		}
 		if (delay_updates)
 			args[ac++] = "--delay-updates";
@@ -2782,17 +2827,11 @@ void server_options(char **args, int *argc_p)
 		args[ac++] = "--use-qsort";
 
 	if (am_sender) {
-		if (usermap) {
-			if (asprintf(&arg, "--usermap=%s", usermap) < 0)
-				goto oom;
-			args[ac++] = arg;
-		}
+		if (usermap)
+			args[ac++] = safe_arg("--usermap", usermap);
 
-		if (groupmap) {
-			if (asprintf(&arg, "--groupmap=%s", groupmap) < 0)
-				goto oom;
-			args[ac++] = arg;
-		}
+		if (groupmap)
+			args[ac++] = safe_arg("--groupmap", groupmap);
 
 		if (ignore_existing)
 			args[ac++] = "--ignore-existing";
@@ -2803,7 +2842,7 @@ void server_options(char **args, int *argc_p)
 
 		if (tmpdir) {
 			args[ac++] = "--temp-dir";
-			args[ac++] = tmpdir;
+			args[ac++] = safe_arg("", tmpdir);
 		}
 
 		if (do_fsync)
@@ -2816,7 +2855,7 @@ void server_options(char **args, int *argc_p)
 			 */
 			for (i = 0; i < basis_dir_cnt; i++) {
 				args[ac++] = alt_dest_opt(0);
-				args[ac++] = basis_dir[i];
+				args[ac++] = safe_arg("", basis_dir[i]);
 			}
 		}
 	}
@@ -2841,7 +2880,7 @@ void server_options(char **args, int *argc_p)
 	if (files_from && (!am_sender || filesfrom_host)) {
 		if (filesfrom_host) {
 			args[ac++] = "--files-from";
-			args[ac++] = files_from;
+			args[ac++] = safe_arg("", files_from);
 			if (eol_nulls)
 				args[ac++] = "--from0";
 		} else {
@@ -2884,7 +2923,7 @@ void server_options(char **args, int *argc_p)
 			exit_cleanup(RERR_SYNTAX);
 		}
 		for (j = 1; j <= remote_option_cnt; j++)
-			args[ac++] = (char*)remote_options[j];
+			args[ac++] = safe_arg(SPLIT_ARG_WHEN_OLD, remote_options[j]);
 	}
 
 	*argc_p = ac;
diff --git a/packaging/cull_options b/packaging/cull-options
similarity index 89%
rename from packaging/cull_options
rename to packaging/cull-options
index ca061121..955c21f1 100755
--- a/packaging/cull_options
+++ b/packaging/cull-options
@@ -6,7 +6,7 @@
 import re, argparse
 
 short_no_arg = { }
-short_with_num = { '@': 1 };
+short_with_num = { '@': 1 }
 long_opts = { # These include some extra long-args that BackupPC uses:
         'block-size': 1,
         'daemon': -1,
@@ -57,11 +57,13 @@ def main():
                 continue
 
             if last_long_opt:
-                m = re.search(r'args\[ac\+\+\] = ([^["\s]+);', line)
+                m = re.search(r'args\[ac\+\+\] = safe_arg\("", ([^[("\s]+)\);', line)
                 if m:
                     long_opts[last_long_opt] = 2
                     last_long_opt = None
                     continue
+                if 'args[ac++] = ' in line:
+                    last_long_opt = None
 
             m = re.search(r'return "--([^"]+-dest)";', line)
             if m:
@@ -73,7 +75,9 @@ def main():
             if not m:
                 m = re.search(r'args\[ac\+\+\] = "--([^"=]+)=', line)
                 if not m:
-                    m = re.search(r'fmt = .*: "--([^"=]+)=', line)
+                    m = re.search(r'args\[ac\+\+\] = safe_arg\("--([^"=]+)"', line)
+                    if not m:
+                        m = re.search(r'fmt = .*: "--([^"=]+)=', line)
             if m:
                 long_opts[m.group(1)] = 1
                 last_long_opt = None
@@ -81,7 +85,7 @@ def main():
     long_opts['files-from'] = 3
 
     txt = """\
-### START of options data produced by the cull_options script. ###
+### START of options data produced by the cull-options script. ###
 
 # To disable a short-named option, add its letter to this string:
 """
@@ -119,7 +123,7 @@ def main():
         print("}")
     else:
         print(");")
-    print("\n### END of options data produced by the cull_options script. ###")
+    print("\n### END of options data produced by the cull-options script. ###")
 
 
 def str_assign(name, val, comment=None):
diff --git a/rsync.1.md b/rsync.1.md
index 2ad467c8..f94e468c 100644
--- a/rsync.1.md
+++ b/rsync.1.md
@@ -159,22 +159,16 @@ the hostname omitted.  For instance, all these work:
 
 >     rsync -av host:file1 :file2 host:file{3,4} /dest/
 >     rsync -av host::modname/file{1,2} host::modname/file3 /dest/
->     rsync -av host::modname/file1 ::modname/file{3,4}
+>     rsync -av host::modname/file1 ::modname/file{3,4} /dest/
 
-Older versions of rsync required using quoted spaces in the SRC, like these
+**Older versions of rsync** required using quoted spaces in the SRC, like these
 examples:
 
 >     rsync -av host:'dir1/file1 dir2/file2' /dest
 >     rsync host::'modname/dir1/file1 modname/dir2/file2' /dest
 
-This word-splitting still works (by default) in the latest rsync, but is not as
-easy to use as the first method.
-
-If you need to transfer a filename that contains whitespace, you can either
-specify the `--protect-args` (`-s`) option, or you'll need to escape the
-whitespace in a way that the remote shell will understand.  For instance:
-
->     rsync -av host:'file\ name\ with\ spaces' /dest
+This word-splitting only works in a modern rsync by using `--old-args` (or its
+environment variable) and making sure that `--protect-args` is not enabled.
 
 # CONNECTING TO AN RSYNC DAEMON
 
@@ -351,6 +345,7 @@ detailed description below for a complete description.
 --append                 append data onto shorter files
 --append-verify          --append w/old data in file checksum
 --dirs, -d               transfer directories without recursing
+--old-dirs, --old-d      works like --dirs when talking to old rsync
 --mkpath                 create the destination's path component
 --links, -l              copy symlinks as symlinks
 --copy-links, -L         transform symlink into referent file/dir
@@ -438,6 +433,7 @@ detailed description below for a complete description.
 --include-from=FILE      read include patterns from FILE
 --files-from=FILE        read list of source-file names from FILE
 --from0, -0              all *-from/filter files are delimited by 0s
+--old-args               disable the modern arg-protection idiom
 --protect-args, -s       no space-splitting; wildcard chars only
 --copy-as=USER[:GROUP]   specify user & optional group for the copy
 --address=ADDRESS        bind address for outgoing socket to daemon
@@ -1962,11 +1958,10 @@ your home directory (remove the '=' for that).
     cause rsync to have a different idea about what data to expect next over
     the socket, and that will make it fail in a cryptic fashion.
 
-    Note that it is best to use a separate `--remote-option` for each option
-    you want to pass.  This makes your usage compatible with the
-    `--protect-args` option.  If that option is off, any spaces in your remote
-    options will be split by the remote shell unless you take steps to protect
-    them.
+    Note that you should use a separate `-M` option for each remote option you
+    want to pass.  On older rsync versions, the presence of any spaces in the
+    remote-option arg could cause it to be split into separate remote args, but
+    this requires the use of `--old-args` in a modern rsync.
 
     When performing a local transfer, the "local" side is the sender and the
     "remote" side is the receiver.
@@ -2182,36 +2177,52 @@ your home directory (remove the '=' for that).
     files specified in a `--filter` rule.  It does not affect `--cvs-exclude`
     (since all names read from a .cvsignore file are split on whitespace).
 
+0. `--old-args`
+
+    This option tells rsync to stop trying to protect the arg values from
+    unintended word-splitting or other misinterpretation by using its new
+    backslash-escape idiom.  The newest default is for remote filenames to only
+    allow wildcards characters to be interpretated by the shell while
+    protecting other shell-interpreted characters (and the args of options get
+    even wildcards escaped).  The only active wildcard characters on the remote
+    side are: `*`, `?`, `[`, & `]`.
+
+    If you have a script that wants to use old-style arg splitting, this option
+    should get it going.
+
+    You may also control this setting via the RSYNC_OLD_ARGS environment
+    variable.  If this variable has a non-zero value, this setting will be
+    enabled by default, otherwise it will be disabled by default.  Either state
+    is overridden by a manually specified positive or negative version of this
+    option (the negative is `--no-old-args`).
+
 0.  `--protect-args`, `-s`
 
     This option sends all filenames and most options to the remote rsync
-    without allowing the remote shell to interpret them.  This means that
-    spaces are not split in names, and any non-wildcard special characters are
-    not translated (such as `~`, `$`, `;`, `&`, etc.).  Wildcards are expanded
-    on the remote host by rsync (instead of the shell doing it).
+    without allowing the remote shell to interpret them.  Wildcards are
+    expanded on the remote host by rsync instead of the shell doing it.
+
+    This is similar to the new-style backslash-escaping of args that was added
+    in 3.2.4, but supports some extra features and doesn't rely on backslash
+    escaping in the remote shell.
 
     If you use this option with `--iconv`, the args related to the remote side
     will also be translated from the local to the remote character-set.  The
     translation happens before wild-cards are expanded.  See also the
     `--files-from` option.
 
-    You may also control this option via the RSYNC_PROTECT_ARGS environment
-    variable.  If this variable has a non-zero value, this option will be
+    You may also control this setting via the RSYNC_PROTECT_ARGS environment
+    variable.  If this variable has a non-zero value, this setting will be
     enabled by default, otherwise it will be disabled by default.  Either state
     is overridden by a manually specified positive or negative version of this
     option (note that `--no-s` and `--no-protect-args` are the negative
-    versions).  Since this option was first introduced in 3.0.0, you'll need to
-    make sure it's disabled if you ever need to interact with a remote rsync
-    that is older than that.
+    versions).
 
-    Rsync can also be configured (at build time) to have this option enabled by
-    default (with is overridden by both the environment and the command-line).
-    Run `rsync --version` to check if this is the case, as it will display
-    "default protect-args" or "optional protect-args" depending on how it was
-    compiled.
+    You may need to disable this option when interacting with an older rsync
+    (one prior to 3.0.0).
 
-    This option will eventually become a new default setting at some
-    as-yet-undetermined point in the future.
+    Note that this option is incompatible with the use of the restricted rsync
+    script (`rrsync`) since it hides options from the script's inspection.
 
 0.  `--copy-as=USER[:GROUP]`
 
@@ -2679,7 +2690,9 @@ your home directory (remove the '=' for that).
     The `--usermap` option implies the `--owner` option while the `--groupmap`
     option implies the `--group` option.
 


-- 
The rsync repository.



More information about the rsync-cvs mailing list