Advertisement
Guest User

linux user namespaces exampl application

a guest
Nov 14th, 2017
376
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 11.68 KB | None | 0 0
  1.        /* userns_child_exec.c
  2.  
  3.           Licensed under GNU General Public License v2 or later
  4.  
  5.           Create a child process that executes a shell command in new
  6.           namespace(s); allow UID and GID mappings to be specified when
  7.           creating a user namespace.
  8.        */
  9.        #define _GNU_SOURCE
  10.        #include <sched.h>
  11.        #include <unistd.h>
  12.        #include <stdlib.h>
  13.        #include <sys/wait.h>
  14.        #include <signal.h>
  15.        #include <fcntl.h>
  16.        #include <stdio.h>
  17.        #include <string.h>
  18.        #include <limits.h>
  19.        #include <errno.h>
  20.  
  21.        /* A simple error-handling function: print an error message based
  22.           on the value in 'errno' and terminate the calling process */
  23.  
  24.        #define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \
  25.                                } while (0)
  26.  
  27.        struct child_args {
  28.            char **argv;        /* Command to be executed by child, with args */
  29.            int    pipe_fd[2];  /* Pipe used to synchronize parent and child */
  30.        };
  31.  
  32.        static int verbose;
  33.  
  34.        static void
  35.        usage(char *pname)
  36.        {
  37.            fprintf(stderr, "Usage: %s [options] cmd [arg...]\n\n", pname);
  38.            fprintf(stderr, "Create a child process that executes a shell "
  39.                    "command in a new user namespace,\n"
  40.                    "and possibly also other new namespace(s).\n\n");
  41.            fprintf(stderr, "Options can be:\n\n");
  42.        #define fpe(str) fprintf(stderr, "    %s", str);
  43.            fpe("-i          New IPC namespace\n");
  44.            fpe("-m          New mount namespace\n");
  45.            fpe("-n          New network namespace\n");
  46.            fpe("-p          New PID namespace\n");
  47.            fpe("-u          New UTS namespace\n");
  48.            fpe("-U          New user namespace\n");
  49.            fpe("-M uid_map  Specify UID map for user namespace\n");
  50.            fpe("-G gid_map  Specify GID map for user namespace\n");
  51.            fpe("-z          Map user's UID and GID to 0 in user namespace\n");
  52.            fpe("            (equivalent to: -M '0 <uid> 1' -G '0 <gid> 1')\n");
  53.            fpe("-v          Display verbose messages\n");
  54.            fpe("\n");
  55.            fpe("If -z, -M, or -G is specified, -U is required.\n");
  56.            fpe("It is not permitted to specify both -z and either -M or -G.\n");
  57.            fpe("\n");
  58.            fpe("Map strings for -M and -G consist of records of the form:\n");
  59.            fpe("\n");
  60.            fpe("    ID-inside-ns   ID-outside-ns   len\n");
  61.            fpe("\n");
  62.            fpe("A map string can contain multiple records, separated"
  63.                " by commas;\n");
  64.            fpe("the commas are replaced by newlines before writing"
  65.                " to map files.\n");
  66.  
  67.            exit(EXIT_FAILURE);
  68.        }
  69.  
  70.        /* Update the mapping file 'map_file', with the value provided in
  71.           'mapping', a string that defines a UID or GID mapping. A UID or
  72.           GID mapping consists of one or more newline-delimited records
  73.           of the form:
  74.  
  75.               ID_inside-ns    ID-outside-ns   length
  76.  
  77.           Requiring the user to supply a string that contains newlines is
  78.           of course inconvenient for command-line use. Thus, we permit the
  79.           use of commas to delimit records in this string, and replace them
  80.           with newlines before writing the string to the file. */
  81.  
  82.        static void
  83.        update_map(char *mapping, char *map_file)
  84.        {
  85.            int fd, j;
  86.            size_t map_len;     /* Length of 'mapping' */
  87.  
  88.            /* Replace commas in mapping string with newlines */
  89.  
  90.            map_len = strlen(mapping);
  91.            for (j = 0; j < map_len; j++)
  92.                if (mapping[j] == ',')
  93.                    mapping[j] = '\n';
  94.  
  95.            fd = open(map_file, O_RDWR);
  96.            if (fd == -1) {
  97.                fprintf(stderr, "ERROR: open %s: %s\n", map_file,
  98.                        strerror(errno));
  99.                exit(EXIT_FAILURE);
  100.            }
  101.  
  102.            if (write(fd, mapping, map_len) != map_len) {
  103.                fprintf(stderr, "ERROR: write %s: %s\n", map_file,
  104.                        strerror(errno));
  105.                exit(EXIT_FAILURE);
  106.            }
  107.  
  108.            close(fd);
  109.        }
  110.  
  111.        /* Linux 3.19 made a change in the handling of setgroups(2) and the
  112.           'gid_map' file to address a security issue. The issue allowed
  113.           *unprivileged* users to employ user namespaces in order to drop
  114.           The upshot of the 3.19 changes is that in order to update the
  115.           'gid_maps' file, use of the setgroups() system call in this
  116.           user namespace must first be disabled by writing "deny" to one of
  117.           the /proc/PID/setgroups files for this namespace.  That is the
  118.           purpose of the following function. */
  119.  
  120.        static void
  121.        proc_setgroups_write(pid_t child_pid, char *str)
  122.        {
  123.            char setgroups_path[PATH_MAX];
  124.            int fd;
  125.  
  126.            snprintf(setgroups_path, PATH_MAX, "/proc/%ld/setgroups",
  127.                    (long) child_pid);
  128.  
  129.            fd = open(setgroups_path, O_RDWR);
  130.            if (fd == -1) {
  131.  
  132.                /* We may be on a system that doesn't support
  133.                   /proc/PID/setgroups. In that case, the file won't exist,
  134.                   and the system won't impose the restrictions that Linux 3.19
  135.                   added. That's fine: we don't need to do anything in order
  136.                   to permit 'gid_map' to be updated.
  137.  
  138.                   However, if the error from open() was something other than
  139.                   the ENOENT error that is expected for that case,  let the
  140.                   user know. */
  141.  
  142.                if (errno != ENOENT)
  143.                    fprintf(stderr, "ERROR: open %s: %s\n", setgroups_path,
  144.                        strerror(errno));
  145.                return;
  146.            }
  147.  
  148.            if (write(fd, str, strlen(str)) == -1)
  149.                fprintf(stderr, "ERROR: write %s: %s\n", setgroups_path,
  150.                    strerror(errno));
  151.  
  152.            close(fd);
  153.        }
  154.  
  155.        static int              /* Start function for cloned child */
  156.        childFunc(void *arg)
  157.        {
  158.            struct child_args *args = (struct child_args *) arg;
  159.            char ch;
  160.  
  161.            /* Wait until the parent has updated the UID and GID mappings.
  162.               See the comment in main(). We wait for end of file on a
  163.               pipe that will be closed by the parent process once it has
  164.               updated the mappings. */
  165.  
  166.            close(args->pipe_fd[1]);    /* Close our descriptor for the write
  167.                                           end of the pipe so that we see EOF
  168.                                           when parent closes its descriptor */
  169.            if (read(args->pipe_fd[0], &ch, 1) != 0) {
  170.                fprintf(stderr,
  171.                        "Failure in child: read from pipe returned != 0\n");
  172.                exit(EXIT_FAILURE);
  173.            }
  174.  
  175.            close(args->pipe_fd[0]);
  176.  
  177.            /* Execute a shell command */
  178.  
  179.            printf("About to exec %s\n", args->argv[0]);
  180.            execvp(args->argv[0], args->argv);
  181.            errExit("execvp");
  182.        }
  183.  
  184.        #define STACK_SIZE (1024 * 1024)
  185.  
  186.        static char child_stack[STACK_SIZE];    /* Space for child's stack */
  187.  
  188.        int
  189.        main(int argc, char *argv[])
  190.        {
  191.            int flags, opt, map_zero;
  192.            pid_t child_pid;
  193.            struct child_args args;
  194.            char *uid_map, *gid_map;
  195.            const int MAP_BUF_SIZE = 100;
  196.            char map_buf[MAP_BUF_SIZE];
  197.            char map_path[PATH_MAX];
  198.  
  199.            /* Parse command-line options. The initial '+' character in
  200.               the final getopt() argument prevents GNU-style permutation
  201.               of command-line options. That's useful, since sometimes
  202.               the 'command' to be executed by this program itself
  203.               has command-line options. We don't want getopt() to treat
  204.               those as options to this program. */
  205.  
  206.            flags = 0;
  207.            verbose = 0;
  208.            gid_map = NULL;
  209.            uid_map = NULL;
  210.            map_zero = 0;
  211.            while ((opt = getopt(argc, argv, "+imnpuUM:G:zv")) != -1) {
  212.                switch (opt) {
  213.                case 'i': flags |= CLONE_NEWIPC;        break;
  214.                case 'm': flags |= CLONE_NEWNS;         break;
  215.                case 'n': flags |= CLONE_NEWNET;        break;
  216.                case 'p': flags |= CLONE_NEWPID;        break;
  217.                case 'u': flags |= CLONE_NEWUTS;        break;
  218.                case 'v': verbose = 1;                  break;
  219.                case 'z': map_zero = 1;                 break;
  220.                case 'M': uid_map = optarg;             break;
  221.                case 'G': gid_map = optarg;             break;
  222.                case 'U': flags |= CLONE_NEWUSER;       break;
  223.                default:  usage(argv[0]);
  224.                }
  225.            }
  226.  
  227.            /* -M or -G without -U is nonsensical */
  228.  
  229.            if (((uid_map != NULL || gid_map != NULL || map_zero) &&
  230.                        !(flags & CLONE_NEWUSER)) ||
  231.                    (map_zero && (uid_map != NULL || gid_map != NULL)))
  232.                usage(argv[0]);
  233.  
  234.            args.argv = &argv[optind];
  235.  
  236.            /* We use a pipe to synchronize the parent and child, in order to
  237.               ensure that the parent sets the UID and GID maps before the child
  238.               calls execve(). This ensures that the child maintains its
  239.               capabilities during the execve() in the common case where we
  240.               want to map the child's effective user ID to 0 in the new user
  241.               namespace. Without this synchronization, the child would lose
  242.               its capabilities if it performed an execve() with nonzero
  243.               user IDs (see the capabilities(7) man page for details of the
  244.               transformation of a process's capabilities during execve()). */
  245.  
  246.            if (pipe(args.pipe_fd) == -1)
  247.                errExit("pipe");
  248.  
  249.            /* Create the child in new namespace(s) */
  250.  
  251.            child_pid = clone(childFunc, child_stack + STACK_SIZE,
  252.                              flags | SIGCHLD, &args);
  253.            if (child_pid == -1)
  254.                errExit("clone");
  255.  
  256.            /* Parent falls through to here */
  257.  
  258.            if (verbose)
  259.                printf("%s: PID of child created by clone() is %ld\n",
  260.                        argv[0], (long) child_pid);
  261.  
  262.            /* Update the UID and GID maps in the child */
  263.  
  264.            if (uid_map != NULL || map_zero) {
  265.                snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map",
  266.                        (long) child_pid);
  267.                if (map_zero) {
  268.                    snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getuid());
  269.                    uid_map = map_buf;
  270.                }
  271.                update_map(uid_map, map_path);
  272.            }
  273.  
  274.            if (gid_map != NULL || map_zero) {
  275.                proc_setgroups_write(child_pid, "deny");
  276.  
  277.                snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map",
  278.                        (long) child_pid);
  279.                if (map_zero) {
  280.                    snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getgid());
  281.                    gid_map = map_buf;
  282.                }
  283.                update_map(gid_map, map_path);
  284.            }
  285.  
  286.            /* Close the write end of the pipe, to signal to the child that we
  287.               have updated the UID and GID maps */
  288.  
  289.            close(args.pipe_fd[1]);
  290.  
  291.            if (waitpid(child_pid, NULL, 0) == -1)      /* Wait for child */
  292.                errExit("waitpid");
  293.  
  294.            if (verbose)
  295.                printf("%s: terminating\n", argv[0]);
  296.  
  297.            exit(EXIT_SUCCESS);
  298.        }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement