Advertisement
Guest User

Untitled

a guest
Feb 22nd, 2019
176
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 113.73 KB | None | 0 0
  1. diff -ruNb a//Documentation/filesystems/Locking b//Documentation/filesystems/Locking
  2. --- a//Documentation/filesystems/Locking    2012-10-12 21:48:25.000000000 +0100
  3. +++ b//Documentation/filesystems/Locking    2012-10-21 15:32:26.594986267 +0100
  4. @@ -62,6 +62,7 @@
  5.     int (*removexattr) (struct dentry *, const char *);
  6.     int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
  7.     void (*update_time)(struct inode *, struct timespec *, int);
  8. +   struct file *(*open)(struct dentry *,struct file *,const struct cred *);
  9.  
  10.  locking rules:
  11.     all may block
  12. @@ -89,7 +90,7 @@
  13.  removexattr:   yes
  14.  fiemap:        no
  15.  update_time:   no
  16. -
  17. +open:      no
  18.     Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
  19.  victim.
  20.     cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
  21. diff -ruNb a//Documentation/filesystems/overlayfs.txt b//Documentation/filesystems/overlayfs.txt
  22. --- a//Documentation/filesystems/overlayfs.txt  1970-01-01 01:00:00.000000000 +0100
  23. +++ b//Documentation/filesystems/overlayfs.txt  2012-10-21 15:34:04.765813939 +0100
  24. @@ -0,0 +1,199 @@
  25. +Written by: Neil Brown <neilb@suse.de>
  26. +
  27. +Overlay Filesystem
  28. +==================
  29. +
  30. +This document describes a prototype for a new approach to providing
  31. +overlay-filesystem functionality in Linux (sometimes referred to as
  32. +union-filesystems).  An overlay-filesystem tries to present a
  33. +filesystem which is the result over overlaying one filesystem on top
  34. +of the other.
  35. +
  36. +The result will inevitably fail to look exactly like a normal
  37. +filesystem for various technical reasons.  The expectation is that
  38. +many use cases will be able to ignore these differences.
  39. +
  40. +This approach is 'hybrid' because the objects that appear in the
  41. +filesystem do not all appear to belong to that filesystem.  In many
  42. +cases an object accessed in the union will be indistinguishable
  43. +from accessing the corresponding object from the original filesystem.
  44. +This is most obvious from the 'st_dev' field returned by stat(2).
  45. +
  46. +While directories will report an st_dev from the overlay-filesystem,
  47. +all non-directory objects will report an st_dev from the lower or
  48. +upper filesystem that is providing the object.  Similarly st_ino will
  49. +only be unique when combined with st_dev, and both of these can change
  50. +over the lifetime of a non-directory object.  Many applications and
  51. +tools ignore these values and will not be affected.
  52. +
  53. +Upper and Lower
  54. +---------------
  55. +
  56. +An overlay filesystem combines two filesystems - an 'upper' filesystem
  57. +and a 'lower' filesystem.  When a name exists in both filesystems, the
  58. +object in the 'upper' filesystem is visible while the object in the
  59. +'lower' filesystem is either hidden or, in the case of directories,
  60. +merged with the 'upper' object.
  61. +
  62. +It would be more correct to refer to an upper and lower 'directory
  63. +tree' rather than 'filesystem' as it is quite possible for both
  64. +directory trees to be in the same filesystem and there is no
  65. +requirement that the root of a filesystem be given for either upper or
  66. +lower.
  67. +
  68. +The lower filesystem can be any filesystem supported by Linux and does
  69. +not need to be writable.  The lower filesystem can even be another
  70. +overlayfs.  The upper filesystem will normally be writable and if it
  71. +is it must support the creation of trusted.* extended attributes, and
  72. +must provide valid d_type in readdir responses, at least for symbolic
  73. +links - so NFS is not suitable.
  74. +
  75. +A read-only overlay of two read-only filesystems may use any
  76. +filesystem type.
  77. +
  78. +Directories
  79. +-----------
  80. +
  81. +Overlaying mainly involved directories.  If a given name appears in both
  82. +upper and lower filesystems and refers to a non-directory in either,
  83. +then the lower object is hidden - the name refers only to the upper
  84. +object.
  85. +
  86. +Where both upper and lower objects are directories, a merged directory
  87. +is formed.
  88. +
  89. +At mount time, the two directories given as mount options are combined
  90. +into a merged directory:
  91. +
  92. +  mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper /overlay
  93. +
  94. +Then whenever a lookup is requested in such a merged directory, the
  95. +lookup is performed in each actual directory and the combined result
  96. +is cached in the dentry belonging to the overlay filesystem.  If both
  97. +actual lookups find directories, both are stored and a merged
  98. +directory is created, otherwise only one is stored: the upper if it
  99. +exists, else the lower.
  100. +
  101. +Only the lists of names from directories are merged.  Other content
  102. +such as metadata and extended attributes are reported for the upper
  103. +directory only.  These attributes of the lower directory are hidden.
  104. +
  105. +whiteouts and opaque directories
  106. +--------------------------------
  107. +
  108. +In order to support rm and rmdir without changing the lower
  109. +filesystem, an overlay filesystem needs to record in the upper filesystem
  110. +that files have been removed.  This is done using whiteouts and opaque
  111. +directories (non-directories are always opaque).
  112. +
  113. +The overlay filesystem uses extended attributes with a
  114. +"trusted.overlay."  prefix to record these details.
  115. +
  116. +A whiteout is created as a symbolic link with target
  117. +"(overlay-whiteout)" and with xattr "trusted.overlay.whiteout" set to "y".
  118. +When a whiteout is found in the upper level of a merged directory, any
  119. +matching name in the lower level is ignored, and the whiteout itself
  120. +is also hidden.
  121. +
  122. +A directory is made opaque by setting the xattr "trusted.overlay.opaque"
  123. +to "y".  Where the upper filesystem contains an opaque directory, any
  124. +directory in the lower filesystem with the same name is ignored.
  125. +
  126. +readdir
  127. +-------
  128. +
  129. +When a 'readdir' request is made on a merged directory, the upper and
  130. +lower directories are each read and the name lists merged in the
  131. +obvious way (upper is read first, then lower - entries that already
  132. +exist are not re-added).  This merged name list is cached in the
  133. +'struct file' and so remains as long as the file is kept open.  If the
  134. +directory is opened and read by two processes at the same time, they
  135. +will each have separate caches.  A seekdir to the start of the
  136. +directory (offset 0) followed by a readdir will cause the cache to be
  137. +discarded and rebuilt.
  138. +
  139. +This means that changes to the merged directory do not appear while a
  140. +directory is being read.  This is unlikely to be noticed by many
  141. +programs.
  142. +
  143. +seek offsets are assigned sequentially when the directories are read.
  144. +Thus if
  145. +  - read part of a directory
  146. +  - remember an offset, and close the directory
  147. +  - re-open the directory some time later
  148. +  - seek to the remembered offset
  149. +
  150. +there may be little correlation between the old and new locations in
  151. +the list of filenames, particularly if anything has changed in the
  152. +directory.
  153. +
  154. +Readdir on directories that are not merged is simply handled by the
  155. +underlying directory (upper or lower).
  156. +
  157. +
  158. +Non-directories
  159. +---------------
  160. +
  161. +Objects that are not directories (files, symlinks, device-special
  162. +files etc.) are presented either from the upper or lower filesystem as
  163. +appropriate.  When a file in the lower filesystem is accessed in a way
  164. +the requires write-access, such as opening for write access, changing
  165. +some metadata etc., the file is first copied from the lower filesystem
  166. +to the upper filesystem (copy_up).  Note that creating a hard-link
  167. +also requires copy_up, though of course creation of a symlink does
  168. +not.
  169. +
  170. +The copy_up may turn out to be unnecessary, for example if the file is
  171. +opened for read-write but the data is not modified.
  172. +
  173. +The copy_up process first makes sure that the containing directory
  174. +exists in the upper filesystem - creating it and any parents as
  175. +necessary.  It then creates the object with the same metadata (owner,
  176. +mode, mtime, symlink-target etc.) and then if the object is a file, the
  177. +data is copied from the lower to the upper filesystem.  Finally any
  178. +extended attributes are copied up.
  179. +
  180. +Once the copy_up is complete, the overlay filesystem simply
  181. +provides direct access to the newly created file in the upper
  182. +filesystem - future operations on the file are barely noticed by the
  183. +overlay filesystem (though an operation on the name of the file such as
  184. +rename or unlink will of course be noticed and handled).
  185. +
  186. +
  187. +Non-standard behavior
  188. +---------------------
  189. +
  190. +The copy_up operation essentially creates a new, identical file and
  191. +moves it over to the old name.  The new file may be on a different
  192. +filesystem, so both st_dev and st_ino of the file may change.
  193. +
  194. +Any open files referring to this inode will access the old data and
  195. +metadata.  Similarly any file locks obtained before copy_up will not
  196. +apply to the copied up file.
  197. +
  198. +On a file is opened with O_RDONLY fchmod(2), fchown(2), futimesat(2)
  199. +and fsetxattr(2) will fail with EROFS.
  200. +
  201. +If a file with multiple hard links is copied up, then this will
  202. +"break" the link.  Changes will not be propagated to other names
  203. +referring to the same inode.
  204. +
  205. +Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory
  206. +object in overlayfs will not contain vaid absolute paths, only
  207. +relative paths leading up to the filesystem's root.  This will be
  208. +fixed in the future.
  209. +
  210. +Some operations are not atomic, for example a crash during copy_up or
  211. +rename will leave the filesystem in an inconsitent state.  This will
  212. +be addressed in the future.
  213. +
  214. +Changes to underlying filesystems
  215. +---------------------------------
  216. +
  217. +Offline changes, when the overlay is not mounted, are allowed to either
  218. +the upper or the lower trees.
  219. +
  220. +Changes to the underlying filesystems while part of a mounted overlay
  221. +filesystem are not allowed.  If the underlying filesystem is changed,
  222. +the behavior of the overlay is undefined, though it will not result in
  223. +a crash or deadlock.
  224. diff -ruNb a//Documentation/filesystems/vfs.txt b//Documentation/filesystems/vfs.txt
  225. --- a//Documentation/filesystems/vfs.txt    2012-10-12 21:48:25.000000000 +0100
  226. +++ b//Documentation/filesystems/vfs.txt    2012-10-21 15:32:26.595986134 +0100
  227. @@ -364,6 +364,8 @@
  228.     ssize_t (*listxattr) (struct dentry *, char *, size_t);
  229.     int (*removexattr) (struct dentry *, const char *);
  230.     void (*update_time)(struct inode *, struct timespec *, int);
  231. +   struct file *(*open) (struct dentry *, struct file *,
  232. +                 const struct cred *);
  233.  };
  234.  
  235.  Again, all methods are called without any locks being held, unless
  236. @@ -476,6 +478,12 @@
  237.     an inode.  If this is not defined the VFS will update the inode itself
  238.     and call mark_inode_dirty_sync.
  239.  
  240. +  open: this is an alternative to f_op->open(), the difference is that this
  241. +   method may return any open file, not necessarily originating from the
  242. +   same filesystem as the one i_op->open() was called on.  It may be useful
  243. +   for stacking filesystems which want to allow native I/O directly on
  244. +   underlying files.
  245. +
  246.  The Address Space Object
  247.  ========================
  248.  
  249. diff -ruNb a//fs/ecryptfs/main.c b//fs/ecryptfs/main.c
  250. --- a//fs/ecryptfs/main.c   2012-10-12 21:48:25.000000000 +0100
  251. +++ b//fs/ecryptfs/main.c   2012-10-21 15:34:17.524102063 +0100
  252. @@ -544,6 +544,13 @@
  253.     s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
  254.     s->s_blocksize = path.dentry->d_sb->s_blocksize;
  255.     s->s_magic = ECRYPTFS_SUPER_MAGIC;
  256. +   s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
  257. +
  258. +   rc = -EINVAL;
  259. +   if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
  260. +       printk(KERN_ERR "eCryptfs: maximum fs stacking depth exceeded\n");
  261. +       goto out_free;
  262. +   }
  263.  
  264.     inode = ecryptfs_get_inode(path.dentry->d_inode, s);
  265.     rc = PTR_ERR(inode);
  266. diff -ruNb a//fs/Kconfig b//fs/Kconfig
  267. --- a//fs/Kconfig   2012-10-12 21:48:25.000000000 +0100
  268. +++ b//fs/Kconfig   2012-10-21 15:33:23.868301470 +0100
  269. @@ -67,6 +67,7 @@
  270.  
  271.  source "fs/autofs4/Kconfig"
  272.  source "fs/fuse/Kconfig"
  273. +source "fs/overlayfs/Kconfig"
  274.  
  275.  config CUSE
  276.     tristate "Character device in Userspace support"
  277. diff -ruNb a//fs/Makefile b//fs/Makefile
  278. --- a//fs/Makefile  2012-10-12 21:48:25.000000000 +0100
  279. +++ b//fs/Makefile  2012-10-21 15:33:23.868301470 +0100
  280. @@ -106,6 +106,7 @@
  281.  obj-$(CONFIG_AUTOFS4_FS)   += autofs4/
  282.  obj-$(CONFIG_ADFS_FS)      += adfs/
  283.  obj-$(CONFIG_FUSE_FS)      += fuse/
  284. +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/
  285.  obj-$(CONFIG_UDF_FS)       += udf/
  286.  obj-$(CONFIG_SUN_OPENPROMFS)   += openpromfs/
  287.  obj-$(CONFIG_OMFS_FS)      += omfs/
  288. diff -ruNb a//fs/namei.c b//fs/namei.c
  289. --- a//fs/namei.c   2012-10-12 21:48:25.000000000 +0100
  290. +++ b//fs/namei.c   2012-10-21 15:35:00.151382436 +0100
  291. @@ -315,6 +315,36 @@
  292.  }
  293.  
  294.  /**
  295. + * inode_only_permission  -  check access rights to a given inode only
  296. + * @inode: inode to check permissions on
  297. + * @mask:  right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
  298. + *
  299. + * Uses to check read/write/execute permissions on an inode directly, we do
  300. + * not check filesystem permissions.
  301. + */
  302. +int inode_only_permission(struct inode *inode, int mask)
  303. +{
  304. +   int retval;
  305. +
  306. +   /*
  307. +    * Nobody gets write access to an immutable file.
  308. +    */
  309. +   if (unlikely(mask & MAY_WRITE) && IS_IMMUTABLE(inode))
  310. +       return -EACCES;
  311. +
  312. +   retval = do_inode_permission(inode, mask);
  313. +   if (retval)
  314. +       return retval;
  315. +
  316. +   retval = devcgroup_inode_permission(inode, mask);
  317. +   if (retval)
  318. +       return retval;
  319. +
  320. +   return security_inode_permission(inode, mask);
  321. +}
  322. +EXPORT_SYMBOL(inode_only_permission);
  323. +
  324. +/**
  325.   * inode_permission  -  check for access rights to a given inode
  326.   * @inode: inode to check permission on
  327.   * @mask:  right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
  328. @@ -328,8 +358,6 @@
  329.   */
  330.  int inode_permission(struct inode *inode, int mask)
  331.  {
  332. -   int retval;
  333. -
  334.     if (unlikely(mask & MAY_WRITE)) {
  335.         umode_t mode = inode->i_mode;
  336.  
  337. @@ -339,23 +367,9 @@
  338.         if (IS_RDONLY(inode) &&
  339.             (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
  340.             return -EROFS;
  341. -
  342. -       /*
  343. -        * Nobody gets write access to an immutable file.
  344. -        */
  345. -       if (IS_IMMUTABLE(inode))
  346. -           return -EACCES;
  347.     }
  348.  
  349. -   retval = do_inode_permission(inode, mask);
  350. -   if (retval)
  351. -       return retval;
  352. -
  353. -   retval = devcgroup_inode_permission(inode, mask);
  354. -   if (retval)
  355. -       return retval;
  356. -
  357. -   return security_inode_permission(inode, mask);
  358. +   return inode_only_permission(inode, mask);
  359.  }
  360.  
  361.  /**
  362. diff -ruNb a//fs/namespace.c b//fs/namespace.c
  363. --- a//fs/namespace.c   2012-10-12 21:48:25.000000000 +0100
  364. +++ b//fs/namespace.c   2012-10-21 15:33:09.262261274 +0100
  365. @@ -1327,6 +1327,24 @@
  366.     release_mounts(&umount_list);
  367.  }
  368.  
  369. +struct vfsmount *clone_private_mount(struct path *path)
  370. +{
  371. +   struct mount *old_mnt = real_mount(path->mnt);
  372. +   struct mount *new_mnt;
  373. +
  374. +   if (IS_MNT_UNBINDABLE(old_mnt))
  375. +       return ERR_PTR(-EINVAL);
  376. +
  377. +   down_read(&namespace_sem);
  378. +   new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
  379. +   up_read(&namespace_sem);
  380. +   if (!new_mnt)
  381. +       return ERR_PTR(-ENOMEM);
  382. +
  383. +   return &new_mnt->mnt;
  384. +}
  385. +EXPORT_SYMBOL_GPL(clone_private_mount);
  386. +
  387.  int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
  388.            struct vfsmount *root)
  389.  {
  390. diff -ruNb a//fs/open.c b//fs/open.c
  391. --- a//fs/open.c    2012-10-12 21:48:25.000000000 +0100
  392. +++ b//fs/open.c    2012-10-21 15:32:26.596986001 +0100
  393. @@ -667,8 +667,7 @@
  394.     return 0;
  395.  }
  396.  
  397. -static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
  398. -                  struct file *f,
  399. +static struct file *do_dentry_open(struct path *path, struct file *f,
  400.                    int (*open)(struct inode *, struct file *),
  401.                    const struct cred *cred)
  402.  {
  403. @@ -676,15 +675,16 @@
  404.     struct inode *inode;
  405.     int error;
  406.  
  407. +   path_get(path);
  408.     f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
  409.                 FMODE_PREAD | FMODE_PWRITE;
  410.  
  411.     if (unlikely(f->f_flags & O_PATH))
  412.         f->f_mode = FMODE_PATH;
  413.  
  414. -   inode = dentry->d_inode;
  415. +   inode = path->dentry->d_inode;
  416.     if (f->f_mode & FMODE_WRITE) {
  417. -       error = __get_file_write_access(inode, mnt);
  418. +       error = __get_file_write_access(inode, path->mnt);
  419.         if (error)
  420.             goto cleanup_file;
  421.         if (!special_file(inode->i_mode))
  422. @@ -692,8 +692,7 @@
  423.     }
  424.  
  425.     f->f_mapping = inode->i_mapping;
  426. -   f->f_path.dentry = dentry;
  427. -   f->f_path.mnt = mnt;
  428. +   f->f_path = *path;
  429.     f->f_pos = 0;
  430.     file_sb_list_add(f, inode->i_sb);
  431.  
  432. @@ -740,24 +739,22 @@
  433.              * here, so just reset the state.
  434.              */
  435.             file_reset_write(f);
  436. -           mnt_drop_write(mnt);
  437. +           mnt_drop_write(path->mnt);
  438.         }
  439.     }
  440.     file_sb_list_del(f);
  441.     f->f_path.dentry = NULL;
  442.     f->f_path.mnt = NULL;
  443.  cleanup_file:
  444. -   dput(dentry);
  445. -   mntput(mnt);
  446. +   path_put(path);
  447.     return ERR_PTR(error);
  448.  }
  449.  
  450. -static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
  451. -               struct file *f,
  452. +static struct file *__dentry_open(struct path *path, struct file *f,
  453.                 int (*open)(struct inode *, struct file *),
  454.                 const struct cred *cred)
  455.  {
  456. -   struct file *res = do_dentry_open(dentry, mnt, f, open, cred);
  457. +   struct file *res = do_dentry_open(path, f, open, cred);
  458.     if (!IS_ERR(res)) {
  459.         int error = open_check_o_direct(f);
  460.         if (error) {
  461. @@ -792,14 +789,14 @@
  462.  struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
  463.         int (*open)(struct inode *, struct file *))
  464.  {
  465. +   struct path path = { .dentry = dentry, .mnt = nd->path.mnt };
  466.     const struct cred *cred = current_cred();
  467.  
  468.     if (IS_ERR(nd->intent.open.file))
  469.         goto out;
  470.     if (IS_ERR(dentry))
  471.         goto out_err;
  472. -   nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
  473. -                        nd->intent.open.file,
  474. +   nd->intent.open.file = __dentry_open(&path, nd->intent.open.file,
  475.                          open, cred);
  476.  out:
  477.     return nd->intent.open.file;
  478. @@ -831,9 +828,7 @@
  479.     } else {
  480.         struct file *res;
  481.  
  482. -       path_get(&nd->path);
  483. -       res = do_dentry_open(nd->path.dentry, nd->path.mnt,
  484. -                    filp, NULL, cred);
  485. +       res = vfs_open(&nd->path, filp, cred);
  486.         if (!IS_ERR(res)) {
  487.             int error;
  488.  
  489. @@ -860,27 +855,48 @@
  490.  struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
  491.              const struct cred *cred)
  492.  {
  493. -   int error;
  494.     struct file *f;
  495. +   struct file *ret;
  496. +   struct path path = { .dentry = dentry, .mnt = mnt };
  497.  
  498.     validate_creds(cred);
  499.  
  500.     /* We must always pass in a valid mount pointer. */
  501.     BUG_ON(!mnt);
  502.  
  503. -   error = -ENFILE;
  504. +   ret = ERR_PTR(-ENFILE);
  505.     f = get_empty_filp();
  506. -   if (f == NULL) {
  507. -       dput(dentry);
  508. -       mntput(mnt);
  509. -       return ERR_PTR(error);
  510. +   if (f != NULL) {
  511. +       f->f_flags = flags;
  512. +       ret = vfs_open(&path, f, cred);
  513.     }
  514. +   path_put(&path);
  515.  
  516. -   f->f_flags = flags;
  517. -   return __dentry_open(dentry, mnt, f, NULL, cred);
  518. +   return ret;
  519.  }
  520.  EXPORT_SYMBOL(dentry_open);
  521.  
  522. +/**
  523. + * vfs_open - open the file at the given path
  524. + * @path: path to open
  525. + * @filp: newly allocated file with f_flag initialized
  526. + * @cred: credentials to use
  527. + *
  528. + * Open the file.  If successful, the returned file will have acquired
  529. + * an additional reference for path.
  530. + */
  531. +struct file *vfs_open(struct path *path, struct file *filp,
  532. +             const struct cred *cred)
  533. +{
  534. +   struct inode *inode = path->dentry->d_inode;
  535. +
  536. +   if (inode->i_op->open)
  537. +       return inode->i_op->open(path->dentry, filp, cred);
  538. +   else
  539. +       return __dentry_open(path, filp, NULL, cred);
  540. +}
  541. +EXPORT_SYMBOL(vfs_open);
  542. +
  543.  static void __put_unused_fd(struct files_struct *files, unsigned int fd)
  544.  {
  545.     struct fdtable *fdt = files_fdtable(files);
  546. diff -ruNb a//fs/overlayfs/copy_up.c b//fs/overlayfs/copy_up.c
  547. --- a//fs/overlayfs/copy_up.c   1970-01-01 01:00:00.000000000 +0100
  548. +++ b//fs/overlayfs/copy_up.c   2012-10-21 15:33:23.868301470 +0100
  549. @@ -0,0 +1,385 @@
  550. +/*
  551. + *
  552. + * Copyright (C) 2011 Novell Inc.
  553. + *
  554. + * This program is free software; you can redistribute it and/or modify it
  555. + * under the terms of the GNU General Public License version 2 as published by
  556. + * the Free Software Foundation.
  557. + */
  558. +
  559. +#include <linux/fs.h>
  560. +#include <linux/slab.h>
  561. +#include <linux/file.h>
  562. +#include <linux/splice.h>
  563. +#include <linux/xattr.h>
  564. +#include <linux/security.h>
  565. +#include <linux/uaccess.h>
  566. +#include <linux/sched.h>
  567. +#include "overlayfs.h"
  568. +
  569. +#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
  570. +
  571. +static int ovl_copy_up_xattr(struct dentry *old, struct dentry *new)
  572. +{
  573. +   ssize_t list_size, size;
  574. +   char *buf, *name, *value;
  575. +   int error;
  576. +
  577. +   if (!old->d_inode->i_op->getxattr ||
  578. +       !new->d_inode->i_op->getxattr)
  579. +       return 0;
  580. +
  581. +   list_size = vfs_listxattr(old, NULL, 0);
  582. +   if (list_size <= 0) {
  583. +       if (list_size == -EOPNOTSUPP)
  584. +           return 0;
  585. +       return list_size;
  586. +   }
  587. +
  588. +   buf = kzalloc(list_size, GFP_KERNEL);
  589. +   if (!buf)
  590. +       return -ENOMEM;
  591. +
  592. +   error = -ENOMEM;
  593. +   value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
  594. +   if (!value)
  595. +       goto out;
  596. +
  597. +   list_size = vfs_listxattr(old, buf, list_size);
  598. +   if (list_size <= 0) {
  599. +       error = list_size;
  600. +       goto out_free_value;
  601. +   }
  602. +
  603. +   for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
  604. +       size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
  605. +       if (size <= 0) {
  606. +           error = size;
  607. +           goto out_free_value;
  608. +       }
  609. +       error = vfs_setxattr(new, name, value, size, 0);
  610. +       if (error)
  611. +           goto out_free_value;
  612. +   }
  613. +
  614. +out_free_value:
  615. +   kfree(value);
  616. +out:
  617. +   kfree(buf);
  618. +   return error;
  619. +}
  620. +
  621. +static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
  622. +{
  623. +   struct file *old_file;
  624. +   struct file *new_file;
  625. +   int error = 0;
  626. +
  627. +   if (len == 0)
  628. +       return 0;
  629. +
  630. +   old_file = ovl_path_open(old, O_RDONLY);
  631. +   if (IS_ERR(old_file))
  632. +       return PTR_ERR(old_file);
  633. +
  634. +   new_file = ovl_path_open(new, O_WRONLY);
  635. +   if (IS_ERR(new_file)) {
  636. +       error = PTR_ERR(new_file);
  637. +       goto out_fput;
  638. +   }
  639. +
  640. +   /* FIXME: copy up sparse files efficiently */
  641. +   while (len) {
  642. +       loff_t offset = new_file->f_pos;
  643. +       size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
  644. +       long bytes;
  645. +
  646. +       if (len < this_len)
  647. +           this_len = len;
  648. +
  649. +       if (signal_pending_state(TASK_KILLABLE, current)) {
  650. +           error = -EINTR;
  651. +           break;
  652. +       }
  653. +
  654. +       bytes = do_splice_direct(old_file, &offset, new_file, this_len,
  655. +                SPLICE_F_MOVE);
  656. +       if (bytes <= 0) {
  657. +           error = bytes;
  658. +           break;
  659. +       }
  660. +
  661. +       len -= bytes;
  662. +   }
  663. +
  664. +   fput(new_file);
  665. +out_fput:
  666. +   fput(old_file);
  667. +   return error;
  668. +}
  669. +
  670. +static char *ovl_read_symlink(struct dentry *realdentry)
  671. +{
  672. +   int res;
  673. +   char *buf;
  674. +   struct inode *inode = realdentry->d_inode;
  675. +   mm_segment_t old_fs;
  676. +
  677. +   res = -EINVAL;
  678. +   if (!inode->i_op->readlink)
  679. +       goto err;
  680. +
  681. +   res = -ENOMEM;
  682. +   buf = (char *) __get_free_page(GFP_KERNEL);
  683. +   if (!buf)
  684. +       goto err;
  685. +
  686. +   old_fs = get_fs();
  687. +   set_fs(get_ds());
  688. +   /* The cast to a user pointer is valid due to the set_fs() */
  689. +   res = inode->i_op->readlink(realdentry,
  690. +                   (char __user *)buf, PAGE_SIZE - 1);
  691. +   set_fs(old_fs);
  692. +   if (res < 0) {
  693. +       free_page((unsigned long) buf);
  694. +       goto err;
  695. +   }
  696. +   buf[res] = '\0';
  697. +
  698. +   return buf;
  699. +
  700. +err:
  701. +   return ERR_PTR(res);
  702. +}
  703. +
  704. +static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
  705. +{
  706. +   struct iattr attr = {
  707. +       .ia_valid =
  708. +            ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
  709. +       .ia_atime = stat->atime,
  710. +       .ia_mtime = stat->mtime,
  711. +   };
  712. +
  713. +   return notify_change(upperdentry, &attr);
  714. +}
  715. +
  716. +static int ovl_set_mode(struct dentry *upperdentry, umode_t mode)
  717. +{
  718. +   struct iattr attr = {
  719. +       .ia_valid = ATTR_MODE,
  720. +       .ia_mode = mode,
  721. +   };
  722. +
  723. +   return notify_change(upperdentry, &attr);
  724. +}
  725. +
  726. +static int ovl_copy_up_locked(struct dentry *upperdir, struct dentry *dentry,
  727. +                 struct path *lowerpath, struct kstat *stat,
  728. +                 const char *link)
  729. +{
  730. +   int err;
  731. +   struct path newpath;
  732. +   umode_t mode = stat->mode;
  733. +
  734. +   /* Can't properly set mode on creation because of the umask */
  735. +   stat->mode &= S_IFMT;
  736. +
  737. +   ovl_path_upper(dentry, &newpath);
  738. +   WARN_ON(newpath.dentry);
  739. +   newpath.dentry = ovl_upper_create(upperdir, dentry, stat, link);
  740. +   if (IS_ERR(newpath.dentry))
  741. +       return PTR_ERR(newpath.dentry);
  742. +
  743. +   if (S_ISREG(stat->mode)) {
  744. +       err = ovl_copy_up_data(lowerpath, &newpath, stat->size);
  745. +       if (err)
  746. +           goto err_remove;
  747. +   }
  748. +
  749. +   err = ovl_copy_up_xattr(lowerpath->dentry, newpath.dentry);
  750. +   if (err)
  751. +       goto err_remove;
  752. +
  753. +   mutex_lock(&newpath.dentry->d_inode->i_mutex);
  754. +   if (!S_ISLNK(stat->mode))
  755. +       err = ovl_set_mode(newpath.dentry, mode);
  756. +   if (!err)
  757. +       err = ovl_set_timestamps(newpath.dentry, stat);
  758. +   mutex_unlock(&newpath.dentry->d_inode->i_mutex);
  759. +   if (err)
  760. +       goto err_remove;
  761. +
  762. +   ovl_dentry_update(dentry, newpath.dentry);
  763. +
  764. +   /*
  765. +    * Easiest way to get rid of the lower dentry reference is to
  766. +    * drop this dentry.  This is neither needed nor possible for
  767. +    * directories.
  768. +    */
  769. +   if (!S_ISDIR(stat->mode))
  770. +       d_drop(dentry);
  771. +
  772. +   return 0;
  773. +
  774. +err_remove:
  775. +   if (S_ISDIR(stat->mode))
  776. +       vfs_rmdir(upperdir->d_inode, newpath.dentry);
  777. +   else
  778. +       vfs_unlink(upperdir->d_inode, newpath.dentry);
  779. +
  780. +   dput(newpath.dentry);
  781. +
  782. +   return err;
  783. +}
  784. +
  785. +/*
  786. + * Copy up a single dentry
  787. + *
  788. + * Directory renames only allowed on "pure upper" (already created on
  789. + * upper filesystem, never copied up).  Directories which are on lower or
  790. + * are merged may not be renamed.  For these -EXDEV is returned and
  791. + * userspace has to deal with it.  This means, when copying up a
  792. + * directory we can rely on it and ancestors being stable.
  793. + *
  794. + * Non-directory renames start with copy up of source if necessary.  The
  795. + * actual rename will only proceed once the copy up was successful.  Copy
  796. + * up uses upper parent i_mutex for exclusion.  Since rename can change
  797. + * d_parent it is possible that the copy up will lock the old parent.  At
  798. + * that point the file will have already been copied up anyway.
  799. + */
  800. +static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
  801. +              struct path *lowerpath, struct kstat *stat)
  802. +{
  803. +   int err;
  804. +   struct kstat pstat;
  805. +   struct path parentpath;
  806. +   struct dentry *upperdir;
  807. +   const struct cred *old_cred;
  808. +   struct cred *override_cred;
  809. +   char *link = NULL;
  810. +
  811. +   ovl_path_upper(parent, &parentpath);
  812. +   upperdir = parentpath.dentry;
  813. +
  814. +   err = vfs_getattr(parentpath.mnt, parentpath.dentry, &pstat);
  815. +   if (err)
  816. +       return err;
  817. +
  818. +   if (S_ISLNK(stat->mode)) {
  819. +       link = ovl_read_symlink(lowerpath->dentry);
  820. +       if (IS_ERR(link))
  821. +           return PTR_ERR(link);
  822. +   }
  823. +
  824. +   err = -ENOMEM;
  825. +   override_cred = prepare_creds();
  826. +   if (!override_cred)
  827. +       goto out_free_link;
  828. +
  829. +   override_cred->fsuid = stat->uid;
  830. +   override_cred->fsgid = stat->gid;
  831. +   /*
  832. +    * CAP_SYS_ADMIN for copying up extended attributes
  833. +    * CAP_DAC_OVERRIDE for create
  834. +    * CAP_FOWNER for chmod, timestamp update
  835. +    * CAP_FSETID for chmod
  836. +    * CAP_MKNOD for mknod
  837. +    */
  838. +   cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  839. +   cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  840. +   cap_raise(override_cred->cap_effective, CAP_FOWNER);
  841. +   cap_raise(override_cred->cap_effective, CAP_FSETID);
  842. +   cap_raise(override_cred->cap_effective, CAP_MKNOD);
  843. +   old_cred = override_creds(override_cred);
  844. +
  845. +   mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  846. +   if (ovl_path_type(dentry) != OVL_PATH_LOWER) {
  847. +       err = 0;
  848. +   } else {
  849. +       err = ovl_copy_up_locked(upperdir, dentry, lowerpath,
  850. +                    stat, link);
  851. +       if (!err) {
  852. +           /* Restore timestamps on parent (best effort) */
  853. +           ovl_set_timestamps(upperdir, &pstat);
  854. +       }
  855. +   }
  856. +
  857. +   mutex_unlock(&upperdir->d_inode->i_mutex);
  858. +
  859. +   revert_creds(old_cred);
  860. +   put_cred(override_cred);
  861. +
  862. +out_free_link:
  863. +   if (link)
  864. +       free_page((unsigned long) link);
  865. +
  866. +   return err;
  867. +}
  868. +
  869. +int ovl_copy_up(struct dentry *dentry)
  870. +{
  871. +   int err;
  872. +
  873. +   err = 0;
  874. +   while (!err) {
  875. +       struct dentry *next;
  876. +       struct dentry *parent;
  877. +       struct path lowerpath;
  878. +       struct kstat stat;
  879. +       enum ovl_path_type type = ovl_path_type(dentry);
  880. +
  881. +       if (type != OVL_PATH_LOWER)
  882. +           break;
  883. +
  884. +       next = dget(dentry);
  885. +       /* find the topmost dentry not yet copied up */
  886. +       for (;;) {
  887. +           parent = dget_parent(next);
  888. +
  889. +           type = ovl_path_type(parent);
  890. +           if (type != OVL_PATH_LOWER)
  891. +               break;
  892. +
  893. +           dput(next);
  894. +           next = parent;
  895. +       }
  896. +
  897. +       ovl_path_lower(next, &lowerpath);
  898. +       err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat);
  899. +       if (!err)
  900. +           err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
  901. +
  902. +       dput(parent);
  903. +       dput(next);
  904. +   }
  905. +
  906. +   return err;
  907. +}
  908. +
  909. +/* Optimize by not copying up the file first and truncating later */
  910. +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size)
  911. +{
  912. +   int err;
  913. +   struct kstat stat;
  914. +   struct path lowerpath;
  915. +   struct dentry *parent = dget_parent(dentry);
  916. +
  917. +   err = ovl_copy_up(parent);
  918. +   if (err)
  919. +       goto out_dput_parent;
  920. +
  921. +   ovl_path_lower(dentry, &lowerpath);
  922. +   err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat);
  923. +   if (err)
  924. +       goto out_dput_parent;
  925. +
  926. +   if (size < stat.size)
  927. +       stat.size = size;
  928. +
  929. +   err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
  930. +
  931. +out_dput_parent:
  932. +   dput(parent);
  933. +   return err;
  934. +}
  935. diff -ruNb a//fs/overlayfs/dir.c b//fs/overlayfs/dir.c
  936. --- a//fs/overlayfs/dir.c   1970-01-01 01:00:00.000000000 +0100
  937. +++ b//fs/overlayfs/dir.c   2012-10-21 15:35:40.472972180 +0100
  938. @@ -0,0 +1,604 @@
  939. +/*
  940. + *
  941. + * Copyright (C) 2011 Novell Inc.
  942. + *
  943. + * This program is free software; you can redistribute it and/or modify it
  944. + * under the terms of the GNU General Public License version 2 as published by
  945. + * the Free Software Foundation.
  946. + */
  947. +
  948. +#include <linux/fs.h>
  949. +#include <linux/namei.h>
  950. +#include <linux/xattr.h>
  951. +#include <linux/security.h>
  952. +#include <linux/cred.h>
  953. +#include "overlayfs.h"
  954. +
  955. +static const char *ovl_whiteout_symlink = "(overlay-whiteout)";
  956. +
  957. +static int ovl_whiteout(struct dentry *upperdir, struct dentry *dentry)
  958. +{
  959. +   int err;
  960. +   struct dentry *newdentry;
  961. +   const struct cred *old_cred;
  962. +   struct cred *override_cred;
  963. +
  964. +   /* FIXME: recheck lower dentry to see if whiteout is really needed */
  965. +
  966. +   err = -ENOMEM;
  967. +   override_cred = prepare_creds();
  968. +   if (!override_cred)
  969. +       goto out;
  970. +
  971. +   /*
  972. +    * CAP_SYS_ADMIN for setxattr
  973. +    * CAP_DAC_OVERRIDE for symlink creation
  974. +    * CAP_FOWNER for unlink in sticky directory
  975. +    */
  976. +   cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  977. +   cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  978. +   cap_raise(override_cred->cap_effective, CAP_FOWNER);
  979. +   override_cred->fsuid = 0;
  980. +   override_cred->fsgid = 0;
  981. +   old_cred = override_creds(override_cred);
  982. +
  983. +   newdentry = lookup_one_len(dentry->d_name.name, upperdir,
  984. +                  dentry->d_name.len);
  985. +   err = PTR_ERR(newdentry);
  986. +   if (IS_ERR(newdentry))
  987. +       goto out_put_cred;
  988. +
  989. +   /* Just been removed within the same locked region */
  990. +   WARN_ON(newdentry->d_inode);
  991. +
  992. +   err = vfs_symlink(upperdir->d_inode, newdentry, ovl_whiteout_symlink);
  993. +   if (err)
  994. +       goto out_dput;
  995. +
  996. +   ovl_dentry_version_inc(dentry->d_parent);
  997. +
  998. +   err = vfs_setxattr(newdentry, ovl_whiteout_xattr, "y", 1, 0);
  999. +   if (err)
  1000. +       vfs_unlink(upperdir->d_inode, newdentry);
  1001. +
  1002. +out_dput:
  1003. +   dput(newdentry);
  1004. +out_put_cred:
  1005. +   revert_creds(old_cred);
  1006. +   put_cred(override_cred);
  1007. +out:
  1008. +   if (err) {
  1009. +       /*
  1010. +        * There's no way to recover from failure to whiteout.
  1011. +        * What should we do?  Log a big fat error and... ?
  1012. +        */
  1013. +       printk(KERN_ERR "overlayfs: ERROR - failed to whiteout '%s'\n",
  1014. +              dentry->d_name.name);
  1015. +   }
  1016. +
  1017. +   return err;
  1018. +}
  1019. +
  1020. +static struct dentry *ovl_lookup_create(struct dentry *upperdir,
  1021. +                   struct dentry *template)
  1022. +{
  1023. +   int err;
  1024. +   struct dentry *newdentry;
  1025. +   struct qstr *name = &template->d_name;
  1026. +
  1027. +   newdentry = lookup_one_len(name->name, upperdir, name->len);
  1028. +   if (IS_ERR(newdentry))
  1029. +       return newdentry;
  1030. +
  1031. +   if (newdentry->d_inode) {
  1032. +       const struct cred *old_cred;
  1033. +       struct cred *override_cred;
  1034. +
  1035. +       /* No need to check whiteout if lower parent is non-existent */
  1036. +       err = -EEXIST;
  1037. +       if (!ovl_dentry_lower(template->d_parent))
  1038. +           goto out_dput;
  1039. +
  1040. +       if (!S_ISLNK(newdentry->d_inode->i_mode))
  1041. +           goto out_dput;
  1042. +
  1043. +       err = -ENOMEM;
  1044. +       override_cred = prepare_creds();
  1045. +       if (!override_cred)
  1046. +           goto out_dput;
  1047. +
  1048. +       /*
  1049. +        * CAP_SYS_ADMIN for getxattr
  1050. +        * CAP_FOWNER for unlink in sticky directory
  1051. +        */
  1052. +       cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  1053. +       cap_raise(override_cred->cap_effective, CAP_FOWNER);
  1054. +       old_cred = override_creds(override_cred);
  1055. +
  1056. +       err = -EEXIST;
  1057. +       if (ovl_is_whiteout(newdentry))
  1058. +           err = vfs_unlink(upperdir->d_inode, newdentry);
  1059. +
  1060. +       revert_creds(old_cred);
  1061. +       put_cred(override_cred);
  1062. +       if (err)
  1063. +           goto out_dput;
  1064. +
  1065. +       dput(newdentry);
  1066. +       newdentry = lookup_one_len(name->name, upperdir, name->len);
  1067. +       if (IS_ERR(newdentry)) {
  1068. +           ovl_whiteout(upperdir, template);
  1069. +           return newdentry;
  1070. +       }
  1071. +
  1072. +       /*
  1073. +        * Whiteout just been successfully removed, parent
  1074. +        * i_mutex is still held, there's no way the lookup
  1075. +        * could return positive.
  1076. +        */
  1077. +       WARN_ON(newdentry->d_inode);
  1078. +   }
  1079. +
  1080. +   return newdentry;
  1081. +
  1082. +out_dput:
  1083. +   dput(newdentry);
  1084. +   return ERR_PTR(err);
  1085. +}
  1086. +
  1087. +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
  1088. +               struct kstat *stat, const char *link)
  1089. +{
  1090. +   int err;
  1091. +   struct dentry *newdentry;
  1092. +   struct inode *dir = upperdir->d_inode;
  1093. +
  1094. +   newdentry = ovl_lookup_create(upperdir, dentry);
  1095. +   if (IS_ERR(newdentry))
  1096. +       goto out;
  1097. +
  1098. +   switch (stat->mode & S_IFMT) {
  1099. +   case S_IFREG:
  1100. +       err = vfs_create(dir, newdentry, stat->mode, NULL);
  1101. +       break;
  1102. +
  1103. +   case S_IFDIR:
  1104. +       err = vfs_mkdir(dir, newdentry, stat->mode);
  1105. +       break;
  1106. +
  1107. +   case S_IFCHR:
  1108. +   case S_IFBLK:
  1109. +   case S_IFIFO:
  1110. +   case S_IFSOCK:
  1111. +       err = vfs_mknod(dir, newdentry, stat->mode, stat->rdev);
  1112. +       break;
  1113. +
  1114. +   case S_IFLNK:
  1115. +       err = vfs_symlink(dir, newdentry, link);
  1116. +       break;
  1117. +
  1118. +   default:
  1119. +       err = -EPERM;
  1120. +   }
  1121. +   if (err) {
  1122. +       if (ovl_dentry_is_opaque(dentry))
  1123. +           ovl_whiteout(upperdir, dentry);
  1124. +       dput(newdentry);
  1125. +       newdentry = ERR_PTR(err);
  1126. +   } else if (WARN_ON(!newdentry->d_inode)) {
  1127. +       /*
  1128. +        * Not quite sure if non-instantiated dentry is legal or not.
  1129. +        * VFS doesn't seem to care so check and warn here.
  1130. +        */
  1131. +       dput(newdentry);
  1132. +       newdentry = ERR_PTR(-ENOENT);
  1133. +   }
  1134. +
  1135. +out:
  1136. +   return newdentry;
  1137. +
  1138. +}
  1139. +
  1140. +static int ovl_set_opaque(struct dentry *upperdentry)
  1141. +{
  1142. +   int err;
  1143. +   const struct cred *old_cred;
  1144. +   struct cred *override_cred;
  1145. +
  1146. +   override_cred = prepare_creds();
  1147. +   if (!override_cred)
  1148. +       return -ENOMEM;
  1149. +
  1150. +   /* CAP_SYS_ADMIN for setxattr of "trusted" namespace */
  1151. +   cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  1152. +   old_cred = override_creds(override_cred);
  1153. +   err = vfs_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0);
  1154. +   revert_creds(old_cred);
  1155. +   put_cred(override_cred);
  1156. +
  1157. +   return err;
  1158. +}
  1159. +
  1160. +static int ovl_remove_opaque(struct dentry *upperdentry)
  1161. +{
  1162. +   int err;
  1163. +   const struct cred *old_cred;
  1164. +   struct cred *override_cred;
  1165. +
  1166. +   override_cred = prepare_creds();
  1167. +   if (!override_cred)
  1168. +       return -ENOMEM;
  1169. +
  1170. +   /* CAP_SYS_ADMIN for removexattr of "trusted" namespace */
  1171. +   cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  1172. +   old_cred = override_creds(override_cred);
  1173. +   err = vfs_removexattr(upperdentry, ovl_opaque_xattr);
  1174. +   revert_creds(old_cred);
  1175. +   put_cred(override_cred);
  1176. +
  1177. +   return err;
  1178. +}
  1179. +
  1180. +static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
  1181. +            struct kstat *stat)
  1182. +{
  1183. +   int err;
  1184. +   enum ovl_path_type type;
  1185. +   struct path realpath;
  1186. +
  1187. +   type = ovl_path_real(dentry, &realpath);
  1188. +   err = vfs_getattr(realpath.mnt, realpath.dentry, stat);
  1189. +   if (err)
  1190. +       return err;
  1191. +
  1192. +   stat->dev = dentry->d_sb->s_dev;
  1193. +   stat->ino = dentry->d_inode->i_ino;
  1194. +
  1195. +   /*
  1196. +    * It's probably not worth it to count subdirs to get the
  1197. +    * correct link count.  nlink=1 seems to pacify 'find' and
  1198. +    * other utilities.
  1199. +    */
  1200. +   if (type == OVL_PATH_MERGE)
  1201. +       stat->nlink = 1;
  1202. +
  1203. +   return 0;
  1204. +}
  1205. +
  1206. +static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
  1207. +                const char *link)
  1208. +{
  1209. +   int err;
  1210. +   struct dentry *newdentry;
  1211. +   struct dentry *upperdir;
  1212. +   struct inode *inode;
  1213. +   struct kstat stat = {
  1214. +       .mode = mode,
  1215. +       .rdev = rdev,
  1216. +   };
  1217. +
  1218. +   err = -ENOMEM;
  1219. +   inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
  1220. +   if (!inode)
  1221. +       goto out;
  1222. +
  1223. +   err = ovl_copy_up(dentry->d_parent);
  1224. +   if (err)
  1225. +       goto out_iput;
  1226. +
  1227. +   upperdir = ovl_dentry_upper(dentry->d_parent);
  1228. +   mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  1229. +
  1230. +   newdentry = ovl_upper_create(upperdir, dentry, &stat, link);
  1231. +   err = PTR_ERR(newdentry);
  1232. +   if (IS_ERR(newdentry))
  1233. +       goto out_unlock;
  1234. +
  1235. +   ovl_dentry_version_inc(dentry->d_parent);
  1236. +   if (ovl_dentry_is_opaque(dentry) && S_ISDIR(mode)) {
  1237. +       err = ovl_set_opaque(newdentry);
  1238. +       if (err) {
  1239. +           vfs_rmdir(upperdir->d_inode, newdentry);
  1240. +           ovl_whiteout(upperdir, dentry);
  1241. +           goto out_dput;
  1242. +       }
  1243. +   }
  1244. +   ovl_dentry_update(dentry, newdentry);
  1245. +   ovl_copyattr(newdentry->d_inode, inode);
  1246. +   d_instantiate(dentry, inode);
  1247. +   inode = NULL;
  1248. +   newdentry = NULL;
  1249. +   err = 0;
  1250. +
  1251. +out_dput:
  1252. +   dput(newdentry);
  1253. +out_unlock:
  1254. +   mutex_unlock(&upperdir->d_inode->i_mutex);
  1255. +out_iput:
  1256. +   iput(inode);
  1257. +out:
  1258. +   return err;
  1259. +}
  1260. +
  1261. +static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
  1262. +           struct nameidata *nd)
  1263. +{
  1264. +   return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
  1265. +}
  1266. +
  1267. +static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  1268. +{
  1269. +   return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
  1270. +}
  1271. +
  1272. +static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
  1273. +            dev_t rdev)
  1274. +{
  1275. +   return ovl_create_object(dentry, mode, rdev, NULL);
  1276. +}
  1277. +
  1278. +static int ovl_symlink(struct inode *dir, struct dentry *dentry,
  1279. +            const char *link)
  1280. +{
  1281. +   return ovl_create_object(dentry, S_IFLNK, 0, link);
  1282. +}
  1283. +
  1284. +static int ovl_do_remove(struct dentry *dentry, bool is_dir)
  1285. +{
  1286. +   int err;
  1287. +   enum ovl_path_type type;
  1288. +   struct path realpath;
  1289. +   struct dentry *upperdir;
  1290. +
  1291. +   err = ovl_copy_up(dentry->d_parent);
  1292. +   if (err)
  1293. +       return err;
  1294. +
  1295. +   upperdir = ovl_dentry_upper(dentry->d_parent);
  1296. +   mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  1297. +   type = ovl_path_real(dentry, &realpath);
  1298. +   if (type != OVL_PATH_LOWER) {
  1299. +       err = -ESTALE;
  1300. +       if (realpath.dentry->d_parent != upperdir)
  1301. +           goto out_d_drop;
  1302. +
  1303. +       /* FIXME: create whiteout up front and rename to target */
  1304. +
  1305. +       if (is_dir)
  1306. +           err = vfs_rmdir(upperdir->d_inode, realpath.dentry);
  1307. +       else
  1308. +           err = vfs_unlink(upperdir->d_inode, realpath.dentry);
  1309. +       if (err)
  1310. +           goto out_d_drop;
  1311. +
  1312. +       ovl_dentry_version_inc(dentry->d_parent);
  1313. +   }
  1314. +
  1315. +   if (type != OVL_PATH_UPPER || ovl_dentry_is_opaque(dentry))
  1316. +       err = ovl_whiteout(upperdir, dentry);
  1317. +
  1318. +   /*
  1319. +    * Keeping this dentry hashed would mean having to release
  1320. +    * upperpath/lowerpath, which could only be done if we are the
  1321. +    * sole user of this dentry.  Too tricky...  Just unhash for
  1322. +    * now.
  1323. +    */
  1324. +out_d_drop:
  1325. +   d_drop(dentry);
  1326. +   mutex_unlock(&upperdir->d_inode->i_mutex);
  1327. +
  1328. +   return err;
  1329. +}
  1330. +
  1331. +static int ovl_unlink(struct inode *dir, struct dentry *dentry)
  1332. +{
  1333. +   return ovl_do_remove(dentry, false);
  1334. +}
  1335. +
  1336. +
  1337. +static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
  1338. +{
  1339. +   int err;
  1340. +   enum ovl_path_type type;
  1341. +
  1342. +   type = ovl_path_type(dentry);
  1343. +   if (type != OVL_PATH_UPPER) {
  1344. +       err = ovl_check_empty_and_clear(dentry, type);
  1345. +       if (err)
  1346. +           return err;
  1347. +   }
  1348. +
  1349. +   return ovl_do_remove(dentry, true);
  1350. +}
  1351. +
  1352. +static int ovl_link(struct dentry *old, struct inode *newdir,
  1353. +           struct dentry *new)
  1354. +{
  1355. +   int err;
  1356. +   struct dentry *olddentry;
  1357. +   struct dentry *newdentry;
  1358. +   struct dentry *upperdir;
  1359. +   struct inode *newinode;
  1360. +
  1361. +   err = ovl_copy_up(old);
  1362. +   if (err)
  1363. +       goto out;
  1364. +
  1365. +   err = ovl_copy_up(new->d_parent);
  1366. +   if (err)
  1367. +       goto out;
  1368. +
  1369. +   upperdir = ovl_dentry_upper(new->d_parent);
  1370. +   mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  1371. +   newdentry = ovl_lookup_create(upperdir, new);
  1372. +   err = PTR_ERR(newdentry);
  1373. +   if (IS_ERR(newdentry))
  1374. +       goto out_unlock;
  1375. +
  1376. +   olddentry = ovl_dentry_upper(old);
  1377. +   err = vfs_link(olddentry, upperdir->d_inode, newdentry);
  1378. +   if (!err) {
  1379. +       if (WARN_ON(!newdentry->d_inode)) {
  1380. +           dput(newdentry);
  1381. +           err = -ENOENT;
  1382. +           goto out_unlock;
  1383. +       }
  1384. +       newinode = ovl_new_inode(old->d_sb, newdentry->d_inode->i_mode,
  1385. +               new->d_fsdata);
  1386. +       if (!newinode)
  1387. +           goto link_fail;
  1388. +       ovl_copyattr(upperdir->d_inode, newinode);
  1389. +
  1390. +       ovl_dentry_version_inc(new->d_parent);
  1391. +       ovl_dentry_update(new, newdentry);
  1392. +
  1393. +       d_instantiate(new, newinode);
  1394. +   } else {
  1395. +link_fail:
  1396. +       if (ovl_dentry_is_opaque(new))
  1397. +           ovl_whiteout(upperdir, new);
  1398. +       dput(newdentry);
  1399. +   }
  1400. +out_unlock:
  1401. +   mutex_unlock(&upperdir->d_inode->i_mutex);
  1402. +out:
  1403. +   return err;
  1404. +
  1405. +}
  1406. +
  1407. +static int ovl_rename(struct inode *olddir, struct dentry *old,
  1408. +           struct inode *newdir, struct dentry *new)
  1409. +{
  1410. +   int err;
  1411. +   enum ovl_path_type old_type;
  1412. +   enum ovl_path_type new_type;
  1413. +   struct dentry *old_upperdir;
  1414. +   struct dentry *new_upperdir;
  1415. +   struct dentry *olddentry;
  1416. +   struct dentry *newdentry;
  1417. +   struct dentry *trap;
  1418. +   bool old_opaque;
  1419. +   bool new_opaque;
  1420. +   bool new_create = false;
  1421. +   bool is_dir = S_ISDIR(old->d_inode->i_mode);
  1422. +
  1423. +   /* Don't copy up directory trees */
  1424. +   old_type = ovl_path_type(old);
  1425. +   if (old_type != OVL_PATH_UPPER && is_dir)
  1426. +       return -EXDEV;
  1427. +
  1428. +   if (new->d_inode) {
  1429. +       new_type = ovl_path_type(new);
  1430. +
  1431. +       if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) {
  1432. +           if (ovl_dentry_lower(old)->d_inode ==
  1433. +               ovl_dentry_lower(new)->d_inode)
  1434. +               return 0;
  1435. +       }
  1436. +       if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) {
  1437. +           if (ovl_dentry_upper(old)->d_inode ==
  1438. +               ovl_dentry_upper(new)->d_inode)
  1439. +               return 0;
  1440. +       }
  1441. +
  1442. +       if (new_type != OVL_PATH_UPPER &&
  1443. +           S_ISDIR(new->d_inode->i_mode)) {
  1444. +           err = ovl_check_empty_and_clear(new, new_type);
  1445. +           if (err)
  1446. +               return err;
  1447. +       }
  1448. +   } else {
  1449. +       new_type = OVL_PATH_UPPER;
  1450. +   }
  1451. +
  1452. +   err = ovl_copy_up(old);
  1453. +   if (err)
  1454. +       return err;
  1455. +
  1456. +   err = ovl_copy_up(new->d_parent);
  1457. +   if (err)
  1458. +       return err;
  1459. +
  1460. +   old_upperdir = ovl_dentry_upper(old->d_parent);
  1461. +   new_upperdir = ovl_dentry_upper(new->d_parent);
  1462. +
  1463. +   trap = lock_rename(new_upperdir, old_upperdir);
  1464. +
  1465. +   olddentry = ovl_dentry_upper(old);
  1466. +   newdentry = ovl_dentry_upper(new);
  1467. +   if (newdentry) {
  1468. +       dget(newdentry);
  1469. +   } else {
  1470. +       new_create = true;
  1471. +       newdentry = ovl_lookup_create(new_upperdir, new);
  1472. +       err = PTR_ERR(newdentry);
  1473. +       if (IS_ERR(newdentry))
  1474. +           goto out_unlock;
  1475. +   }
  1476. +
  1477. +   err = -ESTALE;
  1478. +   if (olddentry->d_parent != old_upperdir)
  1479. +       goto out_dput;
  1480. +   if (newdentry->d_parent != new_upperdir)
  1481. +       goto out_dput;
  1482. +   if (olddentry == trap)
  1483. +       goto out_dput;
  1484. +   if (newdentry == trap)
  1485. +       goto out_dput;
  1486. +
  1487. +   old_opaque = ovl_dentry_is_opaque(old);
  1488. +   new_opaque = ovl_dentry_is_opaque(new) || new_type != OVL_PATH_UPPER;
  1489. +
  1490. +   if (is_dir && !old_opaque && new_opaque) {
  1491. +       err = ovl_set_opaque(olddentry);
  1492. +       if (err)
  1493. +           goto out_dput;
  1494. +   }
  1495. +
  1496. +   err = vfs_rename(old_upperdir->d_inode, olddentry,
  1497. +            new_upperdir->d_inode, newdentry);
  1498. +
  1499. +   if (err) {
  1500. +       if (new_create && ovl_dentry_is_opaque(new))
  1501. +           ovl_whiteout(new_upperdir, new);
  1502. +       if (is_dir && !old_opaque && new_opaque)
  1503. +           ovl_remove_opaque(olddentry);
  1504. +       goto out_dput;
  1505. +   }
  1506. +
  1507. +   if (old_type != OVL_PATH_UPPER || old_opaque)
  1508. +       err = ovl_whiteout(old_upperdir, old);
  1509. +   if (is_dir && old_opaque && !new_opaque)
  1510. +       ovl_remove_opaque(olddentry);
  1511. +
  1512. +   if (old_opaque != new_opaque)
  1513. +       ovl_dentry_set_opaque(old, new_opaque);
  1514. +
  1515. +   ovl_dentry_version_inc(old->d_parent);
  1516. +   ovl_dentry_version_inc(new->d_parent);
  1517. +
  1518. +out_dput:
  1519. +   dput(newdentry);
  1520. +out_unlock:
  1521. +   unlock_rename(new_upperdir, old_upperdir);
  1522. +   return err;
  1523. +}
  1524. +
  1525. +const struct inode_operations ovl_dir_inode_operations = {
  1526. +   .lookup     = ovl_lookup,
  1527. +   .mkdir      = ovl_mkdir,
  1528. +   .symlink    = ovl_symlink,
  1529. +   .unlink     = ovl_unlink,
  1530. +   .rmdir      = ovl_rmdir,
  1531. +   .rename     = ovl_rename,
  1532. +   .link       = ovl_link,
  1533. +   .setattr    = ovl_setattr,
  1534. +   .create     = ovl_create,
  1535. +   .mknod      = ovl_mknod,
  1536. +   .permission = ovl_permission,
  1537. +   .getattr    = ovl_dir_getattr,
  1538. +   .setxattr   = ovl_setxattr,
  1539. +   .getxattr   = ovl_getxattr,
  1540. +   .listxattr  = ovl_listxattr,
  1541. +   .removexattr    = ovl_removexattr,
  1542. +};
  1543. diff -ruNb a//fs/overlayfs/inode.c b//fs/overlayfs/inode.c
  1544. --- a//fs/overlayfs/inode.c 1970-01-01 01:00:00.000000000 +0100
  1545. +++ b//fs/overlayfs/inode.c 2012-10-21 15:35:10.213032386 +0100
  1546. @@ -0,0 +1,375 @@
  1547. +/*
  1548. + *
  1549. + * Copyright (C) 2011 Novell Inc.
  1550. + *
  1551. + * This program is free software; you can redistribute it and/or modify it
  1552. + * under the terms of the GNU General Public License version 2 as published by
  1553. + * the Free Software Foundation.
  1554. + */
  1555. +
  1556. +#include <linux/fs.h>
  1557. +#include <linux/slab.h>
  1558. +#include <linux/xattr.h>
  1559. +#include "overlayfs.h"
  1560. +
  1561. +int ovl_setattr(struct dentry *dentry, struct iattr *attr)
  1562. +{
  1563. +   struct dentry *upperdentry;
  1564. +   int err;
  1565. +
  1566. +   if ((attr->ia_valid & ATTR_SIZE) && !ovl_dentry_upper(dentry))
  1567. +       err = ovl_copy_up_truncate(dentry, attr->ia_size);
  1568. +   else
  1569. +       err = ovl_copy_up(dentry);
  1570. +   if (err)
  1571. +       return err;
  1572. +
  1573. +   upperdentry = ovl_dentry_upper(dentry);
  1574. +
  1575. +   if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
  1576. +       attr->ia_valid &= ~ATTR_MODE;
  1577. +
  1578. +   mutex_lock(&upperdentry->d_inode->i_mutex);
  1579. +   err = notify_change(upperdentry, attr);
  1580. +   mutex_unlock(&upperdentry->d_inode->i_mutex);
  1581. +
  1582. +   return err;
  1583. +}
  1584. +
  1585. +static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
  1586. +            struct kstat *stat)
  1587. +{
  1588. +   struct path realpath;
  1589. +
  1590. +   ovl_path_real(dentry, &realpath);
  1591. +   return vfs_getattr(realpath.mnt, realpath.dentry, stat);
  1592. +}
  1593. +
  1594. +int ovl_permission(struct inode *inode, int mask)
  1595. +{
  1596. +   struct ovl_entry *oe;
  1597. +   struct dentry *alias = NULL;
  1598. +   struct inode *realinode;
  1599. +   struct dentry *realdentry;
  1600. +   bool is_upper;
  1601. +   int err;
  1602. +
  1603. +   if (S_ISDIR(inode->i_mode)) {
  1604. +       oe = inode->i_private;
  1605. +   } else if (mask & MAY_NOT_BLOCK) {
  1606. +       return -ECHILD;
  1607. +   } else {
  1608. +       /*
  1609. +        * For non-directories find an alias and get the info
  1610. +        * from there.
  1611. +        */
  1612. +       spin_lock(&inode->i_lock);
  1613. +       if (WARN_ON(list_empty(&inode->i_dentry))) {
  1614. +           spin_unlock(&inode->i_lock);
  1615. +           return -ENOENT;
  1616. +       }
  1617. +       alias = list_entry(inode->i_dentry.next,
  1618. +                  struct dentry, d_alias);
  1619. +       dget(alias);
  1620. +       spin_unlock(&inode->i_lock);
  1621. +       oe = alias->d_fsdata;
  1622. +   }
  1623. +
  1624. +   realdentry = ovl_entry_real(oe, &is_upper);
  1625. +
  1626. +   /* Careful in RCU walk mode */
  1627. +   realinode = ACCESS_ONCE(realdentry->d_inode);
  1628. +   if (!realinode) {
  1629. +       WARN_ON(!(mask & MAY_NOT_BLOCK));
  1630. +       err = -ENOENT;
  1631. +       goto out_dput;
  1632. +   }
  1633. +
  1634. +   if (mask & MAY_WRITE) {
  1635. +       umode_t mode = realinode->i_mode;
  1636. +
  1637. +       /*
  1638. +        * Writes will always be redirected to upper layer, so
  1639. +        * ignore lower layer being read-only.
  1640. +        *
  1641. +        * If the overlay itself is read-only then proceed
  1642. +        * with the permission check, don't return EROFS.
  1643. +        * This will only happen if this is the lower layer of
  1644. +        * another overlayfs.
  1645. +        *
  1646. +        * If upper fs becomes read-only after the overlay was
  1647. +        * constructed return EROFS to prevent modification of
  1648. +        * upper layer.
  1649. +        */
  1650. +       err = -EROFS;
  1651. +       if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
  1652. +           (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
  1653. +           goto out_dput;
  1654. +   }
  1655. +
  1656. +   err = inode_only_permission(realinode, mask);
  1657. +out_dput:
  1658. +   dput(alias);
  1659. +   return err;
  1660. +}
  1661. +
  1662. +
  1663. +struct ovl_link_data {
  1664. +   struct dentry *realdentry;
  1665. +   void *cookie;
  1666. +};
  1667. +
  1668. +static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
  1669. +{
  1670. +   void *ret;
  1671. +   struct dentry *realdentry;
  1672. +   struct inode *realinode;
  1673. +
  1674. +   realdentry = ovl_dentry_real(dentry);
  1675. +   realinode = realdentry->d_inode;
  1676. +
  1677. +   if (WARN_ON(!realinode->i_op->follow_link))
  1678. +       return ERR_PTR(-EPERM);
  1679. +
  1680. +   ret = realinode->i_op->follow_link(realdentry, nd);
  1681. +   if (IS_ERR(ret))
  1682. +       return ret;
  1683. +
  1684. +   if (realinode->i_op->put_link) {
  1685. +       struct ovl_link_data *data;
  1686. +
  1687. +       data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
  1688. +       if (!data) {
  1689. +           realinode->i_op->put_link(realdentry, nd, ret);
  1690. +           return ERR_PTR(-ENOMEM);
  1691. +       }
  1692. +       data->realdentry = realdentry;
  1693. +       data->cookie = ret;
  1694. +
  1695. +       return data;
  1696. +   } else {
  1697. +       return NULL;
  1698. +   }
  1699. +}
  1700. +
  1701. +static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
  1702. +{
  1703. +   struct inode *realinode;
  1704. +   struct ovl_link_data *data = c;
  1705. +
  1706. +   if (!data)
  1707. +       return;
  1708. +
  1709. +   realinode = data->realdentry->d_inode;
  1710. +   realinode->i_op->put_link(data->realdentry, nd, data->cookie);
  1711. +   kfree(data);
  1712. +}
  1713. +
  1714. +static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
  1715. +{
  1716. +   struct path realpath;
  1717. +   struct inode *realinode;
  1718. +
  1719. +   ovl_path_real(dentry, &realpath);
  1720. +   realinode = realpath.dentry->d_inode;
  1721. +
  1722. +   if (!realinode->i_op->readlink)
  1723. +       return -EINVAL;
  1724. +
  1725. +   touch_atime(&realpath);
  1726. +
  1727. +   return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
  1728. +}
  1729. +
  1730. +
  1731. +static bool ovl_is_private_xattr(const char *name)
  1732. +{
  1733. +   return strncmp(name, "trusted.overlay.", 14) == 0;
  1734. +}
  1735. +
  1736. +int ovl_setxattr(struct dentry *dentry, const char *name,
  1737. +        const void *value, size_t size, int flags)
  1738. +{
  1739. +   int err;
  1740. +   struct dentry *upperdentry;
  1741. +
  1742. +   if (ovl_is_private_xattr(name))
  1743. +       return -EPERM;
  1744. +
  1745. +   err = ovl_copy_up(dentry);
  1746. +   if (err)
  1747. +       return err;
  1748. +
  1749. +   upperdentry = ovl_dentry_upper(dentry);
  1750. +   return  vfs_setxattr(upperdentry, name, value, size, flags);
  1751. +}
  1752. +
  1753. +ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
  1754. +            void *value, size_t size)
  1755. +{
  1756. +   if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
  1757. +       ovl_is_private_xattr(name))
  1758. +       return -ENODATA;
  1759. +
  1760. +   return vfs_getxattr(ovl_dentry_real(dentry), name, value, size);
  1761. +}
  1762. +
  1763. +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
  1764. +{
  1765. +   ssize_t res;
  1766. +   int off;
  1767. +
  1768. +   res = vfs_listxattr(ovl_dentry_real(dentry), list, size);
  1769. +   if (res <= 0 || size == 0)
  1770. +       return res;
  1771. +
  1772. +   if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE)
  1773. +       return res;
  1774. +
  1775. +   /* filter out private xattrs */
  1776. +   for (off = 0; off < res;) {
  1777. +       char *s = list + off;
  1778. +       size_t slen = strlen(s) + 1;
  1779. +
  1780. +       BUG_ON(off + slen > res);
  1781. +
  1782. +       if (ovl_is_private_xattr(s)) {
  1783. +           res -= slen;
  1784. +           memmove(s, s + slen, res - off);
  1785. +       } else {
  1786. +           off += slen;
  1787. +       }
  1788. +   }
  1789. +
  1790. +   return res;
  1791. +}
  1792. +
  1793. +int ovl_removexattr(struct dentry *dentry, const char *name)
  1794. +{
  1795. +   int err;
  1796. +   struct path realpath;
  1797. +   enum ovl_path_type type;
  1798. +
  1799. +   if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
  1800. +       ovl_is_private_xattr(name))
  1801. +       return -ENODATA;
  1802. +
  1803. +   type = ovl_path_real(dentry, &realpath);
  1804. +   if (type == OVL_PATH_LOWER) {
  1805. +       err = vfs_getxattr(realpath.dentry, name, NULL, 0);
  1806. +       if (err < 0)
  1807. +           return err;
  1808. +
  1809. +       err = ovl_copy_up(dentry);
  1810. +       if (err)
  1811. +           return err;
  1812. +
  1813. +       ovl_path_upper(dentry, &realpath);
  1814. +   }
  1815. +
  1816. +   return vfs_removexattr(realpath.dentry, name);
  1817. +}
  1818. +
  1819. +static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
  1820. +                 struct dentry *realdentry)
  1821. +{
  1822. +   if (type != OVL_PATH_LOWER)
  1823. +       return false;
  1824. +
  1825. +   if (special_file(realdentry->d_inode->i_mode))
  1826. +       return false;
  1827. +
  1828. +   if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
  1829. +       return false;
  1830. +
  1831. +   return true;
  1832. +}
  1833. +
  1834. +static struct file *ovl_open(struct dentry *dentry, struct file *file,
  1835. +                const struct cred *cred)
  1836. +{
  1837. +   int err;
  1838. +   struct path realpath;
  1839. +   enum ovl_path_type type;
  1840. +
  1841. +   type = ovl_path_real(dentry, &realpath);
  1842. +   if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
  1843. +       if (file->f_flags & O_TRUNC)
  1844. +           err = ovl_copy_up_truncate(dentry, 0);
  1845. +       else
  1846. +           err = ovl_copy_up(dentry);
  1847. +       if (err)
  1848. +           return ERR_PTR(err);
  1849. +
  1850. +       ovl_path_upper(dentry, &realpath);
  1851. +   }
  1852. +
  1853. +   return vfs_open(&realpath, file, cred);
  1854. +}
  1855. +
  1856. +static const struct inode_operations ovl_file_inode_operations = {
  1857. +   .setattr    = ovl_setattr,
  1858. +   .permission = ovl_permission,
  1859. +   .getattr    = ovl_getattr,
  1860. +   .setxattr   = ovl_setxattr,
  1861. +   .getxattr   = ovl_getxattr,
  1862. +   .listxattr  = ovl_listxattr,
  1863. +   .removexattr    = ovl_removexattr,
  1864. +   .open       = ovl_open,
  1865. +};
  1866. +
  1867. +static const struct inode_operations ovl_symlink_inode_operations = {
  1868. +   .setattr    = ovl_setattr,
  1869. +   .follow_link    = ovl_follow_link,
  1870. +   .put_link   = ovl_put_link,
  1871. +   .readlink   = ovl_readlink,
  1872. +   .getattr    = ovl_getattr,
  1873. +   .setxattr   = ovl_setxattr,
  1874. +   .getxattr   = ovl_getxattr,
  1875. +   .listxattr  = ovl_listxattr,
  1876. +   .removexattr    = ovl_removexattr,
  1877. +};
  1878. +
  1879. +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
  1880. +               struct ovl_entry *oe)
  1881. +{
  1882. +   struct inode *inode;
  1883. +
  1884. +   inode = new_inode(sb);
  1885. +   if (!inode)
  1886. +       return NULL;
  1887. +
  1888. +   mode &= S_IFMT;
  1889. +
  1890. +   inode->i_ino = get_next_ino();
  1891. +   inode->i_mode = mode;
  1892. +   inode->i_flags |= S_NOATIME | S_NOCMTIME;
  1893. +
  1894. +   switch (mode) {
  1895. +   case S_IFDIR:
  1896. +       inode->i_private = oe;
  1897. +       inode->i_op = &ovl_dir_inode_operations;
  1898. +       inode->i_fop = &ovl_dir_operations;
  1899. +       break;
  1900. +
  1901. +   case S_IFLNK:
  1902. +       inode->i_op = &ovl_symlink_inode_operations;
  1903. +       break;
  1904. +
  1905. +   case S_IFREG:
  1906. +   case S_IFSOCK:
  1907. +   case S_IFBLK:
  1908. +   case S_IFCHR:
  1909. +   case S_IFIFO:
  1910. +       inode->i_op = &ovl_file_inode_operations;
  1911. +       break;
  1912. +
  1913. +   default:
  1914. +       WARN(1, "illegal file type: %i\n", mode);
  1915. +       iput(inode);
  1916. +       inode = NULL;
  1917. +   }
  1918. +
  1919. +   return inode;
  1920. +
  1921. +}
  1922. diff -ruNb a//fs/overlayfs/Kconfig b//fs/overlayfs/Kconfig
  1923. --- a//fs/overlayfs/Kconfig 1970-01-01 01:00:00.000000000 +0100
  1924. +++ b//fs/overlayfs/Kconfig 2012-10-21 15:33:23.868301470 +0100
  1925. @@ -0,0 +1,4 @@
  1926. +config OVERLAYFS_FS
  1927. +   tristate "Overlay filesystem support"
  1928. +   help
  1929. +     Add support for overlay filesystem.
  1930. diff -ruNb a//fs/overlayfs/Makefile b//fs/overlayfs/Makefile
  1931. --- a//fs/overlayfs/Makefile    1970-01-01 01:00:00.000000000 +0100
  1932. +++ b//fs/overlayfs/Makefile    2012-10-21 15:33:23.868301470 +0100
  1933. @@ -0,0 +1,7 @@
  1934. +#
  1935. +# Makefile for the overlay filesystem.
  1936. +#
  1937. +
  1938. +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o
  1939. +
  1940. +overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o
  1941. diff -ruNb a//fs/overlayfs/overlayfs.h b//fs/overlayfs/overlayfs.h
  1942. --- a//fs/overlayfs/overlayfs.h 1970-01-01 01:00:00.000000000 +0100
  1943. +++ b//fs/overlayfs/overlayfs.h 2012-10-21 15:35:40.472972180 +0100
  1944. @@ -0,0 +1,70 @@
  1945. +/*
  1946. + *
  1947. + * Copyright (C) 2011 Novell Inc.
  1948. + *
  1949. + * This program is free software; you can redistribute it and/or modify it
  1950. + * under the terms of the GNU General Public License version 2 as published by
  1951. + * the Free Software Foundation.
  1952. + */
  1953. +
  1954. +struct ovl_entry;
  1955. +
  1956. +enum ovl_path_type {
  1957. +   OVL_PATH_UPPER,
  1958. +   OVL_PATH_MERGE,
  1959. +   OVL_PATH_LOWER,
  1960. +};
  1961. +
  1962. +extern const char *ovl_opaque_xattr;
  1963. +extern const char *ovl_whiteout_xattr;
  1964. +extern const struct dentry_operations ovl_dentry_operations;
  1965. +
  1966. +enum ovl_path_type ovl_path_type(struct dentry *dentry);
  1967. +u64 ovl_dentry_version_get(struct dentry *dentry);
  1968. +void ovl_dentry_version_inc(struct dentry *dentry);
  1969. +void ovl_path_upper(struct dentry *dentry, struct path *path);
  1970. +void ovl_path_lower(struct dentry *dentry, struct path *path);
  1971. +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
  1972. +struct dentry *ovl_dentry_upper(struct dentry *dentry);
  1973. +struct dentry *ovl_dentry_lower(struct dentry *dentry);
  1974. +struct dentry *ovl_dentry_real(struct dentry *dentry);
  1975. +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
  1976. +bool ovl_dentry_is_opaque(struct dentry *dentry);
  1977. +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
  1978. +bool ovl_is_whiteout(struct dentry *dentry);
  1979. +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
  1980. +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
  1981. +             struct nameidata *nd);
  1982. +struct file *ovl_path_open(struct path *path, int flags);
  1983. +
  1984. +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
  1985. +               struct kstat *stat, const char *link);
  1986. +
  1987. +/* readdir.c */
  1988. +extern const struct file_operations ovl_dir_operations;
  1989. +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type);
  1990. +
  1991. +/* inode.c */
  1992. +int ovl_setattr(struct dentry *dentry, struct iattr *attr);
  1993. +int ovl_permission(struct inode *inode, int mask);
  1994. +int ovl_setxattr(struct dentry *dentry, const char *name,
  1995. +        const void *value, size_t size, int flags);
  1996. +ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
  1997. +            void *value, size_t size);
  1998. +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
  1999. +int ovl_removexattr(struct dentry *dentry, const char *name);
  2000. +
  2001. +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
  2002. +               struct ovl_entry *oe);
  2003. +static inline void ovl_copyattr(struct inode *from, struct inode *to)
  2004. +{
  2005. +   to->i_uid = from->i_uid;
  2006. +   to->i_gid = from->i_gid;
  2007. +}
  2008. +
  2009. +/* dir.c */
  2010. +extern const struct inode_operations ovl_dir_inode_operations;
  2011. +
  2012. +/* copy_up.c */
  2013. +int ovl_copy_up(struct dentry *dentry);
  2014. +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size);
  2015. diff -ruNb a//fs/overlayfs/readdir.c b//fs/overlayfs/readdir.c
  2016. --- a//fs/overlayfs/readdir.c   1970-01-01 01:00:00.000000000 +0100
  2017. +++ b//fs/overlayfs/readdir.c   2012-10-21 15:33:23.870301202 +0100
  2018. @@ -0,0 +1,566 @@
  2019. +/*
  2020. + *
  2021. + * Copyright (C) 2011 Novell Inc.
  2022. + *
  2023. + * This program is free software; you can redistribute it and/or modify it
  2024. + * under the terms of the GNU General Public License version 2 as published by
  2025. + * the Free Software Foundation.
  2026. + */
  2027. +
  2028. +#include <linux/fs.h>
  2029. +#include <linux/slab.h>
  2030. +#include <linux/namei.h>
  2031. +#include <linux/file.h>
  2032. +#include <linux/xattr.h>
  2033. +#include <linux/rbtree.h>
  2034. +#include <linux/security.h>
  2035. +#include <linux/cred.h>
  2036. +#include "overlayfs.h"
  2037. +
  2038. +struct ovl_cache_entry {
  2039. +   const char *name;
  2040. +   unsigned int len;
  2041. +   unsigned int type;
  2042. +   u64 ino;
  2043. +   bool is_whiteout;
  2044. +   struct list_head l_node;
  2045. +   struct rb_node node;
  2046. +};
  2047. +
  2048. +struct ovl_readdir_data {
  2049. +   struct rb_root *root;
  2050. +   struct list_head *list;
  2051. +   struct list_head *middle;
  2052. +   struct dentry *dir;
  2053. +   int count;
  2054. +   int err;
  2055. +};
  2056. +
  2057. +struct ovl_dir_file {
  2058. +   bool is_real;
  2059. +   bool is_cached;
  2060. +   struct list_head cursor;
  2061. +   u64 cache_version;
  2062. +   struct list_head cache;
  2063. +   struct file *realfile;
  2064. +};
  2065. +
  2066. +static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
  2067. +{
  2068. +   return container_of(n, struct ovl_cache_entry, node);
  2069. +}
  2070. +
  2071. +static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
  2072. +                           const char *name, int len)
  2073. +{
  2074. +   struct rb_node *node = root->rb_node;
  2075. +   int cmp;
  2076. +
  2077. +   while (node) {
  2078. +       struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
  2079. +
  2080. +       cmp = strncmp(name, p->name, len);
  2081. +       if (cmp > 0)
  2082. +           node = p->node.rb_right;
  2083. +       else if (cmp < 0 || len < p->len)
  2084. +           node = p->node.rb_left;
  2085. +       else
  2086. +           return p;
  2087. +   }
  2088. +
  2089. +   return NULL;
  2090. +}
  2091. +
  2092. +static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
  2093. +                          u64 ino, unsigned int d_type)
  2094. +{
  2095. +   struct ovl_cache_entry *p;
  2096. +
  2097. +   p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL);
  2098. +   if (p) {
  2099. +       char *name_copy = (char *) (p + 1);
  2100. +       memcpy(name_copy, name, len);
  2101. +       name_copy[len] = '\0';
  2102. +       p->name = name_copy;
  2103. +       p->len = len;
  2104. +       p->type = d_type;
  2105. +       p->ino = ino;
  2106. +       p->is_whiteout = false;
  2107. +   }
  2108. +
  2109. +   return p;
  2110. +}
  2111. +
  2112. +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
  2113. +                 const char *name, int len, u64 ino,
  2114. +                 unsigned int d_type)
  2115. +{
  2116. +   struct rb_node **newp = &rdd->root->rb_node;
  2117. +   struct rb_node *parent = NULL;
  2118. +   struct ovl_cache_entry *p;
  2119. +
  2120. +   while (*newp) {
  2121. +       int cmp;
  2122. +       struct ovl_cache_entry *tmp;
  2123. +
  2124. +       parent = *newp;
  2125. +       tmp = ovl_cache_entry_from_node(*newp);
  2126. +       cmp = strncmp(name, tmp->name, len);
  2127. +       if (cmp > 0)
  2128. +           newp = &tmp->node.rb_right;
  2129. +       else if (cmp < 0 || len < tmp->len)
  2130. +           newp = &tmp->node.rb_left;
  2131. +       else
  2132. +           return 0;
  2133. +   }
  2134. +
  2135. +   p = ovl_cache_entry_new(name, len, ino, d_type);
  2136. +   if (p == NULL)
  2137. +       return -ENOMEM;
  2138. +
  2139. +   list_add_tail(&p->l_node, rdd->list);
  2140. +   rb_link_node(&p->node, parent, newp);
  2141. +   rb_insert_color(&p->node, rdd->root);
  2142. +
  2143. +   return 0;
  2144. +}
  2145. +
  2146. +static int ovl_fill_lower(void *buf, const char *name, int namelen,
  2147. +               loff_t offset, u64 ino, unsigned int d_type)
  2148. +{
  2149. +   struct ovl_readdir_data *rdd = buf;
  2150. +   struct ovl_cache_entry *p;
  2151. +
  2152. +   rdd->count++;
  2153. +   p = ovl_cache_entry_find(rdd->root, name, namelen);
  2154. +   if (p) {
  2155. +       list_move_tail(&p->l_node, rdd->middle);
  2156. +   } else {
  2157. +       p = ovl_cache_entry_new(name, namelen, ino, d_type);
  2158. +       if (p == NULL)
  2159. +           rdd->err = -ENOMEM;
  2160. +       else
  2161. +           list_add_tail(&p->l_node, rdd->middle);
  2162. +   }
  2163. +
  2164. +   return rdd->err;
  2165. +}
  2166. +
  2167. +static void ovl_cache_free(struct list_head *list)
  2168. +{
  2169. +   struct ovl_cache_entry *p;
  2170. +   struct ovl_cache_entry *n;
  2171. +
  2172. +   list_for_each_entry_safe(p, n, list, l_node)
  2173. +       kfree(p);
  2174. +
  2175. +   INIT_LIST_HEAD(list);
  2176. +}
  2177. +
  2178. +static int ovl_fill_upper(void *buf, const char *name, int namelen,
  2179. +             loff_t offset, u64 ino, unsigned int d_type)
  2180. +{
  2181. +   struct ovl_readdir_data *rdd = buf;
  2182. +
  2183. +   rdd->count++;
  2184. +   return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
  2185. +}
  2186. +
  2187. +static inline int ovl_dir_read(struct path *realpath,
  2188. +                  struct ovl_readdir_data *rdd, filldir_t filler)
  2189. +{
  2190. +   struct file *realfile;
  2191. +   int err;
  2192. +
  2193. +   realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
  2194. +   if (IS_ERR(realfile))
  2195. +       return PTR_ERR(realfile);
  2196. +
  2197. +   do {
  2198. +       rdd->count = 0;
  2199. +       rdd->err = 0;
  2200. +       err = vfs_readdir(realfile, filler, rdd);
  2201. +       if (err >= 0)
  2202. +           err = rdd->err;
  2203. +   } while (!err && rdd->count);
  2204. +   fput(realfile);
  2205. +
  2206. +   return 0;
  2207. +}
  2208. +
  2209. +static void ovl_dir_reset(struct file *file)
  2210. +{
  2211. +   struct ovl_dir_file *od = file->private_data;
  2212. +   enum ovl_path_type type = ovl_path_type(file->f_path.dentry);
  2213. +
  2214. +   if (ovl_dentry_version_get(file->f_path.dentry) != od->cache_version) {
  2215. +       list_del_init(&od->cursor);
  2216. +       ovl_cache_free(&od->cache);
  2217. +       od->is_cached = false;
  2218. +   }
  2219. +   WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
  2220. +   if (od->is_real && type == OVL_PATH_MERGE) {
  2221. +       fput(od->realfile);
  2222. +       od->realfile = NULL;
  2223. +       od->is_real = false;
  2224. +   }
  2225. +}
  2226. +
  2227. +static int ovl_dir_mark_whiteouts(struct ovl_readdir_data *rdd)
  2228. +{
  2229. +   struct ovl_cache_entry *p;
  2230. +   struct dentry *dentry;
  2231. +   const struct cred *old_cred;
  2232. +   struct cred *override_cred;
  2233. +
  2234. +   override_cred = prepare_creds();
  2235. +   if (!override_cred) {
  2236. +       ovl_cache_free(rdd->list);
  2237. +       return -ENOMEM;
  2238. +   }
  2239. +
  2240. +   /*
  2241. +    * CAP_SYS_ADMIN for getxattr
  2242. +    * CAP_DAC_OVERRIDE for lookup
  2243. +    */
  2244. +   cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  2245. +   cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  2246. +   old_cred = override_creds(override_cred);
  2247. +
  2248. +   mutex_lock(&rdd->dir->d_inode->i_mutex);
  2249. +   list_for_each_entry(p, rdd->list, l_node) {
  2250. +       if (p->type != DT_LNK)
  2251. +           continue;
  2252. +
  2253. +       dentry = lookup_one_len(p->name, rdd->dir, p->len);
  2254. +       if (IS_ERR(dentry))
  2255. +           continue;
  2256. +
  2257. +       p->is_whiteout = ovl_is_whiteout(dentry);
  2258. +       dput(dentry);
  2259. +   }
  2260. +   mutex_unlock(&rdd->dir->d_inode->i_mutex);
  2261. +
  2262. +   revert_creds(old_cred);
  2263. +   put_cred(override_cred);
  2264. +
  2265. +   return 0;
  2266. +}
  2267. +
  2268. +static inline int ovl_dir_read_merged(struct path *upperpath,
  2269. +                     struct path *lowerpath,
  2270. +                     struct ovl_readdir_data *rdd)
  2271. +{
  2272. +   int err;
  2273. +   struct rb_root root = RB_ROOT;
  2274. +   struct list_head middle;
  2275. +
  2276. +   rdd->root = &root;
  2277. +   if (upperpath->dentry) {
  2278. +       rdd->dir = upperpath->dentry;
  2279. +       err = ovl_dir_read(upperpath, rdd, ovl_fill_upper);
  2280. +       if (err)
  2281. +           goto out;
  2282. +
  2283. +       err = ovl_dir_mark_whiteouts(rdd);
  2284. +       if (err)
  2285. +           goto out;
  2286. +   }
  2287. +   /*
  2288. +    * Insert lowerpath entries before upperpath ones, this allows
  2289. +    * offsets to be reasonably constant
  2290. +    */
  2291. +   list_add(&middle, rdd->list);
  2292. +   rdd->middle = &middle;
  2293. +   err = ovl_dir_read(lowerpath, rdd, ovl_fill_lower);
  2294. +   list_del(&middle);
  2295. +out:
  2296. +   rdd->root = NULL;
  2297. +
  2298. +   return err;
  2299. +}
  2300. +
  2301. +static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
  2302. +{
  2303. +   struct list_head *l;
  2304. +   loff_t off;
  2305. +
  2306. +   l = od->cache.next;
  2307. +   for (off = 0; off < pos; off++) {
  2308. +       if (l == &od->cache)
  2309. +           break;
  2310. +       l = l->next;
  2311. +   }
  2312. +   list_move_tail(&od->cursor, l);
  2313. +}
  2314. +
  2315. +static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
  2316. +{
  2317. +   struct ovl_dir_file *od = file->private_data;
  2318. +   int res;
  2319. +
  2320. +   if (!file->f_pos)
  2321. +       ovl_dir_reset(file);
  2322. +
  2323. +   if (od->is_real) {
  2324. +       res = vfs_readdir(od->realfile, filler, buf);
  2325. +       file->f_pos = od->realfile->f_pos;
  2326. +
  2327. +       return res;
  2328. +   }
  2329. +
  2330. +   if (!od->is_cached) {
  2331. +       struct path lowerpath;
  2332. +       struct path upperpath;
  2333. +       struct ovl_readdir_data rdd = { .list = &od->cache };
  2334. +
  2335. +       ovl_path_lower(file->f_path.dentry, &lowerpath);
  2336. +       ovl_path_upper(file->f_path.dentry, &upperpath);
  2337. +
  2338. +       res = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd);
  2339. +       if (res) {
  2340. +           ovl_cache_free(rdd.list);
  2341. +           return res;
  2342. +       }
  2343. +
  2344. +       od->cache_version = ovl_dentry_version_get(file->f_path.dentry);
  2345. +       od->is_cached = true;
  2346. +
  2347. +       ovl_seek_cursor(od, file->f_pos);
  2348. +   }
  2349. +
  2350. +   while (od->cursor.next != &od->cache) {
  2351. +       int over;
  2352. +       loff_t off;
  2353. +       struct ovl_cache_entry *p;
  2354. +
  2355. +       p = list_entry(od->cursor.next, struct ovl_cache_entry, l_node);
  2356. +       off = file->f_pos;
  2357. +       if (!p->is_whiteout) {
  2358. +           over = filler(buf, p->name, p->len, off, p->ino,
  2359. +                     p->type);
  2360. +           if (over)
  2361. +               break;
  2362. +       }
  2363. +       file->f_pos++;
  2364. +       list_move(&od->cursor, &p->l_node);
  2365. +   }
  2366. +
  2367. +   return 0;
  2368. +}
  2369. +
  2370. +static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
  2371. +{
  2372. +   loff_t res;
  2373. +   struct ovl_dir_file *od = file->private_data;
  2374. +
  2375. +   mutex_lock(&file->f_dentry->d_inode->i_mutex);
  2376. +   if (!file->f_pos)
  2377. +       ovl_dir_reset(file);
  2378. +
  2379. +   if (od->is_real) {
  2380. +       res = vfs_llseek(od->realfile, offset, origin);
  2381. +       file->f_pos = od->realfile->f_pos;
  2382. +   } else {
  2383. +       res = -EINVAL;
  2384. +
  2385. +       switch (origin) {
  2386. +       case SEEK_CUR:
  2387. +           offset += file->f_pos;
  2388. +           break;
  2389. +       case SEEK_SET:
  2390. +           break;
  2391. +       default:
  2392. +           goto out_unlock;
  2393. +       }
  2394. +       if (offset < 0)
  2395. +           goto out_unlock;
  2396. +
  2397. +       if (offset != file->f_pos) {
  2398. +           file->f_pos = offset;
  2399. +           if (od->is_cached)
  2400. +               ovl_seek_cursor(od, offset);
  2401. +       }
  2402. +       res = offset;
  2403. +   }
  2404. +out_unlock:
  2405. +   mutex_unlock(&file->f_dentry->d_inode->i_mutex);
  2406. +
  2407. +   return res;
  2408. +}
  2409. +
  2410. +static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
  2411. +            int datasync)
  2412. +{
  2413. +   struct ovl_dir_file *od = file->private_data;
  2414. +
  2415. +   /* May need to reopen directory if it got copied up */
  2416. +   if (!od->realfile) {
  2417. +       struct path upperpath;
  2418. +
  2419. +       ovl_path_upper(file->f_path.dentry, &upperpath);
  2420. +       od->realfile = ovl_path_open(&upperpath, O_RDONLY);
  2421. +       if (IS_ERR(od->realfile))
  2422. +           return PTR_ERR(od->realfile);
  2423. +   }
  2424. +
  2425. +   return vfs_fsync_range(od->realfile, start, end, datasync);
  2426. +}
  2427. +
  2428. +static int ovl_dir_release(struct inode *inode, struct file *file)
  2429. +{
  2430. +   struct ovl_dir_file *od = file->private_data;
  2431. +
  2432. +   list_del(&od->cursor);
  2433. +   ovl_cache_free(&od->cache);
  2434. +   if (od->realfile)
  2435. +       fput(od->realfile);
  2436. +   kfree(od);
  2437. +
  2438. +   return 0;
  2439. +}
  2440. +
  2441. +static int ovl_dir_open(struct inode *inode, struct file *file)
  2442. +{
  2443. +   struct path realpath;
  2444. +   struct file *realfile;
  2445. +   struct ovl_dir_file *od;
  2446. +   enum ovl_path_type type;
  2447. +
  2448. +   od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
  2449. +   if (!od)
  2450. +       return -ENOMEM;
  2451. +
  2452. +   type = ovl_path_real(file->f_path.dentry, &realpath);
  2453. +   realfile = ovl_path_open(&realpath, file->f_flags);
  2454. +   if (IS_ERR(realfile)) {
  2455. +       kfree(od);
  2456. +       return PTR_ERR(realfile);
  2457. +   }
  2458. +   INIT_LIST_HEAD(&od->cache);
  2459. +   INIT_LIST_HEAD(&od->cursor);
  2460. +   od->is_cached = false;
  2461. +   od->realfile = realfile;
  2462. +   od->is_real = (type != OVL_PATH_MERGE);
  2463. +   file->private_data = od;
  2464. +
  2465. +   return 0;
  2466. +}
  2467. +
  2468. +const struct file_operations ovl_dir_operations = {
  2469. +   .read       = generic_read_dir,
  2470. +   .open       = ovl_dir_open,
  2471. +   .readdir    = ovl_readdir,
  2472. +   .llseek     = ovl_dir_llseek,
  2473. +   .fsync      = ovl_dir_fsync,
  2474. +   .release    = ovl_dir_release,
  2475. +};
  2476. +
  2477. +static int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
  2478. +{
  2479. +   int err;
  2480. +   struct path lowerpath;
  2481. +   struct path upperpath;
  2482. +   struct ovl_cache_entry *p;
  2483. +   struct ovl_readdir_data rdd = { .list = list };
  2484. +
  2485. +   ovl_path_upper(dentry, &upperpath);
  2486. +   ovl_path_lower(dentry, &lowerpath);
  2487. +
  2488. +   err = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd);
  2489. +   if (err)
  2490. +       return err;
  2491. +
  2492. +   err = 0;
  2493. +
  2494. +   list_for_each_entry(p, list, l_node) {
  2495. +       if (p->is_whiteout)
  2496. +           continue;
  2497. +
  2498. +       if (p->name[0] == '.') {
  2499. +           if (p->len == 1)
  2500. +               continue;
  2501. +           if (p->len == 2 && p->name[1] == '.')
  2502. +               continue;
  2503. +       }
  2504. +       err = -ENOTEMPTY;
  2505. +       break;
  2506. +   }
  2507. +
  2508. +   return err;
  2509. +}
  2510. +
  2511. +static int ovl_remove_whiteouts(struct dentry *dir, struct list_head *list)
  2512. +{
  2513. +   struct path upperpath;
  2514. +   struct dentry *upperdir;
  2515. +   struct ovl_cache_entry *p;
  2516. +   const struct cred *old_cred;
  2517. +   struct cred *override_cred;
  2518. +   int err;
  2519. +
  2520. +   ovl_path_upper(dir, &upperpath);
  2521. +   upperdir = upperpath.dentry;
  2522. +
  2523. +   override_cred = prepare_creds();
  2524. +   if (!override_cred)
  2525. +       return -ENOMEM;
  2526. +
  2527. +   /*
  2528. +    * CAP_DAC_OVERRIDE for lookup and unlink
  2529. +    * CAP_SYS_ADMIN for setxattr of "trusted" namespace
  2530. +    * CAP_FOWNER for unlink in sticky directory
  2531. +    */
  2532. +   cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  2533. +   cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  2534. +   cap_raise(override_cred->cap_effective, CAP_FOWNER);
  2535. +   old_cred = override_creds(override_cred);
  2536. +
  2537. +   err = vfs_setxattr(upperdir, ovl_opaque_xattr, "y", 1, 0);
  2538. +   if (err)
  2539. +       goto out_revert_creds;
  2540. +
  2541. +   mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  2542. +   list_for_each_entry(p, list, l_node) {
  2543. +       struct dentry *dentry;
  2544. +       int ret;
  2545. +
  2546. +       if (!p->is_whiteout)
  2547. +           continue;
  2548. +
  2549. +       dentry = lookup_one_len(p->name, upperdir, p->len);
  2550. +       if (IS_ERR(dentry)) {
  2551. +           printk(KERN_WARNING
  2552. +               "overlayfs: failed to lookup whiteout %.*s: %li\n",
  2553. +               p->len, p->name, PTR_ERR(dentry));
  2554. +           continue;
  2555. +       }
  2556. +       ret = vfs_unlink(upperdir->d_inode, dentry);
  2557. +       dput(dentry);
  2558. +       if (ret)
  2559. +           printk(KERN_WARNING
  2560. +               "overlayfs: failed to unlink whiteout %.*s: %i\n",
  2561. +               p->len, p->name, ret);
  2562. +   }
  2563. +   mutex_unlock(&upperdir->d_inode->i_mutex);
  2564. +
  2565. +out_revert_creds:
  2566. +   revert_creds(old_cred);
  2567. +   put_cred(override_cred);
  2568. +
  2569. +   return err;
  2570. +}
  2571. +
  2572. +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type)
  2573. +{
  2574. +   int err;
  2575. +   LIST_HEAD(list);
  2576. +
  2577. +   err = ovl_check_empty_dir(dentry, &list);
  2578. +   if (!err && type == OVL_PATH_MERGE)
  2579. +       err = ovl_remove_whiteouts(dentry, &list);
  2580. +
  2581. +   ovl_cache_free(&list);
  2582. +
  2583. +   return err;
  2584. +}
  2585. diff -ruNb a//fs/overlayfs/super.c b//fs/overlayfs/super.c
  2586. --- a//fs/overlayfs/super.c 1970-01-01 01:00:00.000000000 +0100
  2587. +++ b//fs/overlayfs/super.c 2012-10-21 15:35:40.473972046 +0100
  2588. @@ -0,0 +1,665 @@
  2589. +/*
  2590. + *
  2591. + * Copyright (C) 2011 Novell Inc.
  2592. + *
  2593. + * This program is free software; you can redistribute it and/or modify it
  2594. + * under the terms of the GNU General Public License version 2 as published by
  2595. + * the Free Software Foundation.
  2596. + */
  2597. +
  2598. +#include <linux/fs.h>
  2599. +#include <linux/namei.h>
  2600. +#include <linux/xattr.h>
  2601. +#include <linux/security.h>
  2602. +#include <linux/mount.h>
  2603. +#include <linux/slab.h>
  2604. +#include <linux/parser.h>
  2605. +#include <linux/module.h>
  2606. +#include <linux/cred.h>
  2607. +#include <linux/sched.h>
  2608. +#include <linux/seq_file.h>
  2609. +#include "overlayfs.h"
  2610. +
  2611. +MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  2612. +MODULE_DESCRIPTION("Overlay filesystem");
  2613. +MODULE_LICENSE("GPL");
  2614. +
  2615. +struct ovl_config {
  2616. +   char *lowerdir;
  2617. +   char *upperdir;
  2618. +};
  2619. +
  2620. +/* private information held for overlayfs's superblock */
  2621. +struct ovl_fs {
  2622. +   struct vfsmount *upper_mnt;
  2623. +   struct vfsmount *lower_mnt;
  2624. +   /* pathnames of lower and upper dirs, for show_options */
  2625. +   struct ovl_config config;
  2626. +};
  2627. +
  2628. +/* private information held for every overlayfs dentry */
  2629. +struct ovl_entry {
  2630. +   /*
  2631. +    * Keep "double reference" on upper dentries, so that
  2632. +    * d_delete() doesn't think it's OK to reset d_inode to NULL.
  2633. +    */
  2634. +   struct dentry *__upperdentry;
  2635. +   struct dentry *lowerdentry;
  2636. +   union {
  2637. +       struct {
  2638. +           u64 version;
  2639. +           bool opaque;
  2640. +       };
  2641. +       struct rcu_head rcu;
  2642. +   };
  2643. +};
  2644. +
  2645. +const char *ovl_whiteout_xattr = "trusted.overlay.whiteout";
  2646. +const char *ovl_opaque_xattr = "trusted.overlay.opaque";
  2647. +
  2648. +
  2649. +enum ovl_path_type ovl_path_type(struct dentry *dentry)
  2650. +{
  2651. +   struct ovl_entry *oe = dentry->d_fsdata;
  2652. +
  2653. +   if (oe->__upperdentry) {
  2654. +       if (oe->lowerdentry && S_ISDIR(dentry->d_inode->i_mode))
  2655. +           return OVL_PATH_MERGE;
  2656. +       else
  2657. +           return OVL_PATH_UPPER;
  2658. +   } else {
  2659. +       return OVL_PATH_LOWER;
  2660. +   }
  2661. +}
  2662. +
  2663. +static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
  2664. +{
  2665. +   struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry);
  2666. +   smp_read_barrier_depends();
  2667. +   return upperdentry;
  2668. +}
  2669. +
  2670. +void ovl_path_upper(struct dentry *dentry, struct path *path)
  2671. +{
  2672. +   struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
  2673. +   struct ovl_entry *oe = dentry->d_fsdata;
  2674. +
  2675. +   path->mnt = ofs->upper_mnt;
  2676. +   path->dentry = ovl_upperdentry_dereference(oe);
  2677. +}
  2678. +
  2679. +void ovl_path_lower(struct dentry *dentry, struct path *path)
  2680. +{
  2681. +   struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
  2682. +   struct ovl_entry *oe = dentry->d_fsdata;
  2683. +
  2684. +   path->mnt = ofs->lower_mnt;
  2685. +   path->dentry = oe->lowerdentry;
  2686. +}
  2687. +
  2688. +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
  2689. +{
  2690. +
  2691. +   enum ovl_path_type type = ovl_path_type(dentry);
  2692. +
  2693. +   if (type == OVL_PATH_LOWER)
  2694. +       ovl_path_lower(dentry, path);
  2695. +   else
  2696. +       ovl_path_upper(dentry, path);
  2697. +
  2698. +   return type;
  2699. +}
  2700. +
  2701. +struct dentry *ovl_dentry_upper(struct dentry *dentry)
  2702. +{
  2703. +   struct ovl_entry *oe = dentry->d_fsdata;
  2704. +
  2705. +   return ovl_upperdentry_dereference(oe);
  2706. +}
  2707. +
  2708. +struct dentry *ovl_dentry_lower(struct dentry *dentry)
  2709. +{
  2710. +   struct ovl_entry *oe = dentry->d_fsdata;
  2711. +
  2712. +   return oe->lowerdentry;
  2713. +}
  2714. +
  2715. +struct dentry *ovl_dentry_real(struct dentry *dentry)
  2716. +{
  2717. +   struct ovl_entry *oe = dentry->d_fsdata;
  2718. +   struct dentry *realdentry;
  2719. +
  2720. +   realdentry = ovl_upperdentry_dereference(oe);
  2721. +   if (!realdentry)
  2722. +       realdentry = oe->lowerdentry;
  2723. +
  2724. +   return realdentry;
  2725. +}
  2726. +
  2727. +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
  2728. +{
  2729. +   struct dentry *realdentry;
  2730. +
  2731. +   realdentry = ovl_upperdentry_dereference(oe);
  2732. +   if (realdentry) {
  2733. +       *is_upper = true;
  2734. +   } else {
  2735. +       realdentry = oe->lowerdentry;
  2736. +       *is_upper = false;
  2737. +   }
  2738. +   return realdentry;
  2739. +}
  2740. +
  2741. +bool ovl_dentry_is_opaque(struct dentry *dentry)
  2742. +{
  2743. +   struct ovl_entry *oe = dentry->d_fsdata;
  2744. +   return oe->opaque;
  2745. +}
  2746. +
  2747. +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
  2748. +{
  2749. +   struct ovl_entry *oe = dentry->d_fsdata;
  2750. +   oe->opaque = opaque;
  2751. +}
  2752. +
  2753. +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
  2754. +{
  2755. +   struct ovl_entry *oe = dentry->d_fsdata;
  2756. +
  2757. +   WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
  2758. +   WARN_ON(oe->__upperdentry);
  2759. +   BUG_ON(!upperdentry->d_inode);
  2760. +   smp_wmb();
  2761. +   oe->__upperdentry = dget(upperdentry);
  2762. +}
  2763. +
  2764. +void ovl_dentry_version_inc(struct dentry *dentry)
  2765. +{
  2766. +   struct ovl_entry *oe = dentry->d_fsdata;
  2767. +
  2768. +   WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
  2769. +   oe->version++;
  2770. +}
  2771. +
  2772. +u64 ovl_dentry_version_get(struct dentry *dentry)
  2773. +{
  2774. +   struct ovl_entry *oe = dentry->d_fsdata;
  2775. +
  2776. +   WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
  2777. +   return oe->version;
  2778. +}
  2779. +
  2780. +bool ovl_is_whiteout(struct dentry *dentry)
  2781. +{
  2782. +   int res;
  2783. +   char val;
  2784. +
  2785. +   if (!dentry)
  2786. +       return false;
  2787. +   if (!dentry->d_inode)
  2788. +       return false;
  2789. +   if (!S_ISLNK(dentry->d_inode->i_mode))
  2790. +       return false;
  2791. +
  2792. +   res = vfs_getxattr(dentry, ovl_whiteout_xattr, &val, 1);
  2793. +   if (res == 1 && val == 'y')
  2794. +       return true;
  2795. +
  2796. +   return false;
  2797. +}
  2798. +
  2799. +static bool ovl_is_opaquedir(struct dentry *dentry)
  2800. +{
  2801. +   int res;
  2802. +   char val;
  2803. +
  2804. +   if (!S_ISDIR(dentry->d_inode->i_mode))
  2805. +       return false;
  2806. +
  2807. +   res = vfs_getxattr(dentry, ovl_opaque_xattr, &val, 1);
  2808. +   if (res == 1 && val == 'y')
  2809. +       return true;
  2810. +
  2811. +   return false;
  2812. +}
  2813. +
  2814. +static void ovl_entry_free(struct rcu_head *head)
  2815. +{
  2816. +   struct ovl_entry *oe = container_of(head, struct ovl_entry, rcu);
  2817. +   kfree(oe);
  2818. +}
  2819. +
  2820. +static void ovl_dentry_release(struct dentry *dentry)
  2821. +{
  2822. +   struct ovl_entry *oe = dentry->d_fsdata;
  2823. +
  2824. +   if (oe) {
  2825. +       dput(oe->__upperdentry);
  2826. +       dput(oe->__upperdentry);
  2827. +       dput(oe->lowerdentry);
  2828. +       call_rcu(&oe->rcu, ovl_entry_free);
  2829. +   }
  2830. +}
  2831. +
  2832. +const struct dentry_operations ovl_dentry_operations = {
  2833. +   .d_release = ovl_dentry_release,
  2834. +};
  2835. +
  2836. +static struct ovl_entry *ovl_alloc_entry(void)
  2837. +{
  2838. +   return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL);
  2839. +}
  2840. +
  2841. +static inline struct dentry *ovl_lookup_real(struct dentry *dir,
  2842. +                        struct qstr *name)
  2843. +{
  2844. +   struct dentry *dentry;
  2845. +
  2846. +   mutex_lock(&dir->d_inode->i_mutex);
  2847. +   dentry = lookup_one_len(name->name, dir, name->len);
  2848. +   mutex_unlock(&dir->d_inode->i_mutex);
  2849. +
  2850. +   if (IS_ERR(dentry)) {
  2851. +       if (PTR_ERR(dentry) == -ENOENT)
  2852. +           dentry = NULL;
  2853. +   } else if (!dentry->d_inode) {
  2854. +       dput(dentry);
  2855. +       dentry = NULL;
  2856. +   }
  2857. +   return dentry;
  2858. +}
  2859. +
  2860. +static int ovl_do_lookup(struct dentry *dentry)
  2861. +{
  2862. +   struct ovl_entry *oe;
  2863. +   struct dentry *upperdir;
  2864. +   struct dentry *lowerdir;
  2865. +   struct dentry *upperdentry = NULL;
  2866. +   struct dentry *lowerdentry = NULL;
  2867. +   struct inode *inode = NULL;
  2868. +   int err;
  2869. +
  2870. +   err = -ENOMEM;
  2871. +   oe = ovl_alloc_entry();
  2872. +   if (!oe)
  2873. +       goto out;
  2874. +
  2875. +   upperdir = ovl_dentry_upper(dentry->d_parent);
  2876. +   lowerdir = ovl_dentry_lower(dentry->d_parent);
  2877. +
  2878. +   if (upperdir) {
  2879. +       upperdentry = ovl_lookup_real(upperdir, &dentry->d_name);
  2880. +       err = PTR_ERR(upperdentry);
  2881. +       if (IS_ERR(upperdentry))
  2882. +           goto out_put_dir;
  2883. +
  2884. +       if (lowerdir && upperdentry &&
  2885. +           (S_ISLNK(upperdentry->d_inode->i_mode) ||
  2886. +            S_ISDIR(upperdentry->d_inode->i_mode))) {
  2887. +           const struct cred *old_cred;
  2888. +           struct cred *override_cred;
  2889. +
  2890. +           err = -ENOMEM;
  2891. +           override_cred = prepare_creds();
  2892. +           if (!override_cred)
  2893. +               goto out_dput_upper;
  2894. +
  2895. +           /* CAP_SYS_ADMIN needed for getxattr */
  2896. +           cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  2897. +           old_cred = override_creds(override_cred);
  2898. +
  2899. +           if (ovl_is_opaquedir(upperdentry)) {
  2900. +               oe->opaque = true;
  2901. +           } else if (ovl_is_whiteout(upperdentry)) {
  2902. +               dput(upperdentry);
  2903. +               upperdentry = NULL;
  2904. +               oe->opaque = true;
  2905. +           }
  2906. +           revert_creds(old_cred);
  2907. +           put_cred(override_cred);
  2908. +       }
  2909. +   }
  2910. +   if (lowerdir && !oe->opaque) {
  2911. +       lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name);
  2912. +       err = PTR_ERR(lowerdentry);
  2913. +       if (IS_ERR(lowerdentry))
  2914. +           goto out_dput_upper;
  2915. +   }
  2916. +
  2917. +   if (lowerdentry && upperdentry &&
  2918. +       (!S_ISDIR(upperdentry->d_inode->i_mode) ||
  2919. +        !S_ISDIR(lowerdentry->d_inode->i_mode))) {
  2920. +       dput(lowerdentry);
  2921. +       lowerdentry = NULL;
  2922. +       oe->opaque = true;
  2923. +   }
  2924. +
  2925. +   if (lowerdentry || upperdentry) {
  2926. +       struct dentry *realdentry;
  2927. +
  2928. +       realdentry = upperdentry ? upperdentry : lowerdentry;
  2929. +       err = -ENOMEM;
  2930. +       inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
  2931. +                     oe);
  2932. +       if (!inode)
  2933. +           goto out_dput;
  2934. +       ovl_copyattr(realdentry->d_inode, inode);
  2935. +   }
  2936. +
  2937. +   if (upperdentry)
  2938. +       oe->__upperdentry = dget(upperdentry);
  2939. +
  2940. +   if (lowerdentry)
  2941. +       oe->lowerdentry = lowerdentry;
  2942. +
  2943. +   dentry->d_fsdata = oe;
  2944. +   dentry->d_op = &ovl_dentry_operations;
  2945. +   d_add(dentry, inode);
  2946. +
  2947. +   return 0;
  2948. +
  2949. +out_dput:
  2950. +   dput(lowerdentry);
  2951. +out_dput_upper:
  2952. +   dput(upperdentry);
  2953. +out_put_dir:
  2954. +   kfree(oe);
  2955. +out:
  2956. +   return err;
  2957. +}
  2958. +
  2959. +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
  2960. +             struct nameidata *nd)
  2961. +{
  2962. +   int err = ovl_do_lookup(dentry);
  2963. +
  2964. +   if (err)
  2965. +       return ERR_PTR(err);
  2966. +
  2967. +   return NULL;
  2968. +}
  2969. +
  2970. +struct file *ovl_path_open(struct path *path, int flags)
  2971. +{
  2972. +   path_get(path);
  2973. +   return dentry_open(path->dentry, path->mnt, flags, current_cred());
  2974. +}
  2975. +
  2976. +static void ovl_put_super(struct super_block *sb)
  2977. +{
  2978. +   struct ovl_fs *ufs = sb->s_fs_info;
  2979. +
  2980. +   if (!(sb->s_flags & MS_RDONLY))
  2981. +       mnt_drop_write(ufs->upper_mnt);
  2982. +
  2983. +   mntput(ufs->upper_mnt);
  2984. +   mntput(ufs->lower_mnt);
  2985. +
  2986. +   kfree(ufs->config.lowerdir);
  2987. +   kfree(ufs->config.upperdir);
  2988. +   kfree(ufs);
  2989. +}
  2990. +
  2991. +static int ovl_remount_fs(struct super_block *sb, int *flagsp, char *data)
  2992. +{
  2993. +   int flags = *flagsp;
  2994. +   struct ovl_fs *ufs = sb->s_fs_info;
  2995. +
  2996. +   /* When remounting rw or ro, we need to adjust the write access to the
  2997. +    * upper fs.
  2998. +    */
  2999. +   if (((flags ^ sb->s_flags) & MS_RDONLY) == 0)
  3000. +       /* No change to readonly status */
  3001. +       return 0;
  3002. +
  3003. +   if (flags & MS_RDONLY) {
  3004. +       mnt_drop_write(ufs->upper_mnt);
  3005. +       return 0;
  3006. +   } else
  3007. +       return mnt_want_write(ufs->upper_mnt);
  3008. +}
  3009. +
  3010. +/**
  3011. + * ovl_statfs
  3012. + * @sb: The overlayfs super block
  3013. + * @buf: The struct kstatfs to fill in with stats
  3014. + *
  3015. + * Get the filesystem statistics.  As writes always target the upper layer
  3016. + * filesystem pass the statfs to the same filesystem.
  3017. + */
  3018. +static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
  3019. +{
  3020. +   struct dentry *root_dentry = dentry->d_sb->s_root;
  3021. +   struct path path;
  3022. +   ovl_path_upper(root_dentry, &path);
  3023. +
  3024. +   if (!path.dentry->d_sb->s_op->statfs)
  3025. +       return -ENOSYS;
  3026. +   return path.dentry->d_sb->s_op->statfs(path.dentry, buf);
  3027. +}
  3028. +
  3029. +/**
  3030. + * ovl_show_options
  3031. + *
  3032. + * Prints the mount options for a given superblock.
  3033. + * Returns zero; does not fail.
  3034. + */
  3035. +static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
  3036. +{
  3037. +   struct super_block *sb = dentry->d_sb;
  3038. +   struct ovl_fs *ufs = sb->s_fs_info;
  3039. +
  3040. +   seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir);
  3041. +   seq_printf(m, ",upperdir=%s", ufs->config.upperdir);
  3042. +   return 0;
  3043. +}
  3044. +
  3045. +static const struct super_operations ovl_super_operations = {
  3046. +   .put_super  = ovl_put_super,
  3047. +   .remount_fs = ovl_remount_fs,
  3048. +   .statfs     = ovl_statfs,
  3049. +   .show_options   = ovl_show_options,
  3050. +};
  3051. +
  3052. +enum {
  3053. +   Opt_lowerdir,
  3054. +   Opt_upperdir,
  3055. +   Opt_err,
  3056. +};
  3057. +
  3058. +static const match_table_t ovl_tokens = {
  3059. +   {Opt_lowerdir,          "lowerdir=%s"},
  3060. +   {Opt_upperdir,          "upperdir=%s"},
  3061. +   {Opt_err,           NULL}
  3062. +};
  3063. +
  3064. +static int ovl_parse_opt(char *opt, struct ovl_config *config)
  3065. +{
  3066. +   char *p;
  3067. +
  3068. +   config->upperdir = NULL;
  3069. +   config->lowerdir = NULL;
  3070. +
  3071. +   while ((p = strsep(&opt, ",")) != NULL) {
  3072. +       int token;
  3073. +       substring_t args[MAX_OPT_ARGS];
  3074. +
  3075. +       if (!*p)
  3076. +           continue;
  3077. +
  3078. +       token = match_token(p, ovl_tokens, args);
  3079. +       switch (token) {
  3080. +       case Opt_upperdir:
  3081. +           kfree(config->upperdir);
  3082. +           config->upperdir = match_strdup(&args[0]);
  3083. +           if (!config->upperdir)
  3084. +               return -ENOMEM;
  3085. +           break;
  3086. +
  3087. +       case Opt_lowerdir:
  3088. +           kfree(config->lowerdir);
  3089. +           config->lowerdir = match_strdup(&args[0]);
  3090. +           if (!config->lowerdir)
  3091. +               return -ENOMEM;
  3092. +           break;
  3093. +
  3094. +       default:
  3095. +           return -EINVAL;
  3096. +       }
  3097. +   }
  3098. +   return 0;
  3099. +}
  3100. +
  3101. +static int ovl_fill_super(struct super_block *sb, void *data, int silent)
  3102. +{
  3103. +   struct path lowerpath;
  3104. +   struct path upperpath;
  3105. +   struct inode *root_inode;
  3106. +   struct dentry *root_dentry;
  3107. +   struct ovl_entry *oe;
  3108. +   struct ovl_fs *ufs;
  3109. +   int err;
  3110. +
  3111. +   err = -ENOMEM;
  3112. +   ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL);
  3113. +   if (!ufs)
  3114. +       goto out;
  3115. +
  3116. +   err = ovl_parse_opt((char *) data, &ufs->config);
  3117. +   if (err)
  3118. +       goto out_free_ufs;
  3119. +
  3120. +   err = -EINVAL;
  3121. +   if (!ufs->config.upperdir || !ufs->config.lowerdir) {
  3122. +       printk(KERN_ERR "overlayfs: missing upperdir or lowerdir\n");
  3123. +       goto out_free_config;
  3124. +   }
  3125. +
  3126. +   oe = ovl_alloc_entry();
  3127. +   if (oe == NULL)
  3128. +       goto out_free_config;
  3129. +
  3130. +   err = kern_path(ufs->config.upperdir, LOOKUP_FOLLOW, &upperpath);
  3131. +   if (err)
  3132. +       goto out_free_oe;
  3133. +
  3134. +   err = kern_path(ufs->config.lowerdir, LOOKUP_FOLLOW, &lowerpath);
  3135. +   if (err)
  3136. +       goto out_put_upperpath;
  3137. +
  3138. +   err = -ENOTDIR;
  3139. +   if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) ||
  3140. +       !S_ISDIR(lowerpath.dentry->d_inode->i_mode))
  3141. +       goto out_put_lowerpath;
  3142. +
  3143. +   sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth,
  3144. +               lowerpath.mnt->mnt_sb->s_stack_depth) + 1;
  3145. +
  3146. +   err = -EINVAL;
  3147. +   if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
  3148. +       printk(KERN_ERR "overlayfs: maximum fs stacking depth exceeded\n");
  3149. +       goto out_put_lowerpath;
  3150. +   }
  3151. +
  3152. +
  3153. +   ufs->upper_mnt = clone_private_mount(&upperpath);
  3154. +   err = PTR_ERR(ufs->upper_mnt);
  3155. +   if (IS_ERR(ufs->upper_mnt)) {
  3156. +       printk(KERN_ERR "overlayfs: failed to clone upperpath\n");
  3157. +       goto out_put_lowerpath;
  3158. +   }
  3159. +
  3160. +   ufs->lower_mnt = clone_private_mount(&lowerpath);
  3161. +   err = PTR_ERR(ufs->lower_mnt);
  3162. +   if (IS_ERR(ufs->lower_mnt)) {
  3163. +       printk(KERN_ERR "overlayfs: failed to clone lowerpath\n");
  3164. +       goto out_put_upper_mnt;
  3165. +   }
  3166. +
  3167. +   /*
  3168. +    * Make lower_mnt R/O.  That way fchmod/fchown on lower file
  3169. +    * will fail instead of modifying lower fs.
  3170. +    */
  3171. +   ufs->lower_mnt->mnt_flags |= MNT_READONLY;
  3172. +
  3173. +   /* If the upper fs is r/o, we mark overlayfs r/o too */
  3174. +   if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)
  3175. +       sb->s_flags |= MS_RDONLY;
  3176. +
  3177. +   if (!(sb->s_flags & MS_RDONLY)) {
  3178. +       err = mnt_want_write(ufs->upper_mnt);
  3179. +       if (err)
  3180. +           goto out_put_lower_mnt;
  3181. +   }
  3182. +
  3183. +   err = -ENOMEM;
  3184. +   root_inode = ovl_new_inode(sb, S_IFDIR, oe);
  3185. +   if (!root_inode)
  3186. +       goto out_drop_write;
  3187. +
  3188. +   root_dentry = d_make_root(root_inode);
  3189. +   if (!root_dentry)
  3190. +       goto out_drop_write;
  3191. +
  3192. +   mntput(upperpath.mnt);
  3193. +   mntput(lowerpath.mnt);
  3194. +
  3195. +   oe->__upperdentry = dget(upperpath.dentry);
  3196. +   oe->lowerdentry = lowerpath.dentry;
  3197. +
  3198. +   root_dentry->d_fsdata = oe;
  3199. +   root_dentry->d_op = &ovl_dentry_operations;
  3200. +
  3201. +   sb->s_op = &ovl_super_operations;
  3202. +   sb->s_root = root_dentry;
  3203. +   sb->s_fs_info = ufs;
  3204. +
  3205. +   return 0;
  3206. +
  3207. +out_drop_write:
  3208. +   if (!(sb->s_flags & MS_RDONLY))
  3209. +       mnt_drop_write(ufs->upper_mnt);
  3210. +out_put_lower_mnt:
  3211. +   mntput(ufs->lower_mnt);
  3212. +out_put_upper_mnt:
  3213. +   mntput(ufs->upper_mnt);
  3214. +out_put_lowerpath:
  3215. +   path_put(&lowerpath);
  3216. +out_put_upperpath:
  3217. +   path_put(&upperpath);
  3218. +out_free_oe:
  3219. +   kfree(oe);
  3220. +out_free_config:
  3221. +   kfree(ufs->config.lowerdir);
  3222. +   kfree(ufs->config.upperdir);
  3223. +out_free_ufs:
  3224. +   kfree(ufs);
  3225. +out:
  3226. +   return err;
  3227. +}
  3228. +
  3229. +static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
  3230. +               const char *dev_name, void *raw_data)
  3231. +{
  3232. +   return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
  3233. +}
  3234. +
  3235. +static struct file_system_type ovl_fs_type = {
  3236. +   .owner      = THIS_MODULE,
  3237. +   .name       = "overlayfs",
  3238. +   .mount      = ovl_mount,
  3239. +   .kill_sb    = kill_anon_super,
  3240. +};
  3241. +
  3242. +static int __init ovl_init(void)
  3243. +{
  3244. +   return register_filesystem(&ovl_fs_type);
  3245. +}
  3246. +
  3247. +static void __exit ovl_exit(void)
  3248. +{
  3249. +   unregister_filesystem(&ovl_fs_type);
  3250. +}
  3251. +
  3252. +module_init(ovl_init);
  3253. +module_exit(ovl_exit);
  3254. diff -ruNb a//fs/splice.c b//fs/splice.c
  3255. --- a//fs/splice.c  2012-10-12 21:48:25.000000000 +0100
  3256. +++ b//fs/splice.c  2012-10-21 15:32:47.265212787 +0100
  3257. @@ -1305,6 +1305,7 @@
  3258.  
  3259.     return ret;
  3260.  }
  3261. +EXPORT_SYMBOL(do_splice_direct);
  3262.  
  3263.  static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
  3264.                    struct pipe_inode_info *opipe,
  3265. diff -ruNb a//include/linux/fs.h b//include/linux/fs.h
  3266. --- a//include/linux/fs.h   2012-10-12 21:48:25.000000000 +0100
  3267. +++ b//include/linux/fs.h   2012-10-21 15:35:00.152382302 +0100
  3268. @@ -499,6 +499,12 @@
  3269.   */
  3270.  #include <linux/quota.h>
  3271.  
  3272. +/*
  3273. + * Maximum number of layers of fs stack.  Needs to be limited to
  3274. + * prevent kernel stack overflow
  3275. + */
  3276. +#define FILESYSTEM_MAX_STACK_DEPTH 2
  3277. +
  3278.  /**
  3279.   * enum positive_aop_returns - aop return codes with specific semantics
  3280.   *
  3281. @@ -1542,6 +1548,11 @@
  3282.  
  3283.     /* Being remounted read-only */
  3284.     int s_readonly_remount;
  3285. +
  3286. +   /*
  3287. +    * Indicates how deep in a filesystem stack this SB is
  3288. +    */
  3289. +   int s_stack_depth;
  3290.  };
  3291.  
  3292.  /* superblock cache pruning functions */
  3293. @@ -1693,6 +1704,8 @@
  3294.     int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
  3295.               u64 len);
  3296.     int (*update_time)(struct inode *, struct timespec *, int);
  3297. +   struct file *(*open) (struct dentry *, struct file *,
  3298. +                 const struct cred *);
  3299.  } ____cacheline_aligned;
  3300.  
  3301.  struct seq_file;
  3302. @@ -2057,6 +2070,7 @@
  3303.  extern struct file *filp_open(const char *, int, umode_t);
  3304.  extern struct file *file_open_root(struct dentry *, struct vfsmount *,
  3305.                    const char *, int);
  3306. +extern struct file *vfs_open(struct path *, struct file *, const struct cred *);
  3307.  extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
  3308.                  const struct cred *);
  3309.  extern int filp_close(struct file *, fl_owner_t id);
  3310. @@ -2249,6 +2263,7 @@
  3311.  #endif
  3312.  extern int notify_change(struct dentry *, struct iattr *);
  3313.  extern int inode_permission(struct inode *, int);
  3314. +extern int inode_only_permission(struct inode *, int);
  3315.  extern int generic_permission(struct inode *, int);
  3316.  
  3317.  static inline bool execute_ok(struct inode *inode)
  3318. diff -ruNb a//include/linux/mount.h b//include/linux/mount.h
  3319. --- a//include/linux/mount.h    2012-10-12 21:48:25.000000000 +0100
  3320. +++ b//include/linux/mount.h    2012-10-21 15:33:09.262261274 +0100
  3321. @@ -66,6 +66,9 @@
  3322.  extern void mnt_unpin(struct vfsmount *mnt);
  3323.  extern int __mnt_is_readonly(struct vfsmount *mnt);
  3324.  
  3325. +struct path;
  3326. +extern struct vfsmount *clone_private_mount(struct path *path);
  3327. +
  3328.  struct file_system_type;
  3329.  extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
  3330.                       int flags, const char *name,
  3331. diff -ruNb a//MAINTAINERS b//MAINTAINERS
  3332. --- a//MAINTAINERS  2012-10-12 21:48:25.000000000 +0100
  3333. +++ b//MAINTAINERS  2012-10-21 15:34:04.767813670 +0100
  3334. @@ -5066,6 +5066,13 @@
  3335.  F: include/scsi/osd_*
  3336.  F: fs/exofs/
  3337.  
  3338. +OVERLAYFS FILESYSTEM
  3339. +M: Miklos Szeredi <miklos@szeredi.hu>
  3340. +L: linux-fsdevel@vger.kernel.org
  3341. +S: Supported
  3342. +F: fs/overlayfs/*
  3343. +F: Documentation/filesystems/overlayfs.txt
  3344. +
  3345.  P54 WIRELESS DRIVER
  3346.  M: Christian Lamparter <chunkeey@googlemail.com>
  3347.  L: linux-wireless@vger.kernel.org
  3348. diff -ruNb a//security/apparmor/apparmorfs.c b//security/apparmor/apparmorfs.c
  3349. --- a//security/apparmor/apparmorfs.c   2012-10-12 21:48:25.000000000 +0100
  3350. +++ b//security/apparmor/apparmorfs.c   2012-10-21 15:35:27.442720548 +0100
  3351. @@ -198,9 +198,22 @@
  3352.     { }
  3353.  };
  3354.  
  3355. +static struct aa_fs_entry aa_fs_entry_mount[] = {
  3356. +   AA_FS_FILE_STRING("mask", "mount umount"),
  3357. +   { }
  3358. +};
  3359. +
  3360. +static struct aa_fs_entry aa_fs_entry_namespaces[] = {
  3361. +   AA_FS_FILE_BOOLEAN("profile",       1),
  3362. +   AA_FS_FILE_BOOLEAN("pivot_root",    1),
  3363. +   { }
  3364. +};
  3365. +
  3366.  static struct aa_fs_entry aa_fs_entry_features[] = {
  3367.     AA_FS_DIR("domain",         aa_fs_entry_domain),
  3368.     AA_FS_DIR("file",           aa_fs_entry_file),
  3369. +   AA_FS_DIR("mount",          aa_fs_entry_mount),
  3370. +   AA_FS_DIR("namespaces",         aa_fs_entry_namespaces),
  3371.     AA_FS_FILE_U64("capability",        VFS_CAP_FLAGS_MASK),
  3372.     AA_FS_DIR("rlimit",         aa_fs_entry_rlimit),
  3373.     { }
  3374. diff -ruNb a//security/apparmor/audit.c b//security/apparmor/audit.c
  3375. --- a//security/apparmor/audit.c    2012-10-12 21:48:25.000000000 +0100
  3376. +++ b//security/apparmor/audit.c    2012-10-21 15:35:27.442720548 +0100
  3377. @@ -44,6 +44,10 @@
  3378.     "file_mmap",
  3379.     "file_mprotect",
  3380.  
  3381. +   "pivotroot",
  3382. +   "mount",
  3383. +   "umount",
  3384. +
  3385.     "create",
  3386.     "post_create",
  3387.     "bind",
  3388. diff -ruNb a//security/apparmor/domain.c b//security/apparmor/domain.c
  3389. --- a//security/apparmor/domain.c   2012-10-12 21:48:25.000000000 +0100
  3390. +++ b//security/apparmor/domain.c   2012-10-21 15:35:27.443720414 +0100
  3391. @@ -242,7 +242,7 @@
  3392.   *
  3393.   * Returns: refcounted profile, or NULL on failure (MAYBE NULL)
  3394.   */
  3395. -static struct aa_profile *x_table_lookup(struct aa_profile *profile, u32 xindex)
  3396. +struct aa_profile *x_table_lookup(struct aa_profile *profile, u32 xindex)
  3397.  {
  3398.     struct aa_profile *new_profile = NULL;
  3399.     struct aa_namespace *ns = profile->ns;
  3400. diff -ruNb a//security/apparmor/include/apparmor.h b//security/apparmor/include/apparmor.h
  3401. --- a//security/apparmor/include/apparmor.h 2012-10-12 21:48:25.000000000 +0100
  3402. +++ b//security/apparmor/include/apparmor.h 2012-10-21 15:35:27.443720414 +0100
  3403. @@ -29,8 +29,9 @@
  3404.  #define AA_CLASS_NET       4
  3405.  #define AA_CLASS_RLIMITS   5
  3406.  #define AA_CLASS_DOMAIN        6
  3407. +#define AA_CLASS_MOUNT     7
  3408.  
  3409. -#define AA_CLASS_LAST      AA_CLASS_DOMAIN
  3410. +#define AA_CLASS_LAST      AA_CLASS_MOUNT
  3411.  
  3412.  /* Control parameters settable through module/boot flags */
  3413.  extern enum audit_mode aa_g_audit;
  3414. diff -ruNb a//security/apparmor/include/audit.h b//security/apparmor/include/audit.h
  3415. --- a//security/apparmor/include/audit.h    2012-10-12 21:48:25.000000000 +0100
  3416. +++ b//security/apparmor/include/audit.h    2012-10-21 15:35:27.443720414 +0100
  3417. @@ -73,6 +73,10 @@
  3418.     OP_FMMAP,
  3419.     OP_FMPROT,
  3420.  
  3421. +   OP_PIVOTROOT,
  3422. +   OP_MOUNT,
  3423. +   OP_UMOUNT,
  3424. +
  3425.     OP_CREATE,
  3426.     OP_POST_CREATE,
  3427.     OP_BIND,
  3428. @@ -122,6 +126,13 @@
  3429.             unsigned long max;
  3430.         } rlim;
  3431.         struct {
  3432. +           const char *src_name;
  3433. +           const char *type;
  3434. +           const char *trans;
  3435. +           const char *data;
  3436. +           unsigned long flags;
  3437. +       } mnt;
  3438. +       struct {
  3439.             const char *target;
  3440.             u32 request;
  3441.             u32 denied;
  3442. diff -ruNb a//security/apparmor/include/domain.h b//security/apparmor/include/domain.h
  3443. --- a//security/apparmor/include/domain.h   2012-10-12 21:48:25.000000000 +0100
  3444. +++ b//security/apparmor/include/domain.h   2012-10-21 15:35:27.443720414 +0100
  3445. @@ -23,6 +23,8 @@
  3446.     char **table;
  3447.  };
  3448.  
  3449. +struct aa_profile *x_table_lookup(struct aa_profile *profile, u32 xindex);
  3450. +
  3451.  int apparmor_bprm_set_creds(struct linux_binprm *bprm);
  3452.  int apparmor_bprm_secureexec(struct linux_binprm *bprm);
  3453.  void apparmor_bprm_committing_creds(struct linux_binprm *bprm);
  3454. diff -ruNb a//security/apparmor/include/mount.h b//security/apparmor/include/mount.h
  3455. --- a//security/apparmor/include/mount.h    1970-01-01 01:00:00.000000000 +0100
  3456. +++ b//security/apparmor/include/mount.h    2012-10-21 15:35:27.443720414 +0100
  3457. @@ -0,0 +1,54 @@
  3458. +/*
  3459. + * AppArmor security module
  3460. + *
  3461. + * This file contains AppArmor file mediation function definitions.
  3462. + *
  3463. + * Copyright 2012 Canonical Ltd.
  3464. + *
  3465. + * This program is free software; you can redistribute it and/or
  3466. + * modify it under the terms of the GNU General Public License as
  3467. + * published by the Free Software Foundation, version 2 of the
  3468. + * License.
  3469. + */
  3470. +
  3471. +#ifndef __AA_MOUNT_H
  3472. +#define __AA_MOUNT_H
  3473. +
  3474. +#include <linux/fs.h>
  3475. +#include <linux/path.h>
  3476. +
  3477. +#include "domain.h"
  3478. +#include "policy.h"
  3479. +
  3480. +/* mount perms */
  3481. +#define AA_MAY_PIVOTROOT   0x01
  3482. +#define AA_MAY_MOUNT       0x02
  3483. +#define AA_MAY_UMOUNT      0x04
  3484. +#define AA_AUDIT_DATA      0x40
  3485. +#define AA_CONT_MATCH      0x40
  3486. +
  3487. +#define AA_MS_IGNORE_MASK (MS_KERNMOUNT | MS_NOSEC | MS_ACTIVE | MS_BORN)
  3488. +
  3489. +int aa_remount(struct aa_profile *profile, struct path *path,
  3490. +          unsigned long flags, void *data);
  3491. +
  3492. +int aa_bind_mount(struct aa_profile *profile, struct path *path,
  3493. +         const char *old_name, unsigned long flags);
  3494. +
  3495. +
  3496. +int aa_mount_change_type(struct aa_profile *profile, struct path *path,
  3497. +            unsigned long flags);
  3498. +
  3499. +int aa_move_mount(struct aa_profile *profile, struct path *path,
  3500. +         const char *old_name);
  3501. +
  3502. +int aa_new_mount(struct aa_profile *profile, const char *dev_name,
  3503. +        struct path *path, const char *type, unsigned long flags,
  3504. +        void *data);
  3505. +
  3506. +int aa_umount(struct aa_profile *profile, struct vfsmount *mnt, int flags);
  3507. +
  3508. +int aa_pivotroot(struct aa_profile *profile, struct path *old_path,
  3509. +         struct path *new_path);
  3510. +
  3511. +#endif /* __AA_MOUNT_H */
  3512. diff -ruNb a//security/apparmor/lsm.c b//security/apparmor/lsm.c
  3513. --- a//security/apparmor/lsm.c  2012-10-12 21:48:25.000000000 +0100
  3514. +++ b//security/apparmor/lsm.c  2012-10-21 15:35:27.444720280 +0100
  3515. @@ -35,6 +35,7 @@
  3516.  #include "include/path.h"
  3517.  #include "include/policy.h"
  3518.  #include "include/procattr.h"
  3519. +#include "include/mount.h"
  3520.  
  3521.  /* Flag indicating whether initialization completed */
  3522.  int apparmor_initialized __initdata;
  3523. @@ -503,6 +504,60 @@
  3524.                !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0);
  3525.  }
  3526.  
  3527. +static int apparmor_sb_mount(char *dev_name, struct path *path, char *type,
  3528. +                unsigned long flags, void *data)
  3529. +{
  3530. +   struct aa_profile *profile;
  3531. +   int error = 0;
  3532. +
  3533. +   /* Discard magic */
  3534. +   if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
  3535. +       flags &= ~MS_MGC_MSK;
  3536. +
  3537. +   flags &= ~AA_MS_IGNORE_MASK;
  3538. +
  3539. +   profile = __aa_current_profile();
  3540. +   if (!unconfined(profile)) {
  3541. +       if (flags & MS_REMOUNT)
  3542. +           error = aa_remount(profile, path, flags, data);
  3543. +       else if (flags & MS_BIND)
  3544. +           error = aa_bind_mount(profile, path, dev_name, flags);
  3545. +       else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE |
  3546. +                 MS_UNBINDABLE))
  3547. +           error = aa_mount_change_type(profile, path, flags);
  3548. +       else if (flags & MS_MOVE)
  3549. +           error = aa_move_mount(profile, path, dev_name);
  3550. +       else
  3551. +           error = aa_new_mount(profile, dev_name, path, type,
  3552. +                        flags, data);
  3553. +   }
  3554. +   return error;
  3555. +}
  3556. +
  3557. +static int apparmor_sb_umount(struct vfsmount *mnt, int flags)
  3558. +{
  3559. +   struct aa_profile *profile;
  3560. +   int error = 0;
  3561. +
  3562. +   profile = __aa_current_profile();
  3563. +   if (!unconfined(profile))
  3564. +       error = aa_umount(profile, mnt, flags);
  3565. +
  3566. +   return error;
  3567. +}
  3568. +
  3569. +static int apparmor_sb_pivotroot(struct path *old_path, struct path *new_path)
  3570. +{
  3571. +   struct aa_profile *profile;
  3572. +   int error = 0;
  3573. +
  3574. +   profile = __aa_current_profile();
  3575. +   if (!unconfined(profile))
  3576. +       error = aa_pivotroot(profile, old_path, new_path);
  3577. +
  3578. +   return error;
  3579. +}
  3580. +
  3581.  static int apparmor_getprocattr(struct task_struct *task, char *name,
  3582.                 char **value)
  3583.  {
  3584. @@ -622,6 +677,10 @@
  3585.     .capget =           apparmor_capget,
  3586.     .capable =          apparmor_capable,
  3587.  
  3588. +   .sb_mount =         apparmor_sb_mount,
  3589. +   .sb_umount =            apparmor_sb_umount,
  3590. +   .sb_pivotroot =         apparmor_sb_pivotroot,
  3591. +
  3592.     .path_link =            apparmor_path_link,
  3593.     .path_unlink =          apparmor_path_unlink,
  3594.     .path_symlink =         apparmor_path_symlink,
  3595. diff -ruNb a//security/apparmor/Makefile b//security/apparmor/Makefile
  3596. --- a//security/apparmor/Makefile   2012-10-12 21:48:25.000000000 +0100
  3597. +++ b//security/apparmor/Makefile   2012-10-21 15:35:27.442720548 +0100
  3598. @@ -4,11 +4,10 @@
  3599.  
  3600.  apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
  3601.                path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
  3602. -              resource.o sid.o file.o
  3603. +              resource.o sid.o file.o mount.o
  3604.  
  3605.  clean-files := capability_names.h rlim_names.h
  3606.  
  3607. -
  3608.  # Build a lower case string table of capability names
  3609.  # Transforms lines from
  3610.  #    #define CAP_DAC_OVERRIDE     1
  3611. diff -ruNb a//security/apparmor/mount.c b//security/apparmor/mount.c
  3612. --- a//security/apparmor/mount.c    1970-01-01 01:00:00.000000000 +0100
  3613. +++ b//security/apparmor/mount.c    2012-10-21 15:35:27.444720280 +0100
  3614. @@ -0,0 +1,620 @@
  3615. +/*
  3616. + * AppArmor security module
  3617. + *
  3618. + * This file contains AppArmor mediation of files
  3619. + *
  3620. + * Copyright (C) 1998-2008 Novell/SUSE
  3621. + * Copyright 2009-2012 Canonical Ltd.
  3622. + *
  3623. + * This program is free software; you can redistribute it and/or
  3624. + * modify it under the terms of the GNU General Public License as
  3625. + * published by the Free Software Foundation, version 2 of the
  3626. + * License.
  3627. + */
  3628. +
  3629. +#include <linux/fs.h>
  3630. +#include <linux/mount.h>
  3631. +#include <linux/namei.h>
  3632. +
  3633. +#include "include/apparmor.h"
  3634. +#include "include/audit.h"
  3635. +#include "include/context.h"
  3636. +#include "include/domain.h"
  3637. +#include "include/file.h"
  3638. +#include "include/match.h"
  3639. +#include "include/mount.h"
  3640. +#include "include/path.h"
  3641. +#include "include/policy.h"
  3642. +
  3643. +
  3644. +static void audit_mnt_flags(struct audit_buffer *ab, unsigned long flags)
  3645. +{
  3646. +   if (flags & MS_RDONLY)
  3647. +       audit_log_format(ab, "ro");
  3648. +   else
  3649. +       audit_log_format(ab, "rw");
  3650. +   if (flags & MS_NOSUID)
  3651. +       audit_log_format(ab, ", nosuid");
  3652. +   if (flags & MS_NODEV)
  3653. +       audit_log_format(ab, ", nodev");
  3654. +   if (flags & MS_NOEXEC)
  3655. +       audit_log_format(ab, ", noexec");
  3656. +   if (flags & MS_SYNCHRONOUS)
  3657. +       audit_log_format(ab, ", sync");
  3658. +   if (flags & MS_REMOUNT)
  3659. +       audit_log_format(ab, ", remount");
  3660. +   if (flags & MS_MANDLOCK)
  3661. +       audit_log_format(ab, ", mand");
  3662. +   if (flags & MS_DIRSYNC)
  3663. +       audit_log_format(ab, ", dirsync");
  3664. +   if (flags & MS_NOATIME)
  3665. +       audit_log_format(ab, ", noatime");
  3666. +   if (flags & MS_NODIRATIME)
  3667. +       audit_log_format(ab, ", nodiratime");
  3668. +   if (flags & MS_BIND)
  3669. +       audit_log_format(ab, flags & MS_REC ? ", rbind" : ", bind");
  3670. +   if (flags & MS_MOVE)
  3671. +       audit_log_format(ab, ", move");
  3672. +   if (flags & MS_SILENT)
  3673. +       audit_log_format(ab, ", silent");
  3674. +   if (flags & MS_POSIXACL)
  3675. +       audit_log_format(ab, ", acl");
  3676. +   if (flags & MS_UNBINDABLE)
  3677. +       audit_log_format(ab, flags & MS_REC ? ", runbindable" :
  3678. +                ", unbindable");
  3679. +   if (flags & MS_PRIVATE)
  3680. +       audit_log_format(ab, flags & MS_REC ? ", rprivate" :
  3681. +                ", private");
  3682. +   if (flags & MS_SLAVE)
  3683. +       audit_log_format(ab, flags & MS_REC ? ", rslave" :
  3684. +                ", slave");
  3685. +   if (flags & MS_SHARED)
  3686. +       audit_log_format(ab, flags & MS_REC ? ", rshared" :
  3687. +                ", shared");
  3688. +   if (flags & MS_RELATIME)
  3689. +       audit_log_format(ab, ", relatime");
  3690. +   if (flags & MS_I_VERSION)
  3691. +       audit_log_format(ab, ", iversion");
  3692. +   if (flags & MS_STRICTATIME)
  3693. +       audit_log_format(ab, ", strictatime");
  3694. +   if (flags & MS_NOUSER)
  3695. +       audit_log_format(ab, ", nouser");
  3696. +}
  3697. +
  3698. +/**
  3699. + * audit_cb - call back for mount specific audit fields
  3700. + * @ab: audit_buffer  (NOT NULL)
  3701. + * @va: audit struct to audit values of  (NOT NULL)
  3702. + */
  3703. +static void audit_cb(struct audit_buffer *ab, void *va)
  3704. +{
  3705. +   struct common_audit_data *sa = va;
  3706. +
  3707. +   if (sa->aad->mnt.type) {
  3708. +       audit_log_format(ab, " fstype=");
  3709. +       audit_log_untrustedstring(ab, sa->aad->mnt.type);
  3710. +   }
  3711. +   if (sa->aad->mnt.src_name) {
  3712. +       audit_log_format(ab, " srcname=");
  3713. +       audit_log_untrustedstring(ab, sa->aad->mnt.src_name);
  3714. +   }
  3715. +   if (sa->aad->mnt.trans) {
  3716. +       audit_log_format(ab, " trans=");
  3717. +       audit_log_untrustedstring(ab, sa->aad->mnt.trans);
  3718. +   }
  3719. +   if (sa->aad->mnt.flags || sa->aad->op == OP_MOUNT) {
  3720. +       audit_log_format(ab, " flags=\"");
  3721. +       audit_mnt_flags(ab, sa->aad->mnt.flags);
  3722. +       audit_log_format(ab, "\"");
  3723. +   }
  3724. +   if (sa->aad->mnt.data) {
  3725. +       audit_log_format(ab, " options=");
  3726. +       audit_log_untrustedstring(ab, sa->aad->mnt.data);
  3727. +   }
  3728. +}
  3729. +
  3730. +/**
  3731. + * audit_mount - handle the auditing of mount operations
  3732. + * @profile: the profile being enforced  (NOT NULL)
  3733. + * @gfp: allocation flags
  3734. + * @op: operation being mediated (NOT NULL)
  3735. + * @name: name of object being mediated (MAYBE NULL)
  3736. + * @src_name: src_name of object being mediated (MAYBE_NULL)
  3737. + * @type: type of filesystem (MAYBE_NULL)
  3738. + * @trans: name of trans (MAYBE NULL)
  3739. + * @flags: filesystem idependent mount flags
  3740. + * @data: filesystem mount flags
  3741. + * @request: permissions requested
  3742. + * @perms: the permissions computed for the request (NOT NULL)
  3743. + * @info: extra information message (MAYBE NULL)
  3744. + * @error: 0 if operation allowed else failure error code
  3745. + *
  3746. + * Returns: %0 or error on failure
  3747. + */
  3748. +static int audit_mount(struct aa_profile *profile, gfp_t gfp, int op,
  3749. +              const char *name, const char *src_name,
  3750. +              const char *type, const char *trans,
  3751. +              unsigned long flags, const void *data, u32 request,
  3752. +              struct file_perms *perms, const char *info, int error)
  3753. +{
  3754. +   int audit_type = AUDIT_APPARMOR_AUTO;
  3755. +   struct common_audit_data sa;
  3756. +   struct apparmor_audit_data aad = { };
  3757. +
  3758. +   if (likely(!error)) {
  3759. +       u32 mask = perms->audit;
  3760. +
  3761. +       if (unlikely(AUDIT_MODE(profile) == AUDIT_ALL))
  3762. +           mask = 0xffff;
  3763. +
  3764. +       /* mask off perms that are not being force audited */
  3765. +       request &= mask;
  3766. +
  3767. +       if (likely(!request))
  3768. +           return 0;
  3769. +       audit_type = AUDIT_APPARMOR_AUDIT;
  3770. +   } else {
  3771. +       /* only report permissions that were denied */
  3772. +       request = request & ~perms->allow;
  3773. +
  3774. +       if (request & perms->kill)
  3775. +           audit_type = AUDIT_APPARMOR_KILL;
  3776. +
  3777. +       /* quiet known rejects, assumes quiet and kill do not overlap */
  3778. +       if ((request & perms->quiet) &&
  3779. +           AUDIT_MODE(profile) != AUDIT_NOQUIET &&
  3780. +           AUDIT_MODE(profile) != AUDIT_ALL)
  3781. +           request &= ~perms->quiet;
  3782. +
  3783. +       if (!request)
  3784. +           return COMPLAIN_MODE(profile) ?
  3785. +               complain_error(error) : error;
  3786. +   }
  3787. +
  3788. +   sa.type = LSM_AUDIT_DATA_NONE;
  3789. +   sa.aad = &aad;
  3790. +   sa.aad->op = op;
  3791. +   sa.aad->name = name;
  3792. +   sa.aad->mnt.src_name = src_name;
  3793. +   sa.aad->mnt.type = type;
  3794. +   sa.aad->mnt.trans = trans;
  3795. +   sa.aad->mnt.flags = flags;
  3796. +   if (data && (perms->audit & AA_AUDIT_DATA))
  3797. +       sa.aad->mnt.data = data;
  3798. +   sa.aad->info = info;
  3799. +   sa.aad->error = error;
  3800. +
  3801. +   return aa_audit(audit_type, profile, gfp, &sa, audit_cb);
  3802. +}
  3803. +
  3804. +/**
  3805. + * match_mnt_flags - Do an ordered match on mount flags
  3806. + * @dfa: dfa to match against
  3807. + * @state: state to start in
  3808. + * @flags: mount flags to match against
  3809. + *
  3810. + * Mount flags are encoded as an ordered match. This is done instead of
  3811. + * checking against a simple bitmask, to allow for logical operations
  3812. + * on the flags.
  3813. + *
  3814. + * Returns: next state after flags match
  3815. + */
  3816. +static unsigned int match_mnt_flags(struct aa_dfa *dfa, unsigned int state,
  3817. +                   unsigned long flags)
  3818. +{
  3819. +   unsigned int i;
  3820. +
  3821. +   for (i = 0; i <= 31 ; ++i) {
  3822. +       if ((1 << i) & flags)
  3823. +           state = aa_dfa_next(dfa, state, i + 1);
  3824. +   }
  3825. +
  3826. +   return state;
  3827. +}
  3828. +
  3829. +/**
  3830. + * compute_mnt_perms - compute mount permission associated with @state
  3831. + * @dfa: dfa to match against (NOT NULL)
  3832. + * @state: state match finished in
  3833. + *
  3834. + * Returns: mount permissions
  3835. + */
  3836. +static struct file_perms compute_mnt_perms(struct aa_dfa *dfa,
  3837. +                      unsigned int state)
  3838. +{
  3839. +   struct file_perms perms;
  3840. +
  3841. +   perms.kill = 0;
  3842. +   perms.allow = dfa_user_allow(dfa, state);
  3843. +   perms.audit = dfa_user_audit(dfa, state);
  3844. +   perms.quiet = dfa_user_quiet(dfa, state);
  3845. +   perms.xindex = dfa_user_xindex(dfa, state);
  3846. +
  3847. +   return perms;
  3848. +}
  3849. +
  3850. +static const char const *mnt_info_table[] = {
  3851. +   "match succeeded",
  3852. +   "failed mntpnt match",
  3853. +   "failed srcname match",
  3854. +   "failed type match",
  3855. +   "failed flags match",
  3856. +   "failed data match"
  3857. +};
  3858. +
  3859. +/*
  3860. + * Returns 0 on success else element that match failed in, this is the
  3861. + * index into the mnt_info_table above
  3862. + */
  3863. +static int do_match_mnt(struct aa_dfa *dfa, unsigned int start,
  3864. +           const char *mntpnt, const char *devname,
  3865. +           const char *type, unsigned long flags,
  3866. +           void *data, bool binary, struct file_perms *perms)
  3867. +{
  3868. +   unsigned int state;
  3869. +
  3870. +   state = aa_dfa_match(dfa, start, mntpnt);
  3871. +   state = aa_dfa_null_transition(dfa, state);
  3872. +   if (!state)
  3873. +       return 1;
  3874. +
  3875. +   if (devname)
  3876. +       state = aa_dfa_match(dfa, state, devname);
  3877. +   state = aa_dfa_null_transition(dfa, state);
  3878. +   if (!state)
  3879. +       return 2;
  3880. +
  3881. +   if (type)
  3882. +       state = aa_dfa_match(dfa, state, type);
  3883. +   state = aa_dfa_null_transition(dfa, state);
  3884. +   if (!state)
  3885. +       return 3;
  3886. +
  3887. +   state = match_mnt_flags(dfa, state, flags);
  3888. +   if (!state)
  3889. +       return 4;
  3890. +   *perms = compute_mnt_perms(dfa, state);
  3891. +   if (perms->allow & AA_MAY_MOUNT)
  3892. +       return 0;
  3893. +
  3894. +   /* only match data if not binary and the DFA flags data is expected */
  3895. +   if (data && !binary && (perms->allow & AA_CONT_MATCH)) {
  3896. +       state = aa_dfa_null_transition(dfa, state);
  3897. +       if (!state)
  3898. +           return 4;
  3899. +
  3900. +       state = aa_dfa_match(dfa, state, data);
  3901. +       if (!state)
  3902. +           return 5;
  3903. +       *perms = compute_mnt_perms(dfa, state);
  3904. +       if (perms->allow & AA_MAY_MOUNT)
  3905. +           return 0;
  3906. +   }
  3907. +
  3908. +   /* failed at end of flags match */
  3909. +   return 4;
  3910. +}
  3911. +
  3912. +/**
  3913. + * match_mnt - handle path matching for mount
  3914. + * @profile: the confining profile
  3915. + * @mntpnt: string for the mntpnt (NOT NULL)
  3916. + * @devname: string for the devname/src_name (MAYBE NULL)
  3917. + * @type: string for the dev type (MAYBE NULL)
  3918. + * @flags: mount flags to match
  3919. + * @data: fs mount data (MAYBE NULL)
  3920. + * @binary: whether @data is binary
  3921. + * @perms: Returns: permission found by the match
  3922. + * @info: Returns: infomation string about the match for logging
  3923. + *
  3924. + * Returns: 0 on success else error
  3925. + */
  3926. +static int match_mnt(struct aa_profile *profile, const char *mntpnt,
  3927. +            const char *devname, const char *type,
  3928. +            unsigned long flags, void *data, bool binary,
  3929. +            struct file_perms *perms, const char **info)
  3930. +{
  3931. +   int pos;
  3932. +
  3933. +   if (!profile->policy.dfa)
  3934. +       return -EACCES;
  3935. +
  3936. +   pos = do_match_mnt(profile->policy.dfa,
  3937. +              profile->policy.start[AA_CLASS_MOUNT],
  3938. +              mntpnt, devname, type, flags, data, binary, perms);
  3939. +   if (pos) {
  3940. +       *info = mnt_info_table[pos];
  3941. +       return -EACCES;
  3942. +   }
  3943. +
  3944. +   return 0;
  3945. +}
  3946. +
  3947. +static int path_flags(struct aa_profile *profile, struct path *path)
  3948. +{
  3949. +   return profile->path_flags |
  3950. +       S_ISDIR(path->dentry->d_inode->i_mode) ? PATH_IS_DIR : 0;
  3951. +}
  3952. +
  3953. +int aa_remount(struct aa_profile *profile, struct path *path,
  3954. +          unsigned long flags, void *data)
  3955. +{
  3956. +   struct file_perms perms = { };
  3957. +   const char *name, *info = NULL;
  3958. +   char *buffer = NULL;
  3959. +   int binary, error;
  3960. +
  3961. +   binary = path->dentry->d_sb->s_type->fs_flags & FS_BINARY_MOUNTDATA;
  3962. +
  3963. +   error = aa_path_name(path, path_flags(profile, path), &buffer, &name,
  3964. +                &info);
  3965. +   if (error)
  3966. +       goto audit;
  3967. +
  3968. +   error = match_mnt(profile, name, NULL, NULL, flags, data, binary,
  3969. +             &perms, &info);
  3970. +
  3971. +audit:
  3972. +   error = audit_mount(profile, GFP_KERNEL, OP_MOUNT, name, NULL, NULL,
  3973. +               NULL, flags, data, AA_MAY_MOUNT, &perms, info,
  3974. +               error);
  3975. +   kfree(buffer);
  3976. +
  3977. +   return error;
  3978. +}
  3979. +
  3980. +int aa_bind_mount(struct aa_profile *profile, struct path *path,
  3981. +         const char *dev_name, unsigned long flags)
  3982. +{
  3983. +   struct file_perms perms = { };
  3984. +   char *buffer = NULL, *old_buffer = NULL;
  3985. +   const char *name, *old_name = NULL, *info = NULL;
  3986. +   struct path old_path;
  3987. +   int error;
  3988. +
  3989. +   if (!dev_name || !*dev_name)
  3990. +       return -EINVAL;
  3991. +
  3992. +   flags &= MS_REC | MS_BIND;
  3993. +
  3994. +   error = aa_path_name(path, path_flags(profile, path), &buffer, &name,
  3995. +                &info);
  3996. +   if (error)
  3997. +       goto audit;
  3998. +
  3999. +   error = kern_path(dev_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
  4000. +   if (error)
  4001. +       goto audit;
  4002. +
  4003. +   error = aa_path_name(&old_path, path_flags(profile, &old_path),
  4004. +                &old_buffer, &old_name, &info);
  4005. +   path_put(&old_path);
  4006. +   if (error)
  4007. +       goto audit;
  4008. +
  4009. +   error = match_mnt(profile, name, old_name, NULL, flags, NULL, 0,
  4010. +             &perms, &info);
  4011. +
  4012. +audit:
  4013. +   error = audit_mount(profile, GFP_KERNEL, OP_MOUNT, name, old_name,
  4014. +               NULL, NULL, flags, NULL, AA_MAY_MOUNT, &perms,
  4015. +               info, error);
  4016. +   kfree(buffer);
  4017. +   kfree(old_buffer);
  4018. +
  4019. +   return error;
  4020. +}
  4021. +
  4022. +int aa_mount_change_type(struct aa_profile *profile, struct path *path,
  4023. +            unsigned long flags)
  4024. +{
  4025. +   struct file_perms perms = { };
  4026. +   char *buffer = NULL;
  4027. +   const char *name, *info = NULL;
  4028. +   int error;
  4029. +
  4030. +   /* These are the flags allowed by do_change_type() */
  4031. +   flags &= (MS_REC | MS_SILENT | MS_SHARED | MS_PRIVATE | MS_SLAVE |
  4032. +         MS_UNBINDABLE);
  4033. +
  4034. +   error = aa_path_name(path, path_flags(profile, path), &buffer, &name,
  4035. +                &info);
  4036. +   if (error)
  4037. +       goto audit;
  4038. +
  4039. +   error = match_mnt(profile, name, NULL, NULL, flags, NULL, 0, &perms,
  4040. +             &info);
  4041. +
  4042. +audit:
  4043. +   error = audit_mount(profile, GFP_KERNEL, OP_MOUNT, name, NULL, NULL,
  4044. +               NULL, flags, NULL, AA_MAY_MOUNT, &perms, info,
  4045. +               error);
  4046. +   kfree(buffer);
  4047. +
  4048. +   return error;
  4049. +}
  4050. +
  4051. +int aa_move_mount(struct aa_profile *profile, struct path *path,
  4052. +         const char *orig_name)
  4053. +{
  4054. +   struct file_perms perms = { };
  4055. +   char *buffer = NULL, *old_buffer = NULL;
  4056. +   const char *name, *old_name = NULL, *info = NULL;
  4057. +   struct path old_path;
  4058. +   int error;
  4059. +
  4060. +   if (!orig_name || !*orig_name)
  4061. +       return -EINVAL;
  4062. +
  4063. +   error = aa_path_name(path, path_flags(profile, path), &buffer, &name,
  4064. +                &info);
  4065. +   if (error)
  4066. +       goto audit;
  4067. +
  4068. +   error = kern_path(orig_name, LOOKUP_FOLLOW, &old_path);
  4069. +   if (error)
  4070. +       goto audit;
  4071. +
  4072. +   error = aa_path_name(&old_path, path_flags(profile, &old_path),
  4073. +                &old_buffer, &old_name, &info);
  4074. +   path_put(&old_path);
  4075. +   if (error)
  4076. +       goto audit;
  4077. +
  4078. +   error = match_mnt(profile, name, old_name, NULL, MS_MOVE, NULL, 0,
  4079. +             &perms, &info);
  4080. +
  4081. +audit:
  4082. +   error = audit_mount(profile, GFP_KERNEL, OP_MOUNT, name, old_name,
  4083. +               NULL, NULL, MS_MOVE, NULL, AA_MAY_MOUNT, &perms,
  4084. +               info, error);
  4085. +   kfree(buffer);
  4086. +   kfree(old_buffer);
  4087. +
  4088. +   return error;
  4089. +}
  4090. +
  4091. +int aa_new_mount(struct aa_profile *profile, const char *orig_dev_name,
  4092. +        struct path *path, const char *type, unsigned long flags,
  4093. +        void *data)
  4094. +{
  4095. +   struct file_perms perms = { };
  4096. +   char *buffer = NULL, *dev_buffer = NULL;
  4097. +   const char *name = NULL, *dev_name = NULL, *info = NULL;
  4098. +   int binary = 1;
  4099. +   int error;
  4100. +
  4101. +   dev_name = orig_dev_name;
  4102. +   if (type) {
  4103. +       int requires_dev;
  4104. +       struct file_system_type *fstype = get_fs_type(type);
  4105. +       if (!fstype)
  4106. +           return -ENODEV;
  4107. +
  4108. +       binary = fstype->fs_flags & FS_BINARY_MOUNTDATA;
  4109. +       requires_dev = fstype->fs_flags & FS_REQUIRES_DEV;
  4110. +       put_filesystem(fstype);
  4111. +
  4112. +       if (requires_dev) {
  4113. +           struct path dev_path;
  4114. +
  4115. +           if (!dev_name || !*dev_name) {
  4116. +               error = -ENOENT;
  4117. +               goto out;
  4118. +           }
  4119. +
  4120. +           error = kern_path(dev_name, LOOKUP_FOLLOW, &dev_path);
  4121. +           if (error)
  4122. +               goto audit;
  4123. +
  4124. +           error = aa_path_name(&dev_path,
  4125. +                        path_flags(profile, &dev_path),
  4126. +                        &dev_buffer, &dev_name, &info);
  4127. +           path_put(&dev_path);
  4128. +           if (error)
  4129. +               goto audit;
  4130. +       }
  4131. +   }
  4132. +
  4133. +   error = aa_path_name(path, path_flags(profile, path), &buffer, &name,
  4134. +                &info);
  4135. +   if (error)
  4136. +       goto audit;
  4137. +
  4138. +   error = match_mnt(profile, name, dev_name, type, flags, data, binary,
  4139. +             &perms, &info);
  4140. +
  4141. +audit:
  4142. +   error = audit_mount(profile, GFP_KERNEL, OP_MOUNT, name,  dev_name,
  4143. +               type, NULL, flags, data, AA_MAY_MOUNT, &perms, info,
  4144. +               error);
  4145. +   kfree(buffer);
  4146. +   kfree(dev_buffer);
  4147. +
  4148. +out:
  4149. +   return error;
  4150. +
  4151. +}
  4152. +
  4153. +int aa_umount(struct aa_profile *profile, struct vfsmount *mnt, int flags)
  4154. +{
  4155. +   struct file_perms perms = { };
  4156. +   char *buffer = NULL;
  4157. +   const char *name, *info = NULL;
  4158. +   int error;
  4159. +
  4160. +   struct path path = { mnt, mnt->mnt_root };
  4161. +   error = aa_path_name(&path, path_flags(profile, &path), &buffer, &name,
  4162. +                &info);
  4163. +   if (error)
  4164. +       goto audit;
  4165. +
  4166. +   if (!error && profile->policy.dfa) {
  4167. +       unsigned int state;
  4168. +       state = aa_dfa_match(profile->policy.dfa,
  4169. +                    profile->policy.start[AA_CLASS_MOUNT],
  4170. +                    name);
  4171. +       perms = compute_mnt_perms(profile->policy.dfa, state);
  4172. +   }
  4173. +
  4174. +   if (AA_MAY_UMOUNT & ~perms.allow)
  4175. +       error = -EACCES;
  4176. +
  4177. +audit:
  4178. +   error = audit_mount(profile, GFP_KERNEL, OP_UMOUNT, name, NULL, NULL,
  4179. +               NULL, 0, NULL, AA_MAY_UMOUNT, &perms, info, error);
  4180. +   kfree(buffer);
  4181. +
  4182. +   return error;
  4183. +}
  4184. +
  4185. +int aa_pivotroot(struct aa_profile *profile, struct path *old_path,
  4186. +         struct path *new_path)
  4187. +{
  4188. +   struct file_perms perms = { };
  4189. +   struct aa_profile *target = NULL;
  4190. +   char *old_buffer = NULL, *new_buffer = NULL;
  4191. +   const char *old_name, *new_name = NULL, *info = NULL;
  4192. +   int error;
  4193. +
  4194. +   error = aa_path_name(old_path, path_flags(profile, old_path),
  4195. +                &old_buffer, &old_name, &info);
  4196. +   if (error)
  4197. +       goto audit;
  4198. +
  4199. +   error = aa_path_name(new_path, path_flags(profile, new_path),
  4200. +                &new_buffer, &new_name, &info);
  4201. +   if (error)
  4202. +       goto audit;
  4203. +
  4204. +   if (profile->policy.dfa) {
  4205. +       unsigned int state;
  4206. +       state = aa_dfa_match(profile->policy.dfa,
  4207. +                    profile->policy.start[AA_CLASS_MOUNT],
  4208. +                    new_name);
  4209. +       state = aa_dfa_null_transition(profile->policy.dfa, state);
  4210. +       state = aa_dfa_match(profile->policy.dfa, state, old_name);
  4211. +       perms = compute_mnt_perms(profile->policy.dfa, state);
  4212. +   }
  4213. +
  4214. +   if (AA_MAY_PIVOTROOT & perms.allow) {
  4215. +       if ((perms.xindex & AA_X_TYPE_MASK) == AA_X_TABLE) {
  4216. +           target = x_table_lookup(profile, perms.xindex);
  4217. +           if (!target)
  4218. +               error = -ENOENT;
  4219. +           else
  4220. +               error = aa_replace_current_profile(target);
  4221. +       }
  4222. +   } else
  4223. +       error = -EACCES;
  4224. +
  4225. +audit:
  4226. +   error = audit_mount(profile, GFP_KERNEL, OP_PIVOTROOT, new_name,
  4227. +               old_name, NULL, target ? target->base.name : NULL,
  4228. +               0, NULL,  AA_MAY_PIVOTROOT, &perms, info, error);
  4229. +   aa_put_profile(target);
  4230. +   kfree(old_buffer);
  4231. +   kfree(new_buffer);
  4232. +
  4233. +   return error;
  4234. +}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement