close_range(): fix the logics in descriptor table trimming
-----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQQqUNBr3gm4hGXdBJlZ7Krx/gZQ6wUCZv90qwAKCRBZ7Krx/gZQ 629DAP9NT9/ndByazKrP03nQ/ITNhYVd0cby2iPcXZiHGj/fsQD7BF4yUUiBy7EO seIY8rGxc8S5TDJAG4HYa7t1Nuksnw4= =oji5 -----END PGP SIGNATURE----- Merge tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs Pull close_range() fix from Al Viro: "Fix the logic in descriptor table trimming" * tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: close_range(): fix the logics in descriptor table trimming
This commit is contained in:
commit
6cca119588
93
fs/file.c
93
fs/file.c
@ -272,59 +272,45 @@ static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
|
|||||||
return test_bit(fd, fdt->open_fds);
|
return test_bit(fd, fdt->open_fds);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int count_open_files(struct fdtable *fdt)
|
|
||||||
{
|
|
||||||
unsigned int size = fdt->max_fds;
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
/* Find the last open fd */
|
|
||||||
for (i = size / BITS_PER_LONG; i > 0; ) {
|
|
||||||
if (fdt->open_fds[--i])
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
i = (i + 1) * BITS_PER_LONG;
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that a sane fdtable size always has to be a multiple of
|
* Note that a sane fdtable size always has to be a multiple of
|
||||||
* BITS_PER_LONG, since we have bitmaps that are sized by this.
|
* BITS_PER_LONG, since we have bitmaps that are sized by this.
|
||||||
*
|
*
|
||||||
* 'max_fds' will normally already be properly aligned, but it
|
* punch_hole is optional - when close_range() is asked to unshare
|
||||||
* turns out that in the close_range() -> __close_range() ->
|
* and close, we don't need to copy descriptors in that range, so
|
||||||
* unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
|
* a smaller cloned descriptor table might suffice if the last
|
||||||
* up having a 'max_fds' value that isn't already aligned.
|
* currently opened descriptor falls into that range.
|
||||||
*
|
|
||||||
* Rather than make close_range() have to worry about this,
|
|
||||||
* just make that BITS_PER_LONG alignment be part of a sane
|
|
||||||
* fdtable size. Becuase that's really what it is.
|
|
||||||
*/
|
*/
|
||||||
static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
|
static unsigned int sane_fdtable_size(struct fdtable *fdt, struct fd_range *punch_hole)
|
||||||
{
|
{
|
||||||
unsigned int count;
|
unsigned int last = find_last_bit(fdt->open_fds, fdt->max_fds);
|
||||||
|
|
||||||
count = count_open_files(fdt);
|
if (last == fdt->max_fds)
|
||||||
if (max_fds < NR_OPEN_DEFAULT)
|
return NR_OPEN_DEFAULT;
|
||||||
max_fds = NR_OPEN_DEFAULT;
|
if (punch_hole && punch_hole->to >= last && punch_hole->from <= last) {
|
||||||
return ALIGN(min(count, max_fds), BITS_PER_LONG);
|
last = find_last_bit(fdt->open_fds, punch_hole->from);
|
||||||
|
if (last == punch_hole->from)
|
||||||
|
return NR_OPEN_DEFAULT;
|
||||||
|
}
|
||||||
|
return ALIGN(last + 1, BITS_PER_LONG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate a new files structure and copy contents from the
|
* Allocate a new descriptor table and copy contents from the passed in
|
||||||
* passed in files structure.
|
* instance. Returns a pointer to cloned table on success, ERR_PTR()
|
||||||
* errorp will be valid only when the returned files_struct is NULL.
|
* on failure. For 'punch_hole' see sane_fdtable_size().
|
||||||
*/
|
*/
|
||||||
struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
|
struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_hole)
|
||||||
{
|
{
|
||||||
struct files_struct *newf;
|
struct files_struct *newf;
|
||||||
struct file **old_fds, **new_fds;
|
struct file **old_fds, **new_fds;
|
||||||
unsigned int open_files, i;
|
unsigned int open_files, i;
|
||||||
struct fdtable *old_fdt, *new_fdt;
|
struct fdtable *old_fdt, *new_fdt;
|
||||||
|
int error;
|
||||||
|
|
||||||
*errorp = -ENOMEM;
|
|
||||||
newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
|
newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
|
||||||
if (!newf)
|
if (!newf)
|
||||||
goto out;
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
atomic_set(&newf->count, 1);
|
atomic_set(&newf->count, 1);
|
||||||
|
|
||||||
@ -341,7 +327,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
|
|||||||
|
|
||||||
spin_lock(&oldf->file_lock);
|
spin_lock(&oldf->file_lock);
|
||||||
old_fdt = files_fdtable(oldf);
|
old_fdt = files_fdtable(oldf);
|
||||||
open_files = sane_fdtable_size(old_fdt, max_fds);
|
open_files = sane_fdtable_size(old_fdt, punch_hole);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check whether we need to allocate a larger fd array and fd set.
|
* Check whether we need to allocate a larger fd array and fd set.
|
||||||
@ -354,14 +340,14 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
|
|||||||
|
|
||||||
new_fdt = alloc_fdtable(open_files - 1);
|
new_fdt = alloc_fdtable(open_files - 1);
|
||||||
if (!new_fdt) {
|
if (!new_fdt) {
|
||||||
*errorp = -ENOMEM;
|
error = -ENOMEM;
|
||||||
goto out_release;
|
goto out_release;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* beyond sysctl_nr_open; nothing to do */
|
/* beyond sysctl_nr_open; nothing to do */
|
||||||
if (unlikely(new_fdt->max_fds < open_files)) {
|
if (unlikely(new_fdt->max_fds < open_files)) {
|
||||||
__free_fdtable(new_fdt);
|
__free_fdtable(new_fdt);
|
||||||
*errorp = -EMFILE;
|
error = -EMFILE;
|
||||||
goto out_release;
|
goto out_release;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -372,7 +358,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
|
|||||||
*/
|
*/
|
||||||
spin_lock(&oldf->file_lock);
|
spin_lock(&oldf->file_lock);
|
||||||
old_fdt = files_fdtable(oldf);
|
old_fdt = files_fdtable(oldf);
|
||||||
open_files = sane_fdtable_size(old_fdt, max_fds);
|
open_files = sane_fdtable_size(old_fdt, punch_hole);
|
||||||
}
|
}
|
||||||
|
|
||||||
copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
|
copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
|
||||||
@ -406,8 +392,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
|
|||||||
|
|
||||||
out_release:
|
out_release:
|
||||||
kmem_cache_free(files_cachep, newf);
|
kmem_cache_free(files_cachep, newf);
|
||||||
out:
|
return ERR_PTR(error);
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct fdtable *close_files(struct files_struct * files)
|
static struct fdtable *close_files(struct files_struct * files)
|
||||||
@ -748,37 +733,25 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
|
|||||||
if (fd > max_fd)
|
if (fd > max_fd)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (flags & CLOSE_RANGE_UNSHARE) {
|
if ((flags & CLOSE_RANGE_UNSHARE) && atomic_read(&cur_fds->count) > 1) {
|
||||||
int ret;
|
struct fd_range range = {fd, max_fd}, *punch_hole = ⦥
|
||||||
unsigned int max_unshare_fds = NR_OPEN_MAX;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the caller requested all fds to be made cloexec we always
|
* If the caller requested all fds to be made cloexec we always
|
||||||
* copy all of the file descriptors since they still want to
|
* copy all of the file descriptors since they still want to
|
||||||
* use them.
|
* use them.
|
||||||
*/
|
*/
|
||||||
if (!(flags & CLOSE_RANGE_CLOEXEC)) {
|
if (flags & CLOSE_RANGE_CLOEXEC)
|
||||||
/*
|
punch_hole = NULL;
|
||||||
* If the requested range is greater than the current
|
|
||||||
* maximum, we're closing everything so only copy all
|
|
||||||
* file descriptors beneath the lowest file descriptor.
|
|
||||||
*/
|
|
||||||
rcu_read_lock();
|
|
||||||
if (max_fd >= last_fd(files_fdtable(cur_fds)))
|
|
||||||
max_unshare_fds = fd;
|
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
|
fds = dup_fd(cur_fds, punch_hole);
|
||||||
|
if (IS_ERR(fds))
|
||||||
|
return PTR_ERR(fds);
|
||||||
/*
|
/*
|
||||||
* We used to share our file descriptor table, and have now
|
* We used to share our file descriptor table, and have now
|
||||||
* created a private one, make sure we're using it below.
|
* created a private one, make sure we're using it below.
|
||||||
*/
|
*/
|
||||||
if (fds)
|
swap(cur_fds, fds);
|
||||||
swap(cur_fds, fds);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & CLOSE_RANGE_CLOEXEC)
|
if (flags & CLOSE_RANGE_CLOEXEC)
|
||||||
|
@ -22,7 +22,6 @@
|
|||||||
* as this is the granularity returned by copy_fdset().
|
* as this is the granularity returned by copy_fdset().
|
||||||
*/
|
*/
|
||||||
#define NR_OPEN_DEFAULT BITS_PER_LONG
|
#define NR_OPEN_DEFAULT BITS_PER_LONG
|
||||||
#define NR_OPEN_MAX ~0U
|
|
||||||
|
|
||||||
struct fdtable {
|
struct fdtable {
|
||||||
unsigned int max_fds;
|
unsigned int max_fds;
|
||||||
@ -106,7 +105,10 @@ struct task_struct;
|
|||||||
|
|
||||||
void put_files_struct(struct files_struct *fs);
|
void put_files_struct(struct files_struct *fs);
|
||||||
int unshare_files(void);
|
int unshare_files(void);
|
||||||
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
|
struct fd_range {
|
||||||
|
unsigned int from, to;
|
||||||
|
};
|
||||||
|
struct files_struct *dup_fd(struct files_struct *, struct fd_range *) __latent_entropy;
|
||||||
void do_close_on_exec(struct files_struct *);
|
void do_close_on_exec(struct files_struct *);
|
||||||
int iterate_fd(struct files_struct *, unsigned,
|
int iterate_fd(struct files_struct *, unsigned,
|
||||||
int (*)(const void *, struct file *, unsigned),
|
int (*)(const void *, struct file *, unsigned),
|
||||||
@ -115,8 +117,6 @@ int iterate_fd(struct files_struct *, unsigned,
|
|||||||
extern int close_fd(unsigned int fd);
|
extern int close_fd(unsigned int fd);
|
||||||
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
|
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
|
||||||
extern struct file *file_close_fd(unsigned int fd);
|
extern struct file *file_close_fd(unsigned int fd);
|
||||||
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
|
|
||||||
struct files_struct **new_fdp);
|
|
||||||
|
|
||||||
extern struct kmem_cache *files_cachep;
|
extern struct kmem_cache *files_cachep;
|
||||||
|
|
||||||
|
@ -1756,33 +1756,30 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk,
|
|||||||
int no_files)
|
int no_files)
|
||||||
{
|
{
|
||||||
struct files_struct *oldf, *newf;
|
struct files_struct *oldf, *newf;
|
||||||
int error = 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A background process may not have any files ...
|
* A background process may not have any files ...
|
||||||
*/
|
*/
|
||||||
oldf = current->files;
|
oldf = current->files;
|
||||||
if (!oldf)
|
if (!oldf)
|
||||||
goto out;
|
return 0;
|
||||||
|
|
||||||
if (no_files) {
|
if (no_files) {
|
||||||
tsk->files = NULL;
|
tsk->files = NULL;
|
||||||
goto out;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (clone_flags & CLONE_FILES) {
|
if (clone_flags & CLONE_FILES) {
|
||||||
atomic_inc(&oldf->count);
|
atomic_inc(&oldf->count);
|
||||||
goto out;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
newf = dup_fd(oldf, NR_OPEN_MAX, &error);
|
newf = dup_fd(oldf, NULL);
|
||||||
if (!newf)
|
if (IS_ERR(newf))
|
||||||
goto out;
|
return PTR_ERR(newf);
|
||||||
|
|
||||||
tsk->files = newf;
|
tsk->files = newf;
|
||||||
error = 0;
|
return 0;
|
||||||
out:
|
|
||||||
return error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
|
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
|
||||||
@ -3238,17 +3235,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
|
|||||||
/*
|
/*
|
||||||
* Unshare file descriptor table if it is being shared
|
* Unshare file descriptor table if it is being shared
|
||||||
*/
|
*/
|
||||||
int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
|
static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
|
||||||
struct files_struct **new_fdp)
|
|
||||||
{
|
{
|
||||||
struct files_struct *fd = current->files;
|
struct files_struct *fd = current->files;
|
||||||
int error = 0;
|
|
||||||
|
|
||||||
if ((unshare_flags & CLONE_FILES) &&
|
if ((unshare_flags & CLONE_FILES) &&
|
||||||
(fd && atomic_read(&fd->count) > 1)) {
|
(fd && atomic_read(&fd->count) > 1)) {
|
||||||
*new_fdp = dup_fd(fd, max_fds, &error);
|
fd = dup_fd(fd, NULL);
|
||||||
if (!*new_fdp)
|
if (IS_ERR(fd))
|
||||||
return error;
|
return PTR_ERR(fd);
|
||||||
|
*new_fdp = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -3306,7 +3302,7 @@ int ksys_unshare(unsigned long unshare_flags)
|
|||||||
err = unshare_fs(unshare_flags, &new_fs);
|
err = unshare_fs(unshare_flags, &new_fs);
|
||||||
if (err)
|
if (err)
|
||||||
goto bad_unshare_out;
|
goto bad_unshare_out;
|
||||||
err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
|
err = unshare_fd(unshare_flags, &new_fd);
|
||||||
if (err)
|
if (err)
|
||||||
goto bad_unshare_cleanup_fs;
|
goto bad_unshare_cleanup_fs;
|
||||||
err = unshare_userns(unshare_flags, &new_cred);
|
err = unshare_userns(unshare_flags, &new_cred);
|
||||||
@ -3398,7 +3394,7 @@ int unshare_files(void)
|
|||||||
struct files_struct *old, *copy = NULL;
|
struct files_struct *old, *copy = NULL;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, ©);
|
error = unshare_fd(CLONE_FILES, ©);
|
||||||
if (error || !copy)
|
if (error || !copy)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user