1
linux/fs/ext2/super.c

1292 lines
35 KiB
C
Raw Normal View History

/*
* linux/fs/ext2/super.c
*
* Copyright (C) 1992, 1993, 1994, 1995
* Remy Card (card@masi.ibp.fr)
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie (Paris VI)
*
* from
*
* linux/fs/minix/inode.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@caip.rutgers.edu), 1995
*/
#include <linux/module.h>
#include <linux/string.h>
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
#include <linux/random.h>
#include <linux/buffer_head.h>
#include <linux/smp_lock.h>
#include <linux/vfs.h>
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <asm/uaccess.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
#include "xip.h"
static void ext2_sync_super(struct super_block *sb,
struct ext2_super_block *es);
static int ext2_remount (struct super_block * sb, int * flags, char * data);
static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
void ext2_error (struct super_block * sb, const char * function,
const char * fmt, ...)
{
va_list args;
struct ext2_sb_info *sbi = EXT2_SB(sb);
struct ext2_super_block *es = sbi->s_es;
if (!(sb->s_flags & MS_RDONLY)) {
sbi->s_mount_state |= EXT2_ERROR_FS;
es->s_state =
cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS);
ext2_sync_super(sb, es);
}
va_start(args, fmt);
printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function);
vprintk(fmt, args);
printk("\n");
va_end(args);
if (test_opt(sb, ERRORS_PANIC))
panic("EXT2-fs panic from previous error\n");
if (test_opt(sb, ERRORS_RO)) {
printk("Remounting filesystem read-only\n");
sb->s_flags |= MS_RDONLY;
}
}
void ext2_warning (struct super_block * sb, const char * function,
const char * fmt, ...)
{
va_list args;
va_start(args, fmt);
printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ",
sb->s_id, function);
vprintk(fmt, args);
printk("\n");
va_end(args);
}
void ext2_update_dynamic_rev(struct super_block *sb)
{
struct ext2_super_block *es = EXT2_SB(sb)->s_es;
if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV)
return;
ext2_warning(sb, __FUNCTION__,
"updating to rev %d because of new feature flag, "
"running e2fsck is recommended",
EXT2_DYNAMIC_REV);
es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE);
es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV);
/* leave es->s_feature_*compat flags alone */
/* es->s_uuid will be set by e2fsck if empty */
/*
* The rest of the superblock fields should be zero, and if not it
* means they are likely already in use, so leave them alone. We
* can leave it up to e2fsck to clean up any inconsistencies there.
*/
}
static void ext2_put_super (struct super_block * sb)
{
int db_count;
int i;
struct ext2_sb_info *sbi = EXT2_SB(sb);
ext2_xattr_put_super(sb);
if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es;
es->s_state = cpu_to_le16(sbi->s_mount_state);
ext2_sync_super(sb, es);
}
db_count = sbi->s_gdb_count;
for (i = 0; i < db_count; i++)
if (sbi->s_group_desc[i])
brelse (sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
kfree(sbi->s_debts);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
brelse (sbi->s_sbh);
sb->s_fs_info = NULL;
kfree(sbi);
return;
}
static struct kmem_cache * ext2_inode_cachep;
static struct inode *ext2_alloc_inode(struct super_block *sb)
{
struct ext2_inode_info *ei;
ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
#ifdef CONFIG_EXT2_FS_POSIX_ACL
ei->i_acl = EXT2_ACL_NOT_CACHED;
ei->i_default_acl = EXT2_ACL_NOT_CACHED;
#endif
ei->vfs_inode.i_version = 1;
return &ei->vfs_inode;
}
static void ext2_destroy_inode(struct inode *inode)
{
kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
}
static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
{
struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
rwlock_init(&ei->i_meta_lock);
#ifdef CONFIG_EXT2_FS_XATTR
init_rwsem(&ei->xattr_sem);
#endif
inode_init_once(&ei->vfs_inode);
}
}
static int init_inodecache(void)
{
ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
sizeof(struct ext2_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
init_once, NULL);
if (ext2_inode_cachep == NULL)
return -ENOMEM;
return 0;
}
static void destroy_inodecache(void)
{
kmem_cache_destroy(ext2_inode_cachep);
}
static void ext2_clear_inode(struct inode *inode)
{
#ifdef CONFIG_EXT2_FS_POSIX_ACL
struct ext2_inode_info *ei = EXT2_I(inode);
if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) {
posix_acl_release(ei->i_acl);
ei->i_acl = EXT2_ACL_NOT_CACHED;
}
if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) {
posix_acl_release(ei->i_default_acl);
ei->i_default_acl = EXT2_ACL_NOT_CACHED;
}
#endif
}
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
{
struct ext2_sb_info *sbi = EXT2_SB(vfs->mnt_sb);
if (sbi->s_mount_opt & EXT2_MOUNT_GRPID)
seq_puts(seq, ",grpid");
#if defined(CONFIG_QUOTA)
if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
seq_puts(seq, ",usrquota");
if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA)
seq_puts(seq, ",grpquota");
#endif
#if defined(CONFIG_EXT2_FS_XIP)
if (sbi->s_mount_opt & EXT2_MOUNT_XIP)
seq_puts(seq, ",xip");
#endif
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
return 0;
}
#ifdef CONFIG_QUOTA
static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
#endif
static struct super_operations ext2_sops = {
.alloc_inode = ext2_alloc_inode,
.destroy_inode = ext2_destroy_inode,
.read_inode = ext2_read_inode,
.write_inode = ext2_write_inode,
[PATCH] ext2 corruption - regression between 2.6.9 and 2.6.10 Whilst trying to stress test a Promise SX8 card, we stumbled across some nasty filesystem corruption in ext2. Our tests involved creating an ext2 partition, mounting, running several concurrent fsx's over it, umounting, and fsck'ing, all scripted[1]. The fsck would always return with errors. This regression was traced back to a change between 2.6.9 and 2.6.10, which moves the functionality of ext2_put_inode into ext2_clear_inode. The attached patch reverses this change, and eliminated the source of corruption. Mingming Cao <cmm@us.ibm.com> said: I think his patch for ext2 is correct. The corruption on ext3 is not the same issue he saw on ext2. I believe that's the race between discard reservation and reservation in-use that we already fixed it in 2.6.12- rc1. For the problem related to ext2, at the time when we design reservation for ext3, we decide we only need to discard the reservation at the last file close, so we have ext3_discard_reservation on iput_final- >ext3_clear_inode. The ext2 handle discard preallocation differently at that time, it discard the preallocation at each iput(), not in input_final(), so we think it's unnecessary to thrash it so frequently, and the right thing to do, as we did for ext3 reservation, discard preallocation on last iput(). So we moved the ext2_discard_preallocation from ext2_put_inode(0 to ext2_clear_inode. Since ext2 preallocation is doing pre-allocation on disk, so it is possible that at the unmount time, someone is still hold the reference of the inode, so the preallocation for a file is not discard yet, so we still mark those blocks allocated on disk, while they are not actually in the inode's block map, so fsck will catch/fix that error later. This is not a issue for ext3, as ext3 reservation(pre-allocation) is done in memory. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-04-16 15:25:45 -07:00
.put_inode = ext2_put_inode,
.delete_inode = ext2_delete_inode,
.put_super = ext2_put_super,
.write_super = ext2_write_super,
.statfs = ext2_statfs,
.remount_fs = ext2_remount,
.clear_inode = ext2_clear_inode,
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
.show_options = ext2_show_options,
#ifdef CONFIG_QUOTA
.quota_read = ext2_quota_read,
.quota_write = ext2_quota_write,
#endif
};
static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp)
{
__u32 *objp = vobjp;
unsigned long ino = objp[0];
__u32 generation = objp[1];
struct inode *inode;
struct dentry *result;
if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO)
return ERR_PTR(-ESTALE);
if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
return ERR_PTR(-ESTALE);
/* iget isn't really right if the inode is currently unallocated!!
* ext2_read_inode currently does appropriate checks, but
* it might be "neater" to call ext2_get_inode first and check
* if the inode is valid.....
*/
inode = iget(sb, ino);
if (inode == NULL)
return ERR_PTR(-ENOMEM);
if (is_bad_inode(inode) ||
(generation && inode->i_generation != generation)) {
/* we didn't find the right inode.. */
iput(inode);
return ERR_PTR(-ESTALE);
}
/* now to find a dentry.
* If possible, get a well-connected one
*/
result = d_alloc_anon(inode);
if (!result) {
iput(inode);
return ERR_PTR(-ENOMEM);
}
return result;
}
/* Yes, most of these are left as NULL!!
* A NULL value implies the default, which works with ext2-like file
* systems, but can be improved upon.
* Currently only get_parent is required.
*/
static struct export_operations ext2_export_ops = {
.get_parent = ext2_get_parent,
.get_dentry = ext2_get_dentry,
};
static unsigned long get_sb_block(void **data)
{
unsigned long sb_block;
char *options = (char *) *data;
if (!options || strncmp(options, "sb=", 3) != 0)
return 1; /* Default location */
options += 3;
sb_block = simple_strtoul(options, &options, 0);
if (*options && *options != ',') {
printk("EXT2-fs: Invalid sb specification: %s\n",
(char *) *data);
return 1;
}
if (*options == ',')
options++;
*data = (void *) options;
return sb_block;
}
enum {
Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
Opt_usrquota, Opt_grpquota
};
static match_table_t tokens = {
{Opt_bsd_df, "bsddf"},
{Opt_minix_df, "minixdf"},
{Opt_grpid, "grpid"},
{Opt_grpid, "bsdgroups"},
{Opt_nogrpid, "nogrpid"},
{Opt_nogrpid, "sysvgroups"},
{Opt_resgid, "resgid=%u"},
{Opt_resuid, "resuid=%u"},
{Opt_sb, "sb=%u"},
{Opt_err_cont, "errors=continue"},
{Opt_err_panic, "errors=panic"},
{Opt_err_ro, "errors=remount-ro"},
{Opt_nouid32, "nouid32"},
{Opt_nocheck, "check=none"},
{Opt_nocheck, "nocheck"},
{Opt_debug, "debug"},
{Opt_oldalloc, "oldalloc"},
{Opt_orlov, "orlov"},
{Opt_nobh, "nobh"},
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_xip, "xip"},
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
{Opt_grpquota, "grpquota"},
{Opt_ignore, "noquota"},
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
{Opt_err, NULL}
};
static int parse_options (char * options,
struct ext2_sb_info *sbi)
{
char * p;
substring_t args[MAX_OPT_ARGS];
int option;
if (!options)
return 1;
while ((p = strsep (&options, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case Opt_bsd_df:
clear_opt (sbi->s_mount_opt, MINIX_DF);
break;
case Opt_minix_df:
set_opt (sbi->s_mount_opt, MINIX_DF);
break;
case Opt_grpid:
set_opt (sbi->s_mount_opt, GRPID);
break;
case Opt_nogrpid:
clear_opt (sbi->s_mount_opt, GRPID);
break;
case Opt_resuid:
if (match_int(&args[0], &option))
return 0;
sbi->s_resuid = option;
break;
case Opt_resgid:
if (match_int(&args[0], &option))
return 0;
sbi->s_resgid = option;
break;
case Opt_sb:
/* handled by get_sb_block() instead of here */
/* *sb_block = match_int(&args[0]); */
break;
case Opt_err_panic:
[PATCH] ext2: errors behaviour fix Current error behaviour for ext2 and ext3 filesystems does not fully correspond to the documentation and should be fixed. According to man 8 mount, ext2 and ext3 file systems allow to set one of 3 different on-errors behaviours: ---- start of quote man 8 mount ---- errors=continue / errors=remount-ro / errors=panic Define the behaviour when an error is encountered. (Either ignore errors and just mark the file system erroneous and continue, or remount the file system read-only, or panic and halt the system.) The default is set in the filesystem superblock, and can be changed using tune2fs(8). ---- end of quote ---- However EXT3_ERRORS_CONTINUE is not read from the superblock, and thus ERRORS_CONT is not saved on the sbi->s_mount_opt. It leads to the incorrect handle of errors on ext3. Then we've checked corresponding code in ext2 and discovered that it is buggy as well: - EXT2_ERRORS_CONTINUE is not read from the superblock (the same); - parse_option() does not clean the alternative values and thus something like (ERRORS_CONT|ERRORS_RO) can be set; - if options are omitted, parse_option() does not set any of these options. Therefore it is possible to set any combination of these options on the ext2: - none of them may be set: EXT2_ERRORS_CONTINUE on superblock / empty mount options; - any of them may be set using mount options; - 2 any options may be set: by using EXT2_ERRORS_RO/EXT2_ERRORS_PANIC on the superblock and other value in mount options; - and finally all three options may be set by adding third option in remount. Currently ext2 uses these values only in ext2_error() and it is not leading to any noticeable troubles. However somebody may be discouraged when he will try to workaround EXT2_ERRORS_PANIC on the superblock by using errors=continue in mount options. This patch: EXT2_ERRORS_CONTINUE should be read from the superblock as default value for error behaviour. parse_option() should clean the alternative options and should not change default value taken from the superblock. Signed-off-by: Vasily Averin <vvs@sw.ru> Acked-by: Kirill Korotaev <dev@openvz.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-11 01:21:50 -07:00
clear_opt (sbi->s_mount_opt, ERRORS_CONT);
clear_opt (sbi->s_mount_opt, ERRORS_RO);
set_opt (sbi->s_mount_opt, ERRORS_PANIC);
break;
case Opt_err_ro:
[PATCH] ext2: errors behaviour fix Current error behaviour for ext2 and ext3 filesystems does not fully correspond to the documentation and should be fixed. According to man 8 mount, ext2 and ext3 file systems allow to set one of 3 different on-errors behaviours: ---- start of quote man 8 mount ---- errors=continue / errors=remount-ro / errors=panic Define the behaviour when an error is encountered. (Either ignore errors and just mark the file system erroneous and continue, or remount the file system read-only, or panic and halt the system.) The default is set in the filesystem superblock, and can be changed using tune2fs(8). ---- end of quote ---- However EXT3_ERRORS_CONTINUE is not read from the superblock, and thus ERRORS_CONT is not saved on the sbi->s_mount_opt. It leads to the incorrect handle of errors on ext3. Then we've checked corresponding code in ext2 and discovered that it is buggy as well: - EXT2_ERRORS_CONTINUE is not read from the superblock (the same); - parse_option() does not clean the alternative values and thus something like (ERRORS_CONT|ERRORS_RO) can be set; - if options are omitted, parse_option() does not set any of these options. Therefore it is possible to set any combination of these options on the ext2: - none of them may be set: EXT2_ERRORS_CONTINUE on superblock / empty mount options; - any of them may be set using mount options; - 2 any options may be set: by using EXT2_ERRORS_RO/EXT2_ERRORS_PANIC on the superblock and other value in mount options; - and finally all three options may be set by adding third option in remount. Currently ext2 uses these values only in ext2_error() and it is not leading to any noticeable troubles. However somebody may be discouraged when he will try to workaround EXT2_ERRORS_PANIC on the superblock by using errors=continue in mount options. This patch: EXT2_ERRORS_CONTINUE should be read from the superblock as default value for error behaviour. parse_option() should clean the alternative options and should not change default value taken from the superblock. Signed-off-by: Vasily Averin <vvs@sw.ru> Acked-by: Kirill Korotaev <dev@openvz.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-11 01:21:50 -07:00
clear_opt (sbi->s_mount_opt, ERRORS_CONT);
clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
set_opt (sbi->s_mount_opt, ERRORS_RO);
break;
case Opt_err_cont:
[PATCH] ext2: errors behaviour fix Current error behaviour for ext2 and ext3 filesystems does not fully correspond to the documentation and should be fixed. According to man 8 mount, ext2 and ext3 file systems allow to set one of 3 different on-errors behaviours: ---- start of quote man 8 mount ---- errors=continue / errors=remount-ro / errors=panic Define the behaviour when an error is encountered. (Either ignore errors and just mark the file system erroneous and continue, or remount the file system read-only, or panic and halt the system.) The default is set in the filesystem superblock, and can be changed using tune2fs(8). ---- end of quote ---- However EXT3_ERRORS_CONTINUE is not read from the superblock, and thus ERRORS_CONT is not saved on the sbi->s_mount_opt. It leads to the incorrect handle of errors on ext3. Then we've checked corresponding code in ext2 and discovered that it is buggy as well: - EXT2_ERRORS_CONTINUE is not read from the superblock (the same); - parse_option() does not clean the alternative values and thus something like (ERRORS_CONT|ERRORS_RO) can be set; - if options are omitted, parse_option() does not set any of these options. Therefore it is possible to set any combination of these options on the ext2: - none of them may be set: EXT2_ERRORS_CONTINUE on superblock / empty mount options; - any of them may be set using mount options; - 2 any options may be set: by using EXT2_ERRORS_RO/EXT2_ERRORS_PANIC on the superblock and other value in mount options; - and finally all three options may be set by adding third option in remount. Currently ext2 uses these values only in ext2_error() and it is not leading to any noticeable troubles. However somebody may be discouraged when he will try to workaround EXT2_ERRORS_PANIC on the superblock by using errors=continue in mount options. This patch: EXT2_ERRORS_CONTINUE should be read from the superblock as default value for error behaviour. parse_option() should clean the alternative options and should not change default value taken from the superblock. Signed-off-by: Vasily Averin <vvs@sw.ru> Acked-by: Kirill Korotaev <dev@openvz.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-11 01:21:50 -07:00
clear_opt (sbi->s_mount_opt, ERRORS_RO);
clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
set_opt (sbi->s_mount_opt, ERRORS_CONT);
break;
case Opt_nouid32:
set_opt (sbi->s_mount_opt, NO_UID32);
break;
case Opt_nocheck:
clear_opt (sbi->s_mount_opt, CHECK);
break;
case Opt_debug:
set_opt (sbi->s_mount_opt, DEBUG);
break;
case Opt_oldalloc:
set_opt (sbi->s_mount_opt, OLDALLOC);
break;
case Opt_orlov:
clear_opt (sbi->s_mount_opt, OLDALLOC);
break;
case Opt_nobh:
set_opt (sbi->s_mount_opt, NOBH);
break;
#ifdef CONFIG_EXT2_FS_XATTR
case Opt_user_xattr:
set_opt (sbi->s_mount_opt, XATTR_USER);
break;
case Opt_nouser_xattr:
clear_opt (sbi->s_mount_opt, XATTR_USER);
break;
#else
case Opt_user_xattr:
case Opt_nouser_xattr:
printk("EXT2 (no)user_xattr options not supported\n");
break;
#endif
#ifdef CONFIG_EXT2_FS_POSIX_ACL
case Opt_acl:
set_opt(sbi->s_mount_opt, POSIX_ACL);
break;
case Opt_noacl:
clear_opt(sbi->s_mount_opt, POSIX_ACL);
break;
#else
case Opt_acl:
case Opt_noacl:
printk("EXT2 (no)acl options not supported\n");
break;
#endif
case Opt_xip:
#ifdef CONFIG_EXT2_FS_XIP
set_opt (sbi->s_mount_opt, XIP);
#else
printk("EXT2 xip option not supported\n");
#endif
break;
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 15:16:54 -07:00
#if defined(CONFIG_QUOTA)
case Opt_quota:
case Opt_usrquota:
set_opt(sbi->s_mount_opt, USRQUOTA);
break;
case Opt_grpquota:
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
#else
case Opt_quota:
case Opt_usrquota:
case Opt_grpquota:
printk(KERN_ERR
"EXT2-fs: quota operations not supported.\n");
break;
#endif
case Opt_ignore:
break;
default:
return 0;
}
}
return 1;
}
static int ext2_setup_super (struct super_block * sb,
struct ext2_super_block * es,
int read_only)
{
int res = 0;
struct ext2_sb_info *sbi = EXT2_SB(sb);
if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
printk ("EXT2-fs warning: revision level too high, "
"forcing read-only mode\n");
res = MS_RDONLY;
}
if (read_only)
return res;
if (!(sbi->s_mount_state & EXT2_VALID_FS))
printk ("EXT2-fs warning: mounting unchecked fs, "
"running e2fsck is recommended\n");
else if ((sbi->s_mount_state & EXT2_ERROR_FS))
printk ("EXT2-fs warning: mounting fs with errors, "
"running e2fsck is recommended\n");
else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
le16_to_cpu(es->s_mnt_count) >=
(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
printk ("EXT2-fs warning: maximal mount count reached, "
"running e2fsck is recommended\n");
else if (le32_to_cpu(es->s_checkinterval) &&
(le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds()))
printk ("EXT2-fs warning: checktime reached, "
"running e2fsck is recommended\n");
if (!le16_to_cpu(es->s_max_mnt_count))
es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
ext2_write_super(sb);
if (test_opt (sb, DEBUG))
printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, "
"bpg=%lu, ipg=%lu, mo=%04lx]\n",
EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
sbi->s_frag_size,
sbi->s_groups_count,
EXT2_BLOCKS_PER_GROUP(sb),
EXT2_INODES_PER_GROUP(sb),
sbi->s_mount_opt);
return res;
}
static int ext2_check_descriptors (struct super_block * sb)
{
int i;
int desc_block = 0;
struct ext2_sb_info *sbi = EXT2_SB(sb);
unsigned long first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
unsigned long last_block;
struct ext2_group_desc * gdp = NULL;
ext2_debug ("Checking group descriptors");
for (i = 0; i < sbi->s_groups_count; i++)
{
if (i == sbi->s_groups_count - 1)
last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
else
last_block = first_block +
(EXT2_BLOCKS_PER_GROUP(sb) - 1);
if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data;
if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
le32_to_cpu(gdp->bg_block_bitmap) > last_block)
{
ext2_error (sb, "ext2_check_descriptors",
"Block bitmap for group %d"
" not in group (block %lu)!",
i, (unsigned long) le32_to_cpu(gdp->bg_block_bitmap));
return 0;
}
if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
{
ext2_error (sb, "ext2_check_descriptors",
"Inode bitmap for group %d"
" not in group (block %lu)!",
i, (unsigned long) le32_to_cpu(gdp->bg_inode_bitmap));
return 0;
}
if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
last_block)
{
ext2_error (sb, "ext2_check_descriptors",
"Inode table for group %d"
" not in group (block %lu)!",
i, (unsigned long) le32_to_cpu(gdp->bg_inode_table));
return 0;
}
first_block += EXT2_BLOCKS_PER_GROUP(sb);
gdp++;
}
return 1;
}
#define log2(n) ffz(~(n))
/*
* Maximal file size. There is a direct, and {,double-,triple-}indirect
* block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
* We need to be 1 filesystem block less than the 2^32 sector limit.
*/
static loff_t ext2_max_size(int bits)
{
loff_t res = EXT2_NDIR_BLOCKS;
/* This constant is calculated to be the largest file size for a
* dense, 4k-blocksize file such that the total number of
* sectors in the file, including data and all indirect blocks,
* does not exceed 2^32. */
const loff_t upper_limit = 0x1ff7fffd000LL;
res += 1LL << (bits-2);
res += 1LL << (2*(bits-2));
res += 1LL << (3*(bits-2));
res <<= bits;
if (res > upper_limit)
res = upper_limit;
return res;
}
static unsigned long descriptor_loc(struct super_block *sb,
unsigned long logic_sb_block,
int nr)
{
struct ext2_sb_info *sbi = EXT2_SB(sb);
unsigned long bg, first_data_block, first_meta_bg;
int has_super = 0;
first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block);
first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) ||
nr < first_meta_bg)
return (logic_sb_block + nr + 1);
bg = sbi->s_desc_per_block * nr;
if (ext2_bg_has_super(sb, bg))
has_super = 1;
return (first_data_block + has_super + (bg * sbi->s_blocks_per_group));
}
static int ext2_fill_super(struct super_block *sb, void *data, int silent)
{
struct buffer_head * bh;
struct ext2_sb_info * sbi;
struct ext2_super_block * es;
struct inode *root;
unsigned long block;
unsigned long sb_block = get_sb_block(&data);
unsigned long logic_sb_block;
unsigned long offset = 0;
unsigned long def_mount_opts;
int blocksize = BLOCK_SIZE;
int db_count;
int i, j;
__le32 features;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
/*
* See what the current blocksize for the device is, and
* use that as the blocksize. Otherwise (or if the blocksize
* is smaller than the default) use the default.
* This is important for devices that have a hardware
* sectorsize that is larger than the default.
*/
blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
if (!blocksize) {
printk ("EXT2-fs: unable to set blocksize\n");
goto failed_sbi;
}
/*
* If the superblock doesn't start on a hardware sector boundary,
* calculate the offset.
*/
if (blocksize != BLOCK_SIZE) {
logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
offset = (sb_block*BLOCK_SIZE) % blocksize;
} else {
logic_sb_block = sb_block;
}
if (!(bh = sb_bread(sb, logic_sb_block))) {
printk ("EXT2-fs: unable to read superblock\n");
goto failed_sbi;
}
/*
* Note: s_es must be initialized as soon as possible because
* some ext2 macro-instructions depend on its value
*/
es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
sbi->s_es = es;
sb->s_magic = le16_to_cpu(es->s_magic);
if (sb->s_magic != EXT2_SUPER_MAGIC)
goto cantfind_ext2;
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
if (def_mount_opts & EXT2_DEFM_DEBUG)
set_opt(sbi->s_mount_opt, DEBUG);
if (def_mount_opts & EXT2_DEFM_BSDGROUPS)
set_opt(sbi->s_mount_opt, GRPID);
if (def_mount_opts & EXT2_DEFM_UID16)
set_opt(sbi->s_mount_opt, NO_UID32);
if (def_mount_opts & EXT2_DEFM_XATTR_USER)
set_opt(sbi->s_mount_opt, XATTR_USER);
if (def_mount_opts & EXT2_DEFM_ACL)
set_opt(sbi->s_mount_opt, POSIX_ACL);
if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
set_opt(sbi->s_mount_opt, ERRORS_PANIC);
else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO)
set_opt(sbi->s_mount_opt, ERRORS_RO);
[PATCH] ext2: errors behaviour fix Current error behaviour for ext2 and ext3 filesystems does not fully correspond to the documentation and should be fixed. According to man 8 mount, ext2 and ext3 file systems allow to set one of 3 different on-errors behaviours: ---- start of quote man 8 mount ---- errors=continue / errors=remount-ro / errors=panic Define the behaviour when an error is encountered. (Either ignore errors and just mark the file system erroneous and continue, or remount the file system read-only, or panic and halt the system.) The default is set in the filesystem superblock, and can be changed using tune2fs(8). ---- end of quote ---- However EXT3_ERRORS_CONTINUE is not read from the superblock, and thus ERRORS_CONT is not saved on the sbi->s_mount_opt. It leads to the incorrect handle of errors on ext3. Then we've checked corresponding code in ext2 and discovered that it is buggy as well: - EXT2_ERRORS_CONTINUE is not read from the superblock (the same); - parse_option() does not clean the alternative values and thus something like (ERRORS_CONT|ERRORS_RO) can be set; - if options are omitted, parse_option() does not set any of these options. Therefore it is possible to set any combination of these options on the ext2: - none of them may be set: EXT2_ERRORS_CONTINUE on superblock / empty mount options; - any of them may be set using mount options; - 2 any options may be set: by using EXT2_ERRORS_RO/EXT2_ERRORS_PANIC on the superblock and other value in mount options; - and finally all three options may be set by adding third option in remount. Currently ext2 uses these values only in ext2_error() and it is not leading to any noticeable troubles. However somebody may be discouraged when he will try to workaround EXT2_ERRORS_PANIC on the superblock by using errors=continue in mount options. This patch: EXT2_ERRORS_CONTINUE should be read from the superblock as default value for error behaviour. parse_option() should clean the alternative options and should not change default value taken from the superblock. Signed-off-by: Vasily Averin <vvs@sw.ru> Acked-by: Kirill Korotaev <dev@openvz.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-11 01:21:50 -07:00
else
set_opt(sbi->s_mount_opt, ERRORS_CONT);
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
if (!parse_options ((char *) data, sbi))
goto failed_mount;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
MS_POSIXACL : 0);
ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
EXT2_MOUNT_XIP if not */
if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
(EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U)))
printk("EXT2-fs warning: feature flags set on rev 0 fs, "
"running e2fsck is recommended\n");
/*
* Check feature flags regardless of the revision level, since we
* previously didn't change the revision level when setting the flags,
* so there is a chance incompat flags are set on a rev 0 filesystem.
*/
features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
if (features) {
printk("EXT2-fs: %s: couldn't mount because of "
"unsupported optional features (%x).\n",
sb->s_id, le32_to_cpu(features));
goto failed_mount;
}
if (!(sb->s_flags & MS_RDONLY) &&
(features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
printk("EXT2-fs: %s: couldn't mount RDWR because of "
"unsupported optional features (%x).\n",
sb->s_id, le32_to_cpu(features));
goto failed_mount;
}
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) ||
(sb->s_blocksize != blocksize))) {
if (!silent)
printk("XIP: Unsupported blocksize\n");
goto failed_mount;
}
/* If the blocksize doesn't match, re-read the thing.. */
if (sb->s_blocksize != blocksize) {
brelse(bh);
if (!sb_set_blocksize(sb, blocksize)) {
printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n");
goto failed_sbi;
}
logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
offset = (sb_block*BLOCK_SIZE) % blocksize;
bh = sb_bread(sb, logic_sb_block);
if(!bh) {
printk("EXT2-fs: Couldn't read superblock on "
"2nd try.\n");
goto failed_sbi;
}
es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
printk ("EXT2-fs: Magic mismatch, very weird !\n");
goto failed_mount;
}
}
sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);
if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
sbi->s_first_ino = EXT2_GOOD_OLD_FIRST_INO;
} else {
sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) ||
(sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
(sbi->s_inode_size > blocksize)) {
printk ("EXT2-fs: unsupported inode size: %d\n",
sbi->s_inode_size);
goto failed_mount;
}
}
sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
if (sbi->s_frag_size == 0)
goto cantfind_ext2;
sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
if (EXT2_INODE_SIZE(sb) == 0)
goto cantfind_ext2;
sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0)
goto cantfind_ext2;
sbi->s_itb_per_group = sbi->s_inodes_per_group /
sbi->s_inodes_per_block;
sbi->s_desc_per_block = sb->s_blocksize /
sizeof (struct ext2_group_desc);
sbi->s_sbh = bh;
sbi->s_mount_state = le16_to_cpu(es->s_state);
sbi->s_addr_per_block_bits =
log2 (EXT2_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits =
log2 (EXT2_DESC_PER_BLOCK(sb));
if (sb->s_magic != EXT2_SUPER_MAGIC)
goto cantfind_ext2;
if (sb->s_blocksize != bh->b_size) {
if (!silent)
printk ("VFS: Unsupported blocksize on dev "
"%s.\n", sb->s_id);
goto failed_mount;
}
if (sb->s_blocksize != sbi->s_frag_size) {
printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n",
sbi->s_frag_size, sb->s_blocksize);
goto failed_mount;
}
if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
printk ("EXT2-fs: #blocks per group too big: %lu\n",
sbi->s_blocks_per_group);
goto failed_mount;
}
if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
printk ("EXT2-fs: #fragments per group too big: %lu\n",
sbi->s_frags_per_group);
goto failed_mount;
}
if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
printk ("EXT2-fs: #inodes per group too big: %lu\n",
sbi->s_inodes_per_group);
goto failed_mount;
}
if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
goto cantfind_ext2;
sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_first_data_block) - 1)
/ EXT2_BLOCKS_PER_GROUP(sb)) + 1;
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
printk ("EXT2-fs: not enough memory\n");
goto failed_mount;
}
bgl_lock_init(&sbi->s_blockgroup_lock);
sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
GFP_KERNEL);
if (!sbi->s_debts) {
printk ("EXT2-fs: not enough memory\n");
goto failed_mount_group_desc;
}
memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logic_sb_block, i);
sbi->s_group_desc[i] = sb_bread(sb, block);
if (!sbi->s_group_desc[i]) {
for (j = 0; j < i; j++)
brelse (sbi->s_group_desc[j]);
printk ("EXT2-fs: unable to read group descriptors\n");
goto failed_mount_group_desc;
}
}
if (!ext2_check_descriptors (sb)) {
printk ("EXT2-fs: group descriptors corrupted!\n");
goto failed_mount2;
}
sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock);
percpu_counter_init(&sbi->s_freeblocks_counter,
ext2_count_free_blocks(sb));
percpu_counter_init(&sbi->s_freeinodes_counter,
ext2_count_free_inodes(sb));
percpu_counter_init(&sbi->s_dirs_counter,
ext2_count_dirs(sb));
/*
* set up enough so that it can read an inode
*/
sb->s_op = &ext2_sops;
sb->s_export_op = &ext2_export_ops;
sb->s_xattr = ext2_xattr_handlers;
root = iget(sb, EXT2_ROOT_INO);
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
iput(root);
printk(KERN_ERR "EXT2-fs: get root inode failed\n");
goto failed_mount3;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
dput(sb->s_root);
sb->s_root = NULL;
printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n");
goto failed_mount3;
}
if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
ext2_warning(sb, __FUNCTION__,
"mounting ext3 filesystem as ext2");
ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
return 0;
cantfind_ext2:
if (!silent)
printk("VFS: Can't find an ext2 filesystem on dev %s.\n",
sb->s_id);
goto failed_mount;
failed_mount3:
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
failed_mount_group_desc:
kfree(sbi->s_group_desc);
kfree(sbi->s_debts);
failed_mount:
brelse(bh);
failed_sbi:
sb->s_fs_info = NULL;
kfree(sbi);
return -EINVAL;
}
static void ext2_commit_super (struct super_block * sb,
struct ext2_super_block * es)
{
es->s_wtime = cpu_to_le32(get_seconds());
mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
sb->s_dirt = 0;
}
static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
{
es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
es->s_wtime = cpu_to_le32(get_seconds());
mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
sb->s_dirt = 0;
}
/*
* In the second extended file system, it is not necessary to
* write the super block since we use a mapping of the
* disk super block in a buffer.
*
* However, this function is still used to set the fs valid
* flags to 0. We need to set this flag to 0 since the fs
* may have been checked while mounted and e2fsck may have
* set s_state to EXT2_VALID_FS after some corrections.
*/
void ext2_write_super (struct super_block * sb)
{
struct ext2_super_block * es;
lock_kernel();
if (!(sb->s_flags & MS_RDONLY)) {
es = EXT2_SB(sb)->s_es;
if (le16_to_cpu(es->s_state) & EXT2_VALID_FS) {
ext2_debug ("setting valid to 0\n");
es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) &
~EXT2_VALID_FS);
es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
es->s_mtime = cpu_to_le32(get_seconds());
ext2_sync_super(sb, es);
} else
ext2_commit_super (sb, es);
}
sb->s_dirt = 0;
unlock_kernel();
}
static int ext2_remount (struct super_block * sb, int * flags, char * data)
{
struct ext2_sb_info * sbi = EXT2_SB(sb);
struct ext2_super_block * es;
unsigned long old_mount_opt = sbi->s_mount_opt;
struct ext2_mount_options old_opts;
unsigned long old_sb_flags;
int err;
/* Store the old options */
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
old_opts.s_resuid = sbi->s_resuid;
old_opts.s_resgid = sbi->s_resgid;
/*
* Allow the "check" option to be passed as a remount option.
*/
if (!parse_options (data, sbi)) {
err = -EINVAL;
goto restore_opts;
}
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
es = sbi->s_es;
if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
(old_mount_opt & EXT2_MOUNT_XIP)) &&
invalidate_inodes(sb))
ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
"xip remain in cache (no functional problem)");
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
return 0;
if (*flags & MS_RDONLY) {
if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
!(sbi->s_mount_state & EXT2_VALID_FS))
return 0;
/*
* OK, we are remounting a valid rw partition rdonly, so set
* the rdonly flag and then mark the partition as valid again.
*/
es->s_state = cpu_to_le16(sbi->s_mount_state);
es->s_mtime = cpu_to_le32(get_seconds());
} else {
__le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
~EXT2_FEATURE_RO_COMPAT_SUPP);
if (ret) {
printk("EXT2-fs: %s: couldn't remount RDWR because of "
"unsupported optional features (%x).\n",
sb->s_id, le32_to_cpu(ret));
err = -EROFS;
goto restore_opts;
}
/*
* Mounting a RDONLY partition read-write, so reread and
* store the current valid flag. (It may have been changed
* by e2fsck since we originally mounted the partition.)
*/
sbi->s_mount_state = le16_to_cpu(es->s_state);
if (!ext2_setup_super (sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
}
ext2_sync_super(sb, es);
return 0;
restore_opts:
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_resuid = old_opts.s_resuid;
sbi->s_resgid = old_opts.s_resgid;
sb->s_flags = old_sb_flags;
return err;
}
static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
{
struct super_block *sb = dentry->d_sb;
struct ext2_sb_info *sbi = EXT2_SB(sb);
struct ext2_super_block *es = sbi->s_es;
unsigned long overhead;
int i;
u64 fsid;
if (test_opt (sb, MINIX_DF))
overhead = 0;
else {
/*
* Compute the overhead (FS structures)
*/
/*
* All of the blocks before first_data_block are
* overhead
*/
overhead = le32_to_cpu(es->s_first_data_block);
/*
* Add the overhead attributed to the superblock and
* block group descriptors. If the sparse superblocks
* feature is turned on, then not all groups have this.
*/
for (i = 0; i < sbi->s_groups_count; i++)
overhead += ext2_bg_has_super(sb, i) +
ext2_bg_num_gdb(sb, i);
/*
* Every block group has an inode bitmap, a block
* bitmap, and an inode table.
*/
overhead += (sbi->s_groups_count *
(2 + sbi->s_itb_per_group));
}
buf->f_type = EXT2_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
buf->f_bfree = ext2_count_free_blocks(sb);
buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
buf->f_bavail = 0;
buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = ext2_count_free_inodes(sb);
buf->f_namelen = EXT2_NAME_LEN;
fsid = le64_to_cpup((void *)es->s_uuid) ^
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
return 0;
}
[PATCH] VFS: Permit filesystem to override root dentry on mount Extend the get_sb() filesystem operation to take an extra argument that permits the VFS to pass in the target vfsmount that defines the mountpoint. The filesystem is then required to manually set the superblock and root dentry pointers. For most filesystems, this should be done with simple_set_mnt() which will set the superblock pointer and then set the root dentry to the superblock's s_root (as per the old default behaviour). The get_sb() op now returns an integer as there's now no need to return the superblock pointer. This patch permits a superblock to be implicitly shared amongst several mount points, such as can be done with NFS to avoid potential inode aliasing. In such a case, simple_set_mnt() would not be called, and instead the mnt_root and mnt_sb would be set directly. The patch also makes the following changes: (*) the get_sb_*() convenience functions in the core kernel now take a vfsmount pointer argument and return an integer, so most filesystems have to change very little. (*) If one of the convenience function is not used, then get_sb() should normally call simple_set_mnt() to instantiate the vfsmount. This will always return 0, and so can be tail-called from get_sb(). (*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the dcache upon superblock destruction rather than shrink_dcache_anon(). This is required because the superblock may now have multiple trees that aren't actually bound to s_root, but that still need to be cleaned up. The currently called functions assume that the whole tree is rooted at s_root, and that anonymous dentries are not the roots of trees which results in dentries being left unculled. However, with the way NFS superblock sharing are currently set to be implemented, these assumptions are violated: the root of the filesystem is simply a dummy dentry and inode (the real inode for '/' may well be inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries with child trees. [*] Anonymous until discovered from another tree. (*) The documentation has been adjusted, including the additional bit of changing ext2_* into foo_* in the documentation. [akpm@osdl.org: convert ipath_fs, do other stuff] Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Cc: Nathan Scott <nathans@sgi.com> Cc: Roland Dreier <rolandd@cisco.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 02:02:57 -07:00
static int ext2_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
[PATCH] VFS: Permit filesystem to override root dentry on mount Extend the get_sb() filesystem operation to take an extra argument that permits the VFS to pass in the target vfsmount that defines the mountpoint. The filesystem is then required to manually set the superblock and root dentry pointers. For most filesystems, this should be done with simple_set_mnt() which will set the superblock pointer and then set the root dentry to the superblock's s_root (as per the old default behaviour). The get_sb() op now returns an integer as there's now no need to return the superblock pointer. This patch permits a superblock to be implicitly shared amongst several mount points, such as can be done with NFS to avoid potential inode aliasing. In such a case, simple_set_mnt() would not be called, and instead the mnt_root and mnt_sb would be set directly. The patch also makes the following changes: (*) the get_sb_*() convenience functions in the core kernel now take a vfsmount pointer argument and return an integer, so most filesystems have to change very little. (*) If one of the convenience function is not used, then get_sb() should normally call simple_set_mnt() to instantiate the vfsmount. This will always return 0, and so can be tail-called from get_sb(). (*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the dcache upon superblock destruction rather than shrink_dcache_anon(). This is required because the superblock may now have multiple trees that aren't actually bound to s_root, but that still need to be cleaned up. The currently called functions assume that the whole tree is rooted at s_root, and that anonymous dentries are not the roots of trees which results in dentries being left unculled. However, with the way NFS superblock sharing are currently set to be implemented, these assumptions are violated: the root of the filesystem is simply a dummy dentry and inode (the real inode for '/' may well be inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries with child trees. [*] Anonymous until discovered from another tree. (*) The documentation has been adjusted, including the additional bit of changing ext2_* into foo_* in the documentation. [akpm@osdl.org: convert ipath_fs, do other stuff] Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Cc: Nathan Scott <nathans@sgi.com> Cc: Roland Dreier <rolandd@cisco.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 02:02:57 -07:00
return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
}
#ifdef CONFIG_QUOTA
/* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and noone else should touch the files)
* we don't have to be afraid of races */
static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off)
{
struct inode *inode = sb_dqopt(sb)->files[type];
sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
int err = 0;
int offset = off & (sb->s_blocksize - 1);
int tocopy;
size_t toread;
struct buffer_head tmp_bh;
struct buffer_head *bh;
loff_t i_size = i_size_read(inode);
if (off > i_size)
return 0;
if (off+len > i_size)
len = i_size-off;
toread = len;
while (toread > 0) {
tocopy = sb->s_blocksize - offset < toread ?
sb->s_blocksize - offset : toread;
tmp_bh.b_state = 0;
err = ext2_get_block(inode, blk, &tmp_bh, 0);
if (err)
return err;
if (!buffer_mapped(&tmp_bh)) /* A hole? */
memset(data, 0, tocopy);
else {
bh = sb_bread(sb, tmp_bh.b_blocknr);
if (!bh)
return -EIO;
memcpy(data, bh->b_data+offset, tocopy);
brelse(bh);
}
offset = 0;
toread -= tocopy;
data += tocopy;
blk++;
}
return len;
}
/* Write to quotafile */
static ssize_t ext2_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off)
{
struct inode *inode = sb_dqopt(sb)->files[type];
sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
int err = 0;
int offset = off & (sb->s_blocksize - 1);
int tocopy;
size_t towrite = len;
struct buffer_head tmp_bh;
struct buffer_head *bh;
2006-07-03 00:25:20 -07:00
mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
err = ext2_get_block(inode, blk, &tmp_bh, 1);
if (err)
goto out;
if (offset || tocopy != EXT2_BLOCK_SIZE(sb))
bh = sb_bread(sb, tmp_bh.b_blocknr);
else
bh = sb_getblk(sb, tmp_bh.b_blocknr);
if (!bh) {
err = -EIO;
goto out;
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, tocopy);
flush_dcache_page(bh->b_page);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
unlock_buffer(bh);
brelse(bh);
offset = 0;
towrite -= tocopy;
data += tocopy;
blk++;
}
out:
if (len == towrite)
return err;
if (inode->i_size < off+len-towrite)
i_size_write(inode, off+len-towrite);
inode->i_version++;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
mutex_unlock(&inode->i_mutex);
return len - towrite;
}
#endif
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
.get_sb = ext2_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
static int __init init_ext2_fs(void)
{
int err = init_ext2_xattr();
if (err)
return err;
err = init_inodecache();
if (err)
goto out1;
err = register_filesystem(&ext2_fs_type);
if (err)
goto out;
return 0;
out:
destroy_inodecache();
out1:
exit_ext2_xattr();
return err;
}
static void __exit exit_ext2_fs(void)
{
unregister_filesystem(&ext2_fs_type);
destroy_inodecache();
exit_ext2_xattr();
}
module_init(init_ext2_fs)
module_exit(exit_ext2_fs)