vfs-6.11.inode
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZpEG2wAKCRCRxhvAZXjc ooW/AQDzyY+xNGt4OPMvlyFUHd5RcyiLsMhYrkKc3FaIFjesVgD+PFW5PPW12c0V Z4VHg9w1HDDuUn4XvELs7OXZpek7RgU= =eDC8 -----END PGP SIGNATURE----- Merge tag 'vfs-6.11.inode' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull vfs inode / dentry updates from Christian Brauner: "This contains smaller performance improvements to inodes and dentries: inode: - Add rcu based inode lookup variants. They avoid one inode hash lock acquire in the common case thereby significantly reducing contention. We already support RCU-based operations but didn't take advantage of them during inode insertion. Callers of iget_locked() get the improvement without any code changes. Callers that need a custom callback can switch to iget5_locked_rcu() as e.g., did btrfs. With 20 threads each walking a dedicated 1000 dirs * 1000 files directory tree to stat(2) on a 32 core + 24GB ram vm: before: 3.54s user 892.30s system 1966% cpu 45.549 total after: 3.28s user 738.66s system 1955% cpu 37.932 total (-16.7%) Long-term we should pick up the effort to introduce more fine-grained locking and possibly improve on the currently used hash implementation. - Start zeroing i_state in inode_init_always() instead of doing it in individual filesystems. This allows us to remove an unneeded lock acquire in new_inode() and not burden individual filesystems with this. dcache: - Move d_lockref out of the area used by RCU lookup to avoid cacheline ping poing because the embedded name is sharing a cacheline with d_lockref. - Fix dentry size on 32bit with CONFIG_SMP=y so it does actually end up with 128 bytes in total" * tag 'vfs-6.11.inode' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: fs: fix dentry size vfs: move d_lockref out of the area used by RCU lookup bcachefs: remove now spurious i_state initialization xfs: remove now spurious i_state initialization in xfs_inode_alloc vfs: partially sanitize i_state zeroing on inode creation xfs: preserve i_state around inode_init_always in xfs_reinit_inode btrfs: use iget5_locked_rcu vfs: add rcu-based find_inode variants for iget ops
This commit is contained in:
commit
2aae1d67fd
@ -244,7 +244,6 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c)
|
||||
inode->ei_flags = 0;
|
||||
mutex_init(&inode->ei_quota_lock);
|
||||
memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
|
||||
inode->v.i_state = 0;
|
||||
|
||||
if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) {
|
||||
kmem_cache_free(bch2_inode_cache, inode);
|
||||
|
@ -5587,7 +5587,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
|
||||
args.ino = ino;
|
||||
args.root = root;
|
||||
|
||||
inode = iget5_locked(s, hashval, btrfs_find_actor,
|
||||
inode = iget5_locked_rcu(s, hashval, btrfs_find_actor,
|
||||
btrfs_init_locked_inode,
|
||||
(void *)&args);
|
||||
return inode;
|
||||
|
108
fs/inode.c
108
fs/inode.c
@ -162,6 +162,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
|
||||
inode->i_sb = sb;
|
||||
inode->i_blkbits = sb->s_blocksize_bits;
|
||||
inode->i_flags = 0;
|
||||
inode->i_state = 0;
|
||||
atomic64_set(&inode->i_sequence, 0);
|
||||
atomic_set(&inode->i_count, 1);
|
||||
inode->i_op = &empty_iops;
|
||||
@ -231,6 +232,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
|
||||
|
||||
if (unlikely(security_inode_alloc(inode)))
|
||||
return -ENOMEM;
|
||||
|
||||
this_cpu_inc(nr_inodes);
|
||||
|
||||
return 0;
|
||||
@ -886,36 +888,45 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
|
||||
return freed;
|
||||
}
|
||||
|
||||
static void __wait_on_freeing_inode(struct inode *inode);
|
||||
static void __wait_on_freeing_inode(struct inode *inode, bool locked);
|
||||
/*
|
||||
* Called with the inode lock held.
|
||||
*/
|
||||
static struct inode *find_inode(struct super_block *sb,
|
||||
struct hlist_head *head,
|
||||
int (*test)(struct inode *, void *),
|
||||
void *data)
|
||||
void *data, bool locked)
|
||||
{
|
||||
struct inode *inode = NULL;
|
||||
|
||||
if (locked)
|
||||
lockdep_assert_held(&inode_hash_lock);
|
||||
else
|
||||
lockdep_assert_not_held(&inode_hash_lock);
|
||||
|
||||
rcu_read_lock();
|
||||
repeat:
|
||||
hlist_for_each_entry(inode, head, i_hash) {
|
||||
hlist_for_each_entry_rcu(inode, head, i_hash) {
|
||||
if (inode->i_sb != sb)
|
||||
continue;
|
||||
if (!test(inode, data))
|
||||
continue;
|
||||
spin_lock(&inode->i_lock);
|
||||
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
|
||||
__wait_on_freeing_inode(inode);
|
||||
__wait_on_freeing_inode(inode, locked);
|
||||
goto repeat;
|
||||
}
|
||||
if (unlikely(inode->i_state & I_CREATING)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
rcu_read_unlock();
|
||||
return ERR_PTR(-ESTALE);
|
||||
}
|
||||
__iget(inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
rcu_read_unlock();
|
||||
return inode;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -924,29 +935,39 @@ repeat:
|
||||
* iget_locked for details.
|
||||
*/
|
||||
static struct inode *find_inode_fast(struct super_block *sb,
|
||||
struct hlist_head *head, unsigned long ino)
|
||||
struct hlist_head *head, unsigned long ino,
|
||||
bool locked)
|
||||
{
|
||||
struct inode *inode = NULL;
|
||||
|
||||
if (locked)
|
||||
lockdep_assert_held(&inode_hash_lock);
|
||||
else
|
||||
lockdep_assert_not_held(&inode_hash_lock);
|
||||
|
||||
rcu_read_lock();
|
||||
repeat:
|
||||
hlist_for_each_entry(inode, head, i_hash) {
|
||||
hlist_for_each_entry_rcu(inode, head, i_hash) {
|
||||
if (inode->i_ino != ino)
|
||||
continue;
|
||||
if (inode->i_sb != sb)
|
||||
continue;
|
||||
spin_lock(&inode->i_lock);
|
||||
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
|
||||
__wait_on_freeing_inode(inode);
|
||||
__wait_on_freeing_inode(inode, locked);
|
||||
goto repeat;
|
||||
}
|
||||
if (unlikely(inode->i_state & I_CREATING)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
rcu_read_unlock();
|
||||
return ERR_PTR(-ESTALE);
|
||||
}
|
||||
__iget(inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
rcu_read_unlock();
|
||||
return inode;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1004,14 +1025,7 @@ EXPORT_SYMBOL(get_next_ino);
|
||||
*/
|
||||
struct inode *new_inode_pseudo(struct super_block *sb)
|
||||
{
|
||||
struct inode *inode = alloc_inode(sb);
|
||||
|
||||
if (inode) {
|
||||
spin_lock(&inode->i_lock);
|
||||
inode->i_state = 0;
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
return inode;
|
||||
return alloc_inode(sb);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1161,7 +1175,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
|
||||
|
||||
again:
|
||||
spin_lock(&inode_hash_lock);
|
||||
old = find_inode(inode->i_sb, head, test, data);
|
||||
old = find_inode(inode->i_sb, head, test, data, true);
|
||||
if (unlikely(old)) {
|
||||
/*
|
||||
* Uhhuh, somebody else created the same inode under us.
|
||||
@ -1235,7 +1249,6 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
|
||||
struct inode *new = alloc_inode(sb);
|
||||
|
||||
if (new) {
|
||||
new->i_state = 0;
|
||||
inode = inode_insert5(new, hashval, test, set, data);
|
||||
if (unlikely(inode != new))
|
||||
destroy_inode(new);
|
||||
@ -1245,6 +1258,47 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
|
||||
}
|
||||
EXPORT_SYMBOL(iget5_locked);
|
||||
|
||||
/**
|
||||
* iget5_locked_rcu - obtain an inode from a mounted file system
|
||||
* @sb: super block of file system
|
||||
* @hashval: hash value (usually inode number) to get
|
||||
* @test: callback used for comparisons between inodes
|
||||
* @set: callback used to initialize a new struct inode
|
||||
* @data: opaque data pointer to pass to @test and @set
|
||||
*
|
||||
* This is equivalent to iget5_locked, except the @test callback must
|
||||
* tolerate the inode not being stable, including being mid-teardown.
|
||||
*/
|
||||
struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
|
||||
int (*test)(struct inode *, void *),
|
||||
int (*set)(struct inode *, void *), void *data)
|
||||
{
|
||||
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
|
||||
struct inode *inode, *new;
|
||||
|
||||
again:
|
||||
inode = find_inode(sb, head, test, data, false);
|
||||
if (inode) {
|
||||
if (IS_ERR(inode))
|
||||
return NULL;
|
||||
wait_on_inode(inode);
|
||||
if (unlikely(inode_unhashed(inode))) {
|
||||
iput(inode);
|
||||
goto again;
|
||||
}
|
||||
return inode;
|
||||
}
|
||||
|
||||
new = alloc_inode(sb);
|
||||
if (new) {
|
||||
inode = inode_insert5(new, hashval, test, set, data);
|
||||
if (unlikely(inode != new))
|
||||
destroy_inode(new);
|
||||
}
|
||||
return inode;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iget5_locked_rcu);
|
||||
|
||||
/**
|
||||
* iget_locked - obtain an inode from a mounted file system
|
||||
* @sb: super block of file system
|
||||
@ -1263,9 +1317,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
|
||||
struct hlist_head *head = inode_hashtable + hash(sb, ino);
|
||||
struct inode *inode;
|
||||
again:
|
||||
spin_lock(&inode_hash_lock);
|
||||
inode = find_inode_fast(sb, head, ino);
|
||||
spin_unlock(&inode_hash_lock);
|
||||
inode = find_inode_fast(sb, head, ino, false);
|
||||
if (inode) {
|
||||
if (IS_ERR(inode))
|
||||
return NULL;
|
||||
@ -1283,7 +1335,7 @@ again:
|
||||
|
||||
spin_lock(&inode_hash_lock);
|
||||
/* We released the lock, so.. */
|
||||
old = find_inode_fast(sb, head, ino);
|
||||
old = find_inode_fast(sb, head, ino, true);
|
||||
if (!old) {
|
||||
inode->i_ino = ino;
|
||||
spin_lock(&inode->i_lock);
|
||||
@ -1419,7 +1471,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
|
||||
struct inode *inode;
|
||||
|
||||
spin_lock(&inode_hash_lock);
|
||||
inode = find_inode(sb, head, test, data);
|
||||
inode = find_inode(sb, head, test, data, true);
|
||||
spin_unlock(&inode_hash_lock);
|
||||
|
||||
return IS_ERR(inode) ? NULL : inode;
|
||||
@ -1474,7 +1526,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
|
||||
struct inode *inode;
|
||||
again:
|
||||
spin_lock(&inode_hash_lock);
|
||||
inode = find_inode_fast(sb, head, ino);
|
||||
inode = find_inode_fast(sb, head, ino, true);
|
||||
spin_unlock(&inode_hash_lock);
|
||||
|
||||
if (inode) {
|
||||
@ -2235,17 +2287,21 @@ EXPORT_SYMBOL(inode_needs_sync);
|
||||
* wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
|
||||
* will DTRT.
|
||||
*/
|
||||
static void __wait_on_freeing_inode(struct inode *inode)
|
||||
static void __wait_on_freeing_inode(struct inode *inode, bool locked)
|
||||
{
|
||||
wait_queue_head_t *wq;
|
||||
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
|
||||
wq = bit_waitqueue(&inode->i_state, __I_NEW);
|
||||
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&inode_hash_lock);
|
||||
rcu_read_unlock();
|
||||
if (locked)
|
||||
spin_unlock(&inode_hash_lock);
|
||||
schedule();
|
||||
finish_wait(wq, &wait.wq_entry);
|
||||
spin_lock(&inode_hash_lock);
|
||||
if (locked)
|
||||
spin_lock(&inode_hash_lock);
|
||||
rcu_read_lock();
|
||||
}
|
||||
|
||||
static __initdata unsigned long ihash_entries;
|
||||
|
@ -86,9 +86,8 @@ xfs_inode_alloc(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* VFS doesn't initialise i_mode or i_state! */
|
||||
/* VFS doesn't initialise i_mode! */
|
||||
VFS_I(ip)->i_mode = 0;
|
||||
VFS_I(ip)->i_state = 0;
|
||||
mapping_set_large_folios(VFS_I(ip)->i_mapping);
|
||||
|
||||
XFS_STATS_INC(mp, vn_active);
|
||||
@ -314,6 +313,7 @@ xfs_reinit_inode(
|
||||
dev_t dev = inode->i_rdev;
|
||||
kuid_t uid = inode->i_uid;
|
||||
kgid_t gid = inode->i_gid;
|
||||
unsigned long state = inode->i_state;
|
||||
|
||||
error = inode_init_always(mp->m_super, inode);
|
||||
|
||||
@ -324,6 +324,7 @@ xfs_reinit_inode(
|
||||
inode->i_rdev = dev;
|
||||
inode->i_uid = uid;
|
||||
inode->i_gid = gid;
|
||||
inode->i_state = state;
|
||||
mapping_set_large_folios(inode->i_mapping);
|
||||
return error;
|
||||
}
|
||||
|
@ -71,7 +71,7 @@ extern const struct qstr dotdot_name;
|
||||
# define DNAME_INLINE_LEN 40 /* 192 bytes */
|
||||
#else
|
||||
# ifdef CONFIG_SMP
|
||||
# define DNAME_INLINE_LEN 40 /* 128 bytes */
|
||||
# define DNAME_INLINE_LEN 36 /* 128 bytes */
|
||||
# else
|
||||
# define DNAME_INLINE_LEN 44 /* 128 bytes */
|
||||
# endif
|
||||
@ -89,13 +89,18 @@ struct dentry {
|
||||
struct inode *d_inode; /* Where the name belongs to - NULL is
|
||||
* negative */
|
||||
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
|
||||
/* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */
|
||||
|
||||
/* Ref lookup also touches following */
|
||||
struct lockref d_lockref; /* per-dentry lock and refcount */
|
||||
const struct dentry_operations *d_op;
|
||||
struct super_block *d_sb; /* The root of the dentry tree */
|
||||
unsigned long d_time; /* used by d_revalidate */
|
||||
void *d_fsdata; /* fs-specific data */
|
||||
/* --- cacheline 2 boundary (128 bytes) --- */
|
||||
struct lockref d_lockref; /* per-dentry lock and refcount
|
||||
* keep separate from RCU lookup area if
|
||||
* possible!
|
||||
*/
|
||||
|
||||
union {
|
||||
struct list_head d_lru; /* LRU list */
|
||||
|
@ -3047,7 +3047,12 @@ extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
|
||||
int (*test)(struct inode *, void *),
|
||||
int (*set)(struct inode *, void *),
|
||||
void *data);
|
||||
extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
|
||||
struct inode *iget5_locked(struct super_block *, unsigned long,
|
||||
int (*test)(struct inode *, void *),
|
||||
int (*set)(struct inode *, void *), void *);
|
||||
struct inode *iget5_locked_rcu(struct super_block *, unsigned long,
|
||||
int (*test)(struct inode *, void *),
|
||||
int (*set)(struct inode *, void *), void *);
|
||||
extern struct inode * iget_locked(struct super_block *, unsigned long);
|
||||
extern struct inode *find_inode_nowait(struct super_block *,
|
||||
unsigned long,
|
||||
|
Loading…
Reference in New Issue
Block a user