3d2b158262
Ext4 does not support data journalling with delayed allocation enabled. We even do not allow to mount the file system with delayed allocation and data journalling enabled, however it can be set via FS_IOC_SETFLAGS so we can hit the inode with EXT4_INODE_JOURNAL_DATA set even on file system mounted with delayed allocation (default) and that's where problem arises. The easies way to reproduce this problem is with the following set of commands: mkfs.ext4 /dev/sdd mount /dev/sdd /mnt/test1 dd if=/dev/zero of=/mnt/test1/file bs=1M count=4 chattr +j /mnt/test1/file dd if=/dev/zero of=/mnt/test1/file bs=1M count=4 conv=notrunc chattr -j /mnt/test1/file Additionally it can be reproduced quite reliably with xfstests 272 and 269. In fact the above reproducer is a part of test 272. To fix this we should ignore the EXT4_INODE_JOURNAL_DATA inode flag if the file system is mounted with delayed allocation. This can be easily done by fixing ext4_should_*_data() functions do ignore data journal flag when delalloc is set (suggested by Ted). We also have to set the appropriate address space operations for the inode (again, ignoring data journal flag if delalloc enabled). Additionally this commit introduces ext4_inode_journal_mode() function because ext4_should_*_data() has already had a lot of common code and this change is putting it all into one function so it is easier to read. Successfully tested with xfstests in following configurations: delalloc + data=ordered delalloc + data=writeback data=journal nodelalloc + data=ordered nodelalloc + data=writeback nodelalloc + data=journal Signed-off-by: Lukas Czerner <lczerner@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
328 lines
11 KiB
C
328 lines
11 KiB
C
/*
|
|
* ext4_jbd2.h
|
|
*
|
|
* Written by Stephen C. Tweedie <sct@redhat.com>, 1999
|
|
*
|
|
* Copyright 1998--1999 Red Hat corp --- All Rights Reserved
|
|
*
|
|
* This file is part of the Linux kernel and is made available under
|
|
* the terms of the GNU General Public License, version 2, or at your
|
|
* option, any later version, incorporated herein by reference.
|
|
*
|
|
* Ext4-specific journaling extensions.
|
|
*/
|
|
|
|
#ifndef _EXT4_JBD2_H
|
|
#define _EXT4_JBD2_H
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/jbd2.h>
|
|
#include "ext4.h"
|
|
|
|
#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)
|
|
|
|
/* Define the number of blocks we need to account to a transaction to
|
|
* modify one block of data.
|
|
*
|
|
* We may have to touch one inode, one bitmap buffer, up to three
|
|
* indirection blocks, the group and superblock summaries, and the data
|
|
* block to complete the transaction.
|
|
*
|
|
* For extents-enabled fs we may have to allocate and modify up to
|
|
* 5 levels of tree + root which are stored in the inode. */
|
|
|
|
#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
|
|
(EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
|
|
? 27U : 8U)
|
|
|
|
/* Extended attribute operations touch at most two data buffers,
|
|
* two bitmap buffers, and two group summaries, in addition to the inode
|
|
* and the superblock, which are already accounted for. */
|
|
|
|
#define EXT4_XATTR_TRANS_BLOCKS 6U
|
|
|
|
/* Define the minimum size for a transaction which modifies data. This
|
|
* needs to take into account the fact that we may end up modifying two
|
|
* quota files too (one for the group, one for the user quota). The
|
|
* superblock only gets updated once, of course, so don't bother
|
|
* counting that again for the quota updates. */
|
|
|
|
#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
|
|
EXT4_XATTR_TRANS_BLOCKS - 2 + \
|
|
EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
|
|
|
|
/*
|
|
* Define the number of metadata blocks we need to account to modify data.
|
|
*
|
|
* This include super block, inode block, quota blocks and xattr blocks
|
|
*/
|
|
#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
|
|
EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
|
|
|
|
/* Delete operations potentially hit one directory's namespace plus an
|
|
* entire inode, plus arbitrary amounts of bitmap/indirection data. Be
|
|
* generous. We can grow the delete transaction later if necessary. */
|
|
|
|
#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
|
|
|
|
/* Define an arbitrary limit for the amount of data we will anticipate
|
|
* writing to any given transaction. For unbounded transactions such as
|
|
* write(2) and truncate(2) we can write more than this, but we always
|
|
* start off at the maximum transaction size and grow the transaction
|
|
* optimistically as we go. */
|
|
|
|
#define EXT4_MAX_TRANS_DATA 64U
|
|
|
|
/* We break up a large truncate or write transaction once the handle's
|
|
* buffer credits gets this low, we need either to extend the
|
|
* transaction or to start a new one. Reserve enough space here for
|
|
* inode, bitmap, superblock, group and indirection updates for at least
|
|
* one block, plus two quota updates. Quota allocations are not
|
|
* needed. */
|
|
|
|
#define EXT4_RESERVE_TRANS_BLOCKS 12U
|
|
|
|
#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
|
|
|
|
#ifdef CONFIG_QUOTA
|
|
/* Amount of blocks needed for quota update - we know that the structure was
|
|
* allocated so we need to update only data block */
|
|
#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
|
|
/* Amount of blocks needed for quota insert/delete - we do some block writes
|
|
* but inode, sb and group updates are done only once */
|
|
#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
|
|
(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
|
|
|
|
#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
|
|
(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
|
|
#else
|
|
#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
|
|
#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
|
|
#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
|
|
#endif
|
|
#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
|
|
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
|
|
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
|
|
|
|
int
|
|
ext4_mark_iloc_dirty(handle_t *handle,
|
|
struct inode *inode,
|
|
struct ext4_iloc *iloc);
|
|
|
|
/*
|
|
* On success, We end up with an outstanding reference count against
|
|
* iloc->bh. This _must_ be cleaned up later.
|
|
*/
|
|
|
|
int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
|
|
struct ext4_iloc *iloc);
|
|
|
|
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
|
|
|
|
/*
|
|
* Wrapper functions with which ext4 calls into JBD.
|
|
*/
|
|
void ext4_journal_abort_handle(const char *caller, unsigned int line,
|
|
const char *err_fn,
|
|
struct buffer_head *bh, handle_t *handle, int err);
|
|
|
|
int __ext4_journal_get_write_access(const char *where, unsigned int line,
|
|
handle_t *handle, struct buffer_head *bh);
|
|
|
|
int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
|
|
int is_metadata, struct inode *inode,
|
|
struct buffer_head *bh, ext4_fsblk_t blocknr);
|
|
|
|
int __ext4_journal_get_create_access(const char *where, unsigned int line,
|
|
handle_t *handle, struct buffer_head *bh);
|
|
|
|
int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
|
|
handle_t *handle, struct inode *inode,
|
|
struct buffer_head *bh);
|
|
|
|
int __ext4_handle_dirty_super(const char *where, unsigned int line,
|
|
handle_t *handle, struct super_block *sb);
|
|
|
|
#define ext4_journal_get_write_access(handle, bh) \
|
|
__ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
|
|
#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
|
|
__ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \
|
|
(bh), (block_nr))
|
|
#define ext4_journal_get_create_access(handle, bh) \
|
|
__ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh))
|
|
#define ext4_handle_dirty_metadata(handle, inode, bh) \
|
|
__ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
|
|
(bh))
|
|
#define ext4_handle_dirty_super(handle, sb) \
|
|
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
|
|
|
|
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
|
|
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
|
|
|
|
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
|
|
|
|
/* Note: Do not use this for NULL handles. This is only to determine if
|
|
* a properly allocated handle is using a journal or not. */
|
|
static inline int ext4_handle_valid(handle_t *handle)
|
|
{
|
|
if ((unsigned long)handle < EXT4_NOJOURNAL_MAX_REF_COUNT)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
static inline void ext4_handle_sync(handle_t *handle)
|
|
{
|
|
if (ext4_handle_valid(handle))
|
|
handle->h_sync = 1;
|
|
}
|
|
|
|
static inline void ext4_handle_release_buffer(handle_t *handle,
|
|
struct buffer_head *bh)
|
|
{
|
|
if (ext4_handle_valid(handle))
|
|
jbd2_journal_release_buffer(handle, bh);
|
|
}
|
|
|
|
static inline int ext4_handle_is_aborted(handle_t *handle)
|
|
{
|
|
if (ext4_handle_valid(handle))
|
|
return is_handle_aborted(handle);
|
|
return 0;
|
|
}
|
|
|
|
static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
|
|
{
|
|
if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
|
|
{
|
|
return ext4_journal_start_sb(inode->i_sb, nblocks);
|
|
}
|
|
|
|
#define ext4_journal_stop(handle) \
|
|
__ext4_journal_stop(__func__, __LINE__, (handle))
|
|
|
|
static inline handle_t *ext4_journal_current_handle(void)
|
|
{
|
|
return journal_current_handle();
|
|
}
|
|
|
|
static inline int ext4_journal_extend(handle_t *handle, int nblocks)
|
|
{
|
|
if (ext4_handle_valid(handle))
|
|
return jbd2_journal_extend(handle, nblocks);
|
|
return 0;
|
|
}
|
|
|
|
static inline int ext4_journal_restart(handle_t *handle, int nblocks)
|
|
{
|
|
if (ext4_handle_valid(handle))
|
|
return jbd2_journal_restart(handle, nblocks);
|
|
return 0;
|
|
}
|
|
|
|
static inline int ext4_journal_blocks_per_page(struct inode *inode)
|
|
{
|
|
if (EXT4_JOURNAL(inode) != NULL)
|
|
return jbd2_journal_blocks_per_page(inode);
|
|
return 0;
|
|
}
|
|
|
|
static inline int ext4_journal_force_commit(journal_t *journal)
|
|
{
|
|
if (journal)
|
|
return jbd2_journal_force_commit(journal);
|
|
return 0;
|
|
}
|
|
|
|
static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
|
|
{
|
|
if (ext4_handle_valid(handle))
|
|
return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
|
|
return 0;
|
|
}
|
|
|
|
static inline void ext4_update_inode_fsync_trans(handle_t *handle,
|
|
struct inode *inode,
|
|
int datasync)
|
|
{
|
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
|
|
|
if (ext4_handle_valid(handle)) {
|
|
ei->i_sync_tid = handle->h_transaction->t_tid;
|
|
if (datasync)
|
|
ei->i_datasync_tid = handle->h_transaction->t_tid;
|
|
}
|
|
}
|
|
|
|
/* super.c */
|
|
int ext4_force_commit(struct super_block *sb);
|
|
|
|
/*
|
|
* Ext4 inode journal modes
|
|
*/
|
|
#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */
|
|
#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */
|
|
#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */
|
|
|
|
static inline int ext4_inode_journal_mode(struct inode *inode)
|
|
{
|
|
if (EXT4_JOURNAL(inode) == NULL)
|
|
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
|
|
/* We do not support data journalling with delayed allocation */
|
|
if (!S_ISREG(inode->i_mode) ||
|
|
test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
|
|
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
|
|
if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
|
|
!test_opt(inode->i_sb, DELALLOC))
|
|
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
|
|
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
|
|
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
|
|
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
|
|
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
|
|
else
|
|
BUG();
|
|
}
|
|
|
|
static inline int ext4_should_journal_data(struct inode *inode)
|
|
{
|
|
return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE;
|
|
}
|
|
|
|
static inline int ext4_should_order_data(struct inode *inode)
|
|
{
|
|
return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE;
|
|
}
|
|
|
|
static inline int ext4_should_writeback_data(struct inode *inode)
|
|
{
|
|
return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
|
|
}
|
|
|
|
/*
|
|
* This function controls whether or not we should try to go down the
|
|
* dioread_nolock code paths, which makes it safe to avoid taking
|
|
* i_mutex for direct I/O reads. This only works for extent-based
|
|
* files, and it doesn't work if data journaling is enabled, since the
|
|
* dioread_nolock code uses b_private to pass information back to the
|
|
* I/O completion handler, and this conflicts with the jbd's use of
|
|
* b_private.
|
|
*/
|
|
static inline int ext4_should_dioread_nolock(struct inode *inode)
|
|
{
|
|
if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
|
|
return 0;
|
|
if (!S_ISREG(inode->i_mode))
|
|
return 0;
|
|
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
|
return 0;
|
|
if (ext4_should_journal_data(inode))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
#endif /* _EXT4_JBD2_H */
|