1

iomap: fix iomap_dio_zero() for fs bs > system page size

iomap_dio_zero() will pad a fs block with zeroes if the direct IO size
< fs block size. iomap_dio_zero() has an implicit assumption that fs block
size < page_size. This is true for most filesystems at the moment.

If the block size > page size, this will send the contents of the page
next to zero page(as len > PAGE_SIZE) to the underlying block device,
causing FS corruption.

iomap is a generic infrastructure and it should not make any assumptions
about the fs block size and the page size of the system.

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Link: https://lore.kernel.org/r/20240822135018.1931258-7-kernel@pankajraghav.com
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Pankaj Raghav 2024-08-22 15:50:14 +02:00 committed by Christian Brauner
parent 743a2753a0
commit 10553a9165
2 changed files with 41 additions and 8 deletions

View File

@ -2007,10 +2007,10 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
} }
EXPORT_SYMBOL_GPL(iomap_writepages); EXPORT_SYMBOL_GPL(iomap_writepages);
static int __init iomap_init(void) static int __init iomap_buffered_init(void)
{ {
return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
offsetof(struct iomap_ioend, io_bio), offsetof(struct iomap_ioend, io_bio),
BIOSET_NEED_BVECS); BIOSET_NEED_BVECS);
} }
fs_initcall(iomap_init); fs_initcall(iomap_buffered_init);

View File

@ -11,6 +11,7 @@
#include <linux/iomap.h> #include <linux/iomap.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/set_memory.h>
#include <linux/task_io_accounting_ops.h> #include <linux/task_io_accounting_ops.h>
#include "trace.h" #include "trace.h"
@ -27,6 +28,13 @@
#define IOMAP_DIO_WRITE (1U << 30) #define IOMAP_DIO_WRITE (1U << 30)
#define IOMAP_DIO_DIRTY (1U << 31) #define IOMAP_DIO_DIRTY (1U << 31)
/*
* Used for sub block zeroing in iomap_dio_zero()
*/
#define IOMAP_ZERO_PAGE_SIZE (SZ_64K)
#define IOMAP_ZERO_PAGE_ORDER (get_order(IOMAP_ZERO_PAGE_SIZE))
static struct page *zero_page;
struct iomap_dio { struct iomap_dio {
struct kiocb *iocb; struct kiocb *iocb;
const struct iomap_dio_ops *dops; const struct iomap_dio_ops *dops;
@ -232,13 +240,20 @@ release_bio:
} }
EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io); EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
loff_t pos, unsigned len) loff_t pos, unsigned len)
{ {
struct inode *inode = file_inode(dio->iocb->ki_filp); struct inode *inode = file_inode(dio->iocb->ki_filp);
struct page *page = ZERO_PAGE(0);
struct bio *bio; struct bio *bio;
if (!len)
return 0;
/*
* Max block size supported is 64k
*/
if (WARN_ON_ONCE(len > IOMAP_ZERO_PAGE_SIZE))
return -EINVAL;
bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE); bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
GFP_KERNEL); GFP_KERNEL);
@ -246,8 +261,9 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
bio->bi_private = dio; bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io; bio->bi_end_io = iomap_dio_bio_end_io;
__bio_add_page(bio, page, len, 0); __bio_add_page(bio, zero_page, len, 0);
iomap_dio_submit_bio(iter, dio, bio, pos); iomap_dio_submit_bio(iter, dio, bio, pos);
return 0;
} }
/* /*
@ -356,8 +372,10 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
if (need_zeroout) { if (need_zeroout) {
/* zero out from the start of the block to the write offset */ /* zero out from the start of the block to the write offset */
pad = pos & (fs_block_size - 1); pad = pos & (fs_block_size - 1);
if (pad)
iomap_dio_zero(iter, dio, pos - pad, pad); ret = iomap_dio_zero(iter, dio, pos - pad, pad);
if (ret)
goto out;
} }
/* /*
@ -431,7 +449,8 @@ zero_tail:
/* zero out from the end of the write to the end of the block */ /* zero out from the end of the write to the end of the block */
pad = pos & (fs_block_size - 1); pad = pos & (fs_block_size - 1);
if (pad) if (pad)
iomap_dio_zero(iter, dio, pos, fs_block_size - pad); ret = iomap_dio_zero(iter, dio, pos,
fs_block_size - pad);
} }
out: out:
/* Undo iter limitation to current extent */ /* Undo iter limitation to current extent */
@ -753,3 +772,17 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
return iomap_dio_complete(dio); return iomap_dio_complete(dio);
} }
EXPORT_SYMBOL_GPL(iomap_dio_rw); EXPORT_SYMBOL_GPL(iomap_dio_rw);
static int __init iomap_dio_init(void)
{
zero_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
IOMAP_ZERO_PAGE_ORDER);
if (!zero_page)
return -ENOMEM;
set_memory_ro((unsigned long)page_address(zero_page),
1U << IOMAP_ZERO_PAGE_ORDER);
return 0;
}
fs_initcall(iomap_dio_init);