erofs: handle overlapped pclusters out of crafted images properly
syzbot reported a task hang issue due to a deadlock case where it is waiting for the folio lock of a cached folio that will be used for cache I/Os. After looking into the crafted fuzzed image, I found it's formed with several overlapped big pclusters as below: Ext: logical offset | length : physical offset | length 0: 0.. 16384 | 16384 : 151552.. 167936 | 16384 1: 16384.. 32768 | 16384 : 155648.. 172032 | 16384 2: 32768.. 49152 | 16384 : 537223168.. 537239552 | 16384 ... Here, extent 0/1 are physically overlapped although it's entirely _impossible_ for normal filesystem images generated by mkfs. First, managed folios containing compressed data will be marked as up-to-date and then unlocked immediately (unlike in-place folios) when compressed I/Os are complete. If physical blocks are not submitted in the incremental order, there should be separate BIOs to avoid dependency issues. However, the current code mis-arranges z_erofs_fill_bio_vec() and BIO submission which causes unexpected BIO waits. Second, managed folios will be connected to their own pclusters for efficient inter-queries. However, this is somewhat hard to implement easily if overlapped big pclusters exist. Again, these only appear in fuzzed images so let's simply fall back to temporary short-lived pages for correctness. Additionally, it justifies that referenced managed folios cannot be truncated for now and reverts part of commit2080ca1ed3
("erofs: tidy up `struct z_erofs_bvec`") for simplicity although it shouldn't be any difference. Reported-by: syzbot+4fc98ed414ae63d1ada2@syzkaller.appspotmail.com Reported-by: syzbot+de04e06b28cfecf2281c@syzkaller.appspotmail.com Reported-by: syzbot+c8c8238b394be4a1087d@syzkaller.appspotmail.com Tested-by: syzbot+4fc98ed414ae63d1ada2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/0000000000002fda01061e334873@google.com Fixes:8e6c8fa9f2
("erofs: enable big pcluster feature") Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20240910070847.3356592-1-hsiangkao@linux.alibaba.com
This commit is contained in:
parent
3fc3e45fcd
commit
9e2f9d34dd
@ -1428,6 +1428,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
|
||||
struct z_erofs_bvec zbv;
|
||||
struct address_space *mapping;
|
||||
struct folio *folio;
|
||||
struct page *page;
|
||||
int bs = i_blocksize(f->inode);
|
||||
|
||||
/* Except for inplace folios, the entire folio can be used for I/Os */
|
||||
@ -1450,7 +1451,6 @@ repeat:
|
||||
* file-backed folios will be used instead.
|
||||
*/
|
||||
if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
|
||||
folio->private = 0;
|
||||
tocache = true;
|
||||
goto out_tocache;
|
||||
}
|
||||
@ -1468,7 +1468,7 @@ repeat:
|
||||
}
|
||||
|
||||
folio_lock(folio);
|
||||
if (folio->mapping == mc) {
|
||||
if (likely(folio->mapping == mc)) {
|
||||
/*
|
||||
* The cached folio is still in managed cache but without
|
||||
* a valid `->private` pcluster hint. Let's reconnect them.
|
||||
@ -1478,41 +1478,44 @@ repeat:
|
||||
/* compressed_bvecs[] already takes a ref before */
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
/* no need to submit if it is already up-to-date */
|
||||
if (folio_test_uptodate(folio)) {
|
||||
folio_unlock(folio);
|
||||
bvec->bv_page = NULL;
|
||||
if (likely(folio->private == pcl)) {
|
||||
/* don't submit cache I/Os again if already uptodate */
|
||||
if (folio_test_uptodate(folio)) {
|
||||
folio_unlock(folio);
|
||||
bvec->bv_page = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
return;
|
||||
/*
|
||||
* Already linked with another pcluster, which only appears in
|
||||
* crafted images by fuzzers for now. But handle this anyway.
|
||||
*/
|
||||
tocache = false; /* use temporary short-lived pages */
|
||||
} else {
|
||||
DBG_BUGON(1); /* referenced managed folios can't be truncated */
|
||||
tocache = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* It has been truncated, so it's unsafe to reuse this one. Let's
|
||||
* allocate a new page for compressed data.
|
||||
*/
|
||||
DBG_BUGON(folio->mapping);
|
||||
tocache = true;
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
out_allocfolio:
|
||||
zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
|
||||
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
|
||||
spin_lock(&pcl->obj.lockref.lock);
|
||||
if (pcl->compressed_bvecs[nr].page) {
|
||||
erofs_pagepool_add(&f->pagepool, zbv.page);
|
||||
if (unlikely(pcl->compressed_bvecs[nr].page != zbv.page)) {
|
||||
erofs_pagepool_add(&f->pagepool, page);
|
||||
spin_unlock(&pcl->obj.lockref.lock);
|
||||
cond_resched();
|
||||
goto repeat;
|
||||
}
|
||||
bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page;
|
||||
folio = page_folio(zbv.page);
|
||||
/* first mark it as a temporary shortlived folio (now 1 ref) */
|
||||
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
|
||||
bvec->bv_page = pcl->compressed_bvecs[nr].page = page;
|
||||
folio = page_folio(page);
|
||||
spin_unlock(&pcl->obj.lockref.lock);
|
||||
out_tocache:
|
||||
if (!tocache || bs != PAGE_SIZE ||
|
||||
filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp))
|
||||
filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp)) {
|
||||
/* turn into a temporary shortlived folio (1 ref) */
|
||||
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
|
||||
return;
|
||||
}
|
||||
folio_attach_private(folio, pcl);
|
||||
/* drop a refcount added by allocpage (then 2 refs in total here) */
|
||||
folio_put(folio);
|
||||
@ -1647,13 +1650,10 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
|
||||
cur = mdev.m_pa;
|
||||
end = cur + pcl->pclustersize;
|
||||
do {
|
||||
z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
|
||||
if (!bvec.bv_page)
|
||||
continue;
|
||||
|
||||
bvec.bv_page = NULL;
|
||||
if (bio && (cur != last_pa ||
|
||||
bio->bi_bdev != mdev.m_bdev)) {
|
||||
io_retry:
|
||||
drain_io:
|
||||
if (!erofs_is_fscache_mode(sb))
|
||||
submit_bio(bio);
|
||||
else
|
||||
@ -1666,6 +1666,15 @@ io_retry:
|
||||
bio = NULL;
|
||||
}
|
||||
|
||||
if (!bvec.bv_page) {
|
||||
z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
|
||||
if (!bvec.bv_page)
|
||||
continue;
|
||||
if (cur + bvec.bv_len > end)
|
||||
bvec.bv_len = end - cur;
|
||||
DBG_BUGON(bvec.bv_len < sb->s_blocksize);
|
||||
}
|
||||
|
||||
if (unlikely(PageWorkingset(bvec.bv_page)) &&
|
||||
!memstall) {
|
||||
psi_memstall_enter(&pflags);
|
||||
@ -1685,13 +1694,9 @@ io_retry:
|
||||
++nr_bios;
|
||||
}
|
||||
|
||||
if (cur + bvec.bv_len > end)
|
||||
bvec.bv_len = end - cur;
|
||||
DBG_BUGON(bvec.bv_len < sb->s_blocksize);
|
||||
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
|
||||
bvec.bv_offset))
|
||||
goto io_retry;
|
||||
|
||||
goto drain_io;
|
||||
last_pa = cur + bvec.bv_len;
|
||||
bypass = false;
|
||||
} while ((cur += bvec.bv_len) < end);
|
||||
|
Loading…
Reference in New Issue
Block a user