erofs: handle overlapped pclusters out of crafted images properly
syzbot reported a task hang issue due to a deadlock case where it is waiting for the folio lock of a cached folio that will be used for cache I/Os. After looking into the crafted fuzzed image, I found it's formed with several overlapped big pclusters as below: Ext: logical offset | length : physical offset | length 0: 0.. 16384 | 16384 : 151552.. 167936 | 16384 1: 16384.. 32768 | 16384 : 155648.. 172032 | 16384 2: 32768.. 49152 | 16384 : 537223168.. 537239552 | 16384 ... Here, extent 0/1 are physically overlapped although it's entirely _impossible_ for normal filesystem images generated by mkfs. First, managed folios containing compressed data will be marked as up-to-date and then unlocked immediately (unlike in-place folios) when compressed I/Os are complete. If physical blocks are not submitted in the incremental order, there should be separate BIOs to avoid dependency issues. However, the current code mis-arranges z_erofs_fill_bio_vec() and BIO submission which causes unexpected BIO waits. Second, managed folios will be connected to their own pclusters for efficient inter-queries. However, this is somewhat hard to implement easily if overlapped big pclusters exist. Again, these only appear in fuzzed images so let's simply fall back to temporary short-lived pages for correctness. Additionally, it justifies that referenced managed folios cannot be truncated for now and reverts part of commit2080ca1ed3
("erofs: tidy up `struct z_erofs_bvec`") for simplicity although it shouldn't be any difference. Reported-by: syzbot+4fc98ed414ae63d1ada2@syzkaller.appspotmail.com Reported-by: syzbot+de04e06b28cfecf2281c@syzkaller.appspotmail.com Reported-by: syzbot+c8c8238b394be4a1087d@syzkaller.appspotmail.com Tested-by: syzbot+4fc98ed414ae63d1ada2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/0000000000002fda01061e334873@google.com Fixes:8e6c8fa9f2
("erofs: enable big pcluster feature") Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20240910070847.3356592-1-hsiangkao@linux.alibaba.com
This commit is contained in:
parent
3fc3e45fcd
commit
9e2f9d34dd
@ -1428,6 +1428,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
|
|||||||
struct z_erofs_bvec zbv;
|
struct z_erofs_bvec zbv;
|
||||||
struct address_space *mapping;
|
struct address_space *mapping;
|
||||||
struct folio *folio;
|
struct folio *folio;
|
||||||
|
struct page *page;
|
||||||
int bs = i_blocksize(f->inode);
|
int bs = i_blocksize(f->inode);
|
||||||
|
|
||||||
/* Except for inplace folios, the entire folio can be used for I/Os */
|
/* Except for inplace folios, the entire folio can be used for I/Os */
|
||||||
@ -1450,7 +1451,6 @@ repeat:
|
|||||||
* file-backed folios will be used instead.
|
* file-backed folios will be used instead.
|
||||||
*/
|
*/
|
||||||
if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
|
if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
|
||||||
folio->private = 0;
|
|
||||||
tocache = true;
|
tocache = true;
|
||||||
goto out_tocache;
|
goto out_tocache;
|
||||||
}
|
}
|
||||||
@ -1468,7 +1468,7 @@ repeat:
|
|||||||
}
|
}
|
||||||
|
|
||||||
folio_lock(folio);
|
folio_lock(folio);
|
||||||
if (folio->mapping == mc) {
|
if (likely(folio->mapping == mc)) {
|
||||||
/*
|
/*
|
||||||
* The cached folio is still in managed cache but without
|
* The cached folio is still in managed cache but without
|
||||||
* a valid `->private` pcluster hint. Let's reconnect them.
|
* a valid `->private` pcluster hint. Let's reconnect them.
|
||||||
@ -1478,41 +1478,44 @@ repeat:
|
|||||||
/* compressed_bvecs[] already takes a ref before */
|
/* compressed_bvecs[] already takes a ref before */
|
||||||
folio_put(folio);
|
folio_put(folio);
|
||||||
}
|
}
|
||||||
|
if (likely(folio->private == pcl)) {
|
||||||
/* no need to submit if it is already up-to-date */
|
/* don't submit cache I/Os again if already uptodate */
|
||||||
if (folio_test_uptodate(folio)) {
|
if (folio_test_uptodate(folio)) {
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
bvec->bv_page = NULL;
|
bvec->bv_page = NULL;
|
||||||
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
return;
|
/*
|
||||||
|
* Already linked with another pcluster, which only appears in
|
||||||
|
* crafted images by fuzzers for now. But handle this anyway.
|
||||||
|
*/
|
||||||
|
tocache = false; /* use temporary short-lived pages */
|
||||||
|
} else {
|
||||||
|
DBG_BUGON(1); /* referenced managed folios can't be truncated */
|
||||||
|
tocache = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* It has been truncated, so it's unsafe to reuse this one. Let's
|
|
||||||
* allocate a new page for compressed data.
|
|
||||||
*/
|
|
||||||
DBG_BUGON(folio->mapping);
|
|
||||||
tocache = true;
|
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
folio_put(folio);
|
folio_put(folio);
|
||||||
out_allocfolio:
|
out_allocfolio:
|
||||||
zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
|
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
|
||||||
spin_lock(&pcl->obj.lockref.lock);
|
spin_lock(&pcl->obj.lockref.lock);
|
||||||
if (pcl->compressed_bvecs[nr].page) {
|
if (unlikely(pcl->compressed_bvecs[nr].page != zbv.page)) {
|
||||||
erofs_pagepool_add(&f->pagepool, zbv.page);
|
erofs_pagepool_add(&f->pagepool, page);
|
||||||
spin_unlock(&pcl->obj.lockref.lock);
|
spin_unlock(&pcl->obj.lockref.lock);
|
||||||
cond_resched();
|
cond_resched();
|
||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page;
|
bvec->bv_page = pcl->compressed_bvecs[nr].page = page;
|
||||||
folio = page_folio(zbv.page);
|
folio = page_folio(page);
|
||||||
/* first mark it as a temporary shortlived folio (now 1 ref) */
|
|
||||||
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
|
|
||||||
spin_unlock(&pcl->obj.lockref.lock);
|
spin_unlock(&pcl->obj.lockref.lock);
|
||||||
out_tocache:
|
out_tocache:
|
||||||
if (!tocache || bs != PAGE_SIZE ||
|
if (!tocache || bs != PAGE_SIZE ||
|
||||||
filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp))
|
filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp)) {
|
||||||
|
/* turn into a temporary shortlived folio (1 ref) */
|
||||||
|
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
folio_attach_private(folio, pcl);
|
folio_attach_private(folio, pcl);
|
||||||
/* drop a refcount added by allocpage (then 2 refs in total here) */
|
/* drop a refcount added by allocpage (then 2 refs in total here) */
|
||||||
folio_put(folio);
|
folio_put(folio);
|
||||||
@ -1647,13 +1650,10 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
|
|||||||
cur = mdev.m_pa;
|
cur = mdev.m_pa;
|
||||||
end = cur + pcl->pclustersize;
|
end = cur + pcl->pclustersize;
|
||||||
do {
|
do {
|
||||||
z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
|
bvec.bv_page = NULL;
|
||||||
if (!bvec.bv_page)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (bio && (cur != last_pa ||
|
if (bio && (cur != last_pa ||
|
||||||
bio->bi_bdev != mdev.m_bdev)) {
|
bio->bi_bdev != mdev.m_bdev)) {
|
||||||
io_retry:
|
drain_io:
|
||||||
if (!erofs_is_fscache_mode(sb))
|
if (!erofs_is_fscache_mode(sb))
|
||||||
submit_bio(bio);
|
submit_bio(bio);
|
||||||
else
|
else
|
||||||
@ -1666,6 +1666,15 @@ io_retry:
|
|||||||
bio = NULL;
|
bio = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!bvec.bv_page) {
|
||||||
|
z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
|
||||||
|
if (!bvec.bv_page)
|
||||||
|
continue;
|
||||||
|
if (cur + bvec.bv_len > end)
|
||||||
|
bvec.bv_len = end - cur;
|
||||||
|
DBG_BUGON(bvec.bv_len < sb->s_blocksize);
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(PageWorkingset(bvec.bv_page)) &&
|
if (unlikely(PageWorkingset(bvec.bv_page)) &&
|
||||||
!memstall) {
|
!memstall) {
|
||||||
psi_memstall_enter(&pflags);
|
psi_memstall_enter(&pflags);
|
||||||
@ -1685,13 +1694,9 @@ io_retry:
|
|||||||
++nr_bios;
|
++nr_bios;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cur + bvec.bv_len > end)
|
|
||||||
bvec.bv_len = end - cur;
|
|
||||||
DBG_BUGON(bvec.bv_len < sb->s_blocksize);
|
|
||||||
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
|
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
|
||||||
bvec.bv_offset))
|
bvec.bv_offset))
|
||||||
goto io_retry;
|
goto drain_io;
|
||||||
|
|
||||||
last_pa = cur + bvec.bv_len;
|
last_pa = cur + bvec.bv_len;
|
||||||
bypass = false;
|
bypass = false;
|
||||||
} while ((cur += bvec.bv_len) < end);
|
} while ((cur += bvec.bv_len) < end);
|
||||||
|
Loading…
Reference in New Issue
Block a user