mm: zswap: fix global shrinker memcg iteration
Patch series "mm: zswap: fixes for global shrinker", v5.
This series addresses issues in the zswap global shrinker that could not
shrink stored pages. With this series, the shrinker continues to shrink
pages until it reaches the accept threshold more reliably, gives much
higher writeback when the zswap pool limit is hit.
This patch (of 2):
This patch fixes an issue where the zswap global shrinker stopped
iterating through the memcg tree.
The problem was that shrink_worker() would restart iterating memcg tree
from the tree root, considering an offline memcg as a failure, and abort
shrinking after encountering the same offline memcg 16 times even if there
is only one offline memcg. After this change, an offline memcg in the
tree is no longer considered a failure. This allows the shrinker to
continue shrinking the other online memcgs regardless of whether an
offline memcg exists, gives higher zswap writeback activity.
To avoid holding refcount of offline memcg encountered during the memcg
tree walking, shrink_worker() must continue iterating to release the
offline memcg to ensure the next memcg stored in the cursor is online.
The offline memcg cleaner has also been changed to avoid the same issue.
When the next memcg of the offlined memcg is also offline, the refcount
stored in the iteration cursor was held until the next shrink_worker()
run. The cleaner must release the offline memcg recursively.
[yosryahmed@google.com: make critical section more obvious, unify comments]
Link: https://lkml.kernel.org/r/CAJD7tkaScz+SbB90Q1d5mMD70UfM2a-J2zhXDT9sePR7Qap45Q@mail.gmail.com
Link: https://lkml.kernel.org/r/20240731004918.33182-1-flintglass@gmail.com
Link: https://lkml.kernel.org/r/20240731004918.33182-2-flintglass@gmail.com
Fixes: a65b0e7607
("zswap: make shrinking memcg-aware")
Signed-off-by: Takero Funaki <flintglass@gmail.com>
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
1d3440305e
commit
c5519e0a9b
76
mm/zswap.c
76
mm/zswap.c
@ -765,12 +765,25 @@ void zswap_folio_swapin(struct folio *folio)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function should be called when a memcg is being offlined.
|
||||||
|
*
|
||||||
|
* Since the global shrinker shrink_worker() may hold a reference
|
||||||
|
* of the memcg, we must check and release the reference in
|
||||||
|
* zswap_next_shrink.
|
||||||
|
*
|
||||||
|
* shrink_worker() must handle the case where this function releases
|
||||||
|
* the reference of memcg being shrunk.
|
||||||
|
*/
|
||||||
void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
|
void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
|
||||||
{
|
{
|
||||||
/* lock out zswap shrinker walking memcg tree */
|
/* lock out zswap shrinker walking memcg tree */
|
||||||
spin_lock(&zswap_shrink_lock);
|
spin_lock(&zswap_shrink_lock);
|
||||||
if (zswap_next_shrink == memcg)
|
if (zswap_next_shrink == memcg) {
|
||||||
|
do {
|
||||||
zswap_next_shrink = mem_cgroup_iter(NULL, zswap_next_shrink, NULL);
|
zswap_next_shrink = mem_cgroup_iter(NULL, zswap_next_shrink, NULL);
|
||||||
|
} while (zswap_next_shrink && !mem_cgroup_online(zswap_next_shrink));
|
||||||
|
}
|
||||||
spin_unlock(&zswap_shrink_lock);
|
spin_unlock(&zswap_shrink_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1304,44 +1317,49 @@ static void shrink_worker(struct work_struct *w)
|
|||||||
/* Reclaim down to the accept threshold */
|
/* Reclaim down to the accept threshold */
|
||||||
thr = zswap_accept_thr_pages();
|
thr = zswap_accept_thr_pages();
|
||||||
|
|
||||||
/* global reclaim will select cgroup in a round-robin fashion. */
|
|
||||||
do {
|
|
||||||
spin_lock(&zswap_shrink_lock);
|
|
||||||
zswap_next_shrink = mem_cgroup_iter(NULL, zswap_next_shrink, NULL);
|
|
||||||
memcg = zswap_next_shrink;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to retry if we have gone through a full round trip, or if we
|
* Global reclaim will select cgroup in a round-robin fashion.
|
||||||
* got an offline memcg (or else we risk undoing the effect of the
|
|
||||||
* zswap memcg offlining cleanup callback). This is not catastrophic
|
|
||||||
* per se, but it will keep the now offlined memcg hostage for a while.
|
|
||||||
*
|
*
|
||||||
* Note that if we got an online memcg, we will keep the extra
|
* We save iteration cursor memcg into zswap_next_shrink,
|
||||||
* reference in case the original reference obtained by mem_cgroup_iter
|
* which can be modified by the offline memcg cleaner
|
||||||
* is dropped by the zswap memcg offlining callback, ensuring that the
|
* zswap_memcg_offline_cleanup().
|
||||||
* memcg is not killed when we are reclaiming.
|
*
|
||||||
|
* Since the offline cleaner is called only once, we cannot leave an
|
||||||
|
* offline memcg reference in zswap_next_shrink.
|
||||||
|
* We can rely on the cleaner only if we get online memcg under lock.
|
||||||
|
*
|
||||||
|
* If we get an offline memcg, we cannot determine if the cleaner has
|
||||||
|
* already been called or will be called later. We must put back the
|
||||||
|
* reference before returning from this function. Otherwise, the
|
||||||
|
* offline memcg left in zswap_next_shrink will hold the reference
|
||||||
|
* until the next run of shrink_worker().
|
||||||
*/
|
*/
|
||||||
|
do {
|
||||||
|
/*
|
||||||
|
* Start shrinking from the next memcg after zswap_next_shrink.
|
||||||
|
* When the offline cleaner has already advanced the cursor,
|
||||||
|
* advancing the cursor here overlooks one memcg, but this
|
||||||
|
* should be negligibly rare.
|
||||||
|
*
|
||||||
|
* If we get an online memcg, keep the extra reference in case
|
||||||
|
* the original one obtained by mem_cgroup_iter() is dropped by
|
||||||
|
* zswap_memcg_offline_cleanup() while we are shrinking the
|
||||||
|
* memcg.
|
||||||
|
*/
|
||||||
|
spin_lock(&zswap_shrink_lock);
|
||||||
|
do {
|
||||||
|
memcg = mem_cgroup_iter(NULL, zswap_next_shrink, NULL);
|
||||||
|
zswap_next_shrink = memcg;
|
||||||
|
} while (memcg && !mem_cgroup_tryget_online(memcg));
|
||||||
|
spin_unlock(&zswap_shrink_lock);
|
||||||
|
|
||||||
if (!memcg) {
|
if (!memcg) {
|
||||||
spin_unlock(&zswap_shrink_lock);
|
|
||||||
if (++failures == MAX_RECLAIM_RETRIES)
|
if (++failures == MAX_RECLAIM_RETRIES)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
goto resched;
|
goto resched;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mem_cgroup_tryget_online(memcg)) {
|
|
||||||
/* drop the reference from mem_cgroup_iter() */
|
|
||||||
mem_cgroup_iter_break(NULL, memcg);
|
|
||||||
zswap_next_shrink = NULL;
|
|
||||||
spin_unlock(&zswap_shrink_lock);
|
|
||||||
|
|
||||||
if (++failures == MAX_RECLAIM_RETRIES)
|
|
||||||
break;
|
|
||||||
|
|
||||||
goto resched;
|
|
||||||
}
|
|
||||||
spin_unlock(&zswap_shrink_lock);
|
|
||||||
|
|
||||||
ret = shrink_memcg(memcg);
|
ret = shrink_memcg(memcg);
|
||||||
/* drop the extra reference */
|
/* drop the extra reference */
|
||||||
mem_cgroup_put(memcg);
|
mem_cgroup_put(memcg);
|
||||||
|
Loading…
Reference in New Issue
Block a user