netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test
Linkui Xiao reported that there's a race condition when ipset swap and destroy is called, which can lead to crash in add/del/test element operations. Swap then destroy are usual operations to replace a set with another one in a production system. The issue can in some cases be reproduced with the script: ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip1 172.20.0.0/16 ipset add hash_ip1 192.168.0.0/16 iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT while [ 1 ] do # ... Ongoing traffic... ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip2 172.20.0.0/16 ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 sleep 0.05 done In the race case the possible order of the operations are CPU0 CPU1 ip_set_test ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 hash_net_kadt Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy removed it, hash_net_kadt crashes. The fix is to force ip_set_swap() to wait for all readers to finish accessing the old set pointers by calling synchronize_rcu(). The first version of the patch was written by Linkui Xiao <xiaolinkui@kylinos.cn>. v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden ip_set_destroy() unnecessarily when all sets are destroyed. v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair. So there's no need to extend the rcu read locked area in ipset itself. Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/ Reported by: Linkui Xiao <xiaolinkui@kylinos.cn> Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
parent
a7d5a955bf
commit
28628fa952
@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
|
||||
ip_set_dereference((inst)->ip_set_list)[id]
|
||||
#define ip_set_ref_netlink(inst,id) \
|
||||
rcu_dereference_raw((inst)->ip_set_list)[id]
|
||||
#define ip_set_dereference_nfnl(p) \
|
||||
rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
|
||||
|
||||
/* The set types are implemented in modules and registered set types
|
||||
* can be found in ip_set_type_list. Adding/deleting types is
|
||||
@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set)
|
||||
static struct ip_set *
|
||||
ip_set_rcu_get(struct net *net, ip_set_id_t index)
|
||||
{
|
||||
struct ip_set *set;
|
||||
struct ip_set_net *inst = ip_set_pernet(net);
|
||||
|
||||
rcu_read_lock();
|
||||
/* ip_set_list itself needs to be protected */
|
||||
set = rcu_dereference(inst->ip_set_list)[index];
|
||||
rcu_read_unlock();
|
||||
|
||||
return set;
|
||||
/* ip_set_list and the set pointer need to be protected */
|
||||
return ip_set_dereference_nfnl(inst->ip_set_list)[index];
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
|
||||
ip_set(inst, to_id) = from;
|
||||
write_unlock_bh(&ip_set_ref_lock);
|
||||
|
||||
/* Make sure all readers of the old set pointers are completed. */
|
||||
synchronize_rcu();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user