1
linux/include/net/net_namespace.h
Eric W. Biederman b9f75f45a6 netns: Don't receive new packets in a dead network namespace.
Alexey Dobriyan <adobriyan@gmail.com> writes:
> Subject: ICMP sockets destruction vs ICMP packets oops

> After icmp_sk_exit() nuked ICMP sockets, we get an interrupt.
> icmp_reply() wants ICMP socket.
>
> Steps to reproduce:
>
> 	launch shell in new netns
> 	move real NIC to netns
> 	setup routing
> 	ping -i 0
> 	exit from shell
>
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
> IP: [<ffffffff803fce17>] icmp_sk+0x17/0x30
> PGD 17f3cd067 PUD 17f3ce067 PMD 0 
> Oops: 0000 [1] PREEMPT SMP DEBUG_PAGEALLOC
> CPU 0 
> Modules linked in: usblp usbcore
> Pid: 0, comm: swapper Not tainted 2.6.26-rc6-netns-ct #4
> RIP: 0010:[<ffffffff803fce17>]  [<ffffffff803fce17>] icmp_sk+0x17/0x30
> RSP: 0018:ffffffff8057fc30  EFLAGS: 00010286
> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff81017c7db900
> RDX: 0000000000000034 RSI: ffff81017c7db900 RDI: ffff81017dc41800
> RBP: ffffffff8057fc40 R08: 0000000000000001 R09: 000000000000a815
> R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff8057fd28
> R13: ffffffff8057fd00 R14: ffff81017c7db938 R15: ffff81017dc41800
> FS:  0000000000000000(0000) GS:ffffffff80525000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
> CR2: 0000000000000000 CR3: 000000017fcda000 CR4: 00000000000006e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process swapper (pid: 0, threadinfo ffffffff8053a000, task ffffffff804fa4a0)
> Stack:  0000000000000000 ffff81017c7db900 ffffffff8057fcf0 ffffffff803fcfe4
>  ffffffff804faa38 0000000000000246 0000000000005a40 0000000000000246
>  000000000001ffff ffff81017dd68dc0 0000000000005a40 0000000055342436
> Call Trace:
>  <IRQ>  [<ffffffff803fcfe4>] icmp_reply+0x44/0x1e0
>  [<ffffffff803d3a0a>] ? ip_route_input+0x23a/0x1360
>  [<ffffffff803fd645>] icmp_echo+0x65/0x70
>  [<ffffffff803fd300>] icmp_rcv+0x180/0x1b0
>  [<ffffffff803d6d84>] ip_local_deliver+0xf4/0x1f0
>  [<ffffffff803d71bb>] ip_rcv+0x33b/0x650
>  [<ffffffff803bb16a>] netif_receive_skb+0x27a/0x340
>  [<ffffffff803be57d>] process_backlog+0x9d/0x100
>  [<ffffffff803bdd4d>] net_rx_action+0x18d/0x250
>  [<ffffffff80237be5>] __do_softirq+0x75/0x100
>  [<ffffffff8020c97c>] call_softirq+0x1c/0x30
>  [<ffffffff8020f085>] do_softirq+0x65/0xa0
>  [<ffffffff80237af7>] irq_exit+0x97/0xa0
>  [<ffffffff8020f198>] do_IRQ+0xa8/0x130
>  [<ffffffff80212ee0>] ? mwait_idle+0x0/0x60
>  [<ffffffff8020bc46>] ret_from_intr+0x0/0xf
>  <EOI>  [<ffffffff80212f2c>] ? mwait_idle+0x4c/0x60
>  [<ffffffff80212f23>] ? mwait_idle+0x43/0x60
>  [<ffffffff8020a217>] ? cpu_idle+0x57/0xa0
>  [<ffffffff8040f380>] ? rest_init+0x70/0x80
> Code: 10 5b 41 5c 41 5d 41 5e c9 c3 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 53
> 48 83 ec 08 48 8b 9f 78 01 00 00 e8 2b c7 f1 ff 89 c0 <48> 8b 04 c3 48 83 c4 08
> 5b c9 c3 66 66 66 66 66 2e 0f 1f 84 00
> RIP  [<ffffffff803fce17>] icmp_sk+0x17/0x30
>  RSP <ffffffff8057fc30>
> CR2: 0000000000000000
> ---[ end trace ea161157b76b33e8 ]---
> Kernel panic - not syncing: Aiee, killing interrupt handler!

Receiving packets while we are cleaning up a network namespace is a
racy proposition. It is possible when the packet arrives that we have
removed some but not all of the state we need to fully process it.  We
have the choice of either playing wack-a-mole with the cleanup routines
or simply dropping packets when we don't have a network namespace to
handle them.

Since the check looks inexpensive in netif_receive_skb let's just
drop the incoming packets.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-06-20 22:16:51 -07:00

220 lines
4.6 KiB
C

/*
* Operations on the network namespace
*/
#ifndef __NET_NET_NAMESPACE_H
#define __NET_NET_NAMESPACE_H
#include <asm/atomic.h>
#include <linux/workqueue.h>
#include <linux/list.h>
#include <net/netns/core.h>
#include <net/netns/unix.h>
#include <net/netns/packet.h>
#include <net/netns/ipv4.h>
#include <net/netns/ipv6.h>
#include <net/netns/dccp.h>
#include <net/netns/x_tables.h>
struct proc_dir_entry;
struct net_device;
struct sock;
struct ctl_table_header;
struct net_generic;
struct net {
atomic_t count; /* To decided when the network
* namespace should be freed.
*/
#ifdef NETNS_REFCNT_DEBUG
atomic_t use_count; /* To track references we
* destroy on demand
*/
#endif
struct list_head list; /* list of network namespaces */
struct work_struct work; /* work struct for freeing */
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
struct list_head sysctl_table_headers;
struct net_device *loopback_dev; /* The loopback */
struct list_head dev_base_head;
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
/* core fib_rules */
struct list_head rules_ops;
spinlock_t rules_mod_lock;
struct sock *rtnl; /* rtnetlink socket */
struct netns_core core;
struct netns_packet packet;
struct netns_unix unx;
struct netns_ipv4 ipv4;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct netns_ipv6 ipv6;
#endif
#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
struct netns_dccp dccp;
#endif
#ifdef CONFIG_NETFILTER
struct netns_xt xt;
#endif
struct net_generic *gen;
};
#include <linux/seq_file_net.h>
/* Init's network namespace */
extern struct net init_net;
#ifdef CONFIG_NET
#define INIT_NET_NS(net_ns) .net_ns = &init_net,
extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns);
#else /* CONFIG_NET */
#define INIT_NET_NS(net_ns)
static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
{
/* There is nothing to copy so this is a noop */
return net_ns;
}
#endif /* CONFIG_NET */
extern struct list_head net_namespace_list;
#ifdef CONFIG_NET_NS
extern void __put_net(struct net *net);
static inline int net_alive(struct net *net)
{
return net && atomic_read(&net->count);
}
static inline struct net *get_net(struct net *net)
{
atomic_inc(&net->count);
return net;
}
static inline struct net *maybe_get_net(struct net *net)
{
/* Used when we know struct net exists but we
* aren't guaranteed a previous reference count
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
if (!atomic_inc_not_zero(&net->count))
net = NULL;
return net;
}
static inline void put_net(struct net *net)
{
if (atomic_dec_and_test(&net->count))
__put_net(net);
}
static inline
int net_eq(const struct net *net1, const struct net *net2)
{
return net1 == net2;
}
#else
static inline int net_alive(struct net *net)
{
return 1;
}
static inline struct net *get_net(struct net *net)
{
return net;
}
static inline void put_net(struct net *net)
{
}
static inline struct net *maybe_get_net(struct net *net)
{
return net;
}
static inline
int net_eq(const struct net *net1, const struct net *net2)
{
return 1;
}
#endif
#ifdef NETNS_REFCNT_DEBUG
static inline struct net *hold_net(struct net *net)
{
if (net)
atomic_inc(&net->use_count);
return net;
}
static inline void release_net(struct net *net)
{
if (net)
atomic_dec(&net->use_count);
}
#else
static inline struct net *hold_net(struct net *net)
{
return net;
}
static inline void release_net(struct net *net)
{
}
#endif
#define for_each_net(VAR) \
list_for_each_entry(VAR, &net_namespace_list, list)
#ifdef CONFIG_NET_NS
#define __net_init
#define __net_exit
#define __net_initdata
#else
#define __net_init __init
#define __net_exit __exit_refok
#define __net_initdata __initdata
#endif
struct pernet_operations {
struct list_head list;
int (*init)(struct net *net);
void (*exit)(struct net *net);
};
extern int register_pernet_subsys(struct pernet_operations *);
extern void unregister_pernet_subsys(struct pernet_operations *);
extern int register_pernet_device(struct pernet_operations *);
extern void unregister_pernet_device(struct pernet_operations *);
extern int register_pernet_gen_device(int *id, struct pernet_operations *);
extern void unregister_pernet_gen_device(int id, struct pernet_operations *);
struct ctl_path;
struct ctl_table;
struct ctl_table_header;
extern struct ctl_table_header *register_net_sysctl_table(struct net *net,
const struct ctl_path *path, struct ctl_table *table);
extern void unregister_net_sysctl_table(struct ctl_table_header *header);
#endif /* __NET_NET_NAMESPACE_H */