0fa7fa98db
Change since v1:
* Fixed inuse counters access spotted by Eric
In patch eea68e2f
(packet: Report socket mclist info via diag module) I've
introduced a "scheduling in atomic" problem in packet diag module -- the
socket list is traversed under rcu_read_lock() while performed under it sk
mclist access requires rtnl lock (i.e. -- mutex) to be taken.
[152363.820563] BUG: scheduling while atomic: crtools/12517/0x10000002
[152363.820573] 4 locks held by crtools/12517:
[152363.820581] #0: (sock_diag_mutex){+.+.+.}, at: [<ffffffff81a2dcb5>] sock_diag_rcv+0x1f/0x3e
[152363.820613] #1: (sock_diag_table_mutex){+.+.+.}, at: [<ffffffff81a2de70>] sock_diag_rcv_msg+0xdb/0x11a
[152363.820644] #2: (nlk->cb_mutex){+.+.+.}, at: [<ffffffff81a67d01>] netlink_dump+0x23/0x1ab
[152363.820693] #3: (rcu_read_lock){.+.+..}, at: [<ffffffff81b6a049>] packet_diag_dump+0x0/0x1af
Similar thing was then re-introduced by further packet diag patches (fanount
mutex and pgvec mutex for rings) :(
Apart from being terribly sorry for the above, I propose to change the packet
sk list protection from spinlock to mutex. This lock currently protects two
modifications:
* sklist
* prot inuse counters
The sklist modifications can be just reprotected with mutex since they already
occur in a sleeping context. The inuse counters modifications are trickier -- the
__this_cpu_-s are used inside, thus requiring the caller to handle the potential
issues with contexts himself. Since packet sockets' counters are modified in two
places only (packet_create and packet_release) we only need to protect the context
from being preempted. BH disabling is not required in this case.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
243 lines
5.6 KiB
C
243 lines
5.6 KiB
C
#include <linux/module.h>
|
|
#include <linux/sock_diag.h>
|
|
#include <linux/net.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/packet_diag.h>
|
|
#include <net/net_namespace.h>
|
|
#include <net/sock.h>
|
|
|
|
#include "internal.h"
|
|
|
|
static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
|
|
{
|
|
struct packet_diag_info pinfo;
|
|
|
|
pinfo.pdi_index = po->ifindex;
|
|
pinfo.pdi_version = po->tp_version;
|
|
pinfo.pdi_reserve = po->tp_reserve;
|
|
pinfo.pdi_copy_thresh = po->copy_thresh;
|
|
pinfo.pdi_tstamp = po->tp_tstamp;
|
|
|
|
pinfo.pdi_flags = 0;
|
|
if (po->running)
|
|
pinfo.pdi_flags |= PDI_RUNNING;
|
|
if (po->auxdata)
|
|
pinfo.pdi_flags |= PDI_AUXDATA;
|
|
if (po->origdev)
|
|
pinfo.pdi_flags |= PDI_ORIGDEV;
|
|
if (po->has_vnet_hdr)
|
|
pinfo.pdi_flags |= PDI_VNETHDR;
|
|
if (po->tp_loss)
|
|
pinfo.pdi_flags |= PDI_LOSS;
|
|
|
|
return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo);
|
|
}
|
|
|
|
static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb)
|
|
{
|
|
struct nlattr *mca;
|
|
struct packet_mclist *ml;
|
|
|
|
mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST);
|
|
if (!mca)
|
|
return -EMSGSIZE;
|
|
|
|
rtnl_lock();
|
|
for (ml = po->mclist; ml; ml = ml->next) {
|
|
struct packet_diag_mclist *dml;
|
|
|
|
dml = nla_reserve_nohdr(nlskb, sizeof(*dml));
|
|
if (!dml) {
|
|
rtnl_unlock();
|
|
nla_nest_cancel(nlskb, mca);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
dml->pdmc_index = ml->ifindex;
|
|
dml->pdmc_type = ml->type;
|
|
dml->pdmc_alen = ml->alen;
|
|
dml->pdmc_count = ml->count;
|
|
BUILD_BUG_ON(sizeof(dml->pdmc_addr) != sizeof(ml->addr));
|
|
memcpy(dml->pdmc_addr, ml->addr, sizeof(ml->addr));
|
|
}
|
|
|
|
rtnl_unlock();
|
|
nla_nest_end(nlskb, mca);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type,
|
|
struct sk_buff *nlskb)
|
|
{
|
|
struct packet_diag_ring pdr;
|
|
|
|
if (!ring->pg_vec || ((ver > TPACKET_V2) &&
|
|
(nl_type == PACKET_DIAG_TX_RING)))
|
|
return 0;
|
|
|
|
pdr.pdr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
|
|
pdr.pdr_block_nr = ring->pg_vec_len;
|
|
pdr.pdr_frame_size = ring->frame_size;
|
|
pdr.pdr_frame_nr = ring->frame_max + 1;
|
|
|
|
if (ver > TPACKET_V2) {
|
|
pdr.pdr_retire_tmo = ring->prb_bdqc.retire_blk_tov;
|
|
pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv;
|
|
pdr.pdr_features = ring->prb_bdqc.feature_req_word;
|
|
} else {
|
|
pdr.pdr_retire_tmo = 0;
|
|
pdr.pdr_sizeof_priv = 0;
|
|
pdr.pdr_features = 0;
|
|
}
|
|
|
|
return nla_put(nlskb, nl_type, sizeof(pdr), &pdr);
|
|
}
|
|
|
|
static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb)
|
|
{
|
|
int ret;
|
|
|
|
mutex_lock(&po->pg_vec_lock);
|
|
ret = pdiag_put_ring(&po->rx_ring, po->tp_version,
|
|
PACKET_DIAG_RX_RING, skb);
|
|
if (!ret)
|
|
ret = pdiag_put_ring(&po->tx_ring, po->tp_version,
|
|
PACKET_DIAG_TX_RING, skb);
|
|
mutex_unlock(&po->pg_vec_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
|
|
{
|
|
int ret = 0;
|
|
|
|
mutex_lock(&fanout_mutex);
|
|
if (po->fanout) {
|
|
u32 val;
|
|
|
|
val = (u32)po->fanout->id | ((u32)po->fanout->type << 16);
|
|
ret = nla_put_u32(nlskb, PACKET_DIAG_FANOUT, val);
|
|
}
|
|
mutex_unlock(&fanout_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
|
|
u32 pid, u32 seq, u32 flags, int sk_ino)
|
|
{
|
|
struct nlmsghdr *nlh;
|
|
struct packet_diag_msg *rp;
|
|
struct packet_sock *po = pkt_sk(sk);
|
|
|
|
nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags);
|
|
if (!nlh)
|
|
return -EMSGSIZE;
|
|
|
|
rp = nlmsg_data(nlh);
|
|
rp->pdiag_family = AF_PACKET;
|
|
rp->pdiag_type = sk->sk_type;
|
|
rp->pdiag_num = ntohs(po->num);
|
|
rp->pdiag_ino = sk_ino;
|
|
sock_diag_save_cookie(sk, rp->pdiag_cookie);
|
|
|
|
if ((req->pdiag_show & PACKET_SHOW_INFO) &&
|
|
pdiag_put_info(po, skb))
|
|
goto out_nlmsg_trim;
|
|
|
|
if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&
|
|
pdiag_put_mclist(po, skb))
|
|
goto out_nlmsg_trim;
|
|
|
|
if ((req->pdiag_show & PACKET_SHOW_RING_CFG) &&
|
|
pdiag_put_rings_cfg(po, skb))
|
|
goto out_nlmsg_trim;
|
|
|
|
if ((req->pdiag_show & PACKET_SHOW_FANOUT) &&
|
|
pdiag_put_fanout(po, skb))
|
|
goto out_nlmsg_trim;
|
|
|
|
return nlmsg_end(skb, nlh);
|
|
|
|
out_nlmsg_trim:
|
|
nlmsg_cancel(skb, nlh);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
|
|
{
|
|
int num = 0, s_num = cb->args[0];
|
|
struct packet_diag_req *req;
|
|
struct net *net;
|
|
struct sock *sk;
|
|
struct hlist_node *node;
|
|
|
|
net = sock_net(skb->sk);
|
|
req = nlmsg_data(cb->nlh);
|
|
|
|
mutex_lock(&net->packet.sklist_lock);
|
|
sk_for_each(sk, node, &net->packet.sklist) {
|
|
if (!net_eq(sock_net(sk), net))
|
|
continue;
|
|
if (num < s_num)
|
|
goto next;
|
|
|
|
if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid,
|
|
cb->nlh->nlmsg_seq, NLM_F_MULTI,
|
|
sock_i_ino(sk)) < 0)
|
|
goto done;
|
|
next:
|
|
num++;
|
|
}
|
|
done:
|
|
mutex_unlock(&net->packet.sklist_lock);
|
|
cb->args[0] = num;
|
|
|
|
return skb->len;
|
|
}
|
|
|
|
static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
|
|
{
|
|
int hdrlen = sizeof(struct packet_diag_req);
|
|
struct net *net = sock_net(skb->sk);
|
|
struct packet_diag_req *req;
|
|
|
|
if (nlmsg_len(h) < hdrlen)
|
|
return -EINVAL;
|
|
|
|
req = nlmsg_data(h);
|
|
/* Make it possible to support protocol filtering later */
|
|
if (req->sdiag_protocol)
|
|
return -EINVAL;
|
|
|
|
if (h->nlmsg_flags & NLM_F_DUMP) {
|
|
struct netlink_dump_control c = {
|
|
.dump = packet_diag_dump,
|
|
};
|
|
return netlink_dump_start(net->diag_nlsk, skb, h, &c);
|
|
} else
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
static const struct sock_diag_handler packet_diag_handler = {
|
|
.family = AF_PACKET,
|
|
.dump = packet_diag_handler_dump,
|
|
};
|
|
|
|
static int __init packet_diag_init(void)
|
|
{
|
|
return sock_diag_register(&packet_diag_handler);
|
|
}
|
|
|
|
static void __exit packet_diag_exit(void)
|
|
{
|
|
sock_diag_unregister(&packet_diag_handler);
|
|
}
|
|
|
|
module_init(packet_diag_init);
|
|
module_exit(packet_diag_exit);
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */);
|