0a14842f5a
In order to speedup packet filtering, here is an implementation of a JIT compiler for x86_64 It is disabled by default, and must be enabled by the admin. echo 1 >/proc/sys/net/core/bpf_jit_enable It uses module_alloc() and module_free() to get memory in the 2GB text kernel range since we call helpers functions from the generated code. EAX : BPF A accumulator EBX : BPF X accumulator RDI : pointer to skb (first argument given to JIT function) RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) r9d : skb->len - skb->data_len (headlen) r8 : skb->data To get a trace of generated code, use : echo 2 >/proc/sys/net/core/bpf_jit_enable Example of generated code : # tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24 flen=18 proglen=147 pass=3 image=ffffffffa00b5000 JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60 JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00 JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00 JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0 JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00 JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00 JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24 JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31 JIT code: ffffffffa00b5090: c0 c9 c3 BPF program is 144 bytes long, so native program is almost same size ;) (000) ldh [12] (001) jeq #0x800 jt 2 jf 8 (002) ld [26] (003) and #0xffffff00 (004) jeq #0xc0a81400 jt 16 jf 5 (005) ld [30] (006) and #0xffffff00 (007) jeq #0xc0a81400 jt 16 jf 17 (008) jeq #0x806 jt 10 jf 9 (009) jeq #0x8035 jt 10 jf 17 (010) ld [28] (011) and #0xffffff00 (012) jeq #0xc0a81400 jt 16 jf 13 (013) ld [38] (014) and #0xffffff00 (015) jeq #0xc0a81400 jt 16 jf 17 (016) ret #65535 (017) ret #0 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Ben Hutchings <bhutchings@solarflare.com> Cc: Hagen Paul Pfeifer <hagen@jauu.net> Signed-off-by: David S. Miller <davem@davemloft.net>
259 lines
5.4 KiB
C
259 lines
5.4 KiB
C
/* -*- linux-c -*-
|
|
* sysctl_net_core.c: sysctl interface to net core subsystem.
|
|
*
|
|
* Begun April 1, 1996, Mike Shaver.
|
|
* Added /proc/sys/net/core directory entry (empty =) ). [MS]
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/module.h>
|
|
#include <linux/socket.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/ratelimit.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/init.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <net/ip.h>
|
|
#include <net/sock.h>
|
|
|
|
#ifdef CONFIG_RPS
|
|
static int rps_sock_flow_sysctl(ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
unsigned int orig_size, size;
|
|
int ret, i;
|
|
ctl_table tmp = {
|
|
.data = &size,
|
|
.maxlen = sizeof(size),
|
|
.mode = table->mode
|
|
};
|
|
struct rps_sock_flow_table *orig_sock_table, *sock_table;
|
|
static DEFINE_MUTEX(sock_flow_mutex);
|
|
|
|
mutex_lock(&sock_flow_mutex);
|
|
|
|
orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
|
|
lockdep_is_held(&sock_flow_mutex));
|
|
size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
|
|
|
|
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
|
|
|
|
if (write) {
|
|
if (size) {
|
|
if (size > 1<<30) {
|
|
/* Enforce limit to prevent overflow */
|
|
mutex_unlock(&sock_flow_mutex);
|
|
return -EINVAL;
|
|
}
|
|
size = roundup_pow_of_two(size);
|
|
if (size != orig_size) {
|
|
sock_table =
|
|
vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
|
|
if (!sock_table) {
|
|
mutex_unlock(&sock_flow_mutex);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
sock_table->mask = size - 1;
|
|
} else
|
|
sock_table = orig_sock_table;
|
|
|
|
for (i = 0; i < size; i++)
|
|
sock_table->ents[i] = RPS_NO_CPU;
|
|
} else
|
|
sock_table = NULL;
|
|
|
|
if (sock_table != orig_sock_table) {
|
|
rcu_assign_pointer(rps_sock_flow_table, sock_table);
|
|
synchronize_rcu();
|
|
vfree(orig_sock_table);
|
|
}
|
|
}
|
|
|
|
mutex_unlock(&sock_flow_mutex);
|
|
|
|
return ret;
|
|
}
|
|
#endif /* CONFIG_RPS */
|
|
|
|
static struct ctl_table net_core_table[] = {
|
|
#ifdef CONFIG_NET
|
|
{
|
|
.procname = "wmem_max",
|
|
.data = &sysctl_wmem_max,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "rmem_max",
|
|
.data = &sysctl_rmem_max,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "wmem_default",
|
|
.data = &sysctl_wmem_default,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "rmem_default",
|
|
.data = &sysctl_rmem_default,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "dev_weight",
|
|
.data = &weight_p,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "netdev_max_backlog",
|
|
.data = &netdev_max_backlog,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
#ifdef CONFIG_BPF_JIT
|
|
{
|
|
.procname = "bpf_jit_enable",
|
|
.data = &bpf_jit_enable,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
#endif
|
|
{
|
|
.procname = "netdev_tstamp_prequeue",
|
|
.data = &netdev_tstamp_prequeue,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "message_cost",
|
|
.data = &net_ratelimit_state.interval,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_jiffies,
|
|
},
|
|
{
|
|
.procname = "message_burst",
|
|
.data = &net_ratelimit_state.burst,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
{
|
|
.procname = "optmem_max",
|
|
.data = &sysctl_optmem_max,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
#ifdef CONFIG_RPS
|
|
{
|
|
.procname = "rps_sock_flow_entries",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = rps_sock_flow_sysctl
|
|
},
|
|
#endif
|
|
#endif /* CONFIG_NET */
|
|
{
|
|
.procname = "netdev_budget",
|
|
.data = &netdev_budget,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "warnings",
|
|
.data = &net_msg_warn,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{ }
|
|
};
|
|
|
|
static struct ctl_table netns_core_table[] = {
|
|
{
|
|
.procname = "somaxconn",
|
|
.data = &init_net.core.sysctl_somaxconn,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{ }
|
|
};
|
|
|
|
__net_initdata struct ctl_path net_core_path[] = {
|
|
{ .procname = "net", },
|
|
{ .procname = "core", },
|
|
{ },
|
|
};
|
|
|
|
static __net_init int sysctl_core_net_init(struct net *net)
|
|
{
|
|
struct ctl_table *tbl;
|
|
|
|
net->core.sysctl_somaxconn = SOMAXCONN;
|
|
|
|
tbl = netns_core_table;
|
|
if (!net_eq(net, &init_net)) {
|
|
tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
|
|
if (tbl == NULL)
|
|
goto err_dup;
|
|
|
|
tbl[0].data = &net->core.sysctl_somaxconn;
|
|
}
|
|
|
|
net->core.sysctl_hdr = register_net_sysctl_table(net,
|
|
net_core_path, tbl);
|
|
if (net->core.sysctl_hdr == NULL)
|
|
goto err_reg;
|
|
|
|
return 0;
|
|
|
|
err_reg:
|
|
if (tbl != netns_core_table)
|
|
kfree(tbl);
|
|
err_dup:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static __net_exit void sysctl_core_net_exit(struct net *net)
|
|
{
|
|
struct ctl_table *tbl;
|
|
|
|
tbl = net->core.sysctl_hdr->ctl_table_arg;
|
|
unregister_net_sysctl_table(net->core.sysctl_hdr);
|
|
BUG_ON(tbl == netns_core_table);
|
|
kfree(tbl);
|
|
}
|
|
|
|
static __net_initdata struct pernet_operations sysctl_core_ops = {
|
|
.init = sysctl_core_net_init,
|
|
.exit = sysctl_core_net_exit,
|
|
};
|
|
|
|
static __init int sysctl_core_init(void)
|
|
{
|
|
static struct ctl_table empty[1];
|
|
|
|
register_sysctl_paths(net_core_path, empty);
|
|
register_net_sysctl_rotable(net_core_path, net_core_table);
|
|
return register_pernet_subsys(&sysctl_core_ops);
|
|
}
|
|
|
|
fs_initcall(sysctl_core_init);
|