From 61b590b9ee4221173ad6990a1150c5c9db73564e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 23 Oct 2015 12:43:18 +0200
Subject: [PATCH 01/83] netfilter: ingress: don't use nf_hook_list_active

nf_hook_list_active() always returns true once at least one device has
NF_INGRESS hook enabled.

Thus, don't use this function. Instead, inverse the test and use the static
key to elide list_empty test if no NF_INGRESS hooks are active.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ingress.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 187feabe557c..ba7ce8805fe3 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -5,10 +5,13 @@
 #include <linux/netdevice.h>
 
 #ifdef CONFIG_NETFILTER_INGRESS
-static inline int nf_hook_ingress_active(struct sk_buff *skb)
+static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
 {
-	return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
-				   NFPROTO_NETDEV, NF_NETDEV_INGRESS);
+#ifdef HAVE_JUMP_LABEL
+	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
+		return false;
+#endif
+	return !list_empty(&skb->dev->nf_hooks_ingress);
 }
 
 static inline int nf_hook_ingress(struct sk_buff *skb)

From b4865988eab598e56e6e628b9b32441acd142b28 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 6 Nov 2015 18:35:57 +0100
Subject: [PATCH 02/83] netfilter: ingress: fix wrong input interface on hook

The input and output interfaces in nf_hook_state_init() are flipped.
This fixes iif matching on nftables.

Reported-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ingress.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index ba7ce8805fe3..5fcd375ef175 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -19,8 +19,8 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	struct nf_hook_state state;
 
 	nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
-			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
-			   skb->dev, NULL, dev_net(skb->dev), NULL);
+			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV,
+			   skb->dev, NULL, NULL, dev_net(skb->dev), NULL);
 	return nf_hook_slow(skb, &state);
 }
 

From 95ad1f4a9358dff1dcf84bf5c9cc84caa9215f7f Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Nov 2015 11:21:47 +0100
Subject: [PATCH 03/83] netfilter: ipset: Fix extension alignment

The data extensions in ipset lacked the proper memory alignment and
thus could lead to kernel crash on several architectures. Therefore
the structures have been reorganized and alignment attributes added
where needed. The patch was tested on armv7h by Gerhard Wiesinger and
on x86_64, sparc64 by Jozsef Kadlecsik.

Reported-by: Gerhard Wiesinger <lists@wiesinger.com>
Tested-by: Gerhard Wiesinger <lists@wiesinger.com>
Tested-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h    |  2 +-
 net/netfilter/ipset/ip_set_bitmap_gen.h   | 17 +++---
 net/netfilter/ipset/ip_set_bitmap_ip.c    | 14 ++---
 net/netfilter/ipset/ip_set_bitmap_ipmac.c | 64 ++++++++++-------------
 net/netfilter/ipset/ip_set_bitmap_port.c  | 18 +++----
 net/netfilter/ipset/ip_set_core.c         | 14 ++---
 net/netfilter/ipset/ip_set_hash_gen.h     | 11 ++--
 net/netfilter/ipset/ip_set_list_set.c     |  5 +-
 8 files changed, 65 insertions(+), 80 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 48bb01edcf30..0e1f433cc4b7 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -421,7 +421,7 @@ extern void ip_set_free(void *members);
 extern int ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr);
 extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr);
 extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
-			      size_t len);
+			      size_t len, size_t align);
 extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 				 struct ip_set_ext *ext);
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..b0bc475f641e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
 #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
 #define mtype			MTYPE
 
-#define get_ext(set, map, id)	((map)->extensions + (set)->dsize * (id))
+#define get_ext(set, map, id)	((map)->extensions + ((set)->dsize * (id)))
 
 static void
 mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
 		del_timer_sync(&map->gc);
 
 	ip_set_free(map->members);
-	if (set->dsize) {
-		if (set->extensions & IPSET_EXT_DESTROY)
-			mtype_ext_cleanup(set);
-		ip_set_free(map->extensions);
-	}
-	kfree(map);
+	if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+		mtype_ext_cleanup(set);
+	ip_set_free(map);
 
 	set->data = NULL;
 }
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
 	const struct mtype *map = set->data;
 	struct nlattr *nested;
+	size_t memsize = sizeof(*map) + map->memsize;
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
 		goto nla_put_failure;
 	if (mtype_do_head(skb, map) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) +
-				map->memsize +
-				set->dsize * map->elements)))
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
 /* Type structure */
 struct bitmap_ip {
 	void *members;		/* the set members */
-	void *extensions;	/* data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
 	size_t memsize;		/* members size */
 	u8 netmask;		/* subnet netmask */
 	struct timer_list gc;	/* garbage collection */
+	unsigned char extensions[0]	/* data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 	pr_debug("hosts %u, elements %llu\n",
 		 hosts, (unsigned long long)elements);
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ip;
-	set->dsize = ip_set_elem_len(set, tb, 0);
 	if (!init_map_ip(set, map, first_ip, last_ip,
 			 elements, hosts, netmask)) {
 		kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..29dde208381d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
 /* Type structure */
 struct bitmap_ipmac {
 	void *members;		/* the set members */
-	void *extensions;	/* MAC + data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collector */
+	unsigned char extensions[0]	/* MAC + data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
 struct bitmap_ipmac_adt_elem {
+	unsigned char ether[ETH_ALEN] __aligned(2);
 	u16 id;
-	unsigned char *ether;
+	u16 add_mac;
 };
 
 struct bitmap_ipmac_elem {
 	unsigned char ether[ETH_ALEN];
 	unsigned char filled;
-} __attribute__ ((aligned));
+} __aligned(__alignof__(u64));
 
 static inline u32
 ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
 	return ip - m->first_ip;
 }
 
-static inline struct bitmap_ipmac_elem *
-get_elem(void *extensions, u16 id, size_t dsize)
-{
-	return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
-}
+#define get_elem(extensions, id, dsize)		\
+	(struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+#define get_const_elem(extensions, id, dsize)	\
+	(const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
 
 /* Common functions */
 
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
 
 	if (!test_bit(e->id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, e->id, dsize);
-	if (elem->filled == MAC_FILLED)
-		return !e->ether ||
-		       ether_addr_equal(e->ether, elem->ether);
+	elem = get_const_elem(map->extensions, e->id, dsize);
+	if (e->add_mac && elem->filled == MAC_FILLED)
+		return ether_addr_equal(e->ether, elem->ether);
 	/* Trigger kernel to fill out the ethernet address */
 	return -EAGAIN;
 }
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
 
 	if (!test_bit(id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, id, dsize);
+	elem = get_const_elem(map->extensions, id, dsize);
 	/* Timer not started for the incomplete elements */
 	return elem->filled == MAC_FILLED;
 }
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
 		 * and we can reuse it later when MAC is filled out,
 		 * possibly by the kernel
 		 */
-		if (e->ether)
+		if (e->add_mac)
 			ip_set_timeout_set(timeout, t);
 		else
 			*timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 	elem = get_elem(map->extensions, e->id, dsize);
 	if (test_bit(e->id, map->members)) {
 		if (elem->filled == MAC_FILLED) {
-			if (e->ether &&
+			if (e->add_mac &&
 			    (flags & IPSET_FLAG_EXIST) &&
 			    !ether_addr_equal(e->ether, elem->ether)) {
 				/* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 				ether_addr_copy(elem->ether, e->ether);
 			}
 			return IPSET_ADD_FAILED;
-		} else if (!e->ether)
+		} else if (!e->add_mac)
 			/* Already added without ethernet address */
 			return IPSET_ADD_FAILED;
 		/* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
 		return IPSET_ADD_START_STORED_TIMEOUT;
-	} else if (e->ether) {
+	} else if (e->add_mac) {
 		/* We can store MAC too */
 		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
 		     u32 id, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, dsize);
+		get_const_elem(map->extensions, id, dsize);
 
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			       htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 {
 	struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct bitmap_ipmac_adt_elem e = { .id = 0 };
+	struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 		return -EINVAL;
 
 	e.id = ip_to_id(map, ip);
-	e.ether = eth_hdr(skb)->h_source;
+	memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
 
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
 		return -IPSET_ERR_BITMAP_RANGE;
 
 	e.id = ip_to_id(map, ip);
-	if (tb[IPSET_ATTR_ETHER])
-		e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
-	else
-		e.ether = NULL;
-
+	if (tb[IPSET_ATTR_ETHER]) {
+		memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+		e.add_mac = 1;
+	}
 	ret = adtfn(set, &e, &ext, &ext, flags);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 	if (elements > IPSET_BITMAP_MAX_RANGE + 1)
 		return -IPSET_ERR_BITMAP_RANGE_SIZE;
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	set->dsize = ip_set_elem_len(set, tb,
+				     sizeof(struct bitmap_ipmac_elem),
+				     __alignof__(struct bitmap_ipmac_elem));
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ipmac;
-	set->dsize = ip_set_elem_len(set, tb,
-				     sizeof(struct bitmap_ipmac_elem));
 	if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 		kfree(map);
 		return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
 /* Type structure */
 struct bitmap_port {
 	void *members;		/* the set members */
-	void *extensions;	/* data extensions */
 	u16 first_port;		/* host byte order, included in range */
 	u16 last_port;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collection */
+	unsigned char extensions[0]	/* data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * map->elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_port = first_port;
 	map->last_port = last_port;
 	set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
+	u32 elements;
 
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		last_port = tmp;
 	}
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	elements = last_port - first_port + 1;
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
-	map->elements = last_port - first_port + 1;
+	map->elements = elements;
 	map->memsize = bitmap_bytes(0, map->elements);
 	set->variant = &bitmap_port;
-	set->dsize = ip_set_elem_len(set, tb, 0);
 	if (!init_map_port(set, map, first_port, last_port)) {
 		kfree(map);
 		return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c2634e1..54f3d7cb23e6 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
 }
 
 size_t
-ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
+		size_t align)
 {
 	enum ip_set_ext_id id;
-	size_t offset = len;
 	u32 cadt_flags = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS])
 		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 	if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
 		set->flags |= IPSET_CREATE_FLAG_FORCEADD;
+	if (!align)
+		align = 1;
 	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
 		if (!add_extension(id, cadt_flags, tb))
 			continue;
-		offset = ALIGN(offset, ip_set_extensions[id].align);
-		set->offset[id] = offset;
+		len = ALIGN(len, ip_set_extensions[id].align);
+		set->offset[id] = len;
 		set->extensions |= ip_set_extensions[id].type;
-		offset += ip_set_extensions[id].len;
+		len += ip_set_extensions[id].len;
 	}
-	return offset;
+	return ALIGN(len, align);
 }
 EXPORT_SYMBOL_GPL(ip_set_elem_len);
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..4ff22194ce55 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
 	DECLARE_BITMAP(used, AHASH_MAX_TUNED);
 	u8 size;		/* size of the array */
 	u8 pos;			/* position of the first free entry */
-	unsigned char value[0];	/* the array of the values */
-} __attribute__ ((aligned));
+	unsigned char value[0]	/* the array of the values */
+		__aligned(__alignof__(u64));
+};
 
 /* The hash table: the table size stored here in order to make resizing easy */
 struct htable {
@@ -1323,12 +1324,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 #endif
 		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
 		set->dsize = ip_set_elem_len(set, tb,
-				sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+			sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
 #ifndef IP_SET_PROTO_UNDEF
 	} else {
 		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
 		set->dsize = ip_set_elem_len(set, tb,
-				sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+			sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
 	}
 #endif
 	if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8c90..bbede95c9f68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
 	struct rcu_head rcu;
 	struct list_head list;
 	ip_set_id_t id;
-};
+} __aligned(__alignof__(u64));
 
 struct set_adt_elem {
 	ip_set_id_t id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		size = IP_SET_LIST_MIN_SIZE;
 
 	set->variant = &set_variant;
-	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
+				     __alignof__(struct set_elem));
 	if (!init_list_set(net, set, size))
 		return -ENOMEM;
 	if (tb[IPSET_ATTR_TIMEOUT]) {

From e9dfdc052d018268926ab769d5b7598226713d5a Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Nov 2015 11:23:34 +0100
Subject: [PATCH 04/83] netfilter: ipset: Fix hash:* type expiration

Incorrect index was used when the data blob was shrinked at expiration,
which could lead to falsely expired entries and memory leak when
the comment extension was used too.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 net/netfilter/ipset/ip_set_hash_gen.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 4ff22194ce55..fa4f6374bb73 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -523,7 +523,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 					continue;
 				data = ahash_data(n, j, dsize);
 				memcpy(tmp->value + d * dsize, data, dsize);
-				set_bit(j, tmp->used);
+				set_bit(d, tmp->used);
 				d++;
 			}
 			tmp->pos = d;

From 0aae24eb409fc429f54ca3809f904f1b91e295e0 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Nov 2015 11:24:51 +0100
Subject: [PATCH 05/83] netfilter: ipset: Fix hash type expire: release empty
 hash bucket block

When all entries are expired/all slots are empty, release the bucket.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 net/netfilter/ipset/ip_set_hash_gen.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index fa4f6374bb73..e5336ab36d67 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -476,7 +476,7 @@ static void
 mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 {
 	struct htable *t;
-	struct hbucket *n;
+	struct hbucket *n, *tmp;
 	struct mtype_elem *data;
 	u32 i, j, d;
 #ifdef IP_SET_HASH_WITH_NETS
@@ -511,9 +511,14 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 			}
 		}
 		if (d >= AHASH_INIT_SIZE) {
-			struct hbucket *tmp = kzalloc(sizeof(*tmp) +
-					(n->size - AHASH_INIT_SIZE) * dsize,
-					GFP_ATOMIC);
+			if (d >= n->size) {
+				rcu_assign_pointer(hbucket(t, i), NULL);
+				kfree_rcu(n, rcu);
+				continue;
+			}
+			tmp = kzalloc(sizeof(*tmp) +
+				      (n->size - AHASH_INIT_SIZE) * dsize,
+				      GFP_ATOMIC);
 			if (!tmp)
 				/* Still try to delete expired elements */
 				continue;

From c255cb2ed3c7960b2c68f45de1dc0ac2197c8f78 Mon Sep 17 00:00:00 2001
From: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Date: Thu, 22 Oct 2015 11:17:03 +1300
Subject: [PATCH 06/83] netfilter: Fix removal of GRE expectation entries
 created by PPTP

The uninitialized tuple structure caused incorrect hash calculation
and the lookup failed.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=106441
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_pptp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 657d2307f031..b3ca21b2ba9b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -45,7 +45,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	struct net *net = nf_ct_net(ct);
 	const struct nf_conn *master = ct->master;
 	struct nf_conntrack_expect *other_exp;
-	struct nf_conntrack_tuple t;
+	struct nf_conntrack_tuple t = {};
 	const struct nf_ct_pptp_master *ct_pptp_info;
 	const struct nf_nat_pptp *nat_pptp_info;
 	struct nf_nat_range range;

From c872a2d9e3627829591736ddd8e8710a0afb2f95 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 10 Nov 2015 13:08:15 +0100
Subject: [PATCH 07/83] netfilter: nfnetlink_log: work around uninitialized
 variable warning

After a recent (correct) change, gcc started warning about the use
of the 'flags' variable in nfulnl_recv_config()

net/netfilter/nfnetlink_log.c: In function 'nfulnl_recv_config':
net/netfilter/nfnetlink_log.c:320:14: warning: 'flags' may be used uninitialized in this function [-Wmaybe-uninitialized]
net/netfilter/nfnetlink_log.c:828:6: note: 'flags' was declared here

The warning first shows up in ARM s3c2410_defconfig with gcc-4.3 or
higher (including 5.2.1, which is the latest version I checked) I
tried working around it by rearranging the code but had no success
with that.

As a last resort, this initializes the variable to zero, which shuts
up the warning, but means that we don't get a warning if the code
is ever changed in a way that actually causes the variable to be
used without first being written.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 8cbc870829ec ("netfilter: nfnetlink_log: validate dependencies to avoid breaking atomicity")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 06eb48fceb42..740cce4685ac 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -825,7 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	struct net *net = sock_net(ctnl);
 	struct nfnl_log_net *log = nfnl_log_pernet(net);
 	int ret = 0;
-	u16 flags;
+	u16 flags = 0;
 
 	if (nfula[NFULA_CFG_CMD]) {
 		u_int8_t pf = nfmsg->nfgen_family;

From 74ec4d55c4d243330d93fc52e23e37d2e76548ba Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 10 Nov 2015 13:22:15 +0100
Subject: [PATCH 08/83] netfilter: fix xt_TEE and xt_TPROXY dependencies

Kconfig is too smart for its own good: a Kconfig line that states

	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES

means that if IP6_NF_IPTABLES is set to 'm', then NF_DEFRAG_IPV6 will
also be set to 'm', regardless of the state of the symbol from which
it is selected. When the xt_TEE driver is built-in and nothing else
forces NF_DEFRAG_IPV6 to be built-in, this causes a link-time error:

net/built-in.o: In function `tee_tg6':
net/netfilter/xt_TEE.c:46: undefined reference to `nf_dup_ipv6'

This works around that behavior by changing the dependency to
'if IP6_NF_IPTABLES != n', which is interpreted as boolean expression
rather than a tristate and causes the NF_DEFRAG_IPV6 symbol to
be built-in as well.

The bug only occurs once in thousands of 'randconfig' builds and
does not really impact real users. From inspecting the other
surrounding Kconfig symbols, I am guessing that NETFILTER_XT_TARGET_TPROXY
and NETFILTER_XT_MATCH_SOCKET have the same issue. If not, this
change should still be harmless.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e22349ea7256..4692782b5280 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_TEE
 	depends on IPV6 || IPV6=n
 	depends on !NF_CONNTRACK || NF_CONNTRACK
 	select NF_DUP_IPV4
-	select NF_DUP_IPV6 if IP6_NF_IPTABLES
+	select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
 	---help---
 	This option adds a "TEE" target with which a packet can be cloned and
 	this clone be rerouted to another nexthop.
@@ -882,7 +882,7 @@ config NETFILTER_XT_TARGET_TPROXY
 	depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
 	depends on IP_NF_MANGLE
 	select NF_DEFRAG_IPV4
-	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
 	help
 	  This option adds a `TPROXY' target, which is somewhat similar to
 	  REDIRECT.  It can only be used in the mangle table and is useful
@@ -1375,7 +1375,7 @@ config NETFILTER_XT_MATCH_SOCKET
 	depends on IPV6 || IPV6=n
 	depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
 	select NF_DEFRAG_IPV4
-	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
 	help
 	  This option adds a `socket' match, which can be used to match
 	  packets for which a TCP or UDP socket lookup finds a valid socket.

From aabc92bbe3cfe4c545f8ccdaaeeea012a46f0abf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 10 Nov 2015 14:31:18 +0100
Subject: [PATCH 09/83] net: add __netdev_alloc_pcpu_stats() to indicate gfp
 flags

nf_tables may create percpu counters from the packet path through its
dynamic set instantiation infrastructure, so we need a way to allocate
this through GFP_ATOMIC.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2c00772bd136..e9d0c8a75380 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2068,20 +2068,23 @@ struct pcpu_sw_netstats {
 	struct u64_stats_sync   syncp;
 };
 
-#define netdev_alloc_pcpu_stats(type)				\
-({								\
-	typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \
-	if (pcpu_stats)	{					\
-		int __cpu;					\
-		for_each_possible_cpu(__cpu) {			\
-			typeof(type) *stat;			\
-			stat = per_cpu_ptr(pcpu_stats, __cpu);	\
-			u64_stats_init(&stat->syncp);		\
-		}						\
-	}							\
-	pcpu_stats;						\
+#define __netdev_alloc_pcpu_stats(type, gfp)				\
+({									\
+	typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
+	if (pcpu_stats)	{						\
+		int __cpu;						\
+		for_each_possible_cpu(__cpu) {				\
+			typeof(type) *stat;				\
+			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
+			u64_stats_init(&stat->syncp);			\
+		}							\
+	}								\
+	pcpu_stats;							\
 })
 
+#define netdev_alloc_pcpu_stats(type)					\
+	__netdev_alloc_pcpu_stats(type, GFP_KERNEL);
+
 #include <linux/notifier.h>
 
 /* netdevice notifier chain. Please remember to update the rtnetlink

From 086f332167d64b645d37405854f049b9ad7371ab Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 10 Nov 2015 13:39:42 +0100
Subject: [PATCH 10/83] netfilter: nf_tables: add clone interface to expression
 operations

With the conversion of the counter expressions to make it percpu, we
need to clone the percpu memory area, otherwise we crash when using
counters from flow tables.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 16 ++++++++--
 net/netfilter/nft_counter.c       | 49 ++++++++++++++++++++++++++-----
 net/netfilter/nft_dynset.c        |  5 ++--
 3 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c9149cc0a02d..4bd7508bedc9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -618,6 +618,8 @@ struct nft_expr_ops {
 	void				(*eval)(const struct nft_expr *expr,
 						struct nft_regs *regs,
 						const struct nft_pktinfo *pkt);
+	int				(*clone)(struct nft_expr *dst,
+						 const struct nft_expr *src);
 	unsigned int			size;
 
 	int				(*init)(const struct nft_ctx *ctx,
@@ -660,10 +662,20 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
 int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
 		  const struct nft_expr *expr);
 
-static inline void nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
 {
+	int err;
+
 	__module_get(src->ops->type->owner);
-	memcpy(dst, src, src->ops->size);
+	if (src->ops->clone) {
+		dst->ops = src->ops;
+		err = src->ops->clone(dst, src);
+		if (err < 0)
+			return err;
+	} else {
+		memcpy(dst, src, src->ops->size);
+	}
+	return 0;
 }
 
 /**
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4c1ffa..c7808fc19719 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@ static void nft_counter_eval(const struct nft_expr *expr,
 	local_bh_enable();
 }
 
-static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
+			      struct nft_counter *total)
 {
-	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
-	struct nft_counter_percpu *cpu_stats;
-	struct nft_counter total;
+	const struct nft_counter_percpu *cpu_stats;
 	u64 bytes, packets;
 	unsigned int seq;
 	int cpu;
 
-	memset(&total, 0, sizeof(total));
+	memset(total, 0, sizeof(*total));
 	for_each_possible_cpu(cpu) {
-		cpu_stats = per_cpu_ptr(priv->counter, cpu);
+		cpu_stats = per_cpu_ptr(counter, cpu);
 		do {
 			seq	= u64_stats_fetch_begin_irq(&cpu_stats->syncp);
 			bytes	= cpu_stats->counter.bytes;
 			packets	= cpu_stats->counter.packets;
 		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
 
-		total.packets += packets;
-		total.bytes += bytes;
+		total->packets += packets;
+		total->bytes += bytes;
 	}
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+	struct nft_counter total;
+
+	nft_counter_fetch(priv->counter, &total);
 
 	if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
 	    nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -118,6 +125,31 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
 	free_percpu(priv->counter);
 }
 
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
+	struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
+	struct nft_counter_percpu __percpu *cpu_stats;
+	struct nft_counter_percpu *this_cpu;
+	struct nft_counter total;
+
+	nft_counter_fetch(priv->counter, &total);
+
+	cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
+					      GFP_ATOMIC);
+	if (cpu_stats == NULL)
+		return ENOMEM;
+
+	preempt_disable();
+	this_cpu = this_cpu_ptr(cpu_stats);
+	this_cpu->counter.packets = total.packets;
+	this_cpu->counter.bytes = total.bytes;
+	preempt_enable();
+
+	priv_clone->counter = cpu_stats;
+	return 0;
+}
+
 static struct nft_expr_type nft_counter_type;
 static const struct nft_expr_ops nft_counter_ops = {
 	.type		= &nft_counter_type,
@@ -126,6 +158,7 @@ static const struct nft_expr_ops nft_counter_ops = {
 	.init		= nft_counter_init,
 	.destroy	= nft_counter_destroy,
 	.dump		= nft_counter_dump,
+	.clone		= nft_counter_clone,
 };
 
 static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef60a59..9dec3bd1b63c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
 	}
 
 	ext = nft_set_elem_ext(set, elem);
-	if (priv->expr != NULL)
-		nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+	if (priv->expr != NULL &&
+	    nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
+		return NULL;
 
 	return elem;
 }

From 39174291d8e8acfd1113214a943263aaa03c57c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?fran=C3=A7ois=20romieu?= <romieu@fr.zoreil.com>
Date: Wed, 11 Nov 2015 23:35:18 +0100
Subject: [PATCH 11/83] r8169: fix kasan reported skb use-after-free.

Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Fixes: d7d2d89d4b0af ("r8169: Add software counter for multicast packages")
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Corinna Vinschen <vinschen@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b4f21232019a..79ef799f88ab 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7429,15 +7429,15 @@ process_pkt:
 
 			rtl8169_rx_vlan_tag(desc, skb);
 
+			if (skb->pkt_type == PACKET_MULTICAST)
+				dev->stats.multicast++;
+
 			napi_gro_receive(&tp->napi, skb);
 
 			u64_stats_update_begin(&tp->rx_stats.syncp);
 			tp->rx_stats.packets++;
 			tp->rx_stats.bytes += pkt_size;
 			u64_stats_update_end(&tp->rx_stats.syncp);
-
-			if (skb->pkt_type == PACKET_MULTICAST)
-				dev->stats.multicast++;
 		}
 release_descriptor:
 		desc->opts2 = 0;

From 49e4a2293035b420e807e739999d59c8ec1488e9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 12 Nov 2015 22:03:40 +0100
Subject: [PATCH 12/83] stmmac: avoid ipq806x constant overflow warning

Building dwmac-ipq806x on a 64-bit architecture produces a harmless
warning from gcc:

stmmac/dwmac-ipq806x.c: In function 'ipq806x_gmac_probe':
include/linux/bitops.h:6:19: warning: overflow in implicit constant conversion [-Woverflow]
  val = QSGMII_PHY_CDR_EN |
stmmac/dwmac-ipq806x.c:333:8: note: in expansion of macro 'QSGMII_PHY_CDR_EN'
 #define QSGMII_PHY_CDR_EN   BIT(0)
 #define BIT(nr)   (1UL << (nr))

This is a result of the type conversion rules in C, when we take the
logical OR of multiple different types. In particular, we have
and unsigned long

	QSGMII_PHY_CDR_EN == BIT(0) == (1ul << 0) == 0x0000000000000001ul

and a signed int

	0xC << QSGMII_PHY_TX_DRV_AMP_OFFSET == 0xc0000000

which together gives a signed long value

	0xffffffffc0000001l

and when this is passed into a function that takes an unsigned int type,
gcc warns about the signed overflow and the loss of the upper 32-bits that
are all ones.

This patch adds 'ul' type modifiers to the literal numbers passed in
here, so now the expression remains an 'unsigned long' with the upper
bits all zero, and that avoids the signed overflow and the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: b1c17215d718 ("stmmac: add ipq806x glue layer")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 9d89bdbf029f..82de68b1a452 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -337,11 +337,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 			     QSGMII_PHY_RX_SIGNAL_DETECT_EN |
 			     QSGMII_PHY_TX_DRIVER_EN |
 			     QSGMII_PHY_QSGMII_EN |
-			     0x4 << QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET |
-			     0x3 << QSGMII_PHY_RX_DC_BIAS_OFFSET |
-			     0x1 << QSGMII_PHY_RX_INPUT_EQU_OFFSET |
-			     0x2 << QSGMII_PHY_CDR_PI_SLEW_OFFSET |
-			     0xC << QSGMII_PHY_TX_DRV_AMP_OFFSET);
+			     0x4ul << QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET |
+			     0x3ul << QSGMII_PHY_RX_DC_BIAS_OFFSET |
+			     0x1ul << QSGMII_PHY_RX_INPUT_EQU_OFFSET |
+			     0x2ul << QSGMII_PHY_CDR_PI_SLEW_OFFSET |
+			     0xCul << QSGMII_PHY_TX_DRV_AMP_OFFSET);
 	}
 
 	plat_dat->has_gmac = true;

From 73ed5d25dce0354ea381d6dc93005c3085fae03d Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 10 Nov 2015 16:23:15 +0100
Subject: [PATCH 13/83] af-unix: fix use-after-free with concurrent readers
 while splicing

During splicing an af-unix socket to a pipe we have to drop all
af-unix socket locks. While doing so we allow another reader to enter
unix_stream_read_generic which can read, copy and finally free another
skb. If exactly this skb is just in process of being spliced we get a
use-after-free report by kasan.

First, we must make sure to not have a free while the skb is used during
the splice operation. We simply increment its use counter before unlocking
the reader lock.

Stream sockets have the nice characteristic that we don't care about
zero length writes and they never reach the peer socket's queue. That
said, we can take the UNIXCB.consumed field as the indicator if the
skb was already freed from the socket's receive queue. If the skb was
fully consumed after we locked the reader side again we know it has been
dropped by a second reader. We indicate a short read to user space and
abort the current splice operation.

This bug has been found with syzkaller
(http://github.com/google/syzkaller) by Dmitry Vyukov.

Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aaa0b58d6aba..12b886f07982 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -441,6 +441,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
 		if (state == TCP_LISTEN)
 			unix_release_sock(skb->sk, 1);
 		/* passed fds are erased in the kfree_skb hook	      */
+		UNIXCB(skb).consumed = skb->len;
 		kfree_skb(skb);
 	}
 
@@ -2072,6 +2073,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
 
 	do {
 		int chunk;
+		bool drop_skb;
 		struct sk_buff *skb, *last;
 
 		unix_state_lock(sk);
@@ -2152,7 +2154,11 @@ unlock:
 		}
 
 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
+		skb_get(skb);
 		chunk = state->recv_actor(skb, skip, chunk, state);
+		drop_skb = !unix_skb_len(skb);
+		/* skb is only safe to use if !drop_skb */
+		consume_skb(skb);
 		if (chunk < 0) {
 			if (copied == 0)
 				copied = -EFAULT;
@@ -2161,6 +2167,18 @@ unlock:
 		copied += chunk;
 		size -= chunk;
 
+		if (drop_skb) {
+			/* the skb was touched by a concurrent reader;
+			 * we should not expect anything from this skb
+			 * anymore and assume it invalid - we can be
+			 * sure it was dropped from the socket queue
+			 *
+			 * let's report a short read
+			 */
+			err = 0;
+			break;
+		}
+
 		/* Mark read part of skb as used */
 		if (!(flags & MSG_PEEK)) {
 			UNIXCB(skb).consumed += chunk;

From 9001d94dfd558f175bede9f847276f691cd9c164 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 11 Nov 2015 15:22:40 +0000
Subject: [PATCH 14/83] fjes: fix inconsistent indenting

minor change, indenting is one tab out.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fjes/fjes_hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c
index bb8b5304d851..b103adb8d62e 100644
--- a/drivers/net/fjes/fjes_hw.c
+++ b/drivers/net/fjes/fjes_hw.c
@@ -599,7 +599,7 @@ int fjes_hw_unregister_buff_addr(struct fjes_hw *hw, int dest_epid)
 		FJES_CMD_REQ_RES_CODE_BUSY) &&
 	       (timeout > 0)) {
 		msleep(200 + hw->my_epid * 20);
-			timeout -= (200 + hw->my_epid * 20);
+		timeout -= (200 + hw->my_epid * 20);
 
 		res_buf->unshare_buffer.length = 0;
 		res_buf->unshare_buffer.code = 0;

From 0d3f6d297bfb7af24d0508460fdb3d1ec4903fa3 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 11 Nov 2015 11:51:06 -0800
Subject: [PATCH 15/83] ipv6: Avoid creating RTF_CACHE from a rt that is not
 managed by fib6 tree

The original bug report:
https://bugzilla.redhat.com/show_bug.cgi?id=1272571

The setup has a IPv4 GRE tunnel running in a IPSec.  The bug
happens when ndisc starts sending router solicitation at the gre
interface.  The simplified oops stack is like:

__lock_acquire+0x1b2/0x1c30
lock_acquire+0xb9/0x140
_raw_write_lock_bh+0x3f/0x50
__ip6_ins_rt+0x2e/0x60
ip6_ins_rt+0x49/0x50
~~~~~~~~
__ip6_rt_update_pmtu.part.54+0x145/0x250
ip6_rt_update_pmtu+0x2e/0x40
~~~~~~~~
ip_tunnel_xmit+0x1f1/0xf40
__gre_xmit+0x7a/0x90
ipgre_xmit+0x15a/0x220
dev_hard_start_xmit+0x2bd/0x480
__dev_queue_xmit+0x696/0x730
dev_queue_xmit+0x10/0x20
neigh_direct_output+0x11/0x20
ip6_finish_output2+0x21f/0x770
ip6_finish_output+0xa7/0x1d0
ip6_output+0x56/0x190
~~~~~~~~
ndisc_send_skb+0x1d9/0x400
ndisc_send_rs+0x88/0xc0
~~~~~~~~

The rt passed to ip6_rt_update_pmtu() is created by
icmp6_dst_alloc() and it is not managed by the fib6 tree,
so its rt6i_table == NULL.  When __ip6_rt_update_pmtu() creates
a RTF_CACHE clone, the newly created clone also has rt6i_table == NULL
and it causes the ip6_ins_rt() oops.

During pmtu update, we only want to create a RTF_CACHE clone
from a rt which is currently managed (or owned) by the
fib6 tree.  It means either rt->rt6i_node != NULL or
rt is a RTF_PCPU clone.

It is worth to note that rt6i_table may not be NULL even it is
not (yet) managed by the fib6 tree (e.g. addrconf_dst_alloc()).
Hence, rt6i_node is a better check instead of rt6i_table.

Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reported-by: Chris Siebenmann <cks-rhbugzilla@cs.toronto.edu>
Cc: Chris Siebenmann <cks-rhbugzilla@cs.toronto.edu>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c8bc9b4ac328..74907c58a5c7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1322,6 +1322,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
 }
 
+static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
+{
+	return !(rt->rt6i_flags & RTF_CACHE) &&
+		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+}
+
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 				 const struct ipv6hdr *iph, u32 mtu)
 {
@@ -1335,7 +1341,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 	if (mtu >= dst_mtu(dst))
 		return;
 
-	if (rt6->rt6i_flags & RTF_CACHE) {
+	if (!rt6_cache_allowed_for_pmtu(rt6)) {
 		rt6_do_update_pmtu(rt6, mtu);
 	} else {
 		const struct in6_addr *daddr, *saddr;

From 5973fb1e245086071bf71994c8b54d99526ded03 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 11 Nov 2015 11:51:07 -0800
Subject: [PATCH 16/83] ipv6: Check expire on DST_NOCACHE route

Since the expires of the DST_NOCACHE rt can be set during
the ip6_rt_update_pmtu(), we also need to consider the expires
value when doing ip6_dst_check().

This patches creates __rt6_check_expired() to only
check the expire value (if one exists) of the current rt.

In rt6_dst_from_check(), it adds __rt6_check_expired() as
one of the condition check.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 74907c58a5c7..3754cf9287a0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -404,6 +404,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 	}
 }
 
+static bool __rt6_check_expired(const struct rt6_info *rt)
+{
+	if (rt->rt6i_flags & RTF_EXPIRES)
+		return time_after(jiffies, rt->dst.expires);
+	else
+		return false;
+}
+
 static bool rt6_check_expired(const struct rt6_info *rt)
 {
 	if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -1252,7 +1260,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 
 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 {
-	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+	if (!__rt6_check_expired(rt) &&
+	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
 		return &rt->dst;
 	else

From 02bcf4e082e4dc634409a6a6cb7def8806d6e5e6 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 11 Nov 2015 11:51:08 -0800
Subject: [PATCH 17/83] ipv6: Check rt->dst.from for the DST_NOCACHE route

All DST_NOCACHE rt6_info used to have rt->dst.from set to
its parent.

After commit 8e3d5be73681 ("ipv6: Avoid double dst_free"),
DST_NOCACHE is also set to rt6_info which does not have
a parent (i.e. rt->dst.from is NULL).

This patch catches the rt->dst.from == NULL case.

Fixes: 8e3d5be73681 ("ipv6: Avoid double dst_free")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 3 ++-
 net/ipv6/route.c      | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index aaf9700fc9e5..fb961a576abe 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -167,7 +167,8 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
 
 static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 {
-	if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
+	if (rt->rt6i_flags & RTF_PCPU ||
+	    (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
 		rt = (struct rt6_info *)(rt->dst.from);
 
 	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3754cf9287a0..6f01fe122abd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1281,7 +1281,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 	rt6_dst_from_metrics_check(rt);
 
-	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
+	if (rt->rt6i_flags & RTF_PCPU ||
+	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
 		return rt6_dst_from_check(rt, cookie);
 	else
 		return rt6_check(rt, cookie);

From d7475de58575c904818efa369c82e88c6648ce2e Mon Sep 17 00:00:00 2001
From: Kamal Mostafa <kamal@canonical.com>
Date: Wed, 11 Nov 2015 14:24:27 -0800
Subject: [PATCH 18/83] tools/net: Use include/uapi with __EXPORTED_HEADERS__

Use the local uapi headers to keep in sync with "recently" added #define's
(e.g. SKF_AD_VLAN_TPID).  Refactored CFLAGS, and bpf_asm doesn't need -I.

Fixes: 3f356385e8a4 ("filter: bpf_asm: add minimal bpf asm tool")
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/net/Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/net/Makefile b/tools/net/Makefile
index ee577ea03ba5..ddf888010652 100644
--- a/tools/net/Makefile
+++ b/tools/net/Makefile
@@ -4,6 +4,9 @@ CC = gcc
 LEX = flex
 YACC = bison
 
+CFLAGS += -Wall -O2
+CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+
 %.yacc.c: %.y
 	$(YACC) -o $@ -d $<
 
@@ -12,15 +15,13 @@ YACC = bison
 
 all : bpf_jit_disasm bpf_dbg bpf_asm
 
-bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'
+bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
 bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
 bpf_jit_disasm : bpf_jit_disasm.o
 
-bpf_dbg : CFLAGS = -Wall -O2
 bpf_dbg : LDLIBS = -lreadline
 bpf_dbg : bpf_dbg.o
 
-bpf_asm : CFLAGS = -Wall -O2 -I.
 bpf_asm : LDLIBS =
 bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
 bpf_exp.lex.o : bpf_exp.yacc.c

From efdfa2f7848f64517008136fb41f53c4a1faf93a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:40 +0100
Subject: [PATCH 19/83] packet: do skb_probe_transport_header when we actually
 have data

In tpacket_fill_skb() commit c1aad275b029 ("packet: set transport
header before doing xmit") and later on 40893fd0fd4e ("net: switch
to use skb_probe_transport_header()") was probing for a transport
header on the skb from a ring buffer slot, but at a time, where
the skb has _not even_ been filled with data yet. So that call into
the flow dissector is pretty useless. Lets do it after we've set
up the skb frags.

Fixes: c1aad275b029 ("packet: set transport header before doing xmit")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index af399cac5205..80c36c0867d3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2368,8 +2368,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
 
-	if (!packet_use_direct_xmit(po))
-		skb_probe_transport_header(skb, 0);
 	if (unlikely(po->tp_tx_has_off)) {
 		int off_min, off_max, off;
 		off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2449,6 +2447,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		len = ((to_write > len_max) ? len_max : to_write);
 	}
 
+	if (!packet_use_direct_xmit(po))
+		skb_probe_transport_header(skb, 0);
+
 	return tp_len;
 }
 

From 8fd6c80d9dd938ca338c70698533a7e304752846 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:41 +0100
Subject: [PATCH 20/83] packet: always probe for transport header

We concluded that the skb_probe_transport_header() should better be
called unconditionally. Avoiding the call into the flow dissector has
also not really much to do with the direct xmit mode.

While it seems that only virtio_net code makes use of GSO from non
RX/TX ring packet socket paths, we should probe for a transport header
nevertheless before they hit devices.

Reference: http://thread.gmane.org/gmane.linux.network/386173/
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 80c36c0867d3..bdecf17a15bb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2447,8 +2447,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		len = ((to_write > len_max) ? len_max : to_write);
 	}
 
-	if (!packet_use_direct_xmit(po))
-		skb_probe_transport_header(skb, 0);
+	skb_probe_transport_header(skb, 0);
 
 	return tp_len;
 }
@@ -2808,8 +2807,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		len += vnet_hdr_len;
 	}
 
-	if (!packet_use_direct_xmit(po))
-		skb_probe_transport_header(skb, reserve);
+	skb_probe_transport_header(skb, reserve);
+
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
 

From 3c70c132488794e2489ab045559b0ce0afcf17de Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:42 +0100
Subject: [PATCH 21/83] packet: only allow extra vlan len on ethernet devices

Packet sockets can be used by various net devices and are not
really restricted to ARPHRD_ETHER device types. However, when
currently checking for the extra 4 bytes that can be transmitted
in VLAN case, our assumption is that we generally probe on
ARPHRD_ETHER devices. Therefore, before looking into Ethernet
header, check the device type first.

This also fixes the issue where non-ARPHRD_ETHER devices could
have no dev->hard_header_len in TX_RING SOCK_RAW case, and thus
the check would test unfilled linear part of the skb (instead
of non-linear).

Fixes: 57f89bfa2140 ("network: Allow af_packet to transmit +4 bytes for VLAN packets.")
Fixes: 52f1454f629f ("packet: allow to transmit +4 byte in TX_RING slot for VLAN case")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 60 ++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 35 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bdecf17a15bb..8795b0fb1ed1 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk)
 		kfree_rcu(po->rollover, rcu);
 }
 
+static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
+					  struct sk_buff *skb)
+{
+	/* Earlier code assumed this would be a VLAN pkt, double-check
+	 * this now that we have the actual packet in hand. We can only
+	 * do this check on Ethernet devices.
+	 */
+	if (unlikely(dev->type != ARPHRD_ETHER))
+		return false;
+
+	skb_reset_mac_header(skb);
+	return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
+}
+
 static const struct proto_ops packet_ops;
 
 static const struct proto_ops packet_ops_spkt;
@@ -1902,18 +1916,10 @@ retry:
 		goto retry;
 	}
 
-	if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
-		/* Earlier code assumed this would be a VLAN pkt,
-		 * double-check this now that we have the actual
-		 * packet in hand.
-		 */
-		struct ethhdr *ehdr;
-		skb_reset_mac_header(skb);
-		ehdr = eth_hdr(skb);
-		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
-			err = -EMSGSIZE;
-			goto out_unlock;
-		}
+	if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
+	    !packet_extra_vlan_len_allowed(dev, skb)) {
+		err = -EMSGSIZE;
+		goto out_unlock;
 	}
 
 	skb->protocol = proto;
@@ -2525,18 +2531,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
 					  addr, hlen);
 		if (likely(tp_len >= 0) &&
-		    tp_len > dev->mtu + dev->hard_header_len) {
-			struct ethhdr *ehdr;
-			/* Earlier code assumed this would be a VLAN pkt,
-			 * double-check this now that we have the actual
-			 * packet in hand.
-			 */
+		    tp_len > dev->mtu + dev->hard_header_len &&
+		    !packet_extra_vlan_len_allowed(dev, skb))
+			tp_len = -EMSGSIZE;
 
-			skb_reset_mac_header(skb);
-			ehdr = eth_hdr(skb);
-			if (ehdr->h_proto != htons(ETH_P_8021Q))
-				tp_len = -EMSGSIZE;
-		}
 		if (unlikely(tp_len < 0)) {
 			if (po->tp_loss) {
 				__packet_set_status(po, ph,
@@ -2765,18 +2763,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 
-	if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
-		/* Earlier code assumed this would be a VLAN pkt,
-		 * double-check this now that we have the actual
-		 * packet in hand.
-		 */
-		struct ethhdr *ehdr;
-		skb_reset_mac_header(skb);
-		ehdr = eth_hdr(skb);
-		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
-			err = -EMSGSIZE;
-			goto out_free;
-		}
+	if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
+	    !packet_extra_vlan_len_allowed(dev, skb)) {
+		err = -EMSGSIZE;
+		goto out_free;
 	}
 
 	skb->protocol = proto;

From c72219b75fde768efccf7666342282fab7f9e4e7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:43 +0100
Subject: [PATCH 22/83] packet: infer protocol from ethernet header if unset

In case no struct sockaddr_ll has been passed to packet
socket's sendmsg() when doing a TX_RING flush run, then
skb->protocol is set to po->num instead, which is the protocol
passed via socket(2)/bind(2).

Applications only xmitting can go the path of allocating the
socket as socket(PF_PACKET, <mode>, 0) and do a bind(2) on the
TX_RING with sll_protocol of 0. That way, register_prot_hook()
is neither called on creation nor on bind time, which saves
cycles when there's no interest in capturing anyway.

That leaves us however with po->num 0 instead and therefore
the TX_RING flush run sets skb->protocol to 0 as well. Eric
reported that this leads to problems when using tools like
trafgen over bonding device. I.e. the bonding's hash function
could invoke the kernel's flow dissector, which depends on
skb->protocol being properly set. In the current situation, all
the traffic is then directed to a single slave.

Fix it up by inferring skb->protocol from the Ethernet header
when not set and we have ARPHRD_ETHER device type. This is only
done in case of SOCK_RAW and where we have a dev->hard_header_len
length. In case of ARPHRD_ETHER devices, this is guaranteed to
cover ETH_HLEN, and therefore being accessed on the skb after
the skb_store_bits().

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8795b0fb1ed1..0066da2b8e44 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2338,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
 	return false;
 }
 
+static void tpacket_set_protocol(const struct net_device *dev,
+				 struct sk_buff *skb)
+{
+	if (dev->type == ARPHRD_ETHER) {
+		skb_reset_mac_header(skb);
+		skb->protocol = eth_hdr(skb)->h_proto;
+	}
+}
+
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		void *frame, struct net_device *dev, int size_max,
 		__be16 proto, unsigned char *addr, int hlen)
@@ -2419,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 				dev->hard_header_len);
 		if (unlikely(err))
 			return err;
+		if (!skb->protocol)
+			tpacket_set_protocol(dev, skb);
 
 		data += dev->hard_header_len;
 		to_write -= dev->hard_header_len;

From 5cfb4c8d05b4409c4044cb9c05b19705c1d9818b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:44 +0100
Subject: [PATCH 23/83] packet: fix tpacket_snd max frame len

Since it's introduction in commit 69e3c75f4d54 ("net: TX_RING and
packet mmap"), TX_RING could be used from SOCK_DGRAM and SOCK_RAW
side. When used with SOCK_DGRAM only, the size_max > dev->mtu +
reserve check should have reserve as 0, but currently, this is
unconditionally set (in it's original form as dev->hard_header_len).

I think this is not correct since tpacket_fill_skb() would then
take dev->mtu and dev->hard_header_len into account for SOCK_DGRAM,
the extra VLAN_HLEN could be possible in both cases. Presumably, the
reserve code was copied from packet_snd(), but later on missed the
check. Make it similar as we have it in packet_snd().

Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0066da2b8e44..242bce1cf0f3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2510,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
-	reserve = dev->hard_header_len + VLAN_HLEN;
+	if (po->sk.sk_socket->type == SOCK_RAW)
+		reserve = dev->hard_header_len;
 	size_max = po->tx_ring.frame_size
 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
 
-	if (size_max > dev->mtu + reserve)
-		size_max = dev->mtu + reserve;
+	if (size_max > dev->mtu + reserve + VLAN_HLEN)
+		size_max = dev->mtu + reserve + VLAN_HLEN;
 
 	do {
 		ph = packet_current_frame(po, &po->tx_ring,
@@ -2542,7 +2543,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
 					  addr, hlen);
 		if (likely(tp_len >= 0) &&
-		    tp_len > dev->mtu + dev->hard_header_len &&
+		    tp_len > dev->mtu + reserve &&
 		    !packet_extra_vlan_len_allowed(dev, skb))
 			tp_len = -EMSGSIZE;
 

From ed5a377d87dc4c87fb3e1f7f698cba38cd893103 Mon Sep 17 00:00:00 2001
From: lucien <lucien.xin@gmail.com>
Date: Thu, 12 Nov 2015 13:07:07 +0800
Subject: [PATCH 24/83] sctp: translate host order to network order when
 setting a hmacid

now sctp auth cannot work well when setting a hmacid manually, which
is caused by that we didn't use the network order for hmacid, so fix
it by adding the transformation in sctp_auth_ep_set_hmacs.

even we set hmacid with the network order in userspace, it still
can't work, because of this condition in sctp_auth_ep_set_hmacs():

		if (id > SCTP_AUTH_HMAC_ID_MAX)
			return -EOPNOTSUPP;

so this wasn't working before and thus it won't break compatibility.

Fixes: 65b07e5d0d09 ("[SCTP]: API updates to suport SCTP-AUTH extensions.")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4f15b7d730e1..1543e39f47c3 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
 	if (!has_sha1)
 		return -EINVAL;
 
-	memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0],
-		hmacs->shmac_num_idents * sizeof(__u16));
+	for (i = 0; i < hmacs->shmac_num_idents; i++)
+		ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
 	ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
 				hmacs->shmac_num_idents * sizeof(__u16));
 	return 0;

From 5883d9c6d7e680bcdc7a8a9ed2509cd10dd98206 Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Thu, 12 Nov 2015 14:55:18 +0300
Subject: [PATCH 25/83] net: thunder: Fix crash upon shutdown after failed
 probe

If device probe fails, driver remains bound to the PCI device. However,
driver data has been reset to NULL. This causes crash upon dereferencing
it in nicvf_remove()

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index a9377727c11c..372c39e5bcbd 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1600,6 +1600,9 @@ static void nicvf_remove(struct pci_dev *pdev)
 
 static void nicvf_shutdown(struct pci_dev *pdev)
 {
+	if (!pci_get_drvdata(pdev))
+		return;
+
 	nicvf_remove(pdev);
 }
 

From 00fd38d938db3f1ab1c486549afc450cb7e751b1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Nov 2015 08:43:18 -0800
Subject: [PATCH 26/83] tcp: ensure proper barriers in lockless contexts

Some functions access TCP sockets without holding a lock and
might output non consistent data, depending on compiler and or
architecture.

tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...

Introduce sk_state_load() and sk_state_store() to fix the issues,
and more clearly document where this lack of locking is happening.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h              | 25 +++++++++++++++++++++++++
 net/ipv4/inet_connection_sock.c |  4 ++--
 net/ipv4/tcp.c                  | 21 +++++++++++----------
 net/ipv4/tcp_diag.c             |  2 +-
 net/ipv4/tcp_ipv4.c             | 14 ++++++++------
 net/ipv6/tcp_ipv6.c             | 19 +++++++++++++++----
 6 files changed, 62 insertions(+), 23 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index bbf7c2cf15b4..7f89e4ba18d1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2226,6 +2226,31 @@ static inline bool sk_listener(const struct sock *sk)
 	return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
 }
 
+/**
+ * sk_state_load - read sk->sk_state for lockless contexts
+ * @sk: socket pointer
+ *
+ * Paired with sk_state_store(). Used in places we do not hold socket lock :
+ * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
+ */
+static inline int sk_state_load(const struct sock *sk)
+{
+	return smp_load_acquire(&sk->sk_state);
+}
+
+/**
+ * sk_state_store - update sk->sk_state
+ * @sk: socket pointer
+ * @newstate: new state
+ *
+ * Paired with sk_state_load(). Should be used in contexts where
+ * state change might impact lockless readers.
+ */
+static inline void sk_state_store(struct sock *sk, int newstate)
+{
+	smp_store_release(&sk->sk_state, newstate);
+}
+
 void sock_enable_timestamp(struct sock *sk, int flag);
 int sock_get_timestamp(struct sock *, struct timeval __user *);
 int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1feb15f23de8..46b9c887bede 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -563,7 +563,7 @@ static void reqsk_timer_handler(unsigned long data)
 	int max_retries, thresh;
 	u8 defer_accept;
 
-	if (sk_listener->sk_state != TCP_LISTEN)
+	if (sk_state_load(sk_listener) != TCP_LISTEN)
 		goto drop;
 
 	max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
@@ -749,7 +749,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
 	 * It is OK, because this socket enters to hash table only
 	 * after validation is complete.
 	 */
-	sk->sk_state = TCP_LISTEN;
+	sk_state_store(sk, TCP_LISTEN);
 	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
 		inet->inet_sport = htons(inet->inet_num);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0cfa7c0c1e80..c1728771cf89 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,11 +451,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 	const struct tcp_sock *tp = tcp_sk(sk);
+	int state;
 
 	sock_rps_record_flow(sk);
 
 	sock_poll_wait(file, sk_sleep(sk), wait);
-	if (sk->sk_state == TCP_LISTEN)
+
+	state = sk_state_load(sk);
+	if (state == TCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
 	/* Socket is not locked. We are protected from async events
@@ -492,14 +495,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
 	 * blocking on fresh not-connected or disconnected socket. --ANK
 	 */
-	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
+	if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
 		mask |= POLLHUP;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 
 	/* Connected or passive Fast Open socket? */
-	if (sk->sk_state != TCP_SYN_SENT &&
-	    (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) {
+	if (state != TCP_SYN_SENT &&
+	    (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
 		int target = sock_rcvlowat(sk, 0, INT_MAX);
 
 		if (tp->urg_seq == tp->copied_seq &&
@@ -507,9 +510,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		    tp->urg_data)
 			target++;
 
-		/* Potential race condition. If read of tp below will
-		 * escape above sk->sk_state, we can be illegally awaken
-		 * in SYN_* states. */
 		if (tp->rcv_nxt - tp->copied_seq >= target)
 			mask |= POLLIN | POLLRDNORM;
 
@@ -1934,7 +1934,7 @@ void tcp_set_state(struct sock *sk, int state)
 	/* Change state AFTER socket is unhashed to avoid closed
 	 * socket sitting in hash tables.
 	 */
-	sk->sk_state = state;
+	sk_state_store(sk, state);
 
 #ifdef STATE_TRACE
 	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2644,7 +2644,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	if (sk->sk_type != SOCK_STREAM)
 		return;
 
-	info->tcpi_state = sk->sk_state;
+	info->tcpi_state = sk_state_load(sk);
+
 	info->tcpi_ca_state = icsk->icsk_ca_state;
 	info->tcpi_retransmits = icsk->icsk_retransmits;
 	info->tcpi_probes = icsk->icsk_probes_out;
@@ -2672,7 +2673,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_snd_mss = tp->mss_cache;
 	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
 
-	if (sk->sk_state == TCP_LISTEN) {
+	if (info->tcpi_state == TCP_LISTEN) {
 		info->tcpi_unacked = sk->sk_ack_backlog;
 		info->tcpi_sacked = sk->sk_max_ack_backlog;
 	} else {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 479f34946177..b31604086edd 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -21,7 +21,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 {
 	struct tcp_info *info = _info;
 
-	if (sk->sk_state == TCP_LISTEN) {
+	if (sk_state_load(sk) == TCP_LISTEN) {
 		r->idiag_rqueue = sk->sk_ack_backlog;
 		r->idiag_wqueue = sk->sk_max_ack_backlog;
 	} else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 950e28c0cdf2..ba09016d1bfd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2158,6 +2158,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 	__u16 destp = ntohs(inet->inet_dport);
 	__u16 srcp = ntohs(inet->inet_sport);
 	int rx_queue;
+	int state;
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
 	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
@@ -2175,17 +2176,18 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 		timer_expires = jiffies;
 	}
 
-	if (sk->sk_state == TCP_LISTEN)
+	state = sk_state_load(sk);
+	if (state == TCP_LISTEN)
 		rx_queue = sk->sk_ack_backlog;
 	else
-		/*
-		 * because we dont lock socket, we might find a transient negative value
+		/* Because we don't lock the socket,
+		 * we might find a transient negative value.
 		 */
 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
 
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
-		i, src, srcp, dest, destp, sk->sk_state,
+		i, src, srcp, dest, destp, state,
 		tp->write_seq - tp->snd_una,
 		rx_queue,
 		timer_active,
@@ -2199,8 +2201,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 		jiffies_to_clock_t(icsk->icsk_ack.ato),
 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		tp->snd_cwnd,
-		sk->sk_state == TCP_LISTEN ?
-		    (fastopenq ? fastopenq->max_qlen : 0) :
+		state == TCP_LISTEN ?
+		    fastopenq->max_qlen :
 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
 }
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5baa8e754e41..c5429a636f1a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1690,6 +1690,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
+	int rx_queue;
+	int state;
 
 	dest  = &sp->sk_v6_daddr;
 	src   = &sp->sk_v6_rcv_saddr;
@@ -1710,6 +1712,15 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		timer_expires = jiffies;
 	}
 
+	state = sk_state_load(sp);
+	if (state == TCP_LISTEN)
+		rx_queue = sp->sk_ack_backlog;
+	else
+		/* Because we don't lock the socket,
+		 * we might find a transient negative value.
+		 */
+		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
@@ -1718,9 +1729,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
-		   sp->sk_state,
-		   tp->write_seq-tp->snd_una,
-		   (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
+		   state,
+		   tp->write_seq - tp->snd_una,
+		   rx_queue,
 		   timer_active,
 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
 		   icsk->icsk_retransmits,
@@ -1732,7 +1743,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
 		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		   tp->snd_cwnd,
-		   sp->sk_state == TCP_LISTEN ?
+		   state == TCP_LISTEN ?
 			fastopenq->max_qlen :
 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
 		   );

From 340c78e5906264c42f9415005c7ba0a4efcfe735 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Nov 2015 09:14:12 -0800
Subject: [PATCH 27/83] ipvs: use skb_to_full_sk() helper

SYNACK packets might be attached to request sockets.

Use skb_to_full_sk() helper to avoid illegal accesses to
inet_sk(skb->sk)

Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Acked-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_core.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1e24fff53e4b..f57b4dcdb233 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1176,6 +1176,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 	struct ip_vs_protocol *pp;
 	struct ip_vs_proto_data *pd;
 	struct ip_vs_conn *cp;
+	struct sock *sk;
 
 	EnterFunction(11);
 
@@ -1183,13 +1184,12 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 	if (skb->ipvs_property)
 		return NF_ACCEPT;
 
+	sk = skb_to_full_sk(skb);
 	/* Bad... Do not break raw sockets */
-	if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+	if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
 		     af == AF_INET)) {
-		struct sock *sk = skb->sk;
-		struct inet_sock *inet = inet_sk(skb->sk);
 
-		if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+		if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
 			return NF_ACCEPT;
 	}
 
@@ -1681,6 +1681,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
 	struct ip_vs_conn *cp;
 	int ret, pkts;
 	int conn_reuse_mode;
+	struct sock *sk;
 
 	/* Already marked as IPVS request or reply? */
 	if (skb->ipvs_property)
@@ -1708,12 +1709,11 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
 	ip_vs_fill_iph_skb(af, skb, false, &iph);
 
 	/* Bad... Do not break raw sockets */
-	if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+	sk = skb_to_full_sk(skb);
+	if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
 		     af == AF_INET)) {
-		struct sock *sk = skb->sk;
-		struct inet_sock *inet = inet_sk(skb->sk);
 
-		if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+		if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
 			return NF_ACCEPT;
 	}
 

From ba6c4c094470dc83a7275000bac2fbd46bd5ab69 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:25 +0200
Subject: [PATCH 28/83] net/mlx5e: Fix inline header size calculation

mlx5e_get_inline_hdr_size didn't take into account the vlan insertion
into the inline WQE segment.
This could lead to max inline violation in cases where
skb_headlen(skb) + VLAN_HLEN >= sq->max_inline.

Fixes: 3ea4891db8d0 ("net/mlx5e: Fix LSO vlan insertion")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index cd8f85a251d7..f687ebf20d9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -118,8 +118,15 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
 	 */
 #define MLX5E_MIN_INLINE ETH_HLEN
 
-	if (bf && (skb_headlen(skb) <= sq->max_inline))
-		return skb_headlen(skb);
+	if (bf) {
+		u16 ihs = skb_headlen(skb);
+
+		if (skb_vlan_tag_present(skb))
+			ihs += VLAN_HLEN;
+
+		if (ihs <= sq->max_inline)
+			return skb_headlen(skb);
+	}
 
 	return MLX5E_MIN_INLINE;
 }

From 66189961e986e53ae39822898fc2ce88f44c61bb Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:26 +0200
Subject: [PATCH 29/83] net/mlx5e: Added self loopback prevention

Prevent outgoing multicast frames from looping back to the RX queue.

By introducing new HW capability self_lb_en_modifiable, which indicates
the support to modify self_lb_en bit in modify_tir command.

When this capability is set we can prevent TIRs from sending back
loopback multicast traffic to their own RQs, by "refreshing TIRs" with
modify_tir command, on every time new channels (SQs/RQs) are created at
device open.
This is needed since TIRs are static and only allocated once on driver
load, and the loopback decision is under their responsibility.

Fixes issues of the kind:
"IPv6: eth2: IPv6 duplicate address fe80::e61d:2dff:fe5c:f2e9 detected!"
The issue is seen since the IPv6 solicitations multicast messages are
loopedback and the network stack thinks they are coming from another host.

Fixes: 5c50368f3831 ("net/mlx5e: Light-weight netdev open/stop")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 48 +++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                 | 24 ++++++----
 2 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5fc4d2d78cdf..df001754bcd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1332,6 +1332,42 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
 	return err;
 }
 
+static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev,
+						  u32 tirn)
+{
+	void *in;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+	err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+
+	kvfree(in);
+
+	return err;
+}
+
+static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < MLX5E_NUM_TT; i++) {
+		err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev,
+							     priv->tirn[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1376,6 +1412,13 @@ int mlx5e_open_locked(struct net_device *netdev)
 		goto err_clear_state_opened_flag;
 	}
 
+	err = mlx5e_refresh_tirs_self_loopback_enable(priv);
+	if (err) {
+		netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
+			   __func__, err);
+		goto err_close_channels;
+	}
+
 	mlx5e_update_carrier(priv);
 	mlx5e_redirect_rqts(priv);
 
@@ -1383,6 +1426,8 @@ int mlx5e_open_locked(struct net_device *netdev)
 
 	return 0;
 
+err_close_channels:
+	mlx5e_close_channels(priv);
 err_clear_state_opened_flag:
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 	return err;
@@ -1909,6 +1954,9 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 			       "Not creating net device, some required device capabilities are missing\n");
 		return -ENOTSUPP;
 	}
+	if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
+		mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+
 	return 0;
 }
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dd2097455a2e..1565324eb620 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         lro_cap[0x1];
 	u8         lro_psh_flag[0x1];
 	u8         lro_time_stamp[0x1];
-	u8         reserved_0[0x6];
+	u8         reserved_0[0x3];
+	u8         self_lb_en_modifiable[0x1];
+	u8         reserved_1[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_1[0x4];
+	u8         reserved_2[0x4];
 	u8         rss_ind_tbl_cap[0x4];
-	u8         reserved_2[0x3];
+	u8         reserved_3[0x3];
 	u8         tunnel_lso_const_out_ip_id[0x1];
-	u8         reserved_3[0x2];
+	u8         reserved_4[0x2];
 	u8         tunnel_statless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
-	u8         reserved_4[0x20];
+	u8         reserved_5[0x20];
 
-	u8         reserved_5[0x10];
+	u8         reserved_6[0x10];
 	u8         lro_min_mss_size[0x10];
 
-	u8         reserved_6[0x120];
+	u8         reserved_7[0x120];
 
 	u8         lro_timer_supported_periods[4][0x20];
 
-	u8         reserved_7[0x600];
+	u8         reserved_8[0x600];
 };
 
 struct mlx5_ifc_roce_cap_bits {
@@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits {
 };
 
 struct mlx5_ifc_modify_tir_bitmask_bits {
-	u8	   reserved[0x20];
+	u8	   reserved_0[0x20];
 
-	u8         reserved1[0x1f];
+	u8         reserved_1[0x1b];
+	u8         self_lb_en[0x1];
+	u8         reserved_2[0x3];
 	u8         lro[0x1];
 };
 

From 50a9eea694ab8e0779069e0a4e0b12e145521468 Mon Sep 17 00:00:00 2001
From: Doron Tsur <doront@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:27 +0200
Subject: [PATCH 30/83] net/mlx5e: Max mtu comparison fix

On change mtu the driver compares between hardware queried mtu and
software requested mtu. We need to compare between software
representation of the queried mtu and the requested mtu.

Fixes: facc9699f0fe ('net/mlx5e: Fix HW MTU settings')
Signed-off-by: Doron Tsur <doront@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index df001754bcd1..1e52db32c73d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1901,6 +1901,8 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 
 	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
 
+	max_mtu = MLX5E_HW2SW_MTU(max_mtu);
+
 	if (new_mtu > max_mtu) {
 		netdev_err(netdev,
 			   "%s: Bad MTU (%d) > (%d) Max\n",

From d4e28cbd24c8cb004960ddb8b22124953f6c220c Mon Sep 17 00:00:00 2001
From: Achiad Shochat <achiad@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:28 +0200
Subject: [PATCH 31/83] net/mlx5e: Use the right DMA free function on TX path

On xmit path we use skb_frag_dma_map() which is using dma_map_page(),
while upon completion we dma-unmap the skb fragments using
dma_unmap_single() rather than dma_unmap_page().

To fix this, we now save the dma map type on xmit path and use this
info to call the right dma unmap method upon TX completion.

Signed-off-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 10 ++-
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   | 69 ++++++++++---------
 2 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f2ae62dd8c09..22e72bf1ae48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -334,9 +334,15 @@ struct mlx5e_tx_skb_cb {
 
 #define MLX5E_TX_SKB_CB(__skb) ((struct mlx5e_tx_skb_cb *)__skb->cb)
 
+enum mlx5e_dma_map_type {
+	MLX5E_DMA_MAP_SINGLE,
+	MLX5E_DMA_MAP_PAGE
+};
+
 struct mlx5e_sq_dma {
-	dma_addr_t addr;
-	u32        size;
+	dma_addr_t              addr;
+	u32                     size;
+	enum mlx5e_dma_map_type type;
 };
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index f687ebf20d9c..1341b1d3c421 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -61,41 +61,49 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw)
 	}
 }
 
-static void mlx5e_dma_pop_last_pushed(struct mlx5e_sq *sq, dma_addr_t *addr,
-				      u32 *size)
+static inline void mlx5e_tx_dma_unmap(struct device *pdev,
+				      struct mlx5e_sq_dma *dma)
 {
-	sq->dma_fifo_pc--;
-	*addr = sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr;
-	*size = sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size;
+	switch (dma->type) {
+	case MLX5E_DMA_MAP_SINGLE:
+		dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+		break;
+	case MLX5E_DMA_MAP_PAGE:
+		dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+		break;
+	default:
+		WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+	}
+}
+
+static inline void mlx5e_dma_push(struct mlx5e_sq *sq,
+				  dma_addr_t addr,
+				  u32 size,
+				  enum mlx5e_dma_map_type map_type)
+{
+	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr;
+	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size;
+	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type;
+	sq->dma_fifo_pc++;
+}
+
+static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i)
+{
+	return &sq->dma_fifo[i & sq->dma_fifo_mask];
 }
 
 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, struct sk_buff *skb)
 {
-	dma_addr_t addr;
-	u32 size;
 	int i;
 
 	for (i = 0; i < MLX5E_TX_SKB_CB(skb)->num_dma; i++) {
-		mlx5e_dma_pop_last_pushed(sq, &addr, &size);
-		dma_unmap_single(sq->pdev, addr, size, DMA_TO_DEVICE);
+		struct mlx5e_sq_dma *last_pushed_dma =
+			mlx5e_dma_get(sq, --sq->dma_fifo_pc);
+
+		mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma);
 	}
 }
 
-static inline void mlx5e_dma_push(struct mlx5e_sq *sq, dma_addr_t addr,
-				  u32 size)
-{
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr;
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size;
-	sq->dma_fifo_pc++;
-}
-
-static inline void mlx5e_dma_get(struct mlx5e_sq *sq, u32 i, dma_addr_t *addr,
-				 u32 *size)
-{
-	*addr = sq->dma_fifo[i & sq->dma_fifo_mask].addr;
-	*size = sq->dma_fifo[i & sq->dma_fifo_mask].size;
-}
-
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		       void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -225,7 +233,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		dseg->lkey       = sq->mkey_be;
 		dseg->byte_count = cpu_to_be32(headlen);
 
-		mlx5e_dma_push(sq, dma_addr, headlen);
+		mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
 		MLX5E_TX_SKB_CB(skb)->num_dma++;
 
 		dseg++;
@@ -244,7 +252,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		dseg->lkey       = sq->mkey_be;
 		dseg->byte_count = cpu_to_be32(fsz);
 
-		mlx5e_dma_push(sq, dma_addr, fsz);
+		mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
 		MLX5E_TX_SKB_CB(skb)->num_dma++;
 
 		dseg++;
@@ -360,13 +368,10 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
 			}
 
 			for (j = 0; j < MLX5E_TX_SKB_CB(skb)->num_dma; j++) {
-				dma_addr_t addr;
-				u32 size;
+				struct mlx5e_sq_dma *dma =
+					mlx5e_dma_get(sq, dma_fifo_cc++);
 
-				mlx5e_dma_get(sq, dma_fifo_cc, &addr, &size);
-				dma_fifo_cc++;
-				dma_unmap_single(sq->pdev, addr, size,
-						 DMA_TO_DEVICE);
+				mlx5e_tx_dma_unmap(sq->pdev, dma);
 			}
 
 			npkts++;

From f5adbfee72282bb1f456d52b04adacd4fe6ac502 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:29 +0200
Subject: [PATCH 32/83] net/mlx4_core: Fix sleeping while holding spinlock at
 rem_slave_counters

When cleaning slave's counter resources, we hold a spinlock that
protects the slave's counters list. As part of the clean, we call
__mlx4_clear_if_stat which calls mlx4_alloc_cmd_mailbox which is a
sleepable function.

In order to fix this issue, hold the spinlock, and copy all counter
indices into a temporary array, and release the spinlock. Afterwards,
iterate over this array and free every counter. Repeat this scenario
until the original list is empty (a new counter might have been added
while releasing the counters from the temporary array).

Fixes: b72ca7e96acf ("net/mlx4_core: Reset counters data when freed")
Reported-by: Moni Shoua <monis@mellanox.com>
Tested-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlx4/resource_tracker.c | 39 +++++++++++++------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 9813d34f3e5b..6fec3e993d02 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4952,26 +4952,41 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave)
 	struct res_counter *counter;
 	struct res_counter *tmp;
 	int err;
-	int index;
+	int *counters_arr = NULL;
+	int i, j;
 
 	err = move_all_busy(dev, slave, RES_COUNTER);
 	if (err)
 		mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n",
 			  slave);
 
-	spin_lock_irq(mlx4_tlock(dev));
-	list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
-		if (counter->com.owner == slave) {
-			index = counter->com.res_id;
-			rb_erase(&counter->com.node,
-				 &tracker->res_tree[RES_COUNTER]);
-			list_del(&counter->com.list);
-			kfree(counter);
-			__mlx4_counter_free(dev, index);
+	counters_arr = kmalloc_array(dev->caps.max_counters,
+				     sizeof(*counters_arr), GFP_KERNEL);
+	if (!counters_arr)
+		return;
+
+	do {
+		i = 0;
+		j = 0;
+		spin_lock_irq(mlx4_tlock(dev));
+		list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
+			if (counter->com.owner == slave) {
+				counters_arr[i++] = counter->com.res_id;
+				rb_erase(&counter->com.node,
+					 &tracker->res_tree[RES_COUNTER]);
+				list_del(&counter->com.list);
+				kfree(counter);
+			}
+		}
+		spin_unlock_irq(mlx4_tlock(dev));
+
+		while (j < i) {
+			__mlx4_counter_free(dev, counters_arr[j++]);
 			mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
 		}
-	}
-	spin_unlock_irq(mlx4_tlock(dev));
+	} while (i);
+
+	kfree(counters_arr);
 }
 
 static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave)

From d49c2197fd70c37d57982804465268440a33183a Mon Sep 17 00:00:00 2001
From: Noa Osherovich <noaos@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:30 +0200
Subject: [PATCH 33/83] net/mlx4_core: Avoid returning success in case of an
 error flow

The err variable wasn't set with the correct error value in some cases.

Fixes: 47605df95398 ('mlx4: Modify proxy/tunnel QP mechanism [..]')
Signed-off-by: Noa Osherovich <noaos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 85f1b1e7e505..31c491e02e69 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -892,9 +892,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
 		dev->caps.port_mask[i] = dev->caps.port_type[i];
 		dev->caps.phys_port_id[i] = func_cap.phys_port_id;
-		if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
-						    &dev->caps.gid_table_len[i],
-						    &dev->caps.pkey_table_len[i]))
+		err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
+						      &dev->caps.gid_table_len[i],
+						      &dev->caps.pkey_table_len[i]);
+		if (err)
 			goto err_mem;
 	}
 
@@ -906,6 +907,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 			 dev->caps.uar_page_size * dev->caps.num_uars,
 			 (unsigned long long)
 			 pci_resource_len(dev->persist->pdev, 2));
+		err = -ENOMEM;
 		goto err_mem;
 	}
 

From dc1e7eb846ae6677426c2dcdfe5ea323cf9036e9 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 10 Nov 2015 16:51:14 +0100
Subject: [PATCH 34/83] net: dsa: mv88e6060: remove poll_link callback

As of mv88e6xxx remove the poll_link callback since the link
state change polling is now handled by the phylib.

Tested on a mv88e6060 B0 device with a TI DM816X SoC.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6060.c | 49 -------------------------------------
 1 file changed, 49 deletions(-)

diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index 9093577755f6..6885ef5678bc 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -225,54 +225,6 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
 	return reg_write(ds, addr, regnum, val);
 }
 
-static void mv88e6060_poll_link(struct dsa_switch *ds)
-{
-	int i;
-
-	for (i = 0; i < DSA_MAX_PORTS; i++) {
-		struct net_device *dev;
-		int uninitialized_var(port_status);
-		int link;
-		int speed;
-		int duplex;
-		int fc;
-
-		dev = ds->ports[i];
-		if (dev == NULL)
-			continue;
-
-		link = 0;
-		if (dev->flags & IFF_UP) {
-			port_status = reg_read(ds, REG_PORT(i), 0x00);
-			if (port_status < 0)
-				continue;
-
-			link = !!(port_status & 0x1000);
-		}
-
-		if (!link) {
-			if (netif_carrier_ok(dev)) {
-				netdev_info(dev, "link down\n");
-				netif_carrier_off(dev);
-			}
-			continue;
-		}
-
-		speed = (port_status & 0x0100) ? 100 : 10;
-		duplex = (port_status & 0x0200) ? 1 : 0;
-		fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0;
-
-		if (!netif_carrier_ok(dev)) {
-			netdev_info(dev,
-				    "link up, %d Mb/s, %s duplex, flow control %sabled\n",
-				    speed,
-				    duplex ? "full" : "half",
-				    fc ? "en" : "dis");
-			netif_carrier_on(dev);
-		}
-	}
-}
-
 static struct dsa_switch_driver mv88e6060_switch_driver = {
 	.tag_protocol	= DSA_TAG_PROTO_TRAILER,
 	.probe		= mv88e6060_probe,
@@ -280,7 +232,6 @@ static struct dsa_switch_driver mv88e6060_switch_driver = {
 	.set_addr	= mv88e6060_set_addr,
 	.phy_read	= mv88e6060_phy_read,
 	.phy_write	= mv88e6060_phy_write,
-	.poll_link	= mv88e6060_poll_link,
 };
 
 static int __init mv88e6060_init(void)

From c37909b4bb24320a59f697439bc77c700ca354fe Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 10 Nov 2015 16:51:19 +0100
Subject: [PATCH 35/83] net: dsa: mv88e6060: use the correct InitReady bit

According to the mv88e6060 datasheet, the InitReady bit position
is 11 and the polarity is inverted.
Use the bit correctly to detect the end of initialization.

Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6060.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index 6885ef5678bc..eff5e18079a7 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -102,7 +102,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds)
 	timeout = jiffies + 1 * HZ;
 	while (time_before(jiffies, timeout)) {
 		ret = REG_READ(REG_GLOBAL, 0x00);
-		if ((ret & 0x8000) == 0x0000)
+		if (ret & 0x800)
 			break;
 
 		usleep_range(1000, 2000);

From b73c774e08cf1e8877b000585e8598ecee890c7f Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 10 Nov 2015 16:51:24 +0100
Subject: [PATCH 36/83] net: dsa: mv88e6060: use the correct MaxFrameSize bit

According to the mv88e6060 datasheet, the MaxFrameSize bit position
is 10 instead of 11 which is reserved.
Use the bit correctly to setup max frame size to 1536.

Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6060.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index eff5e18079a7..10647ade2f6d 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -119,7 +119,7 @@ static int mv88e6060_setup_global(struct dsa_switch *ds)
 	 * set the maximum frame size to 1536 bytes, and mask all
 	 * interrupt sources.
 	 */
-	REG_WRITE(REG_GLOBAL, 0x04, 0x0800);
+	REG_WRITE(REG_GLOBAL, 0x04, 0x400);
 
 	/* Enable automatic address learning, set the address
 	 * database size to 1024 entries, and set the default aging

From 83ea0f4cb344089b6f240d3793d8c522a8501037 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 10 Nov 2015 16:51:32 +0100
Subject: [PATCH 37/83] net: dsa: mv88e6060: use the correct bit shift for mac0

According to the mv88e6060 datasheet, the first mac byte must
be at position 9 instead of 8 since the bit 8 is used to select
if the mac address must differ for each port for Pause frames.
Use the correct shift and set the same mac address for all port.

Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6060.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index 10647ade2f6d..cd08079e84ad 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -188,7 +188,8 @@ static int mv88e6060_setup(struct dsa_switch *ds)
 
 static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr)
 {
-	REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
+	/* Use the same MAC Address as FD Pause frames for all ports */
+	REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 9) | addr[1]);
 	REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
 	REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
 

From f7e3931181595cd15e22e199d1bbabb0468d5a93 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 10 Nov 2015 16:51:42 +0100
Subject: [PATCH 38/83] net: dsa: mv88e6060: add register defines header file

To align with the mv88e6xxx code, add a similar header file
with all the register defines.
The file is based on the mv88e6xxx header for coherency.

Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6060.h | 111 ++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 drivers/net/dsa/mv88e6060.h

diff --git a/drivers/net/dsa/mv88e6060.h b/drivers/net/dsa/mv88e6060.h
new file mode 100644
index 000000000000..cc9b2ed4aff4
--- /dev/null
+++ b/drivers/net/dsa/mv88e6060.h
@@ -0,0 +1,111 @@
+/*
+ * drivers/net/dsa/mv88e6060.h - Marvell 88e6060 switch chip support
+ * Copyright (c) 2015 Neil Armstrong
+ *
+ * Based on mv88e6xxx.h
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __MV88E6060_H
+#define __MV88E6060_H
+
+#define MV88E6060_PORTS	6
+
+#define REG_PORT(p)		(0x8 + (p))
+#define PORT_STATUS		0x00
+#define PORT_STATUS_PAUSE_EN	BIT(15)
+#define PORT_STATUS_MY_PAUSE	BIT(14)
+#define PORT_STATUS_FC		(PORT_STATUS_MY_PAUSE | PORT_STATUS_PAUSE_EN)
+#define PORT_STATUS_RESOLVED	BIT(13)
+#define PORT_STATUS_LINK	BIT(12)
+#define PORT_STATUS_PORTMODE	BIT(11)
+#define PORT_STATUS_PHYMODE	BIT(10)
+#define PORT_STATUS_DUPLEX	BIT(9)
+#define PORT_STATUS_SPEED	BIT(8)
+#define PORT_SWITCH_ID		0x03
+#define PORT_SWITCH_ID_6060	0x0600
+#define PORT_SWITCH_ID_6060_MASK	0xfff0
+#define PORT_SWITCH_ID_6060_R1	0x0601
+#define PORT_SWITCH_ID_6060_R2	0x0602
+#define PORT_CONTROL		0x04
+#define PORT_CONTROL_FORCE_FLOW_CTRL	BIT(15)
+#define PORT_CONTROL_TRAILER	BIT(14)
+#define PORT_CONTROL_HEADER	BIT(11)
+#define PORT_CONTROL_INGRESS_MODE	BIT(8)
+#define PORT_CONTROL_VLAN_TUNNEL	BIT(7)
+#define PORT_CONTROL_STATE_MASK	0x03
+#define PORT_CONTROL_STATE_DISABLED	0x00
+#define PORT_CONTROL_STATE_BLOCKING	0x01
+#define PORT_CONTROL_STATE_LEARNING	0x02
+#define PORT_CONTROL_STATE_FORWARDING	0x03
+#define PORT_VLAN_MAP		0x06
+#define PORT_VLAN_MAP_DBNUM_SHIFT	12
+#define PORT_VLAN_MAP_TABLE_MASK	0x1f
+#define PORT_ASSOC_VECTOR	0x0b
+#define PORT_ASSOC_VECTOR_MONITOR	BIT(15)
+#define PORT_ASSOC_VECTOR_PAV_MASK	0x1f
+#define PORT_RX_CNTR		0x10
+#define PORT_TX_CNTR		0x11
+
+#define REG_GLOBAL		0x0f
+#define GLOBAL_STATUS		0x00
+#define GLOBAL_STATUS_SW_MODE_MASK	(0x3 << 12)
+#define GLOBAL_STATUS_SW_MODE_0	(0x0 << 12)
+#define GLOBAL_STATUS_SW_MODE_1	(0x1 << 12)
+#define GLOBAL_STATUS_SW_MODE_2	(0x2 << 12)
+#define GLOBAL_STATUS_SW_MODE_3	(0x3 << 12)
+#define GLOBAL_STATUS_INIT_READY	BIT(11)
+#define GLOBAL_STATUS_ATU_FULL		BIT(3)
+#define GLOBAL_STATUS_ATU_DONE		BIT(2)
+#define GLOBAL_STATUS_PHY_INT	BIT(1)
+#define GLOBAL_STATUS_EEINT	BIT(0)
+#define GLOBAL_MAC_01		0x01
+#define GLOBAL_MAC_01_DIFF_ADDR	BIT(8)
+#define GLOBAL_MAC_23		0x02
+#define GLOBAL_MAC_45		0x03
+#define GLOBAL_CONTROL		0x04
+#define GLOBAL_CONTROL_DISCARD_EXCESS	BIT(13)
+#define GLOBAL_CONTROL_MAX_FRAME_1536	BIT(10)
+#define GLOBAL_CONTROL_RELOAD_EEPROM	BIT(9)
+#define GLOBAL_CONTROL_CTRMODE		BIT(8)
+#define GLOBAL_CONTROL_ATU_FULL_EN	BIT(3)
+#define GLOBAL_CONTROL_ATU_DONE_EN	BIT(2)
+#define GLOBAL_CONTROL_PHYINT_EN	BIT(1)
+#define GLOBAL_CONTROL_EEPROM_DONE_EN	BIT(0)
+#define GLOBAL_ATU_CONTROL	0x0a
+#define GLOBAL_ATU_CONTROL_SWRESET	BIT(15)
+#define GLOBAL_ATU_CONTROL_LEARNDIS	BIT(14)
+#define GLOBAL_ATU_CONTROL_ATUSIZE_256	(0x0 << 12)
+#define GLOBAL_ATU_CONTROL_ATUSIZE_512	(0x1 << 12)
+#define GLOBAL_ATU_CONTROL_ATUSIZE_1024	(0x2 << 12)
+#define GLOBAL_ATU_CONTROL_ATE_AGE_SHIFT	4
+#define GLOBAL_ATU_CONTROL_ATE_AGE_MASK	(0xff << 4)
+#define GLOBAL_ATU_CONTROL_ATE_AGE_5MIN	(0x13 << 4)
+#define GLOBAL_ATU_OP		0x0b
+#define GLOBAL_ATU_OP_BUSY	BIT(15)
+#define GLOBAL_ATU_OP_NOP		(0 << 12)
+#define GLOBAL_ATU_OP_FLUSH_ALL	((1 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_FLUSH_UNLOCKED	((2 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_LOAD_DB		((3 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_GET_NEXT_DB	((4 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_FLUSH_DB		((5 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_FLUSH_UNLOCKED_DB ((6 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_DATA		0x0c
+#define GLOBAL_ATU_DATA_PORT_VECTOR_MASK	0x3f0
+#define GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT	4
+#define GLOBAL_ATU_DATA_STATE_MASK		0x0f
+#define GLOBAL_ATU_DATA_STATE_UNUSED		0x00
+#define GLOBAL_ATU_DATA_STATE_UC_STATIC		0x0e
+#define GLOBAL_ATU_DATA_STATE_UC_LOCKED		0x0f
+#define GLOBAL_ATU_DATA_STATE_MC_STATIC		0x07
+#define GLOBAL_ATU_DATA_STATE_MC_LOCKED		0x0e
+#define GLOBAL_ATU_MAC_01	0x0d
+#define GLOBAL_ATU_MAC_23	0x0e
+#define GLOBAL_ATU_MAC_45	0x0f
+
+#endif

From 6a4b2980d18164a09734c2069ddb4ad4a3b69c71 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 10 Nov 2015 16:51:36 +0100
Subject: [PATCH 39/83] net: dsa: mv88e6060: replace magic values with register
 defines

To align with the mv88e6xxx code, use the register defines to
access all the register addresses and bit fields.

Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6060.c | 64 +++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 27 deletions(-)

diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index cd08079e84ad..0527f485c3dc 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -15,9 +15,7 @@
 #include <linux/netdevice.h>
 #include <linux/phy.h>
 #include <net/dsa.h>
-
-#define REG_PORT(p)		(8 + (p))
-#define REG_GLOBAL		0x0f
+#include "mv88e6060.h"
 
 static int reg_read(struct dsa_switch *ds, int addr, int reg)
 {
@@ -67,13 +65,14 @@ static char *mv88e6060_probe(struct device *host_dev, int sw_addr)
 	if (bus == NULL)
 		return NULL;
 
-	ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
+	ret = mdiobus_read(bus, sw_addr + REG_PORT(0), PORT_SWITCH_ID);
 	if (ret >= 0) {
-		if (ret == 0x0600)
+		if (ret == PORT_SWITCH_ID_6060)
 			return "Marvell 88E6060 (A0)";
-		if (ret == 0x0601 || ret == 0x0602)
+		if (ret == PORT_SWITCH_ID_6060_R1 ||
+		    ret == PORT_SWITCH_ID_6060_R2)
 			return "Marvell 88E6060 (B0)";
-		if ((ret & 0xfff0) == 0x0600)
+		if ((ret & PORT_SWITCH_ID_6060_MASK) == PORT_SWITCH_ID_6060)
 			return "Marvell 88E6060";
 	}
 
@@ -87,22 +86,26 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds)
 	unsigned long timeout;
 
 	/* Set all ports to the disabled state. */
-	for (i = 0; i < 6; i++) {
-		ret = REG_READ(REG_PORT(i), 0x04);
-		REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+	for (i = 0; i < MV88E6060_PORTS; i++) {
+		ret = REG_READ(REG_PORT(i), PORT_CONTROL);
+		REG_WRITE(REG_PORT(i), PORT_CONTROL,
+			  ret & ~PORT_CONTROL_STATE_MASK);
 	}
 
 	/* Wait for transmit queues to drain. */
 	usleep_range(2000, 4000);
 
 	/* Reset the switch. */
-	REG_WRITE(REG_GLOBAL, 0x0a, 0xa130);
+	REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
+		  GLOBAL_ATU_CONTROL_SWRESET |
+		  GLOBAL_ATU_CONTROL_ATUSIZE_1024 |
+		  GLOBAL_ATU_CONTROL_ATE_AGE_5MIN);
 
 	/* Wait up to one second for reset to complete. */
 	timeout = jiffies + 1 * HZ;
 	while (time_before(jiffies, timeout)) {
-		ret = REG_READ(REG_GLOBAL, 0x00);
-		if (ret & 0x800)
+		ret = REG_READ(REG_GLOBAL, GLOBAL_STATUS);
+		if (ret & GLOBAL_STATUS_INIT_READY)
 			break;
 
 		usleep_range(1000, 2000);
@@ -119,13 +122,15 @@ static int mv88e6060_setup_global(struct dsa_switch *ds)
 	 * set the maximum frame size to 1536 bytes, and mask all
 	 * interrupt sources.
 	 */
-	REG_WRITE(REG_GLOBAL, 0x04, 0x400);
+	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, GLOBAL_CONTROL_MAX_FRAME_1536);
 
 	/* Enable automatic address learning, set the address
 	 * database size to 1024 entries, and set the default aging
 	 * time to 5 minutes.
 	 */
-	REG_WRITE(REG_GLOBAL, 0x0a, 0x2130);
+	REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
+		  GLOBAL_ATU_CONTROL_ATUSIZE_1024 |
+		  GLOBAL_ATU_CONTROL_ATE_AGE_5MIN);
 
 	return 0;
 }
@@ -139,25 +144,30 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
 	 * state to Forwarding.  Additionally, if this is the CPU
 	 * port, enable Ingress and Egress Trailer tagging mode.
 	 */
-	REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ?  0x4103 : 0x0003);
+	REG_WRITE(addr, PORT_CONTROL,
+		  dsa_is_cpu_port(ds, p) ?
+			PORT_CONTROL_TRAILER |
+			PORT_CONTROL_INGRESS_MODE |
+			PORT_CONTROL_STATE_FORWARDING :
+			PORT_CONTROL_STATE_FORWARDING);
 
 	/* Port based VLAN map: give each port its own address
 	 * database, allow the CPU port to talk to each of the 'real'
 	 * ports, and allow each of the 'real' ports to only talk to
 	 * the CPU port.
 	 */
-	REG_WRITE(addr, 0x06,
-			((p & 0xf) << 12) |
-			 (dsa_is_cpu_port(ds, p) ?
-				ds->phys_port_mask :
-				(1 << ds->dst->cpu_port)));
+	REG_WRITE(addr, PORT_VLAN_MAP,
+		  ((p & 0xf) << PORT_VLAN_MAP_DBNUM_SHIFT) |
+		   (dsa_is_cpu_port(ds, p) ?
+			ds->phys_port_mask :
+			BIT(ds->dst->cpu_port)));
 
 	/* Port Association Vector: when learning source addresses
 	 * of packets, add the address to the address database using
 	 * a port bitmap that has only the bit for this port set and
 	 * the other bits clear.
 	 */
-	REG_WRITE(addr, 0x0b, 1 << p);
+	REG_WRITE(addr, PORT_ASSOC_VECTOR, BIT(p));
 
 	return 0;
 }
@@ -177,7 +187,7 @@ static int mv88e6060_setup(struct dsa_switch *ds)
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; i < 6; i++) {
+	for (i = 0; i < MV88E6060_PORTS; i++) {
 		ret = mv88e6060_setup_port(ds, i);
 		if (ret < 0)
 			return ret;
@@ -189,16 +199,16 @@ static int mv88e6060_setup(struct dsa_switch *ds)
 static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr)
 {
 	/* Use the same MAC Address as FD Pause frames for all ports */
-	REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 9) | addr[1]);
-	REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
-	REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
+	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 9) | addr[1]);
+	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]);
+	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]);
 
 	return 0;
 }
 
 static int mv88e6060_port_to_phy_addr(int port)
 {
-	if (port >= 0 && port <= 5)
+	if (port >= 0 && port < MV88E6060_PORTS)
 		return port;
 	return -1;
 }

From b4fe85f9c9146f60457e9512fb6055e69e6a7a65 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 12 Nov 2015 17:35:58 +0100
Subject: [PATCH 40/83] ip_tunnel: disable preemption when updating per-cpu
 tstats

Drivers like vxlan use the recently introduced
udp_tunnel_xmit_skb/udp_tunnel6_xmit_skb APIs. udp_tunnel6_xmit_skb
makes use of ip6tunnel_xmit, and ip6tunnel_xmit, after sending the
packet, updates the struct stats using the usual
u64_stats_update_begin/end calls on this_cpu_ptr(dev->tstats).
udp_tunnel_xmit_skb makes use of iptunnel_xmit, which doesn't touch
tstats, so drivers like vxlan, immediately after, call
iptunnel_xmit_stats, which does the same thing - calls
u64_stats_update_begin/end on this_cpu_ptr(dev->tstats).

While vxlan is probably fine (I don't know?), calling a similar function
from, say, an unbound workqueue, on a fully preemptable kernel causes
real issues:

[  188.434537] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u8:0/6
[  188.435579] caller is debug_smp_processor_id+0x17/0x20
[  188.435583] CPU: 0 PID: 6 Comm: kworker/u8:0 Not tainted 4.2.6 #2
[  188.435607] Call Trace:
[  188.435611]  [<ffffffff8234e936>] dump_stack+0x4f/0x7b
[  188.435615]  [<ffffffff81915f3d>] check_preemption_disabled+0x19d/0x1c0
[  188.435619]  [<ffffffff81915f77>] debug_smp_processor_id+0x17/0x20

The solution would be to protect the whole
this_cpu_ptr(dev->tstats)/u64_stats_update_begin/end blocks with
disabling preemption and then reenabling it.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_tunnel.h | 3 ++-
 include/net/ip_tunnels.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index aaee6fa02cf1..ff788b665277 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -90,11 +90,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 	err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
 
 	if (net_xmit_eval(err) == 0) {
-		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+		struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->tx_bytes += pkt_len;
 		tstats->tx_packets++;
 		u64_stats_update_end(&tstats->syncp);
+		put_cpu_ptr(tstats);
 	} else {
 		stats->tx_errors++;
 		stats->tx_aborted_errors++;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f6dafec9102c..62a750a6a8f8 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -287,12 +287,13 @@ static inline void iptunnel_xmit_stats(int err,
 				       struct pcpu_sw_netstats __percpu *stats)
 {
 	if (err > 0) {
-		struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats);
+		struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats);
 
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->tx_bytes += err;
 		tstats->tx_packets++;
 		u64_stats_update_end(&tstats->syncp);
+		put_cpu_ptr(tstats);
 	} else if (err < 0) {
 		err_stats->tx_errors++;
 		err_stats->tx_aborted_errors++;

From 0eae5982a3571dc15a9e7684c02e8774ba2fb1c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A5ns=20Rullg=C3=A5rd?= <mans@mansr.com>
Date: Thu, 12 Nov 2015 17:40:20 +0000
Subject: [PATCH 41/83] net: phy: at803x: support interrupt on 8030 and 8035

Commit 77a993942 "phy/at8031: enable at8031 to work on interrupt mode"
added interrupt support for the 8031 PHY but left out the other two
chips supported by this driver.

This patch sets the .ack_interrupt and .config_intr functions for the
8030 and 8035 drivers as well.

Signed-off-by: Mans Rullgard <mans@mansr.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index fabf11d32d27..2d020a3ec0b5 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -308,6 +308,8 @@ static struct phy_driver at803x_driver[] = {
 	.flags			= PHY_HAS_INTERRUPT,
 	.config_aneg		= genphy_config_aneg,
 	.read_status		= genphy_read_status,
+	.ack_interrupt		= at803x_ack_interrupt,
+	.config_intr		= at803x_config_intr,
 	.driver			= {
 		.owner = THIS_MODULE,
 	},
@@ -327,6 +329,8 @@ static struct phy_driver at803x_driver[] = {
 	.flags			= PHY_HAS_INTERRUPT,
 	.config_aneg		= genphy_config_aneg,
 	.read_status		= genphy_read_status,
+	.ack_interrupt		= at803x_ack_interrupt,
+	.config_intr		= at803x_config_intr,
 	.driver			= {
 		.owner = THIS_MODULE,
 	},

From 7729b053814ac91af340f5055970afc87d7fee21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A5ns=20Rullg=C3=A5rd?= <mans@mansr.com>
Date: Thu, 12 Nov 2015 18:41:12 +0000
Subject: [PATCH 42/83] net: phy: vitesse: add support for VSC8601

This adds support for the Vitesse VSC8601 PHY. Generic functions are
used for everything except interrupt handling.

Signed-off-by: Mans Rullgard <mans@mansr.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/vitesse.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c
index 76cad712ddb2..dd295dbaa074 100644
--- a/drivers/net/phy/vitesse.c
+++ b/drivers/net/phy/vitesse.c
@@ -66,6 +66,7 @@
 #define PHY_ID_VSC8244			0x000fc6c0
 #define PHY_ID_VSC8514			0x00070670
 #define PHY_ID_VSC8574			0x000704a0
+#define PHY_ID_VSC8601			0x00070420
 #define PHY_ID_VSC8662			0x00070660
 #define PHY_ID_VSC8221			0x000fc550
 #define PHY_ID_VSC8211			0x000fc4b0
@@ -133,7 +134,8 @@ static int vsc82xx_config_intr(struct phy_device *phydev)
 			(phydev->drv->phy_id == PHY_ID_VSC8234 ||
 			 phydev->drv->phy_id == PHY_ID_VSC8244 ||
 			 phydev->drv->phy_id == PHY_ID_VSC8514 ||
-			 phydev->drv->phy_id == PHY_ID_VSC8574) ?
+			 phydev->drv->phy_id == PHY_ID_VSC8574 ||
+			 phydev->drv->phy_id == PHY_ID_VSC8601) ?
 				MII_VSC8244_IMASK_MASK :
 				MII_VSC8221_IMASK_MASK);
 	else {
@@ -271,6 +273,18 @@ static struct phy_driver vsc82xx_driver[] = {
 	.ack_interrupt  = &vsc824x_ack_interrupt,
 	.config_intr    = &vsc82xx_config_intr,
 	.driver         = { .owner = THIS_MODULE,},
+}, {
+	.phy_id         = PHY_ID_VSC8601,
+	.name           = "Vitesse VSC8601",
+	.phy_id_mask    = 0x000ffff0,
+	.features       = PHY_GBIT_FEATURES,
+	.flags          = PHY_HAS_INTERRUPT,
+	.config_init    = &genphy_config_init,
+	.config_aneg    = &genphy_config_aneg,
+	.read_status    = &genphy_read_status,
+	.ack_interrupt  = &vsc824x_ack_interrupt,
+	.config_intr    = &vsc82xx_config_intr,
+	.driver         = { .owner = THIS_MODULE,},
 }, {
 	.phy_id         = PHY_ID_VSC8662,
 	.name           = "Vitesse VSC8662",

From 0fcd593b943bfcc21ad84d3321422401de071d8a Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Thu, 12 Nov 2015 13:57:00 -0800
Subject: [PATCH 43/83] arm64: bpf: fix JIT frame pointer setup

BPF fp should point to the top of the BPF prog stack. The original
implementation made it point to the bottom incorrectly.
Move A64_SP to fp before reserve BPF prog stack space.

CC: Zi Shen Lim <zlim.lnx@gmail.com>
CC: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Yang Shi <yang.shi@linaro.org>
Reviewed-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/net/bpf_jit_comp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index a44e5293c6f5..ac8b548ce493 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -161,12 +161,12 @@ static void build_prologue(struct jit_ctx *ctx)
 	if (ctx->tmp_used)
 		emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
 
-	/* Set up BPF stack */
-	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
-
 	/* Set up frame pointer */
 	emit(A64_MOV(1, fp, A64_SP), ctx);
 
+	/* Set up BPF stack */
+	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
+
 	/* Clear registers A and X */
 	emit_a64_mov_i64(ra, 0, ctx);
 	emit_a64_mov_i64(rx, 0, ctx);

From 30b50aa612018bd92f5a85534cc2668423e8c7e8 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Thu, 12 Nov 2015 14:07:46 -0800
Subject: [PATCH 44/83] bpf: samples: exclude asm/sysreg.h for arm64

commit 338d4f49d6f7114a017d294ccf7374df4f998edc
("arm64: kernel: Add support for Privileged Access Never") includes sysreg.h
into futex.h and uaccess.h. But, the inline assembly used by asm/sysreg.h is
incompatible with llvm so it will cause BPF samples build failure for ARM64.
Since sysreg.h is useless for BPF samples, just exclude it from Makefile via
defining __ASM_SYSREG_H.

Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/Makefile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 79b4596b5f9a..edd638b5825f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -67,10 +67,13 @@ HOSTLOADLIBES_lathist += -lelf
 # point this to your LLVM backend with bpf support
 LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
 
+# asm/sysreg.h inline assmbly used by it is incompatible with llvm.
+# But, ehere is not easy way to fix it, so just exclude it since it is
+# useless for BPF samples.
 $(obj)/%.o: $(src)/%.c
 	clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
-		-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+		-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
 		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
 	clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
-		-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+		-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
 		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s

From ebaef649c26b44ff28114b452fd067a270ca7f02 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 14 Nov 2015 01:26:53 +0100
Subject: [PATCH 45/83] bpf, arm: start flushing icache range from header

During review I noticed that the icache range we're flushing should
start at header already and not at ctx.image.

Reason is that after 55309dd3d4cd ("net: bpf: arm: address randomize
and write protect JIT code"), we also want to make sure to flush the
random-sized trap in front of the start of the actual program (analogous
to x86). No operational differences from user side.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Nicolas Schichan <nschichan@freebox.fr>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 2f4b14cfddb4..591f9db3bf40 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1061,7 +1061,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 	}
 	build_epilogue(&ctx);
 
-	flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
+	flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
 
 #if __LINUX_ARM_ARCH__ < 7
 	if (ctx.imm_count)

From c3d4c682c240595c3637c552dc1afa985bc6d382 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 14 Nov 2015 01:16:18 +0100
Subject: [PATCH 46/83] bpf, arm64: start flushing icache range from header

While recently going over ARM64's BPF code, I noticed that the icache
range we're flushing should start at header already and not at ctx.image.

Reason is that after b569c1c622c5 ("net: bpf: arm64: address randomize
and write protect JIT code"), we also want to make sure to flush the
random-sized trap in front of the start of the actual program (analogous
to x86). No operational differences from user side.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Zi Shen Lim <zlim.lnx@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/net/bpf_jit_comp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index ac8b548ce493..64a8bc12f4a2 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -740,7 +740,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 	if (bpf_jit_enable > 1)
 		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
 
-	bpf_flush_icache(ctx.image, ctx.image + ctx.idx);
+	bpf_flush_icache(header, ctx.image + ctx.idx);
 
 	set_memory_ro((unsigned long)header, header->pages);
 	prog->bpf_func = (void *)ctx.image;

From cd998ecd2f031cfdb88436ea12f7c6d0b09c7a80 Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Fri, 13 Nov 2015 09:46:59 +0300
Subject: [PATCH 47/83] net: smsc911x: Reset PHY during initialization

On certain hardware after software reboot the chip may get stuck and fail
to reinitialize during reset. This can be fixed by ensuring that PHY is
reset too.

Old PHY resetting method required operational MDIO interface, therefore
the chip should have been already set up. In order to be able to function
during probe, it is changed to use PMT_CTRL register.

The problem could be observed on SMDK5410 board.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smsc911x.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index c860c9007e49..219a99b7a631 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -809,22 +809,17 @@ static int smsc911x_phy_check_loopbackpkt(struct smsc911x_data *pdata)
 
 static int smsc911x_phy_reset(struct smsc911x_data *pdata)
 {
-	struct phy_device *phy_dev = pdata->phy_dev;
 	unsigned int temp;
 	unsigned int i = 100000;
 
-	BUG_ON(!phy_dev);
-	BUG_ON(!phy_dev->bus);
-
-	SMSC_TRACE(pdata, hw, "Performing PHY BCR Reset");
-	smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, BMCR_RESET);
+	temp = smsc911x_reg_read(pdata, PMT_CTRL);
+	smsc911x_reg_write(pdata, PMT_CTRL, temp | PMT_CTRL_PHY_RST_);
 	do {
 		msleep(1);
-		temp = smsc911x_mii_read(phy_dev->bus, phy_dev->addr,
-			MII_BMCR);
-	} while ((i--) && (temp & BMCR_RESET));
+		temp = smsc911x_reg_read(pdata, PMT_CTRL);
+	} while ((i--) && (temp & PMT_CTRL_PHY_RST_));
 
-	if (temp & BMCR_RESET) {
+	if (unlikely(temp & PMT_CTRL_PHY_RST_)) {
 		SMSC_WARN(pdata, hw, "PHY reset failed to complete");
 		return -EIO;
 	}
@@ -2296,7 +2291,7 @@ static int smsc911x_init(struct net_device *dev)
 	}
 
 	/* Reset the LAN911x */
-	if (smsc911x_soft_reset(pdata))
+	if (smsc911x_phy_reset(pdata) || smsc911x_soft_reset(pdata))
 		return -ENODEV;
 
 	dev->flags |= IFF_MULTICAST;

From 2452cb0c6503f05ab371b26c3216c5fba54d4476 Mon Sep 17 00:00:00 2001
From: Masaru Nagai <masaru.nagai.vx@renesas.com>
Date: Fri, 13 Nov 2015 19:24:49 +0900
Subject: [PATCH 48/83] ravb: Fix int mask value overwritten issue

When RX/TX interrupt for Network Control queue and Best Effort queue
is issued at the same time, the interrupt mask of Network Control
queue will be reset when the mask of Best Effort queue is set.
This patch fixes this problem.

Signed-off-by: Masaru Nagai <masaru.nagai.vx@renesas.com>
Signed-off-by: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index aa7b2083cb53..7180e26f5c67 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -733,8 +733,10 @@ static irqreturn_t ravb_interrupt(int irq, void *dev_id)
 			    ((tis  & tic)  & BIT(q))) {
 				if (napi_schedule_prep(&priv->napi[q])) {
 					/* Mask RX and TX interrupts */
-					ravb_write(ndev, ric0 & ~BIT(q), RIC0);
-					ravb_write(ndev, tic  & ~BIT(q), TIC);
+					ric0 &= ~BIT(q);
+					tic &= ~BIT(q);
+					ravb_write(ndev, ric0, RIC0);
+					ravb_write(ndev, tic, TIC);
 					__napi_schedule(&priv->napi[q]);
 				} else {
 					netdev_warn(ndev,

From 4114ec905de2f02b91703c484bba9b62dc3bbb87 Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Fri, 13 Nov 2015 11:36:57 +0100
Subject: [PATCH 49/83] be2net: remove unused local rsstable array

Remove rsstable array and its initialization from be_set_rss_hash_opts().
The array became unused after "e255787 be2net: Support for configurable
RSS hash key". The initial RSS table is now filled and stored for later
usage during Rx queue creation.

Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Acked-by: Sathya Perla <sathya.perla@avagotech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_ethtool.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index f4cb8e425853..7246eac5148a 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1062,9 +1062,7 @@ static int be_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 static int be_set_rss_hash_opts(struct be_adapter *adapter,
 				struct ethtool_rxnfc *cmd)
 {
-	struct be_rx_obj *rxo;
-	int status = 0, i, j;
-	u8 rsstable[128];
+	int status;
 	u32 rss_flags = adapter->rss_info.rss_flags;
 
 	if (cmd->data != L3_RSS_FLAGS &&
@@ -1113,17 +1111,7 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter,
 	}
 
 	if (rss_flags == adapter->rss_info.rss_flags)
-		return status;
-
-	if (be_multi_rxq(adapter)) {
-		for (j = 0; j < 128; j += adapter->num_rss_qs) {
-			for_all_rss_queues(adapter, rxo, i) {
-				if ((j + i) >= 128)
-					break;
-				rsstable[j + i] = rxo->rss_id;
-			}
-		}
-	}
+		return 0;
 
 	status = be_cmd_rss_config(adapter, adapter->rss_info.rsstable,
 				   rss_flags, 128, adapter->rss_info.rss_hkey);

From d5d309815bc377ec8235df380fda6d1befbc57cc Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Fri, 13 Nov 2015 11:36:58 +0100
Subject: [PATCH 50/83] be2net: replace hardcoded values with existing define

Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_ethtool.c | 3 ++-
 drivers/net/ethernet/emulex/benet/be_main.c    | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 7246eac5148a..734f655c99c1 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1114,7 +1114,8 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter,
 		return 0;
 
 	status = be_cmd_rss_config(adapter, adapter->rss_info.rsstable,
-				   rss_flags, 128, adapter->rss_info.rss_hkey);
+				   rss_flags, RSS_INDIR_TABLE_LEN,
+				   adapter->rss_info.rss_hkey);
 	if (!status)
 		adapter->rss_info.rss_flags = rss_flags;
 
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index eb48a977f8da..b6ad02909d6b 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3518,7 +3518,7 @@ static int be_rx_qs_create(struct be_adapter *adapter)
 
 	netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
 	rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
-			       128, rss_key);
+			       RSS_INDIR_TABLE_LEN, rss_key);
 	if (rc) {
 		rss->rss_flags = RSS_ENABLE_NONE;
 		return rc;

From bbe14f54297467ddb23b7d1db564a2468c6ae151 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 13 Nov 2015 13:06:12 +0200
Subject: [PATCH 51/83] switchdev: bridge: Check return code is not EOPNOTSUPP

When NET_SWITCHDEV=n, switchdev_port_attr_set simply returns EOPNOTSUPP.
In this case we should not emit errors and warnings to the kernel log.

Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Fixes: 0bc05d585d38 ("switchdev: allow caller to explicitly request
attr_set as deferred")
Fixes: 6ac311ae8bfb ("Adding switchdev ageing notification on port
bridged")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp.c    | 2 +-
 net/bridge/br_stp_if.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f7e8dee64fc8..5f3f64553179 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -48,7 +48,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
 
 	p->state = state;
 	err = switchdev_port_attr_set(p->dev, &attr);
-	if (err)
+	if (err && err != -EOPNOTSUPP)
 		br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
 				(unsigned int) p->port_no, p->dev->name);
 }
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index fa53d7a89f48..5396ff08af32 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -50,7 +50,7 @@ void br_init_port(struct net_bridge_port *p)
 	p->config_pending = 0;
 
 	err = switchdev_port_attr_set(p->dev, &attr);
-	if (err)
+	if (err && err != -EOPNOTSUPP)
 		netdev_err(p->dev, "failed to set HW ageing time\n");
 }
 

From 5f8dc33e8ee7e59bee3bc6dc2088807a384b285a Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 13 Nov 2015 14:54:01 +0100
Subject: [PATCH 52/83] net: fix feature changes on devices without
 ndo_set_features

When __netdev_update_features() was updated to ensure some features are
disabled on new lower devices, an error was introduced for devices which
don't have the ndo_set_features() method set. Before we'll just set the
new features, but now we return an error and don't set them. Fix this by
returning the old behaviour and setting err to 0 when ndo_set_features
is not present.

Fixes: e7868a85e1b2 ("net/core: ensure features get disabled on new lower devs")
CC: Jarod Wilson <jarod@redhat.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Ido Schimmel <idosch@mellanox.com>
CC: Sander Eikelenboom <linux@eikelenboom.it>
CC: Andy Gospodarek <gospo@cumulusnetworks.com>
CC: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Reviewed-by: Jarod Wilson <jarod@redhat.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Dave Young <dyoung@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index ab9b8d0d115e..4a1d198dbbff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6426,6 +6426,8 @@ int __netdev_update_features(struct net_device *dev)
 
 	if (dev->netdev_ops->ndo_set_features)
 		err = dev->netdev_ops->ndo_set_features(dev, features);
+	else
+		err = 0;
 
 	if (unlikely(err < 0)) {
 		netdev_err(dev,

From 00ee5927177792a6e139d50b6b7564d35705556a Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 13 Nov 2015 15:20:24 +0100
Subject: [PATCH 53/83] net: fix __netdev_update_features return on
 ndo_set_features failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If ndo_set_features fails __netdev_update_features() will return -1 but
this is wrong because it is expected to return 0 if no features were
changed (see netdev_update_features()), which will cause a netdev
notifier to be called without any actual changes. Fix this by returning
0 if ndo_set_features fails.

Fixes: 6cb6a27c45ce ("net: Call netdev_features_change() from netdev_update_features()")
CC: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 4a1d198dbbff..1974aee005a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6433,7 +6433,7 @@ int __netdev_update_features(struct net_device *dev)
 		netdev_err(dev,
 			"set_features() failed (%d); wanted %pNF, left %pNF\n",
 			err, &features, &dev->features);
-		return -1;
+		return 0;
 	}
 
 sync_lower:

From 0b88393cdf6b1322522849e61f7a3328f4fd3843 Mon Sep 17 00:00:00 2001
From: Daniele Palmas <dnlplm@gmail.com>
Date: Fri, 13 Nov 2015 18:01:21 +0100
Subject: [PATCH 54/83] net: usb: cdc_ether: add Dell DW5580 as a mobile
 broadband adapter

Since Dell DW5580 is a 3G modem, this patch adds the device as a
mobile broadband adapter

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index c78d3cb1b464..3da70bf9936a 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -695,6 +695,11 @@ static const struct usb_device_id	products[] = {
 	USB_VENDOR_AND_INTERFACE_INFO(0x1bc7, USB_CLASS_COMM,
 			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
 	.driver_info = (kernel_ulong_t) &wwan_info,
+}, {
+	/* Dell DW5580 modules */
+	USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x81ba, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+	.driver_info = (kernel_ulong_t)&wwan_info,
 }, {
 	USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET,
 			USB_CDC_PROTO_NONE),

From d37d5ec861b0d937c0bdd70f0138e2750aacccdb Mon Sep 17 00:00:00 2001
From: Shrikrishna Khare <skhare@vmware.com>
Date: Fri, 13 Nov 2015 15:42:10 -0800
Subject: [PATCH 55/83] Driver: Vmxnet3: Fix use of mfTableLen for big endian
 architectures

Signed-off-by: Shrikrishna Khare <skhare@vmware.com>
Reported-by: Masao Uebayashi <uebayasi@gmail.com>
Signed-off-by: Bhavesh Davda <bhavesh@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_drv.c | 7 ++++---
 drivers/net/vmxnet3/vmxnet3_int.h | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 46f4caddccbe..899ea4288197 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2157,12 +2157,13 @@ vmxnet3_set_mc(struct net_device *netdev)
 		if (!netdev_mc_empty(netdev)) {
 			new_table = vmxnet3_copy_mc(netdev);
 			if (new_table) {
-				rxConf->mfTableLen = cpu_to_le16(
-					netdev_mc_count(netdev) * ETH_ALEN);
+				size_t sz = netdev_mc_count(netdev) * ETH_ALEN;
+
+				rxConf->mfTableLen = cpu_to_le16(sz);
 				new_table_pa = dma_map_single(
 							&adapter->pdev->dev,
 							new_table,
-							rxConf->mfTableLen,
+							sz,
 							PCI_DMA_TODEVICE);
 			}
 
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 3f859a55c035..4c58c83dc225 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.3.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.4.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040300
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040400
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */

From 166e23623e7482070aa124ad805f600672377019 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sat, 14 Nov 2015 11:06:53 +0100
Subject: [PATCH 56/83] net: cavium: liquidio: constify pci_error_handlers
 structures

This pci_error_handlers structure is never modified, like all the other
pci_error_handlers structures, so declare it as const.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index f683d97d7614..b89504405b72 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -560,7 +560,7 @@ static int liquidio_resume(struct pci_dev *pdev)
 #endif
 
 /* For PCI-E Advanced Error Recovery (AER) Interface */
-static struct pci_error_handlers liquidio_err_handler = {
+static const struct pci_error_handlers liquidio_err_handler = {
 	.error_detected = liquidio_pcie_error_detected,
 	.mmio_enabled	= liquidio_pcie_mmio_enabled,
 	.slot_reset	= liquidio_pcie_slot_reset,

From c300366b6b978fcb84f8eeb6205e5980cc0c40c3 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sat, 14 Nov 2015 11:06:57 +0100
Subject: [PATCH 57/83] sfc: constify pci_error_handlers structures

This pci_error_handlers structure is never modified, like all the other
pci_error_handlers structures, so declare it as const.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/efx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index d288f1c928de..a3c42a376741 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -3422,7 +3422,7 @@ out:
  * with our request for slot reset the mmio_enabled callback will never be
  * called, and the link_reset callback is not used by AER or EEH mechanisms.
  */
-static struct pci_error_handlers efx_err_handlers = {
+static const struct pci_error_handlers efx_err_handlers = {
 	.error_detected = efx_io_error_detected,
 	.slot_reset	= efx_io_slot_reset,
 	.resume		= efx_io_resume,

From 027ac58e3c757277dd6cb8975d1b69c27853aa76 Mon Sep 17 00:00:00 2001
From: Ben Cartwright-Cox <ben@benjojo.co.uk>
Date: Sat, 14 Nov 2015 15:13:58 +0000
Subject: [PATCH 58/83] raw: increment correct SNMP counters for ICMP messages

Sending ICMP packets with raw sockets ends up in the SNMP counters
logging the type as the first byte of the IPv4 header rather than
the ICMP header. This is fixed by adding the IP Header Length to
the casting into a icmphdr struct.

Signed-off-by: Ben Cartwright-Cox <ben@benjojo.co.uk>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8c0d0bdc2a7c..63e5be0abd86 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -406,10 +406,12 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 			ip_select_ident(net, skb, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+		skb->transport_header += iphlen;
+		if (iph->protocol == IPPROTO_ICMP &&
+		    length >= iphlen + sizeof(struct icmphdr))
+			icmp_out_count(net, ((struct icmphdr *)
+				skb_transport_header(skb))->type);
 	}
-	if (iph->protocol == IPPROTO_ICMP)
-		icmp_out_count(net, ((struct icmphdr *)
-			skb_transport_header(skb))->type);
 
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
 		      net, sk, skb, NULL, rt->dst.dev,

From d60cf616ec39141b3483304fde69acf0dcec80eb Mon Sep 17 00:00:00 2001
From: Masaru Nagai <masaru.nagai.vx@renesas.com>
Date: Sun, 15 Nov 2015 21:34:42 +0900
Subject: [PATCH 59/83] ravb: remove unhandle int cause

This driver does not handle the AVB-DMAC Receive FIFO Warning interrupt
now, so the interrupt should not be enabled.

Signed-off-by: Masaru Nagai <masaru.nagai.vx@renesas.com>
Signed-off-by: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 7180e26f5c67..ee8d1ec61fab 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -408,8 +408,6 @@ static int ravb_dmac_init(struct net_device *ndev)
 	/* Interrupt enable: */
 	/* Frame receive */
 	ravb_write(ndev, RIC0_FRE0 | RIC0_FRE1, RIC0);
-	/* Receive FIFO full warning */
-	ravb_write(ndev, RIC1_RFWE, RIC1);
 	/* Receive FIFO full error, descriptor empty */
 	ravb_write(ndev, RIC2_QFE0 | RIC2_QFE1 | RIC2_RFFE, RIC2);
 	/* Frame transmitted, timestamp FIFO updated */

From ab6d7846cf80affc43b9d412fed5e25dfcf4f35d Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Sun, 15 Nov 2015 15:02:16 +0200
Subject: [PATCH 60/83] bnx2x: Fix VLANs null-pointer for 57710, 57711

Commit 05cc5a39ddb7 "bnx2x: add vlan filtering offload" introduced
a regression in regard for vlans for 57710, 57711 adapters -
Loading 8021q module on a machine with such an adapter would cause
a null pointer dereference, as the driver mistakenly publishes it
has capabilities for vlan CTAG filtering.

Reported-by: Otto Sabart <osabart@redhat.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Ariel Elior <Ariel.Elior@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index f1d62d5dbaff..c9b036789184 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13207,7 +13207,7 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
 
 	/* VF with OLD Hypervisor or old PF do not support filtering */
 	if (IS_PF(bp)) {
-		if (CHIP_IS_E1x(bp))
+		if (chip_is_e1x)
 			bp->accept_any_vlan = true;
 		else
 			dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;

From 24cb7055a3066634a0f3fa0cd6a4780652905d35 Mon Sep 17 00:00:00 2001
From: Dragos Tatulea <dragos@endocode.com>
Date: Mon, 16 Nov 2015 10:52:48 +0100
Subject: [PATCH 61/83] net: switchdev: fix return code of fdb_dump stub

rtnl_fdb_dump always expects an index to be returned by the ndo_fdb_dump op,
but when CONFIG_NET_SWITCHDEV is off, it returns an error.

Fix that by returning the given unmodified idx.

A similar fix was 0890cf6cb6ab ("switchdev: fix return value of
switchdev_port_fdb_dump in case of error") but for the CONFIG_NET_SWITCHDEV=y
case.

Fixes: 45d4122ca7cd ("switchdev: add support for fdb add/del/dump via switchdev_port_obj ops.")
Signed-off-by: Dragos Tatulea <dragos@endocode.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index bc865e244efe..1d22ce9f352e 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -323,7 +323,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
 					  struct net_device *filter_dev,
 					  int idx)
 {
-	return -EOPNOTSUPP;
+       return idx;
 }
 
 static inline void switchdev_port_fwd_mark_set(struct net_device *dev,

From 8844f97238ca6c1ca92a5d6c69f53efd361a266f Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 16 Nov 2015 16:25:56 +0100
Subject: [PATCH 62/83] af_unix: don't append consumed skbs to sk_receive_queue

In case multiple writes to a unix stream socket race we could end up in a
situation where we pre-allocate a new skb for use in unix_stream_sendpage
but have to free it again in the locked section because another skb
has been appended meanwhile, which we must use. Accidentally we didn't
clear the pointer after consuming it and so we touched freed memory
while appending it to the sk_receive_queue. So, clear the pointer after
consuming the skb.

This bug has been found with syzkaller
(http://github.com/google/syzkaller) by Dmitry Vyukov.

Fixes: 869e7c62486e ("net: af_unix: implement stream sendpage support")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 12b886f07982..a8352db5c5b5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1800,6 +1800,7 @@ alloc_skb:
 		 * this - does no harm
 		 */
 		consume_skb(newskb);
+		newskb = NULL;
 	}
 
 	if (skb_append_pagefrags(skb, page, offset, size)) {

From 88ad4175b201ae24be5e9b7752cf33c1306b64e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Mon, 16 Nov 2015 19:16:40 +0100
Subject: [PATCH 63/83] net/core: use netdev name in warning if no parent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A recent flaw in the netdev feature setting resulted in warnings
like this one from VLAN interfaces:

 WARNING: CPU: 1 PID: 4975 at net/core/dev.c:2419 skb_warn_bad_offload+0xbc/0xcb()
 : caps=(0x00000000001b5820, 0x00000000001b5829) len=2782 data_len=0 gso_size=1348 gso_type=16 ip_summed=3

The ":" is supposed to be preceded by a driver name, but in this
case it is an empty string since the device has no parent.

There are many types of network devices without a parent. The
anonymous warnings for these devices can be hard to debug.  Log
the network device name instead in these cases to assist further
debugging.

This is mostly similar to how __netdev_printk() handles orphan
devices.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 1974aee005a6..5dbc86ea6b58 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2403,17 +2403,20 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
 	static const netdev_features_t null_features = 0;
 	struct net_device *dev = skb->dev;
-	const char *driver = "";
+	const char *name = "";
 
 	if (!net_ratelimit())
 		return;
 
-	if (dev && dev->dev.parent)
-		driver = dev_driver_string(dev->dev.parent);
-
+	if (dev) {
+		if (dev->dev.parent)
+			name = dev_driver_string(dev->dev.parent);
+		else
+			name = netdev_name(dev);
+	}
 	WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
 	     "gso_type=%d ip_summed=%d\n",
-	     driver, dev ? &dev->features : &null_features,
+	     name, dev ? &dev->features : &null_features,
 	     skb->sk ? &skb->sk->sk_route_caps : &null_features,
 	     skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
 	     skb_shinfo(skb)->gso_type, skb->ip_summed);

From 7750130d93decff06120df0d8ea024ff8a038a21 Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Mon, 16 Nov 2015 17:51:34 +0300
Subject: [PATCH 64/83] net: thunder: Check for driver data in nicvf_remove()

In some cases the crash is caused by nicvf_remove() being called from
outside. For example, if we try to feed the device to vfio after the
probe has failed for some reason. So, move the check to better place.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 372c39e5bcbd..7f709cbdcd87 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1583,8 +1583,14 @@ err_disable_device:
 static void nicvf_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct nicvf *nic = netdev_priv(netdev);
-	struct net_device *pnetdev = nic->pnicvf->netdev;
+	struct nicvf *nic;
+	struct net_device *pnetdev;
+
+	if (!netdev)
+		return;
+
+	nic = netdev_priv(netdev);
+	pnetdev = nic->pnicvf->netdev;
 
 	/* Check if this Qset is assigned to different VF.
 	 * If yes, clean primary and all secondary Qsets.
@@ -1600,9 +1606,6 @@ static void nicvf_remove(struct pci_dev *pdev)
 
 static void nicvf_shutdown(struct pci_dev *pdev)
 {
-	if (!pci_get_drvdata(pdev))
-		return;
-
 	nicvf_remove(pdev);
 }
 

From 41033f029e393a64e81966cbe34d66c6cf8a2e7e Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 16 Nov 2015 13:09:10 -0500
Subject: [PATCH 65/83] snmp: Remove duplicate OUTMCAST stat increment

the OUTMCAST stat is double incremented, getting bumped once in the mcast code
itself, and again in the common ip output path.  Remove the mcast bump, as its
not needed

Validated by the reporter, with good results

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Claus Jensen <claus.jensen@microsemi.com>
CC: Claus Jensen <claus.jensen@microsemi.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a39e29..5ee56d0a8699 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1651,7 +1651,6 @@ out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
-		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
 	} else {
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 	}
@@ -2015,7 +2014,6 @@ out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
-		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
 	} else
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 

From c3f45d322cbd379c46466cc2ecab7e2d719b22ed Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Sun, 15 Nov 2015 22:36:11 +0100
Subject: [PATCH 66/83] dl2k: Add support for IP1000A-based cards

Add support for IP1000A chips to dl2k driver.
IP1000A chip looks like a TC9020 with integrated PHY.

This allows IP1000A chips to work reliably because the ipg driver is
buggy - it loses packets under load and then completely stops
transmitting data.

Tested with Asus NX1101 v2.0 at 10, 100 and 1000Mbps:
vendor=0x13f0 device=0x1023 (rev 0x41)
subsystem vendor=0x1043 device=0x8180

MAC address registers access needed to be changed from 8-bit to 16-bit
because 8-bit does not work on IP1000A. 8-bit access is not even
allowed in the TC9020 datasheet (although it worked). 16-bit access
works on both.

Tested that it does not break D-Link DGE-550T (DL-2000 chip, probably
a rebranded TC9020):
vendor=0x1186 device=0x4000 (rev 0x0c)
subsystem vendor=0x1186 device=0x4000

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dlink/Kconfig |  5 +--
 drivers/net/ethernet/dlink/dl2k.c  | 55 ++++++++++++++++++++++++++++--
 drivers/net/ethernet/dlink/dl2k.h  | 15 +++++++-
 3 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/dlink/Kconfig b/drivers/net/ethernet/dlink/Kconfig
index f6e858d0b9d4..ebdc83247bb6 100644
--- a/drivers/net/ethernet/dlink/Kconfig
+++ b/drivers/net/ethernet/dlink/Kconfig
@@ -17,15 +17,16 @@ config NET_VENDOR_DLINK
 if NET_VENDOR_DLINK
 
 config DL2K
-	tristate "DL2000/TC902x-based Gigabit Ethernet support"
+	tristate "DL2000/TC902x/IP1000A-based Gigabit Ethernet support"
 	depends on PCI
 	select CRC32
 	---help---
-	  This driver supports DL2000/TC902x-based Gigabit ethernet cards,
+	  This driver supports DL2000/TC902x/IP1000A-based Gigabit ethernet cards,
 	  which includes
 	  D-Link DGE-550T Gigabit Ethernet Adapter.
 	  D-Link DL2000-based Gigabit Ethernet Adapter.
 	  Sundance/Tamarack TC902x Gigabit Ethernet Adapter.
+	  ICPlus IP1000A-based cards
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called dl2k.
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index cf0a5fcdaaaf..ccca4799c27b 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -253,6 +253,19 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_out_unmap_rx;
 
+	if (np->chip_id == CHIP_IP1000A &&
+	    (np->pdev->revision == 0x40 || np->pdev->revision == 0x41)) {
+		/* PHY magic taken from ipg driver, undocumented registers */
+		mii_write(dev, np->phy_addr, 31, 0x0001);
+		mii_write(dev, np->phy_addr, 27, 0x01e0);
+		mii_write(dev, np->phy_addr, 31, 0x0002);
+		mii_write(dev, np->phy_addr, 27, 0xeb8e);
+		mii_write(dev, np->phy_addr, 31, 0x0000);
+		mii_write(dev, np->phy_addr, 30, 0x005e);
+		/* advertise 1000BASE-T half & full duplex, prefer MASTER */
+		mii_write(dev, np->phy_addr, MII_CTRL1000, 0x0700);
+	}
+
 	/* Fiber device? */
 	np->phy_media = (dr16(ASICCtrl) & PhyMedia) ? 1 : 0;
 	np->link_status = 0;
@@ -361,6 +374,11 @@ parse_eeprom (struct net_device *dev)
 	for (i = 0; i < 6; i++)
 		dev->dev_addr[i] = psrom->mac_addr[i];
 
+	if (np->chip_id == CHIP_IP1000A) {
+		np->led_mode = psrom->led_mode;
+		return 0;
+	}
+
 	if (np->pdev->vendor != PCI_VENDOR_ID_DLINK) {
 		return 0;
 	}
@@ -406,6 +424,28 @@ parse_eeprom (struct net_device *dev)
 	return 0;
 }
 
+static void rio_set_led_mode(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->ioaddr;
+	u32 mode;
+
+	if (np->chip_id != CHIP_IP1000A)
+		return;
+
+	mode = dr32(ASICCtrl);
+	mode &= ~(IPG_AC_LED_MODE_BIT_1 | IPG_AC_LED_MODE | IPG_AC_LED_SPEED);
+
+	if (np->led_mode & 0x01)
+		mode |= IPG_AC_LED_MODE;
+	if (np->led_mode & 0x02)
+		mode |= IPG_AC_LED_MODE_BIT_1;
+	if (np->led_mode & 0x08)
+		mode |= IPG_AC_LED_SPEED;
+
+	dw32(ASICCtrl, mode);
+}
+
 static int
 rio_open (struct net_device *dev)
 {
@@ -424,6 +464,8 @@ rio_open (struct net_device *dev)
 	     GlobalReset | DMAReset | FIFOReset | NetworkReset | HostReset);
 	mdelay(10);
 
+	rio_set_led_mode(dev);
+
 	/* DebugCtrl bit 4, 5, 9 must set */
 	dw32(DebugCtrl, dr32(DebugCtrl) | 0x0230);
 
@@ -433,9 +475,13 @@ rio_open (struct net_device *dev)
 
 	alloc_list (dev);
 
-	/* Get station address */
-	for (i = 0; i < 6; i++)
-		dw8(StationAddr0 + i, dev->dev_addr[i]);
+	/* Set station address */
+	/* 16 or 32-bit access is required by TC9020 datasheet but 8-bit works
+	 * too. However, it doesn't work on IP1000A so we use 16-bit access.
+	 */
+	for (i = 0; i < 3; i++)
+		dw16(StationAddr0 + 2 * i,
+		     cpu_to_le16(((u16 *)dev->dev_addr)[i]));
 
 	set_multicast (dev);
 	if (np->coalesce) {
@@ -780,6 +826,7 @@ tx_error (struct net_device *dev, int tx_status)
 				break;
 			mdelay (1);
 		}
+		rio_set_led_mode(dev);
 		rio_free_tx (dev, 1);
 		/* Reset TFDListPtr */
 		dw32(TFDListPtr0, np->tx_ring_dma +
@@ -799,6 +846,7 @@ tx_error (struct net_device *dev, int tx_status)
 				break;
 			mdelay (1);
 		}
+		rio_set_led_mode(dev);
 		/* Let TxStartThresh stay default value */
 	}
 	/* Maximum Collisions */
@@ -965,6 +1013,7 @@ rio_error (struct net_device *dev, int int_status)
 			dev->name, int_status);
 		dw16(ASICCtrl + 2, GlobalReset | HostReset);
 		mdelay (500);
+		rio_set_led_mode(dev);
 	}
 }
 
diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h
index 23c07b007069..8f4f61262d5c 100644
--- a/drivers/net/ethernet/dlink/dl2k.h
+++ b/drivers/net/ethernet/dlink/dl2k.h
@@ -211,6 +211,10 @@ enum ASICCtrl_HiWord_bits {
 	ResetBusy = 0x0400,
 };
 
+#define IPG_AC_LED_MODE		BIT(14)
+#define IPG_AC_LED_SPEED	BIT(27)
+#define IPG_AC_LED_MODE_BIT_1	BIT(29)
+
 /* Transmit Frame Control bits */
 enum TFC_bits {
 	DwordAlign = 0x00000000,
@@ -332,7 +336,10 @@ typedef struct t_SROM {
 	u16 asic_ctrl;		/* 0x02 */
 	u16 sub_vendor_id;	/* 0x04 */
 	u16 sub_system_id;	/* 0x06 */
-	u16 reserved1[12];	/* 0x08-0x1f */
+	u16 pci_base_1;		/* 0x08 (IP1000A only) */
+	u16 pci_base_2;		/* 0x0a (IP1000A only) */
+	u16 led_mode;		/* 0x0c (IP1000A only) */
+	u16 reserved1[9];	/* 0x0e-0x1f */
 	u8 mac_addr[6];		/* 0x20-0x25 */
 	u8 reserved2[10];	/* 0x26-0x2f */
 	u8 sib[204];		/* 0x30-0xfb */
@@ -397,6 +404,7 @@ struct netdev_private {
 	u16 advertising;	/* NWay media advertisement */
 	u16 negotiate;		/* Negotiated media */
 	int phy_addr;		/* PHY addresses. */
+	u16 led_mode;		/* LED mode read from EEPROM (IP1000A only) */
 };
 
 /* The station address location in the EEPROM. */
@@ -407,10 +415,15 @@ struct netdev_private {
         class_mask              of the class are honored during the comparison.
         driver_data             Data private to the driver.
 */
+#define CHIP_IP1000A	1
 
 static const struct pci_device_id rio_pci_tbl[] = {
 	{0x1186, 0x4000, PCI_ANY_ID, PCI_ANY_ID, },
 	{0x13f0, 0x1021, PCI_ANY_ID, PCI_ANY_ID, },
+	{ PCI_VDEVICE(SUNDANCE,	0x1023), CHIP_IP1000A },
+	{ PCI_VDEVICE(SUNDANCE,	0x2021), CHIP_IP1000A },
+	{ PCI_VDEVICE(DLINK,	0x9021), CHIP_IP1000A },
+	{ PCI_VDEVICE(DLINK,	0x4020), CHIP_IP1000A },
 	{ }
 };
 MODULE_DEVICE_TABLE (pci, rio_pci_tbl);

From f1a454a37618b819f2528ccd234f77a02b3a6016 Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Sun, 15 Nov 2015 22:36:12 +0100
Subject: [PATCH 67/83] ipg: Remove ipg driver

Now that IP1000A chips are supported by dl2k driver, the buggy ipg
driver can be removed.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                          |    7 -
 drivers/net/ethernet/Kconfig         |    1 -
 drivers/net/ethernet/Makefile        |    1 -
 drivers/net/ethernet/icplus/Kconfig  |   13 -
 drivers/net/ethernet/icplus/Makefile |    5 -
 drivers/net/ethernet/icplus/ipg.c    | 2300 --------------------------
 drivers/net/ethernet/icplus/ipg.h    |  748 ---------
 7 files changed, 3075 deletions(-)
 delete mode 100644 drivers/net/ethernet/icplus/Kconfig
 delete mode 100644 drivers/net/ethernet/icplus/Makefile
 delete mode 100644 drivers/net/ethernet/icplus/ipg.c
 delete mode 100644 drivers/net/ethernet/icplus/ipg.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 35fe7ae0492e..45320675a460 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5709,13 +5709,6 @@ M:	Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
 S:	Maintained
 F:	net/ipv4/netfilter/ipt_MASQUERADE.c
 
-IP1000A 10/100/1000 GIGABIT ETHERNET DRIVER
-M:	Francois Romieu <romieu@fr.zoreil.com>
-M:	Sorbica Shieh <sorbica@icplus.com.tw>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ethernet/icplus/ipg.*
-
 IPATH DRIVER
 M:	Mike Marciniszyn <infinipath@intel.com>
 L:	linux-rdma@vger.kernel.org
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 05aa7597dab9..955d06b9cdba 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -78,7 +78,6 @@ source "drivers/net/ethernet/ibm/Kconfig"
 source "drivers/net/ethernet/intel/Kconfig"
 source "drivers/net/ethernet/i825xx/Kconfig"
 source "drivers/net/ethernet/xscale/Kconfig"
-source "drivers/net/ethernet/icplus/Kconfig"
 
 config JME
 	tristate "JMicron(R) PCI-Express Gigabit Ethernet support"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index ddfc808110a1..4a2ee98738f0 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_NET_VENDOR_IBM) += ibm/
 obj-$(CONFIG_NET_VENDOR_INTEL) += intel/
 obj-$(CONFIG_NET_VENDOR_I825XX) += i825xx/
 obj-$(CONFIG_NET_VENDOR_XSCALE) += xscale/
-obj-$(CONFIG_IP1000) += icplus/
 obj-$(CONFIG_JME) += jme.o
 obj-$(CONFIG_KORINA) += korina.o
 obj-$(CONFIG_LANTIQ_ETOP) += lantiq_etop.o
diff --git a/drivers/net/ethernet/icplus/Kconfig b/drivers/net/ethernet/icplus/Kconfig
deleted file mode 100644
index 14a66e9d2e26..000000000000
--- a/drivers/net/ethernet/icplus/Kconfig
+++ /dev/null
@@ -1,13 +0,0 @@
-#
-# IC Plus device configuration
-#
-
-config IP1000
-	tristate "IP1000 Gigabit Ethernet support"
-	depends on PCI
-	select MII
-	---help---
-	  This driver supports IP1000 gigabit Ethernet cards.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called ipg.  This is recommended.
diff --git a/drivers/net/ethernet/icplus/Makefile b/drivers/net/ethernet/icplus/Makefile
deleted file mode 100644
index 5bc87c1f36aa..000000000000
--- a/drivers/net/ethernet/icplus/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the IC Plus device drivers
-#
-
-obj-$(CONFIG_IP1000) += ipg.o
diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c
deleted file mode 100644
index c3b6af83f070..000000000000
--- a/drivers/net/ethernet/icplus/ipg.c
+++ /dev/null
@@ -1,2300 +0,0 @@
-/*
- * ipg.c: Device Driver for the IP1000 Gigabit Ethernet Adapter
- *
- * Copyright (C) 2003, 2007  IC Plus Corp
- *
- * Original Author:
- *
- *   Craig Rich
- *   Sundance Technology, Inc.
- *   www.sundanceti.com
- *   craig_rich@sundanceti.com
- *
- * Current Maintainer:
- *
- *   Sorbica Shieh.
- *   http://www.icplus.com.tw
- *   sorbica@icplus.com.tw
- *
- *   Jesse Huang
- *   http://www.icplus.com.tw
- *   jesse@icplus.com.tw
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/crc32.h>
-#include <linux/ethtool.h>
-#include <linux/interrupt.h>
-#include <linux/gfp.h>
-#include <linux/mii.h>
-#include <linux/mutex.h>
-
-#include <asm/div64.h>
-
-#define IPG_RX_RING_BYTES	(sizeof(struct ipg_rx) * IPG_RFDLIST_LENGTH)
-#define IPG_TX_RING_BYTES	(sizeof(struct ipg_tx) * IPG_TFDLIST_LENGTH)
-#define IPG_RESET_MASK \
-	(IPG_AC_GLOBAL_RESET | IPG_AC_RX_RESET | IPG_AC_TX_RESET | \
-	 IPG_AC_DMA | IPG_AC_FIFO | IPG_AC_NETWORK | IPG_AC_HOST | \
-	 IPG_AC_AUTO_INIT)
-
-#define ipg_w32(val32, reg)	iowrite32((val32), ioaddr + (reg))
-#define ipg_w16(val16, reg)	iowrite16((val16), ioaddr + (reg))
-#define ipg_w8(val8, reg)	iowrite8((val8), ioaddr + (reg))
-
-#define ipg_r32(reg)		ioread32(ioaddr + (reg))
-#define ipg_r16(reg)		ioread16(ioaddr + (reg))
-#define ipg_r8(reg)		ioread8(ioaddr + (reg))
-
-enum {
-	netdev_io_size = 128
-};
-
-#include "ipg.h"
-#define DRV_NAME	"ipg"
-
-MODULE_AUTHOR("IC Plus Corp. 2003");
-MODULE_DESCRIPTION("IC Plus IP1000 Gigabit Ethernet Adapter Linux Driver");
-MODULE_LICENSE("GPL");
-
-/*
- * Defaults
- */
-#define IPG_MAX_RXFRAME_SIZE	0x0600
-#define IPG_RXFRAG_SIZE		0x0600
-#define IPG_RXSUPPORT_SIZE	0x0600
-#define IPG_IS_JUMBO		false
-
-/*
- * Variable record -- index by leading revision/length
- * Revision/Length(=N*4), Address1, Data1, Address2, Data2,...,AddressN,DataN
- */
-static const unsigned short DefaultPhyParam[] = {
-	/* 11/12/03 IP1000A v1-3 rev=0x40 */
-	/*--------------------------------------------------------------------------
-	(0x4000|(15*4)), 31, 0x0001, 27, 0x01e0, 31, 0x0002, 22, 0x85bd, 24, 0xfff2,
-				 27, 0x0c10, 28, 0x0c10, 29, 0x2c10, 31, 0x0003, 23, 0x92f6,
-				 31, 0x0000, 23, 0x003d, 30, 0x00de, 20, 0x20e7,  9, 0x0700,
-	  --------------------------------------------------------------------------*/
-	/* 12/17/03 IP1000A v1-4 rev=0x40 */
-	(0x4000 | (07 * 4)), 31, 0x0001, 27, 0x01e0, 31, 0x0002, 27, 0xeb8e, 31,
-	    0x0000,
-	30, 0x005e, 9, 0x0700,
-	/* 01/09/04 IP1000A v1-5 rev=0x41 */
-	(0x4100 | (07 * 4)), 31, 0x0001, 27, 0x01e0, 31, 0x0002, 27, 0xeb8e, 31,
-	    0x0000,
-	30, 0x005e, 9, 0x0700,
-	0x0000
-};
-
-static const char * const ipg_brand_name[] = {
-	"IC PLUS IP1000 1000/100/10 based NIC",
-	"Sundance Technology ST2021 based NIC",
-	"Tamarack Microelectronics TC9020/9021 based NIC",
-	"D-Link NIC IP1000A"
-};
-
-static const struct pci_device_id ipg_pci_tbl[] = {
-	{ PCI_VDEVICE(SUNDANCE,	0x1023), 0 },
-	{ PCI_VDEVICE(SUNDANCE,	0x2021), 1 },
-	{ PCI_VDEVICE(DLINK,	0x9021), 2 },
-	{ PCI_VDEVICE(DLINK,	0x4020), 3 },
-	{ 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, ipg_pci_tbl);
-
-static inline void __iomem *ipg_ioaddr(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	return sp->ioaddr;
-}
-
-#ifdef IPG_DEBUG
-static void ipg_dump_rfdlist(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-	u32 offset;
-
-	IPG_DEBUG_MSG("_dump_rfdlist\n");
-
-	netdev_info(dev, "rx_current = %02x\n", sp->rx_current);
-	netdev_info(dev, "rx_dirty   = %02x\n", sp->rx_dirty);
-	netdev_info(dev, "RFDList start address = %016lx\n",
-		    (unsigned long)sp->rxd_map);
-	netdev_info(dev, "RFDListPtr register   = %08x%08x\n",
-		    ipg_r32(IPG_RFDLISTPTR1), ipg_r32(IPG_RFDLISTPTR0));
-
-	for (i = 0; i < IPG_RFDLIST_LENGTH; i++) {
-		offset = (u32) &sp->rxd[i].next_desc - (u32) sp->rxd;
-		netdev_info(dev, "%02x %04x RFDNextPtr = %016lx\n",
-			    i, offset, (unsigned long)sp->rxd[i].next_desc);
-		offset = (u32) &sp->rxd[i].rfs - (u32) sp->rxd;
-		netdev_info(dev, "%02x %04x RFS        = %016lx\n",
-			    i, offset, (unsigned long)sp->rxd[i].rfs);
-		offset = (u32) &sp->rxd[i].frag_info - (u32) sp->rxd;
-		netdev_info(dev, "%02x %04x frag_info   = %016lx\n",
-			    i, offset, (unsigned long)sp->rxd[i].frag_info);
-	}
-}
-
-static void ipg_dump_tfdlist(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-	u32 offset;
-
-	IPG_DEBUG_MSG("_dump_tfdlist\n");
-
-	netdev_info(dev, "tx_current         = %02x\n", sp->tx_current);
-	netdev_info(dev, "tx_dirty = %02x\n", sp->tx_dirty);
-	netdev_info(dev, "TFDList start address = %016lx\n",
-		    (unsigned long) sp->txd_map);
-	netdev_info(dev, "TFDListPtr register   = %08x%08x\n",
-		    ipg_r32(IPG_TFDLISTPTR1), ipg_r32(IPG_TFDLISTPTR0));
-
-	for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
-		offset = (u32) &sp->txd[i].next_desc - (u32) sp->txd;
-		netdev_info(dev, "%02x %04x TFDNextPtr = %016lx\n",
-			    i, offset, (unsigned long)sp->txd[i].next_desc);
-
-		offset = (u32) &sp->txd[i].tfc - (u32) sp->txd;
-		netdev_info(dev, "%02x %04x TFC        = %016lx\n",
-			    i, offset, (unsigned long) sp->txd[i].tfc);
-		offset = (u32) &sp->txd[i].frag_info - (u32) sp->txd;
-		netdev_info(dev, "%02x %04x frag_info   = %016lx\n",
-			    i, offset, (unsigned long) sp->txd[i].frag_info);
-	}
-}
-#endif
-
-static void ipg_write_phy_ctl(void __iomem *ioaddr, u8 data)
-{
-	ipg_w8(IPG_PC_RSVD_MASK & data, PHY_CTRL);
-	ndelay(IPG_PC_PHYCTRLWAIT_NS);
-}
-
-static void ipg_drive_phy_ctl_low_high(void __iomem *ioaddr, u8 data)
-{
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_LO | data);
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_HI | data);
-}
-
-static void send_three_state(void __iomem *ioaddr, u8 phyctrlpolarity)
-{
-	phyctrlpolarity |= (IPG_PC_MGMTDATA & 0) | IPG_PC_MGMTDIR;
-
-	ipg_drive_phy_ctl_low_high(ioaddr, phyctrlpolarity);
-}
-
-static void send_end(void __iomem *ioaddr, u8 phyctrlpolarity)
-{
-	ipg_w8((IPG_PC_MGMTCLK_LO | (IPG_PC_MGMTDATA & 0) | IPG_PC_MGMTDIR |
-		phyctrlpolarity) & IPG_PC_RSVD_MASK, PHY_CTRL);
-}
-
-static u16 read_phy_bit(void __iomem *ioaddr, u8 phyctrlpolarity)
-{
-	u16 bit_data;
-
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_LO | phyctrlpolarity);
-
-	bit_data = ((ipg_r8(PHY_CTRL) & IPG_PC_MGMTDATA) >> 1) & 1;
-
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_HI | phyctrlpolarity);
-
-	return bit_data;
-}
-
-/*
- * Read a register from the Physical Layer device located
- * on the IPG NIC, using the IPG PHYCTRL register.
- */
-static int mdio_read(struct net_device *dev, int phy_id, int phy_reg)
-{
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	/*
-	 * The GMII mangement frame structure for a read is as follows:
-	 *
-	 * |Preamble|st|op|phyad|regad|ta|      data      |idle|
-	 * |< 32 1s>|01|10|AAAAA|RRRRR|z0|DDDDDDDDDDDDDDDD|z   |
-	 *
-	 * <32 1s> = 32 consecutive logic 1 values
-	 * A = bit of Physical Layer device address (MSB first)
-	 * R = bit of register address (MSB first)
-	 * z = High impedance state
-	 * D = bit of read data (MSB first)
-	 *
-	 * Transmission order is 'Preamble' field first, bits transmitted
-	 * left to right (first to last).
-	 */
-	struct {
-		u32 field;
-		unsigned int len;
-	} p[] = {
-		{ GMII_PREAMBLE,	32 },	/* Preamble */
-		{ GMII_ST,		2  },	/* ST */
-		{ GMII_READ,		2  },	/* OP */
-		{ phy_id,		5  },	/* PHYAD */
-		{ phy_reg,		5  },	/* REGAD */
-		{ 0x0000,		2  },	/* TA */
-		{ 0x0000,		16 },	/* DATA */
-		{ 0x0000,		1  }	/* IDLE */
-	};
-	unsigned int i, j;
-	u8 polarity, data;
-
-	polarity  = ipg_r8(PHY_CTRL);
-	polarity &= (IPG_PC_DUPLEX_POLARITY | IPG_PC_LINK_POLARITY);
-
-	/* Create the Preamble, ST, OP, PHYAD, and REGAD field. */
-	for (j = 0; j < 5; j++) {
-		for (i = 0; i < p[j].len; i++) {
-			/* For each variable length field, the MSB must be
-			 * transmitted first. Rotate through the field bits,
-			 * starting with the MSB, and move each bit into the
-			 * the 1st (2^1) bit position (this is the bit position
-			 * corresponding to the MgmtData bit of the PhyCtrl
-			 * register for the IPG).
-			 *
-			 * Example: ST = 01;
-			 *
-			 *          First write a '0' to bit 1 of the PhyCtrl
-			 *          register, then write a '1' to bit 1 of the
-			 *          PhyCtrl register.
-			 *
-			 * To do this, right shift the MSB of ST by the value:
-			 * [field length - 1 - #ST bits already written]
-			 * then left shift this result by 1.
-			 */
-			data  = (p[j].field >> (p[j].len - 1 - i)) << 1;
-			data &= IPG_PC_MGMTDATA;
-			data |= polarity | IPG_PC_MGMTDIR;
-
-			ipg_drive_phy_ctl_low_high(ioaddr, data);
-		}
-	}
-
-	send_three_state(ioaddr, polarity);
-
-	read_phy_bit(ioaddr, polarity);
-
-	/*
-	 * For a read cycle, the bits for the next two fields (TA and
-	 * DATA) are driven by the PHY (the IPG reads these bits).
-	 */
-	for (i = 0; i < p[6].len; i++) {
-		p[6].field |=
-		    (read_phy_bit(ioaddr, polarity) << (p[6].len - 1 - i));
-	}
-
-	send_three_state(ioaddr, polarity);
-	send_three_state(ioaddr, polarity);
-	send_three_state(ioaddr, polarity);
-	send_end(ioaddr, polarity);
-
-	/* Return the value of the DATA field. */
-	return p[6].field;
-}
-
-/*
- * Write to a register from the Physical Layer device located
- * on the IPG NIC, using the IPG PHYCTRL register.
- */
-static void mdio_write(struct net_device *dev, int phy_id, int phy_reg, int val)
-{
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	/*
-	 * The GMII mangement frame structure for a read is as follows:
-	 *
-	 * |Preamble|st|op|phyad|regad|ta|      data      |idle|
-	 * |< 32 1s>|01|10|AAAAA|RRRRR|z0|DDDDDDDDDDDDDDDD|z   |
-	 *
-	 * <32 1s> = 32 consecutive logic 1 values
-	 * A = bit of Physical Layer device address (MSB first)
-	 * R = bit of register address (MSB first)
-	 * z = High impedance state
-	 * D = bit of write data (MSB first)
-	 *
-	 * Transmission order is 'Preamble' field first, bits transmitted
-	 * left to right (first to last).
-	 */
-	struct {
-		u32 field;
-		unsigned int len;
-	} p[] = {
-		{ GMII_PREAMBLE,	32 },	/* Preamble */
-		{ GMII_ST,		2  },	/* ST */
-		{ GMII_WRITE,		2  },	/* OP */
-		{ phy_id,		5  },	/* PHYAD */
-		{ phy_reg,		5  },	/* REGAD */
-		{ 0x0002,		2  },	/* TA */
-		{ val & 0xffff,		16 },	/* DATA */
-		{ 0x0000,		1  }	/* IDLE */
-	};
-	unsigned int i, j;
-	u8 polarity, data;
-
-	polarity  = ipg_r8(PHY_CTRL);
-	polarity &= (IPG_PC_DUPLEX_POLARITY | IPG_PC_LINK_POLARITY);
-
-	/* Create the Preamble, ST, OP, PHYAD, and REGAD field. */
-	for (j = 0; j < 7; j++) {
-		for (i = 0; i < p[j].len; i++) {
-			/* For each variable length field, the MSB must be
-			 * transmitted first. Rotate through the field bits,
-			 * starting with the MSB, and move each bit into the
-			 * the 1st (2^1) bit position (this is the bit position
-			 * corresponding to the MgmtData bit of the PhyCtrl
-			 * register for the IPG).
-			 *
-			 * Example: ST = 01;
-			 *
-			 *          First write a '0' to bit 1 of the PhyCtrl
-			 *          register, then write a '1' to bit 1 of the
-			 *          PhyCtrl register.
-			 *
-			 * To do this, right shift the MSB of ST by the value:
-			 * [field length - 1 - #ST bits already written]
-			 * then left shift this result by 1.
-			 */
-			data  = (p[j].field >> (p[j].len - 1 - i)) << 1;
-			data &= IPG_PC_MGMTDATA;
-			data |= polarity | IPG_PC_MGMTDIR;
-
-			ipg_drive_phy_ctl_low_high(ioaddr, data);
-		}
-	}
-
-	/* The last cycle is a tri-state, so read from the PHY. */
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_LO | polarity);
-	ipg_r8(PHY_CTRL);
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_HI | polarity);
-}
-
-static void ipg_set_led_mode(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	u32 mode;
-
-	mode = ipg_r32(ASIC_CTRL);
-	mode &= ~(IPG_AC_LED_MODE_BIT_1 | IPG_AC_LED_MODE | IPG_AC_LED_SPEED);
-
-	if ((sp->led_mode & 0x03) > 1)
-		mode |= IPG_AC_LED_MODE_BIT_1;	/* Write Asic Control Bit 29 */
-
-	if ((sp->led_mode & 0x01) == 1)
-		mode |= IPG_AC_LED_MODE;	/* Write Asic Control Bit 14 */
-
-	if ((sp->led_mode & 0x08) == 8)
-		mode |= IPG_AC_LED_SPEED;	/* Write Asic Control Bit 27 */
-
-	ipg_w32(mode, ASIC_CTRL);
-}
-
-static void ipg_set_phy_set(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	int physet;
-
-	physet = ipg_r8(PHY_SET);
-	physet &= ~(IPG_PS_MEM_LENB9B | IPG_PS_MEM_LEN9 | IPG_PS_NON_COMPDET);
-	physet |= ((sp->led_mode & 0x70) >> 4);
-	ipg_w8(physet, PHY_SET);
-}
-
-static int ipg_reset(struct net_device *dev, u32 resetflags)
-{
-	/* Assert functional resets via the IPG AsicCtrl
-	 * register as specified by the 'resetflags' input
-	 * parameter.
-	 */
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	unsigned int timeout_count = 0;
-
-	IPG_DEBUG_MSG("_reset\n");
-
-	ipg_w32(ipg_r32(ASIC_CTRL) | resetflags, ASIC_CTRL);
-
-	/* Delay added to account for problem with 10Mbps reset. */
-	mdelay(IPG_AC_RESETWAIT);
-
-	while (IPG_AC_RESET_BUSY & ipg_r32(ASIC_CTRL)) {
-		mdelay(IPG_AC_RESETWAIT);
-		if (++timeout_count > IPG_AC_RESET_TIMEOUT)
-			return -ETIME;
-	}
-	/* Set LED Mode in Asic Control */
-	ipg_set_led_mode(dev);
-
-	/* Set PHYSet Register Value */
-	ipg_set_phy_set(dev);
-	return 0;
-}
-
-/* Find the GMII PHY address. */
-static int ipg_find_phyaddr(struct net_device *dev)
-{
-	unsigned int phyaddr, i;
-
-	for (i = 0; i < 32; i++) {
-		u32 status;
-
-		/* Search for the correct PHY address among 32 possible. */
-		phyaddr = (IPG_NIC_PHY_ADDRESS + i) % 32;
-
-		/* 10/22/03 Grace change verify from GMII_PHY_STATUS to
-		   GMII_PHY_ID1
-		 */
-
-		status = mdio_read(dev, phyaddr, MII_BMSR);
-
-		if ((status != 0xFFFF) && (status != 0))
-			return phyaddr;
-	}
-
-	return 0x1f;
-}
-
-/*
- * Configure IPG based on result of IEEE 802.3 PHY
- * auto-negotiation.
- */
-static int ipg_config_autoneg(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int txflowcontrol;
-	unsigned int rxflowcontrol;
-	unsigned int fullduplex;
-	u32 mac_ctrl_val;
-	u32 asicctrl;
-	u8 phyctrl;
-	const char *speed;
-	const char *duplex;
-	const char *tx_desc;
-	const char *rx_desc;
-
-	IPG_DEBUG_MSG("_config_autoneg\n");
-
-	asicctrl = ipg_r32(ASIC_CTRL);
-	phyctrl = ipg_r8(PHY_CTRL);
-	mac_ctrl_val = ipg_r32(MAC_CTRL);
-
-	/* Set flags for use in resolving auto-negotiation, assuming
-	 * non-1000Mbps, half duplex, no flow control.
-	 */
-	fullduplex = 0;
-	txflowcontrol = 0;
-	rxflowcontrol = 0;
-
-	/* To accommodate a problem in 10Mbps operation,
-	 * set a global flag if PHY running in 10Mbps mode.
-	 */
-	sp->tenmbpsmode = 0;
-
-	/* Determine actual speed of operation. */
-	switch (phyctrl & IPG_PC_LINK_SPEED) {
-	case IPG_PC_LINK_SPEED_10MBPS:
-		speed = "10Mbps";
-		sp->tenmbpsmode = 1;
-		break;
-	case IPG_PC_LINK_SPEED_100MBPS:
-		speed = "100Mbps";
-		break;
-	case IPG_PC_LINK_SPEED_1000MBPS:
-		speed = "1000Mbps";
-		break;
-	default:
-		speed = "undefined!";
-		return 0;
-	}
-
-	netdev_info(dev, "Link speed = %s\n", speed);
-	if (sp->tenmbpsmode == 1)
-		netdev_info(dev, "10Mbps operational mode enabled\n");
-
-	if (phyctrl & IPG_PC_DUPLEX_STATUS) {
-		fullduplex = 1;
-		txflowcontrol = 1;
-		rxflowcontrol = 1;
-	}
-
-	/* Configure full duplex, and flow control. */
-	if (fullduplex == 1) {
-
-		/* Configure IPG for full duplex operation. */
-
-		duplex = "full";
-
-		mac_ctrl_val |= IPG_MC_DUPLEX_SELECT_FD;
-
-		if (txflowcontrol == 1) {
-			tx_desc  = "";
-			mac_ctrl_val |= IPG_MC_TX_FLOW_CONTROL_ENABLE;
-		} else {
-			tx_desc = "no ";
-			mac_ctrl_val &= ~IPG_MC_TX_FLOW_CONTROL_ENABLE;
-		}
-
-		if (rxflowcontrol == 1) {
-			rx_desc = "";
-			mac_ctrl_val |= IPG_MC_RX_FLOW_CONTROL_ENABLE;
-		} else {
-			rx_desc = "no ";
-			mac_ctrl_val &= ~IPG_MC_RX_FLOW_CONTROL_ENABLE;
-		}
-	} else {
-		duplex = "half";
-		tx_desc = "no ";
-		rx_desc = "no ";
-		mac_ctrl_val &= (~IPG_MC_DUPLEX_SELECT_FD &
-				 ~IPG_MC_TX_FLOW_CONTROL_ENABLE &
-				 ~IPG_MC_RX_FLOW_CONTROL_ENABLE);
-	}
-
-	netdev_info(dev, "setting %s duplex, %sTX, %sRX flow control\n",
-		    duplex, tx_desc, rx_desc);
-	ipg_w32(mac_ctrl_val, MAC_CTRL);
-
-	return 0;
-}
-
-/* Determine and configure multicast operation and set
- * receive mode for IPG.
- */
-static void ipg_nic_set_multicast_list(struct net_device *dev)
-{
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	struct netdev_hw_addr *ha;
-	unsigned int hashindex;
-	u32 hashtable[2];
-	u8 receivemode;
-
-	IPG_DEBUG_MSG("_nic_set_multicast_list\n");
-
-	receivemode = IPG_RM_RECEIVEUNICAST | IPG_RM_RECEIVEBROADCAST;
-
-	if (dev->flags & IFF_PROMISC) {
-		/* NIC to be configured in promiscuous mode. */
-		receivemode = IPG_RM_RECEIVEALLFRAMES;
-	} else if ((dev->flags & IFF_ALLMULTI) ||
-		   ((dev->flags & IFF_MULTICAST) &&
-		    (netdev_mc_count(dev) > IPG_MULTICAST_HASHTABLE_SIZE))) {
-		/* NIC to be configured to receive all multicast
-		 * frames. */
-		receivemode |= IPG_RM_RECEIVEMULTICAST;
-	} else if ((dev->flags & IFF_MULTICAST) && !netdev_mc_empty(dev)) {
-		/* NIC to be configured to receive selected
-		 * multicast addresses. */
-		receivemode |= IPG_RM_RECEIVEMULTICASTHASH;
-	}
-
-	/* Calculate the bits to set for the 64 bit, IPG HASHTABLE.
-	 * The IPG applies a cyclic-redundancy-check (the same CRC
-	 * used to calculate the frame data FCS) to the destination
-	 * address all incoming multicast frames whose destination
-	 * address has the multicast bit set. The least significant
-	 * 6 bits of the CRC result are used as an addressing index
-	 * into the hash table. If the value of the bit addressed by
-	 * this index is a 1, the frame is passed to the host system.
-	 */
-
-	/* Clear hashtable. */
-	hashtable[0] = 0x00000000;
-	hashtable[1] = 0x00000000;
-
-	/* Cycle through all multicast addresses to filter. */
-	netdev_for_each_mc_addr(ha, dev) {
-		/* Calculate CRC result for each multicast address. */
-		hashindex = crc32_le(0xffffffff, ha->addr,
-				     ETH_ALEN);
-
-		/* Use only the least significant 6 bits. */
-		hashindex = hashindex & 0x3F;
-
-		/* Within "hashtable", set bit number "hashindex"
-		 * to a logic 1.
-		 */
-		set_bit(hashindex, (void *)hashtable);
-	}
-
-	/* Write the value of the hashtable, to the 4, 16 bit
-	 * HASHTABLE IPG registers.
-	 */
-	ipg_w32(hashtable[0], HASHTABLE_0);
-	ipg_w32(hashtable[1], HASHTABLE_1);
-
-	ipg_w8(IPG_RM_RSVD_MASK & receivemode, RECEIVE_MODE);
-
-	IPG_DEBUG_MSG("ReceiveMode = %x\n", ipg_r8(RECEIVE_MODE));
-}
-
-static int ipg_io_config(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	u32 origmacctrl;
-	u32 restoremacctrl;
-
-	IPG_DEBUG_MSG("_io_config\n");
-
-	origmacctrl = ipg_r32(MAC_CTRL);
-
-	restoremacctrl = origmacctrl | IPG_MC_STATISTICS_ENABLE;
-
-	/* Based on compilation option, determine if FCS is to be
-	 * stripped on receive frames by IPG.
-	 */
-	if (!IPG_STRIP_FCS_ON_RX)
-		restoremacctrl |= IPG_MC_RCV_FCS;
-
-	/* Determine if transmitter and/or receiver are
-	 * enabled so we may restore MACCTRL correctly.
-	 */
-	if (origmacctrl & IPG_MC_TX_ENABLED)
-		restoremacctrl |= IPG_MC_TX_ENABLE;
-
-	if (origmacctrl & IPG_MC_RX_ENABLED)
-		restoremacctrl |= IPG_MC_RX_ENABLE;
-
-	/* Transmitter and receiver must be disabled before setting
-	 * IFSSelect.
-	 */
-	ipg_w32((origmacctrl & (IPG_MC_RX_DISABLE | IPG_MC_TX_DISABLE)) &
-		IPG_MC_RSVD_MASK, MAC_CTRL);
-
-	/* Now that transmitter and receiver are disabled, write
-	 * to IFSSelect.
-	 */
-	ipg_w32((origmacctrl & IPG_MC_IFS_96BIT) & IPG_MC_RSVD_MASK, MAC_CTRL);
-
-	/* Set RECEIVEMODE register. */
-	ipg_nic_set_multicast_list(dev);
-
-	ipg_w16(sp->max_rxframe_size, MAX_FRAME_SIZE);
-
-	ipg_w8(IPG_RXDMAPOLLPERIOD_VALUE,   RX_DMA_POLL_PERIOD);
-	ipg_w8(IPG_RXDMAURGENTTHRESH_VALUE, RX_DMA_URGENT_THRESH);
-	ipg_w8(IPG_RXDMABURSTTHRESH_VALUE,  RX_DMA_BURST_THRESH);
-	ipg_w8(IPG_TXDMAPOLLPERIOD_VALUE,   TX_DMA_POLL_PERIOD);
-	ipg_w8(IPG_TXDMAURGENTTHRESH_VALUE, TX_DMA_URGENT_THRESH);
-	ipg_w8(IPG_TXDMABURSTTHRESH_VALUE,  TX_DMA_BURST_THRESH);
-	ipg_w16((IPG_IE_HOST_ERROR | IPG_IE_TX_DMA_COMPLETE |
-		 IPG_IE_TX_COMPLETE | IPG_IE_INT_REQUESTED |
-		 IPG_IE_UPDATE_STATS | IPG_IE_LINK_EVENT |
-		 IPG_IE_RX_DMA_COMPLETE | IPG_IE_RX_DMA_PRIORITY), INT_ENABLE);
-	ipg_w16(IPG_FLOWONTHRESH_VALUE,  FLOW_ON_THRESH);
-	ipg_w16(IPG_FLOWOFFTHRESH_VALUE, FLOW_OFF_THRESH);
-
-	/* IPG multi-frag frame bug workaround.
-	 * Per silicon revision B3 eratta.
-	 */
-	ipg_w16(ipg_r16(DEBUG_CTRL) | 0x0200, DEBUG_CTRL);
-
-	/* IPG TX poll now bug workaround.
-	 * Per silicon revision B3 eratta.
-	 */
-	ipg_w16(ipg_r16(DEBUG_CTRL) | 0x0010, DEBUG_CTRL);
-
-	/* IPG RX poll now bug workaround.
-	 * Per silicon revision B3 eratta.
-	 */
-	ipg_w16(ipg_r16(DEBUG_CTRL) | 0x0020, DEBUG_CTRL);
-
-	/* Now restore MACCTRL to original setting. */
-	ipg_w32(IPG_MC_RSVD_MASK & restoremacctrl, MAC_CTRL);
-
-	/* Disable unused RMON statistics. */
-	ipg_w32(IPG_RZ_ALL, RMON_STATISTICS_MASK);
-
-	/* Disable unused MIB statistics. */
-	ipg_w32(IPG_SM_MACCONTROLFRAMESXMTD | IPG_SM_MACCONTROLFRAMESRCVD |
-		IPG_SM_BCSTOCTETXMTOK_BCSTFRAMESXMTDOK | IPG_SM_TXJUMBOFRAMES |
-		IPG_SM_MCSTOCTETXMTOK_MCSTFRAMESXMTDOK | IPG_SM_RXJUMBOFRAMES |
-		IPG_SM_BCSTOCTETRCVDOK_BCSTFRAMESRCVDOK |
-		IPG_SM_UDPCHECKSUMERRORS | IPG_SM_TCPCHECKSUMERRORS |
-		IPG_SM_IPCHECKSUMERRORS, STATISTICS_MASK);
-
-	return 0;
-}
-
-/*
- * Create a receive buffer within system memory and update
- * NIC private structure appropriately.
- */
-static int ipg_get_rxbuff(struct net_device *dev, int entry)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	struct ipg_rx *rxfd = sp->rxd + entry;
-	struct sk_buff *skb;
-	u64 rxfragsize;
-
-	IPG_DEBUG_MSG("_get_rxbuff\n");
-
-	skb = netdev_alloc_skb_ip_align(dev, sp->rxsupport_size);
-	if (!skb) {
-		sp->rx_buff[entry] = NULL;
-		return -ENOMEM;
-	}
-
-	/* Save the address of the sk_buff structure. */
-	sp->rx_buff[entry] = skb;
-
-	rxfd->frag_info = cpu_to_le64(pci_map_single(sp->pdev, skb->data,
-		sp->rx_buf_sz, PCI_DMA_FROMDEVICE));
-
-	/* Set the RFD fragment length. */
-	rxfragsize = sp->rxfrag_size;
-	rxfd->frag_info |= cpu_to_le64((rxfragsize << 48) & IPG_RFI_FRAGLEN);
-
-	return 0;
-}
-
-static int init_rfdlist(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_init_rfdlist\n");
-
-	for (i = 0; i < IPG_RFDLIST_LENGTH; i++) {
-		struct ipg_rx *rxfd = sp->rxd + i;
-
-		if (sp->rx_buff[i]) {
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-				sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-			dev_kfree_skb_irq(sp->rx_buff[i]);
-			sp->rx_buff[i] = NULL;
-		}
-
-		/* Clear out the RFS field. */
-		rxfd->rfs = 0x0000000000000000;
-
-		if (ipg_get_rxbuff(dev, i) < 0) {
-			/*
-			 * A receive buffer was not ready, break the
-			 * RFD list here.
-			 */
-			IPG_DEBUG_MSG("Cannot allocate Rx buffer\n");
-
-			/* Just in case we cannot allocate a single RFD.
-			 * Should not occur.
-			 */
-			if (i == 0) {
-				netdev_err(dev, "No memory available for RFD list\n");
-				return -ENOMEM;
-			}
-		}
-
-		rxfd->next_desc = cpu_to_le64(sp->rxd_map +
-			sizeof(struct ipg_rx)*(i + 1));
-	}
-	sp->rxd[i - 1].next_desc = cpu_to_le64(sp->rxd_map);
-
-	sp->rx_current = 0;
-	sp->rx_dirty = 0;
-
-	/* Write the location of the RFDList to the IPG. */
-	ipg_w32((u32) sp->rxd_map, RFD_LIST_PTR_0);
-	ipg_w32(0x00000000, RFD_LIST_PTR_1);
-
-	return 0;
-}
-
-static void init_tfdlist(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_init_tfdlist\n");
-
-	for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
-		struct ipg_tx *txfd = sp->txd + i;
-
-		txfd->tfc = cpu_to_le64(IPG_TFC_TFDDONE);
-
-		if (sp->tx_buff[i]) {
-			dev_kfree_skb_irq(sp->tx_buff[i]);
-			sp->tx_buff[i] = NULL;
-		}
-
-		txfd->next_desc = cpu_to_le64(sp->txd_map +
-			sizeof(struct ipg_tx)*(i + 1));
-	}
-	sp->txd[i - 1].next_desc = cpu_to_le64(sp->txd_map);
-
-	sp->tx_current = 0;
-	sp->tx_dirty = 0;
-
-	/* Write the location of the TFDList to the IPG. */
-	IPG_DDEBUG_MSG("Starting TFDListPtr = %08x\n",
-		       (u32) sp->txd_map);
-	ipg_w32((u32) sp->txd_map, TFD_LIST_PTR_0);
-	ipg_w32(0x00000000, TFD_LIST_PTR_1);
-
-	sp->reset_current_tfd = 1;
-}
-
-/*
- * Free all transmit buffers which have already been transferred
- * via DMA to the IPG.
- */
-static void ipg_nic_txfree(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int released, pending, dirty;
-
-	IPG_DEBUG_MSG("_nic_txfree\n");
-
-	pending = sp->tx_current - sp->tx_dirty;
-	dirty = sp->tx_dirty % IPG_TFDLIST_LENGTH;
-
-	for (released = 0; released < pending; released++) {
-		struct sk_buff *skb = sp->tx_buff[dirty];
-		struct ipg_tx *txfd = sp->txd + dirty;
-
-		IPG_DEBUG_MSG("TFC = %016lx\n", (unsigned long) txfd->tfc);
-
-		/* Look at each TFD's TFC field beginning
-		 * at the last freed TFD up to the current TFD.
-		 * If the TFDDone bit is set, free the associated
-		 * buffer.
-		 */
-		if (!(txfd->tfc & cpu_to_le64(IPG_TFC_TFDDONE)))
-                        break;
-
-		/* Free the transmit buffer. */
-		if (skb) {
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(txfd->frag_info) & ~IPG_TFI_FRAGLEN,
-				skb->len, PCI_DMA_TODEVICE);
-
-			dev_kfree_skb_irq(skb);
-
-			sp->tx_buff[dirty] = NULL;
-		}
-		dirty = (dirty + 1) % IPG_TFDLIST_LENGTH;
-	}
-
-	sp->tx_dirty += released;
-
-	if (netif_queue_stopped(dev) &&
-	    (sp->tx_current != (sp->tx_dirty + IPG_TFDLIST_LENGTH))) {
-		netif_wake_queue(dev);
-	}
-}
-
-static void ipg_tx_timeout(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-
-	ipg_reset(dev, IPG_AC_TX_RESET | IPG_AC_DMA | IPG_AC_NETWORK |
-		  IPG_AC_FIFO);
-
-	spin_lock_irq(&sp->lock);
-
-	/* Re-configure after DMA reset. */
-	if (ipg_io_config(dev) < 0)
-		netdev_info(dev, "Error during re-configuration\n");
-
-	init_tfdlist(dev);
-
-	spin_unlock_irq(&sp->lock);
-
-	ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) & IPG_MC_RSVD_MASK,
-		MAC_CTRL);
-}
-
-/*
- * For TxComplete interrupts, free all transmit
- * buffers which have already been transferred via DMA
- * to the IPG.
- */
-static void ipg_nic_txcleanup(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_nic_txcleanup\n");
-
-	for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
-		/* Reading the TXSTATUS register clears the
-		 * TX_COMPLETE interrupt.
-		 */
-		u32 txstatusdword = ipg_r32(TX_STATUS);
-
-		IPG_DEBUG_MSG("TxStatus = %08x\n", txstatusdword);
-
-		/* Check for Transmit errors. Error bits only valid if
-		 * TX_COMPLETE bit in the TXSTATUS register is a 1.
-		 */
-		if (!(txstatusdword & IPG_TS_TX_COMPLETE))
-			break;
-
-		/* If in 10Mbps mode, indicate transmit is ready. */
-		if (sp->tenmbpsmode) {
-			netif_wake_queue(dev);
-		}
-
-		/* Transmit error, increment stat counters. */
-		if (txstatusdword & IPG_TS_TX_ERROR) {
-			IPG_DEBUG_MSG("Transmit error\n");
-			sp->stats.tx_errors++;
-		}
-
-		/* Late collision, re-enable transmitter. */
-		if (txstatusdword & IPG_TS_LATE_COLLISION) {
-			IPG_DEBUG_MSG("Late collision on transmit\n");
-			ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) &
-				IPG_MC_RSVD_MASK, MAC_CTRL);
-		}
-
-		/* Maximum collisions, re-enable transmitter. */
-		if (txstatusdword & IPG_TS_TX_MAX_COLL) {
-			IPG_DEBUG_MSG("Maximum collisions on transmit\n");
-			ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) &
-				IPG_MC_RSVD_MASK, MAC_CTRL);
-		}
-
-		/* Transmit underrun, reset and re-enable
-		 * transmitter.
-		 */
-		if (txstatusdword & IPG_TS_TX_UNDERRUN) {
-			IPG_DEBUG_MSG("Transmitter underrun\n");
-			sp->stats.tx_fifo_errors++;
-			ipg_reset(dev, IPG_AC_TX_RESET | IPG_AC_DMA |
-				  IPG_AC_NETWORK | IPG_AC_FIFO);
-
-			/* Re-configure after DMA reset. */
-			if (ipg_io_config(dev) < 0) {
-				netdev_info(dev, "Error during re-configuration\n");
-			}
-			init_tfdlist(dev);
-
-			ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) &
-				IPG_MC_RSVD_MASK, MAC_CTRL);
-		}
-	}
-
-	ipg_nic_txfree(dev);
-}
-
-/* Provides statistical information about the IPG NIC. */
-static struct net_device_stats *ipg_nic_get_stats(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	u16 temp1;
-	u16 temp2;
-
-	IPG_DEBUG_MSG("_nic_get_stats\n");
-
-	/* Check to see if the NIC has been initialized via nic_open,
-	 * before trying to read statistic registers.
-	 */
-	if (!netif_running(dev))
-		return &sp->stats;
-
-	sp->stats.rx_packets += ipg_r32(IPG_FRAMESRCVDOK);
-	sp->stats.tx_packets += ipg_r32(IPG_FRAMESXMTDOK);
-	sp->stats.rx_bytes += ipg_r32(IPG_OCTETRCVOK);
-	sp->stats.tx_bytes += ipg_r32(IPG_OCTETXMTOK);
-	temp1 = ipg_r16(IPG_FRAMESLOSTRXERRORS);
-	sp->stats.rx_errors += temp1;
-	sp->stats.rx_missed_errors += temp1;
-	temp1 = ipg_r32(IPG_SINGLECOLFRAMES) + ipg_r32(IPG_MULTICOLFRAMES) +
-		ipg_r32(IPG_LATECOLLISIONS);
-	temp2 = ipg_r16(IPG_CARRIERSENSEERRORS);
-	sp->stats.collisions += temp1;
-	sp->stats.tx_dropped += ipg_r16(IPG_FRAMESABORTXSCOLLS);
-	sp->stats.tx_errors += ipg_r16(IPG_FRAMESWEXDEFERRAL) +
-		ipg_r32(IPG_FRAMESWDEFERREDXMT) + temp1 + temp2;
-	sp->stats.multicast += ipg_r32(IPG_MCSTOCTETRCVDOK);
-
-	/* detailed tx_errors */
-	sp->stats.tx_carrier_errors += temp2;
-
-	/* detailed rx_errors */
-	sp->stats.rx_length_errors += ipg_r16(IPG_INRANGELENGTHERRORS) +
-		ipg_r16(IPG_FRAMETOOLONGERRORS);
-	sp->stats.rx_crc_errors += ipg_r16(IPG_FRAMECHECKSEQERRORS);
-
-	/* Unutilized IPG statistic registers. */
-	ipg_r32(IPG_MCSTFRAMESRCVDOK);
-
-	return &sp->stats;
-}
-
-/* Restore used receive buffers. */
-static int ipg_nic_rxrestore(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	const unsigned int curr = sp->rx_current;
-	unsigned int dirty = sp->rx_dirty;
-
-	IPG_DEBUG_MSG("_nic_rxrestore\n");
-
-	for (dirty = sp->rx_dirty; curr - dirty > 0; dirty++) {
-		unsigned int entry = dirty % IPG_RFDLIST_LENGTH;
-
-		/* rx_copybreak may poke hole here and there. */
-		if (sp->rx_buff[entry])
-			continue;
-
-		/* Generate a new receive buffer to replace the
-		 * current buffer (which will be released by the
-		 * Linux system).
-		 */
-		if (ipg_get_rxbuff(dev, entry) < 0) {
-			IPG_DEBUG_MSG("Cannot allocate new Rx buffer\n");
-
-			break;
-		}
-
-		/* Reset the RFS field. */
-		sp->rxd[entry].rfs = 0x0000000000000000;
-	}
-	sp->rx_dirty = dirty;
-
-	return 0;
-}
-
-/* use jumboindex and jumbosize to control jumbo frame status
- * initial status is jumboindex=-1 and jumbosize=0
- * 1. jumboindex = -1 and jumbosize=0 : previous jumbo frame has been done.
- * 2. jumboindex != -1 and jumbosize != 0 : jumbo frame is not over size and receiving
- * 3. jumboindex = -1 and jumbosize != 0 : jumbo frame is over size, already dump
- *               previous receiving and need to continue dumping the current one
- */
-enum {
-	NORMAL_PACKET,
-	ERROR_PACKET
-};
-
-enum {
-	FRAME_NO_START_NO_END	= 0,
-	FRAME_WITH_START		= 1,
-	FRAME_WITH_END		= 10,
-	FRAME_WITH_START_WITH_END = 11
-};
-
-static void ipg_nic_rx_free_skb(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int entry = sp->rx_current % IPG_RFDLIST_LENGTH;
-
-	if (sp->rx_buff[entry]) {
-		struct ipg_rx *rxfd = sp->rxd + entry;
-
-		pci_unmap_single(sp->pdev,
-			le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-			sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-		dev_kfree_skb_irq(sp->rx_buff[entry]);
-		sp->rx_buff[entry] = NULL;
-	}
-}
-
-static int ipg_nic_rx_check_frame_type(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	struct ipg_rx *rxfd = sp->rxd + (sp->rx_current % IPG_RFDLIST_LENGTH);
-	int type = FRAME_NO_START_NO_END;
-
-	if (le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMESTART)
-		type += FRAME_WITH_START;
-	if (le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMEEND)
-		type += FRAME_WITH_END;
-	return type;
-}
-
-static int ipg_nic_rx_check_error(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int entry = sp->rx_current % IPG_RFDLIST_LENGTH;
-	struct ipg_rx *rxfd = sp->rxd + entry;
-
-	if (IPG_DROP_ON_RX_ETH_ERRORS && (le64_to_cpu(rxfd->rfs) &
-	     (IPG_RFS_RXFIFOOVERRUN | IPG_RFS_RXRUNTFRAME |
-	      IPG_RFS_RXALIGNMENTERROR | IPG_RFS_RXFCSERROR |
-	      IPG_RFS_RXOVERSIZEDFRAME | IPG_RFS_RXLENGTHERROR))) {
-		IPG_DEBUG_MSG("Rx error, RFS = %016lx\n",
-			      (unsigned long) rxfd->rfs);
-
-		/* Increment general receive error statistic. */
-		sp->stats.rx_errors++;
-
-		/* Increment detailed receive error statistics. */
-		if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFIFOOVERRUN) {
-			IPG_DEBUG_MSG("RX FIFO overrun occurred\n");
-
-			sp->stats.rx_fifo_errors++;
-		}
-
-		if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXRUNTFRAME) {
-			IPG_DEBUG_MSG("RX runt occurred\n");
-			sp->stats.rx_length_errors++;
-		}
-
-		/* Do nothing for IPG_RFS_RXOVERSIZEDFRAME,
-		 * error count handled by a IPG statistic register.
-		 */
-
-		if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXALIGNMENTERROR) {
-			IPG_DEBUG_MSG("RX alignment error occurred\n");
-			sp->stats.rx_frame_errors++;
-		}
-
-		/* Do nothing for IPG_RFS_RXFCSERROR, error count
-		 * handled by a IPG statistic register.
-		 */
-
-		/* Free the memory associated with the RX
-		 * buffer since it is erroneous and we will
-		 * not pass it to higher layer processes.
-		 */
-		if (sp->rx_buff[entry]) {
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-				sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
-			dev_kfree_skb_irq(sp->rx_buff[entry]);
-			sp->rx_buff[entry] = NULL;
-		}
-		return ERROR_PACKET;
-	}
-	return NORMAL_PACKET;
-}
-
-static void ipg_nic_rx_with_start_and_end(struct net_device *dev,
-					  struct ipg_nic_private *sp,
-					  struct ipg_rx *rxfd, unsigned entry)
-{
-	struct ipg_jumbo *jumbo = &sp->jumbo;
-	struct sk_buff *skb;
-	int framelen;
-
-	if (jumbo->found_start) {
-		dev_kfree_skb_irq(jumbo->skb);
-		jumbo->found_start = 0;
-		jumbo->current_size = 0;
-		jumbo->skb = NULL;
-	}
-
-	/* 1: found error, 0 no error */
-	if (ipg_nic_rx_check_error(dev) != NORMAL_PACKET)
-		return;
-
-	skb = sp->rx_buff[entry];
-	if (!skb)
-		return;
-
-	/* accept this frame and send to upper layer */
-	framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN;
-	if (framelen > sp->rxfrag_size)
-		framelen = sp->rxfrag_size;
-
-	skb_put(skb, framelen);
-	skb->protocol = eth_type_trans(skb, dev);
-	skb_checksum_none_assert(skb);
-	netif_rx(skb);
-	sp->rx_buff[entry] = NULL;
-}
-
-static void ipg_nic_rx_with_start(struct net_device *dev,
-				  struct ipg_nic_private *sp,
-				  struct ipg_rx *rxfd, unsigned entry)
-{
-	struct ipg_jumbo *jumbo = &sp->jumbo;
-	struct pci_dev *pdev = sp->pdev;
-	struct sk_buff *skb;
-
-	/* 1: found error, 0 no error */
-	if (ipg_nic_rx_check_error(dev) != NORMAL_PACKET)
-		return;
-
-	/* accept this frame and send to upper layer */
-	skb = sp->rx_buff[entry];
-	if (!skb)
-		return;
-
-	if (jumbo->found_start)
-		dev_kfree_skb_irq(jumbo->skb);
-
-	pci_unmap_single(pdev, le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-			 sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
-	skb_put(skb, sp->rxfrag_size);
-
-	jumbo->found_start = 1;
-	jumbo->current_size = sp->rxfrag_size;
-	jumbo->skb = skb;
-
-	sp->rx_buff[entry] = NULL;
-}
-
-static void ipg_nic_rx_with_end(struct net_device *dev,
-				struct ipg_nic_private *sp,
-				struct ipg_rx *rxfd, unsigned entry)
-{
-	struct ipg_jumbo *jumbo = &sp->jumbo;
-
-	/* 1: found error, 0 no error */
-	if (ipg_nic_rx_check_error(dev) == NORMAL_PACKET) {
-		struct sk_buff *skb = sp->rx_buff[entry];
-
-		if (!skb)
-			return;
-
-		if (jumbo->found_start) {
-			int framelen, endframelen;
-
-			framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN;
-
-			endframelen = framelen - jumbo->current_size;
-			if (framelen > sp->rxsupport_size)
-				dev_kfree_skb_irq(jumbo->skb);
-			else {
-				memcpy(skb_put(jumbo->skb, endframelen),
-				       skb->data, endframelen);
-
-				jumbo->skb->protocol =
-				    eth_type_trans(jumbo->skb, dev);
-
-				skb_checksum_none_assert(jumbo->skb);
-				netif_rx(jumbo->skb);
-			}
-		}
-
-		jumbo->found_start = 0;
-		jumbo->current_size = 0;
-		jumbo->skb = NULL;
-
-		ipg_nic_rx_free_skb(dev);
-	} else {
-		dev_kfree_skb_irq(jumbo->skb);
-		jumbo->found_start = 0;
-		jumbo->current_size = 0;
-		jumbo->skb = NULL;
-	}
-}
-
-static void ipg_nic_rx_no_start_no_end(struct net_device *dev,
-				       struct ipg_nic_private *sp,
-				       struct ipg_rx *rxfd, unsigned entry)
-{
-	struct ipg_jumbo *jumbo = &sp->jumbo;
-
-	/* 1: found error, 0 no error */
-	if (ipg_nic_rx_check_error(dev) == NORMAL_PACKET) {
-		struct sk_buff *skb = sp->rx_buff[entry];
-
-		if (skb) {
-			if (jumbo->found_start) {
-				jumbo->current_size += sp->rxfrag_size;
-				if (jumbo->current_size <= sp->rxsupport_size) {
-					memcpy(skb_put(jumbo->skb,
-						       sp->rxfrag_size),
-					       skb->data, sp->rxfrag_size);
-				}
-			}
-			ipg_nic_rx_free_skb(dev);
-		}
-	} else {
-		dev_kfree_skb_irq(jumbo->skb);
-		jumbo->found_start = 0;
-		jumbo->current_size = 0;
-		jumbo->skb = NULL;
-	}
-}
-
-static int ipg_nic_rx_jumbo(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int curr = sp->rx_current;
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_nic_rx\n");
-
-	for (i = 0; i < IPG_MAXRFDPROCESS_COUNT; i++, curr++) {
-		unsigned int entry = curr % IPG_RFDLIST_LENGTH;
-		struct ipg_rx *rxfd = sp->rxd + entry;
-
-		if (!(rxfd->rfs & cpu_to_le64(IPG_RFS_RFDDONE)))
-			break;
-
-		switch (ipg_nic_rx_check_frame_type(dev)) {
-		case FRAME_WITH_START_WITH_END:
-			ipg_nic_rx_with_start_and_end(dev, sp, rxfd, entry);
-			break;
-		case FRAME_WITH_START:
-			ipg_nic_rx_with_start(dev, sp, rxfd, entry);
-			break;
-		case FRAME_WITH_END:
-			ipg_nic_rx_with_end(dev, sp, rxfd, entry);
-			break;
-		case FRAME_NO_START_NO_END:
-			ipg_nic_rx_no_start_no_end(dev, sp, rxfd, entry);
-			break;
-		}
-	}
-
-	sp->rx_current = curr;
-
-	if (i == IPG_MAXRFDPROCESS_COUNT) {
-		/* There are more RFDs to process, however the
-		 * allocated amount of RFD processing time has
-		 * expired. Assert Interrupt Requested to make
-		 * sure we come back to process the remaining RFDs.
-		 */
-		ipg_w32(ipg_r32(ASIC_CTRL) | IPG_AC_INT_REQUEST, ASIC_CTRL);
-	}
-
-	ipg_nic_rxrestore(dev);
-
-	return 0;
-}
-
-static int ipg_nic_rx(struct net_device *dev)
-{
-	/* Transfer received Ethernet frames to higher network layers. */
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int curr = sp->rx_current;
-	void __iomem *ioaddr = sp->ioaddr;
-	struct ipg_rx *rxfd;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_nic_rx\n");
-
-#define __RFS_MASK \
-	cpu_to_le64(IPG_RFS_RFDDONE | IPG_RFS_FRAMESTART | IPG_RFS_FRAMEEND)
-
-	for (i = 0; i < IPG_MAXRFDPROCESS_COUNT; i++, curr++) {
-		unsigned int entry = curr % IPG_RFDLIST_LENGTH;
-		struct sk_buff *skb = sp->rx_buff[entry];
-		unsigned int framelen;
-
-		rxfd = sp->rxd + entry;
-
-		if (((rxfd->rfs & __RFS_MASK) != __RFS_MASK) || !skb)
-			break;
-
-		/* Get received frame length. */
-		framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN;
-
-		/* Check for jumbo frame arrival with too small
-		 * RXFRAG_SIZE.
-		 */
-		if (framelen > sp->rxfrag_size) {
-			IPG_DEBUG_MSG
-			    ("RFS FrameLen > allocated fragment size\n");
-
-			framelen = sp->rxfrag_size;
-		}
-
-		if ((IPG_DROP_ON_RX_ETH_ERRORS && (le64_to_cpu(rxfd->rfs) &
-		       (IPG_RFS_RXFIFOOVERRUN | IPG_RFS_RXRUNTFRAME |
-			IPG_RFS_RXALIGNMENTERROR | IPG_RFS_RXFCSERROR |
-			IPG_RFS_RXOVERSIZEDFRAME | IPG_RFS_RXLENGTHERROR)))) {
-
-			IPG_DEBUG_MSG("Rx error, RFS = %016lx\n",
-				      (unsigned long int) rxfd->rfs);
-
-			/* Increment general receive error statistic. */
-			sp->stats.rx_errors++;
-
-			/* Increment detailed receive error statistics. */
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFIFOOVERRUN) {
-				IPG_DEBUG_MSG("RX FIFO overrun occurred\n");
-				sp->stats.rx_fifo_errors++;
-			}
-
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXRUNTFRAME) {
-				IPG_DEBUG_MSG("RX runt occurred\n");
-				sp->stats.rx_length_errors++;
-			}
-
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXOVERSIZEDFRAME) ;
-			/* Do nothing, error count handled by a IPG
-			 * statistic register.
-			 */
-
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXALIGNMENTERROR) {
-				IPG_DEBUG_MSG("RX alignment error occurred\n");
-				sp->stats.rx_frame_errors++;
-			}
-
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFCSERROR) ;
-			/* Do nothing, error count handled by a IPG
-			 * statistic register.
-			 */
-
-			/* Free the memory associated with the RX
-			 * buffer since it is erroneous and we will
-			 * not pass it to higher layer processes.
-			 */
-			if (skb) {
-				__le64 info = rxfd->frag_info;
-
-				pci_unmap_single(sp->pdev,
-					le64_to_cpu(info) & ~IPG_RFI_FRAGLEN,
-					sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
-				dev_kfree_skb_irq(skb);
-			}
-		} else {
-
-			/* Adjust the new buffer length to accommodate the size
-			 * of the received frame.
-			 */
-			skb_put(skb, framelen);
-
-			/* Set the buffer's protocol field to Ethernet. */
-			skb->protocol = eth_type_trans(skb, dev);
-
-			/* The IPG encountered an error with (or
-			 * there were no) IP/TCP/UDP checksums.
-			 * This may or may not indicate an invalid
-			 * IP/TCP/UDP frame was received. Let the
-			 * upper layer decide.
-			 */
-			skb_checksum_none_assert(skb);
-
-			/* Hand off frame for higher layer processing.
-			 * The function netif_rx() releases the sk_buff
-			 * when processing completes.
-			 */
-			netif_rx(skb);
-		}
-
-		/* Assure RX buffer is not reused by IPG. */
-		sp->rx_buff[entry] = NULL;
-	}
-
-	/*
-	 * If there are more RFDs to process and the allocated amount of RFD
-	 * processing time has expired, assert Interrupt Requested to make
-	 * sure we come back to process the remaining RFDs.
-	 */
-	if (i == IPG_MAXRFDPROCESS_COUNT)
-		ipg_w32(ipg_r32(ASIC_CTRL) | IPG_AC_INT_REQUEST, ASIC_CTRL);
-
-#ifdef IPG_DEBUG
-	/* Check if the RFD list contained no receive frame data. */
-	if (!i)
-		sp->EmptyRFDListCount++;
-#endif
-	while ((le64_to_cpu(rxfd->rfs) & IPG_RFS_RFDDONE) &&
-	       !((le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMESTART) &&
-		 (le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMEEND))) {
-		unsigned int entry = curr++ % IPG_RFDLIST_LENGTH;
-
-		rxfd = sp->rxd + entry;
-
-		IPG_DEBUG_MSG("Frame requires multiple RFDs\n");
-
-		/* An unexpected event, additional code needed to handle
-		 * properly. So for the time being, just disregard the
-		 * frame.
-		 */
-
-		/* Free the memory associated with the RX
-		 * buffer since it is erroneous and we will
-		 * not pass it to higher layer processes.
-		 */
-		if (sp->rx_buff[entry]) {
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-				sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-			dev_kfree_skb_irq(sp->rx_buff[entry]);
-		}
-
-		/* Assure RX buffer is not reused by IPG. */
-		sp->rx_buff[entry] = NULL;
-	}
-
-	sp->rx_current = curr;
-
-	/* Check to see if there are a minimum number of used
-	 * RFDs before restoring any (should improve performance.)
-	 */
-	if ((curr - sp->rx_dirty) >= IPG_MINUSEDRFDSTOFREE)
-		ipg_nic_rxrestore(dev);
-
-	return 0;
-}
-
-static void ipg_reset_after_host_error(struct work_struct *work)
-{
-	struct ipg_nic_private *sp =
-		container_of(work, struct ipg_nic_private, task.work);
-	struct net_device *dev = sp->dev;
-
-	/*
-	 * Acknowledge HostError interrupt by resetting
-	 * IPG DMA and HOST.
-	 */
-	ipg_reset(dev, IPG_AC_GLOBAL_RESET | IPG_AC_HOST | IPG_AC_DMA);
-
-	init_rfdlist(dev);
-	init_tfdlist(dev);
-
-	if (ipg_io_config(dev) < 0) {
-		netdev_info(dev, "Cannot recover from PCI error\n");
-		schedule_delayed_work(&sp->task, HZ);
-	}
-}
-
-static irqreturn_t ipg_interrupt_handler(int irq, void *dev_inst)
-{
-	struct net_device *dev = dev_inst;
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int handled = 0;
-	u16 status;
-
-	IPG_DEBUG_MSG("_interrupt_handler\n");
-
-	if (sp->is_jumbo)
-		ipg_nic_rxrestore(dev);
-
-	spin_lock(&sp->lock);
-
-	/* Get interrupt source information, and acknowledge
-	 * some (i.e. TxDMAComplete, RxDMAComplete, RxEarly,
-	 * IntRequested, MacControlFrame, LinkEvent) interrupts
-	 * if issued. Also, all IPG interrupts are disabled by
-	 * reading IntStatusAck.
-	 */
-	status = ipg_r16(INT_STATUS_ACK);
-
-	IPG_DEBUG_MSG("IntStatusAck = %04x\n", status);
-
-	/* Shared IRQ of remove event. */
-	if (!(status & IPG_IS_RSVD_MASK))
-		goto out_enable;
-
-	handled = 1;
-
-	if (unlikely(!netif_running(dev)))
-		goto out_unlock;
-
-	/* If RFDListEnd interrupt, restore all used RFDs. */
-	if (status & IPG_IS_RFD_LIST_END) {
-		IPG_DEBUG_MSG("RFDListEnd Interrupt\n");
-
-		/* The RFD list end indicates an RFD was encountered
-		 * with a 0 NextPtr, or with an RFDDone bit set to 1
-		 * (indicating the RFD is not read for use by the
-		 * IPG.) Try to restore all RFDs.
-		 */
-		ipg_nic_rxrestore(dev);
-
-#ifdef IPG_DEBUG
-		/* Increment the RFDlistendCount counter. */
-		sp->RFDlistendCount++;
-#endif
-	}
-
-	/* If RFDListEnd, RxDMAPriority, RxDMAComplete, or
-	 * IntRequested interrupt, process received frames. */
-	if ((status & IPG_IS_RX_DMA_PRIORITY) ||
-	    (status & IPG_IS_RFD_LIST_END) ||
-	    (status & IPG_IS_RX_DMA_COMPLETE) ||
-	    (status & IPG_IS_INT_REQUESTED)) {
-#ifdef IPG_DEBUG
-		/* Increment the RFD list checked counter if interrupted
-		 * only to check the RFD list. */
-		if (status & (~(IPG_IS_RX_DMA_PRIORITY | IPG_IS_RFD_LIST_END |
-				IPG_IS_RX_DMA_COMPLETE | IPG_IS_INT_REQUESTED) &
-			       (IPG_IS_HOST_ERROR | IPG_IS_TX_DMA_COMPLETE |
-				IPG_IS_LINK_EVENT | IPG_IS_TX_COMPLETE |
-				IPG_IS_UPDATE_STATS)))
-			sp->RFDListCheckedCount++;
-#endif
-
-		if (sp->is_jumbo)
-			ipg_nic_rx_jumbo(dev);
-		else
-			ipg_nic_rx(dev);
-	}
-
-	/* If TxDMAComplete interrupt, free used TFDs. */
-	if (status & IPG_IS_TX_DMA_COMPLETE)
-		ipg_nic_txfree(dev);
-
-	/* TxComplete interrupts indicate one of numerous actions.
-	 * Determine what action to take based on TXSTATUS register.
-	 */
-	if (status & IPG_IS_TX_COMPLETE)
-		ipg_nic_txcleanup(dev);
-
-	/* If UpdateStats interrupt, update Linux Ethernet statistics */
-	if (status & IPG_IS_UPDATE_STATS)
-		ipg_nic_get_stats(dev);
-
-	/* If HostError interrupt, reset IPG. */
-	if (status & IPG_IS_HOST_ERROR) {
-		IPG_DDEBUG_MSG("HostError Interrupt\n");
-
-		schedule_delayed_work(&sp->task, 0);
-	}
-
-	/* If LinkEvent interrupt, resolve autonegotiation. */
-	if (status & IPG_IS_LINK_EVENT) {
-		if (ipg_config_autoneg(dev) < 0)
-			netdev_info(dev, "Auto-negotiation error\n");
-	}
-
-	/* If MACCtrlFrame interrupt, do nothing. */
-	if (status & IPG_IS_MAC_CTRL_FRAME)
-		IPG_DEBUG_MSG("MACCtrlFrame interrupt\n");
-
-	/* If RxComplete interrupt, do nothing. */
-	if (status & IPG_IS_RX_COMPLETE)
-		IPG_DEBUG_MSG("RxComplete interrupt\n");
-
-	/* If RxEarly interrupt, do nothing. */
-	if (status & IPG_IS_RX_EARLY)
-		IPG_DEBUG_MSG("RxEarly interrupt\n");
-
-out_enable:
-	/* Re-enable IPG interrupts. */
-	ipg_w16(IPG_IE_TX_DMA_COMPLETE | IPG_IE_RX_DMA_COMPLETE |
-		IPG_IE_HOST_ERROR | IPG_IE_INT_REQUESTED | IPG_IE_TX_COMPLETE |
-		IPG_IE_LINK_EVENT | IPG_IE_UPDATE_STATS, INT_ENABLE);
-out_unlock:
-	spin_unlock(&sp->lock);
-
-	return IRQ_RETVAL(handled);
-}
-
-static void ipg_rx_clear(struct ipg_nic_private *sp)
-{
-	unsigned int i;
-
-	for (i = 0; i < IPG_RFDLIST_LENGTH; i++) {
-		if (sp->rx_buff[i]) {
-			struct ipg_rx *rxfd = sp->rxd + i;
-
-			dev_kfree_skb_irq(sp->rx_buff[i]);
-			sp->rx_buff[i] = NULL;
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-				sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-		}
-	}
-}
-
-static void ipg_tx_clear(struct ipg_nic_private *sp)
-{
-	unsigned int i;
-
-	for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
-		if (sp->tx_buff[i]) {
-			struct ipg_tx *txfd = sp->txd + i;
-
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(txfd->frag_info) & ~IPG_TFI_FRAGLEN,
-				sp->tx_buff[i]->len, PCI_DMA_TODEVICE);
-
-			dev_kfree_skb_irq(sp->tx_buff[i]);
-
-			sp->tx_buff[i] = NULL;
-		}
-	}
-}
-
-static int ipg_nic_open(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	struct pci_dev *pdev = sp->pdev;
-	int rc;
-
-	IPG_DEBUG_MSG("_nic_open\n");
-
-	sp->rx_buf_sz = sp->rxsupport_size;
-
-	/* Check for interrupt line conflicts, and request interrupt
-	 * line for IPG.
-	 *
-	 * IMPORTANT: Disable IPG interrupts prior to registering
-	 *            IRQ.
-	 */
-	ipg_w16(0x0000, INT_ENABLE);
-
-	/* Register the interrupt line to be used by the IPG within
-	 * the Linux system.
-	 */
-	rc = request_irq(pdev->irq, ipg_interrupt_handler, IRQF_SHARED,
-			 dev->name, dev);
-	if (rc < 0) {
-		netdev_info(dev, "Error when requesting interrupt\n");
-		goto out;
-	}
-
-	dev->irq = pdev->irq;
-
-	rc = -ENOMEM;
-
-	sp->rxd = dma_alloc_coherent(&pdev->dev, IPG_RX_RING_BYTES,
-				     &sp->rxd_map, GFP_KERNEL);
-	if (!sp->rxd)
-		goto err_free_irq_0;
-
-	sp->txd = dma_alloc_coherent(&pdev->dev, IPG_TX_RING_BYTES,
-				     &sp->txd_map, GFP_KERNEL);
-	if (!sp->txd)
-		goto err_free_rx_1;
-
-	rc = init_rfdlist(dev);
-	if (rc < 0) {
-		netdev_info(dev, "Error during configuration\n");
-		goto err_free_tx_2;
-	}
-
-	init_tfdlist(dev);
-
-	rc = ipg_io_config(dev);
-	if (rc < 0) {
-		netdev_info(dev, "Error during configuration\n");
-		goto err_release_tfdlist_3;
-	}
-
-	/* Resolve autonegotiation. */
-	if (ipg_config_autoneg(dev) < 0)
-		netdev_info(dev, "Auto-negotiation error\n");
-
-	/* initialize JUMBO Frame control variable */
-	sp->jumbo.found_start = 0;
-	sp->jumbo.current_size = 0;
-	sp->jumbo.skb = NULL;
-
-	/* Enable transmit and receive operation of the IPG. */
-	ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_RX_ENABLE | IPG_MC_TX_ENABLE) &
-		 IPG_MC_RSVD_MASK, MAC_CTRL);
-
-	netif_start_queue(dev);
-out:
-	return rc;
-
-err_release_tfdlist_3:
-	ipg_tx_clear(sp);
-	ipg_rx_clear(sp);
-err_free_tx_2:
-	dma_free_coherent(&pdev->dev, IPG_TX_RING_BYTES, sp->txd, sp->txd_map);
-err_free_rx_1:
-	dma_free_coherent(&pdev->dev, IPG_RX_RING_BYTES, sp->rxd, sp->rxd_map);
-err_free_irq_0:
-	free_irq(pdev->irq, dev);
-	goto out;
-}
-
-static int ipg_nic_stop(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	struct pci_dev *pdev = sp->pdev;
-
-	IPG_DEBUG_MSG("_nic_stop\n");
-
-	netif_stop_queue(dev);
-
-	IPG_DUMPTFDLIST(dev);
-
-	do {
-		(void) ipg_r16(INT_STATUS_ACK);
-
-		ipg_reset(dev, IPG_AC_GLOBAL_RESET | IPG_AC_HOST | IPG_AC_DMA);
-
-		synchronize_irq(pdev->irq);
-	} while (ipg_r16(INT_ENABLE) & IPG_IE_RSVD_MASK);
-
-	ipg_rx_clear(sp);
-
-	ipg_tx_clear(sp);
-
-	pci_free_consistent(pdev, IPG_RX_RING_BYTES, sp->rxd, sp->rxd_map);
-	pci_free_consistent(pdev, IPG_TX_RING_BYTES, sp->txd, sp->txd_map);
-
-	free_irq(pdev->irq, dev);
-
-	return 0;
-}
-
-static netdev_tx_t ipg_nic_hard_start_xmit(struct sk_buff *skb,
-					   struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int entry = sp->tx_current % IPG_TFDLIST_LENGTH;
-	unsigned long flags;
-	struct ipg_tx *txfd;
-
-	IPG_DDEBUG_MSG("_nic_hard_start_xmit\n");
-
-	/* If in 10Mbps mode, stop the transmit queue so
-	 * no more transmit frames are accepted.
-	 */
-	if (sp->tenmbpsmode)
-		netif_stop_queue(dev);
-
-	if (sp->reset_current_tfd) {
-		sp->reset_current_tfd = 0;
-		entry = 0;
-	}
-
-	txfd = sp->txd + entry;
-
-	sp->tx_buff[entry] = skb;
-
-	/* Clear all TFC fields, except TFDDONE. */
-	txfd->tfc = cpu_to_le64(IPG_TFC_TFDDONE);
-
-	/* Specify the TFC field within the TFD. */
-	txfd->tfc |= cpu_to_le64(IPG_TFC_WORDALIGNDISABLED |
-		(IPG_TFC_FRAMEID & sp->tx_current) |
-		(IPG_TFC_FRAGCOUNT & (1 << 24)));
-	/*
-	 * 16--17 (WordAlign) <- 3 (disable),
-	 * 0--15 (FrameId) <- sp->tx_current,
-	 * 24--27 (FragCount) <- 1
-	 */
-
-	/* Request TxComplete interrupts at an interval defined
-	 * by the constant IPG_FRAMESBETWEENTXCOMPLETES.
-	 * Request TxComplete interrupt for every frame
-	 * if in 10Mbps mode to accommodate problem with 10Mbps
-	 * processing.
-	 */
-	if (sp->tenmbpsmode)
-		txfd->tfc |= cpu_to_le64(IPG_TFC_TXINDICATE);
-	txfd->tfc |= cpu_to_le64(IPG_TFC_TXDMAINDICATE);
-	/* Based on compilation option, determine if FCS is to be
-	 * appended to transmit frame by IPG.
-	 */
-	if (!(IPG_APPEND_FCS_ON_TX))
-		txfd->tfc |= cpu_to_le64(IPG_TFC_FCSAPPENDDISABLE);
-
-	/* Based on compilation option, determine if IP, TCP and/or
-	 * UDP checksums are to be added to transmit frame by IPG.
-	 */
-	if (IPG_ADD_IPCHECKSUM_ON_TX)
-		txfd->tfc |= cpu_to_le64(IPG_TFC_IPCHECKSUMENABLE);
-
-	if (IPG_ADD_TCPCHECKSUM_ON_TX)
-		txfd->tfc |= cpu_to_le64(IPG_TFC_TCPCHECKSUMENABLE);
-
-	if (IPG_ADD_UDPCHECKSUM_ON_TX)
-		txfd->tfc |= cpu_to_le64(IPG_TFC_UDPCHECKSUMENABLE);
-
-	/* Based on compilation option, determine if VLAN tag info is to be
-	 * inserted into transmit frame by IPG.
-	 */
-	if (IPG_INSERT_MANUAL_VLAN_TAG) {
-		txfd->tfc |= cpu_to_le64(IPG_TFC_VLANTAGINSERT |
-			((u64) IPG_MANUAL_VLAN_VID << 32) |
-			((u64) IPG_MANUAL_VLAN_CFI << 44) |
-			((u64) IPG_MANUAL_VLAN_USERPRIORITY << 45));
-	}
-
-	/* The fragment start location within system memory is defined
-	 * by the sk_buff structure's data field. The physical address
-	 * of this location within the system's virtual memory space
-	 * is determined using the IPG_HOST2BUS_MAP function.
-	 */
-	txfd->frag_info = cpu_to_le64(pci_map_single(sp->pdev, skb->data,
-		skb->len, PCI_DMA_TODEVICE));
-
-	/* The length of the fragment within system memory is defined by
-	 * the sk_buff structure's len field.
-	 */
-	txfd->frag_info |= cpu_to_le64(IPG_TFI_FRAGLEN &
-		((u64) (skb->len & 0xffff) << 48));
-
-	/* Clear the TFDDone bit last to indicate the TFD is ready
-	 * for transfer to the IPG.
-	 */
-	txfd->tfc &= cpu_to_le64(~IPG_TFC_TFDDONE);
-
-	spin_lock_irqsave(&sp->lock, flags);
-
-	sp->tx_current++;
-
-	mmiowb();
-
-	ipg_w32(IPG_DC_TX_DMA_POLL_NOW, DMA_CTRL);
-
-	if (sp->tx_current == (sp->tx_dirty + IPG_TFDLIST_LENGTH))
-		netif_stop_queue(dev);
-
-	spin_unlock_irqrestore(&sp->lock, flags);
-
-	return NETDEV_TX_OK;
-}
-
-static void ipg_set_phy_default_param(unsigned char rev,
-				      struct net_device *dev, int phy_address)
-{
-	unsigned short length;
-	unsigned char revision;
-	const unsigned short *phy_param;
-	unsigned short address, value;
-
-	phy_param = &DefaultPhyParam[0];
-	length = *phy_param & 0x00FF;
-	revision = (unsigned char)((*phy_param) >> 8);
-	phy_param++;
-	while (length != 0) {
-		if (rev == revision) {
-			while (length > 1) {
-				address = *phy_param;
-				value = *(phy_param + 1);
-				phy_param += 2;
-				mdio_write(dev, phy_address, address, value);
-				length -= 4;
-			}
-			break;
-		} else {
-			phy_param += length / 2;
-			length = *phy_param & 0x00FF;
-			revision = (unsigned char)((*phy_param) >> 8);
-			phy_param++;
-		}
-	}
-}
-
-static int read_eeprom(struct net_device *dev, int eep_addr)
-{
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	unsigned int i;
-	int ret = 0;
-	u16 value;
-
-	value = IPG_EC_EEPROM_READOPCODE | (eep_addr & 0xff);
-	ipg_w16(value, EEPROM_CTRL);
-
-	for (i = 0; i < 1000; i++) {
-		u16 data;
-
-		mdelay(10);
-		data = ipg_r16(EEPROM_CTRL);
-		if (!(data & IPG_EC_EEPROM_BUSY)) {
-			ret = ipg_r16(EEPROM_DATA);
-			break;
-		}
-	}
-	return ret;
-}
-
-static void ipg_init_mii(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	struct mii_if_info *mii_if = &sp->mii_if;
-	int phyaddr;
-
-	mii_if->dev          = dev;
-	mii_if->mdio_read    = mdio_read;
-	mii_if->mdio_write   = mdio_write;
-	mii_if->phy_id_mask  = 0x1f;
-	mii_if->reg_num_mask = 0x1f;
-
-	mii_if->phy_id = phyaddr = ipg_find_phyaddr(dev);
-
-	if (phyaddr != 0x1f) {
-		u16 mii_phyctrl, mii_1000cr;
-
-		mii_1000cr  = mdio_read(dev, phyaddr, MII_CTRL1000);
-		mii_1000cr |= ADVERTISE_1000FULL | ADVERTISE_1000HALF |
-			GMII_PHY_1000BASETCONTROL_PreferMaster;
-		mdio_write(dev, phyaddr, MII_CTRL1000, mii_1000cr);
-
-		mii_phyctrl = mdio_read(dev, phyaddr, MII_BMCR);
-
-		/* Set default phyparam */
-		ipg_set_phy_default_param(sp->pdev->revision, dev, phyaddr);
-
-		/* Reset PHY */
-		mii_phyctrl |= BMCR_RESET | BMCR_ANRESTART;
-		mdio_write(dev, phyaddr, MII_BMCR, mii_phyctrl);
-
-	}
-}
-
-static int ipg_hw_init(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-	int rc;
-
-	/* Read/Write and Reset EEPROM Value */
-	/* Read LED Mode Configuration from EEPROM */
-	sp->led_mode = read_eeprom(dev, 6);
-
-	/* Reset all functions within the IPG. Do not assert
-	 * RST_OUT as not compatible with some PHYs.
-	 */
-	rc = ipg_reset(dev, IPG_RESET_MASK);
-	if (rc < 0)
-		goto out;
-
-	ipg_init_mii(dev);
-
-	/* Read MAC Address from EEPROM */
-	for (i = 0; i < 3; i++)
-		sp->station_addr[i] = read_eeprom(dev, 16 + i);
-
-	for (i = 0; i < 3; i++)
-		ipg_w16(sp->station_addr[i], STATION_ADDRESS_0 + 2*i);
-
-	/* Set station address in ethernet_device structure. */
-	dev->dev_addr[0] =  ipg_r16(STATION_ADDRESS_0) & 0x00ff;
-	dev->dev_addr[1] = (ipg_r16(STATION_ADDRESS_0) & 0xff00) >> 8;
-	dev->dev_addr[2] =  ipg_r16(STATION_ADDRESS_1) & 0x00ff;
-	dev->dev_addr[3] = (ipg_r16(STATION_ADDRESS_1) & 0xff00) >> 8;
-	dev->dev_addr[4] =  ipg_r16(STATION_ADDRESS_2) & 0x00ff;
-	dev->dev_addr[5] = (ipg_r16(STATION_ADDRESS_2) & 0xff00) >> 8;
-out:
-	return rc;
-}
-
-static int ipg_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int rc;
-
-	mutex_lock(&sp->mii_mutex);
-	rc = generic_mii_ioctl(&sp->mii_if, if_mii(ifr), cmd, NULL);
-	mutex_unlock(&sp->mii_mutex);
-
-	return rc;
-}
-
-static int ipg_nic_change_mtu(struct net_device *dev, int new_mtu)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int err;
-
-	/* Function to accommodate changes to Maximum Transfer Unit
-	 * (or MTU) of IPG NIC. Cannot use default function since
-	 * the default will not allow for MTU > 1500 bytes.
-	 */
-
-	IPG_DEBUG_MSG("_nic_change_mtu\n");
-
-	/*
-	 * Check that the new MTU value is between 68 (14 byte header, 46 byte
-	 * payload, 4 byte FCS) and 10 KB, which is the largest supported MTU.
-	 */
-	if (new_mtu < 68 || new_mtu > 10240)
-		return -EINVAL;
-
-	err = ipg_nic_stop(dev);
-	if (err)
-		return err;
-
-	dev->mtu = new_mtu;
-
-	sp->max_rxframe_size = new_mtu;
-
-	sp->rxfrag_size = new_mtu;
-	if (sp->rxfrag_size > 4088)
-		sp->rxfrag_size = 4088;
-
-	sp->rxsupport_size = sp->max_rxframe_size;
-
-	if (new_mtu > 0x0600)
-		sp->is_jumbo = true;
-	else
-		sp->is_jumbo = false;
-
-	return ipg_nic_open(dev);
-}
-
-static int ipg_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int rc;
-
-	mutex_lock(&sp->mii_mutex);
-	rc = mii_ethtool_gset(&sp->mii_if, cmd);
-	mutex_unlock(&sp->mii_mutex);
-
-	return rc;
-}
-
-static int ipg_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int rc;
-
-	mutex_lock(&sp->mii_mutex);
-	rc = mii_ethtool_sset(&sp->mii_if, cmd);
-	mutex_unlock(&sp->mii_mutex);
-
-	return rc;
-}
-
-static int ipg_nway_reset(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int rc;
-
-	mutex_lock(&sp->mii_mutex);
-	rc = mii_nway_restart(&sp->mii_if);
-	mutex_unlock(&sp->mii_mutex);
-
-	return rc;
-}
-
-static const struct ethtool_ops ipg_ethtool_ops = {
-	.get_settings = ipg_get_settings,
-	.set_settings = ipg_set_settings,
-	.nway_reset   = ipg_nway_reset,
-};
-
-static void ipg_remove(struct pci_dev *pdev)
-{
-	struct net_device *dev = pci_get_drvdata(pdev);
-	struct ipg_nic_private *sp = netdev_priv(dev);
-
-	IPG_DEBUG_MSG("_remove\n");
-
-	/* Un-register Ethernet device. */
-	unregister_netdev(dev);
-
-	pci_iounmap(pdev, sp->ioaddr);
-
-	pci_release_regions(pdev);
-
-	free_netdev(dev);
-	pci_disable_device(pdev);
-}
-
-static const struct net_device_ops ipg_netdev_ops = {
-	.ndo_open		= ipg_nic_open,
-	.ndo_stop		= ipg_nic_stop,
-	.ndo_start_xmit		= ipg_nic_hard_start_xmit,
-	.ndo_get_stats		= ipg_nic_get_stats,
-	.ndo_set_rx_mode	= ipg_nic_set_multicast_list,
-	.ndo_do_ioctl		= ipg_ioctl,
-	.ndo_tx_timeout 	= ipg_tx_timeout,
-	.ndo_change_mtu 	= ipg_nic_change_mtu,
-	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-static int ipg_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-	unsigned int i = id->driver_data;
-	struct ipg_nic_private *sp;
-	struct net_device *dev;
-	void __iomem *ioaddr;
-	int rc;
-
-	rc = pci_enable_device(pdev);
-	if (rc < 0)
-		goto out;
-
-	pr_info("%s: %s\n", pci_name(pdev), ipg_brand_name[i]);
-
-	pci_set_master(pdev);
-
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
-	if (rc < 0) {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc < 0) {
-			pr_err("%s: DMA config failed\n", pci_name(pdev));
-			goto err_disable_0;
-		}
-	}
-
-	/*
-	 * Initialize net device.
-	 */
-	dev = alloc_etherdev(sizeof(struct ipg_nic_private));
-	if (!dev) {
-		rc = -ENOMEM;
-		goto err_disable_0;
-	}
-
-	sp = netdev_priv(dev);
-	spin_lock_init(&sp->lock);
-	mutex_init(&sp->mii_mutex);
-
-	sp->is_jumbo = IPG_IS_JUMBO;
-	sp->rxfrag_size = IPG_RXFRAG_SIZE;
-	sp->rxsupport_size = IPG_RXSUPPORT_SIZE;
-	sp->max_rxframe_size = IPG_MAX_RXFRAME_SIZE;
-
-	/* Declare IPG NIC functions for Ethernet device methods.
-	 */
-	dev->netdev_ops = &ipg_netdev_ops;
-	SET_NETDEV_DEV(dev, &pdev->dev);
-	dev->ethtool_ops = &ipg_ethtool_ops;
-
-	rc = pci_request_regions(pdev, DRV_NAME);
-	if (rc)
-		goto err_free_dev_1;
-
-	ioaddr = pci_iomap(pdev, 1, pci_resource_len(pdev, 1));
-	if (!ioaddr) {
-		pr_err("%s: cannot map MMIO\n", pci_name(pdev));
-		rc = -EIO;
-		goto err_release_regions_2;
-	}
-
-	/* Save the pointer to the PCI device information. */
-	sp->ioaddr = ioaddr;
-	sp->pdev = pdev;
-	sp->dev = dev;
-
-	INIT_DELAYED_WORK(&sp->task, ipg_reset_after_host_error);
-
-	pci_set_drvdata(pdev, dev);
-
-	rc = ipg_hw_init(dev);
-	if (rc < 0)
-		goto err_unmap_3;
-
-	rc = register_netdev(dev);
-	if (rc < 0)
-		goto err_unmap_3;
-
-	netdev_info(dev, "Ethernet device registered\n");
-out:
-	return rc;
-
-err_unmap_3:
-	pci_iounmap(pdev, ioaddr);
-err_release_regions_2:
-	pci_release_regions(pdev);
-err_free_dev_1:
-	free_netdev(dev);
-err_disable_0:
-	pci_disable_device(pdev);
-	goto out;
-}
-
-static struct pci_driver ipg_pci_driver = {
-	.name		= IPG_DRIVER_NAME,
-	.id_table	= ipg_pci_tbl,
-	.probe		= ipg_probe,
-	.remove		= ipg_remove,
-};
-
-module_pci_driver(ipg_pci_driver);
diff --git a/drivers/net/ethernet/icplus/ipg.h b/drivers/net/ethernet/icplus/ipg.h
deleted file mode 100644
index de606281f97b..000000000000
--- a/drivers/net/ethernet/icplus/ipg.h
+++ /dev/null
@@ -1,748 +0,0 @@
-/*
- * Include file for Gigabit Ethernet device driver for Network
- * Interface Cards (NICs) utilizing the Tamarack Microelectronics
- * Inc. IPG Gigabit or Triple Speed Ethernet Media Access
- * Controller.
- */
-#ifndef __LINUX_IPG_H
-#define __LINUX_IPG_H
-
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ioport.h>
-#include <linux/errno.h>
-#include <asm/io.h>
-#include <linux/delay.h>
-#include <linux/types.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <asm/bitops.h>
-
-/*
- *	Constants
- */
-
-/* GMII based PHY IDs */
-#define		NS				0x2000
-#define		MARVELL				0x0141
-#define		ICPLUS_PHY			0x243
-
-/* NIC Physical Layer Device MII register fields. */
-#define         MII_PHY_SELECTOR_IEEE8023       0x0001
-#define         MII_PHY_TECHABILITYFIELD        0x1FE0
-
-/* GMII_PHY_1000 need to set to prefer master */
-#define         GMII_PHY_1000BASETCONTROL_PreferMaster 0x0400
-
-/* NIC Physical Layer Device GMII constants. */
-#define         GMII_PREAMBLE                    0xFFFFFFFF
-#define         GMII_ST                          0x1
-#define         GMII_READ                        0x2
-#define         GMII_WRITE                       0x1
-#define         GMII_TA_READ_MASK                0x1
-#define         GMII_TA_WRITE                    0x2
-
-/* I/O register offsets. */
-enum ipg_regs {
-	DMA_CTRL		= 0x00,
-	RX_DMA_STATUS		= 0x08, /* Unused + reserved */
-	TFD_LIST_PTR_0		= 0x10,
-	TFD_LIST_PTR_1		= 0x14,
-	TX_DMA_BURST_THRESH	= 0x18,
-	TX_DMA_URGENT_THRESH	= 0x19,
-	TX_DMA_POLL_PERIOD	= 0x1a,
-	RFD_LIST_PTR_0		= 0x1c,
-	RFD_LIST_PTR_1		= 0x20,
-	RX_DMA_BURST_THRESH	= 0x24,
-	RX_DMA_URGENT_THRESH	= 0x25,
-	RX_DMA_POLL_PERIOD	= 0x26,
-	DEBUG_CTRL		= 0x2c,
-	ASIC_CTRL		= 0x30,
-	FIFO_CTRL		= 0x38, /* Unused */
-	FLOW_OFF_THRESH		= 0x3c,
-	FLOW_ON_THRESH		= 0x3e,
-	EEPROM_DATA		= 0x48,
-	EEPROM_CTRL		= 0x4a,
-	EXPROM_ADDR		= 0x4c, /* Unused */
-	EXPROM_DATA		= 0x50, /* Unused */
-	WAKE_EVENT		= 0x51, /* Unused */
-	COUNTDOWN		= 0x54, /* Unused */
-	INT_STATUS_ACK		= 0x5a,
-	INT_ENABLE		= 0x5c,
-	INT_STATUS		= 0x5e, /* Unused */
-	TX_STATUS		= 0x60,
-	MAC_CTRL		= 0x6c,
-	VLAN_TAG		= 0x70, /* Unused */
-	PHY_SET			= 0x75,
-	PHY_CTRL		= 0x76,
-	STATION_ADDRESS_0	= 0x78,
-	STATION_ADDRESS_1	= 0x7a,
-	STATION_ADDRESS_2	= 0x7c,
-	MAX_FRAME_SIZE		= 0x86,
-	RECEIVE_MODE		= 0x88,
-	HASHTABLE_0		= 0x8c,
-	HASHTABLE_1		= 0x90,
-	RMON_STATISTICS_MASK	= 0x98,
-	STATISTICS_MASK		= 0x9c,
-	RX_JUMBO_FRAMES		= 0xbc, /* Unused */
-	TCP_CHECKSUM_ERRORS	= 0xc0, /* Unused */
-	IP_CHECKSUM_ERRORS	= 0xc2, /* Unused */
-	UDP_CHECKSUM_ERRORS	= 0xc4, /* Unused */
-	TX_JUMBO_FRAMES		= 0xf4  /* Unused */
-};
-
-/* Ethernet MIB statistic register offsets. */
-#define	IPG_OCTETRCVOK			0xA8
-#define	IPG_MCSTOCTETRCVDOK		0xAC
-#define	IPG_BCSTOCTETRCVOK		0xB0
-#define	IPG_FRAMESRCVDOK		0xB4
-#define	IPG_MCSTFRAMESRCVDOK		0xB8
-#define	IPG_BCSTFRAMESRCVDOK		0xBE
-#define	IPG_MACCONTROLFRAMESRCVD	0xC6
-#define	IPG_FRAMETOOLONGERRORS		0xC8
-#define	IPG_INRANGELENGTHERRORS		0xCA
-#define	IPG_FRAMECHECKSEQERRORS		0xCC
-#define	IPG_FRAMESLOSTRXERRORS		0xCE
-#define	IPG_OCTETXMTOK			0xD0
-#define	IPG_MCSTOCTETXMTOK		0xD4
-#define	IPG_BCSTOCTETXMTOK		0xD8
-#define	IPG_FRAMESXMTDOK		0xDC
-#define	IPG_MCSTFRAMESXMTDOK		0xE0
-#define	IPG_FRAMESWDEFERREDXMT		0xE4
-#define	IPG_LATECOLLISIONS		0xE8
-#define	IPG_MULTICOLFRAMES		0xEC
-#define	IPG_SINGLECOLFRAMES		0xF0
-#define	IPG_BCSTFRAMESXMTDOK		0xF6
-#define	IPG_CARRIERSENSEERRORS		0xF8
-#define	IPG_MACCONTROLFRAMESXMTDOK	0xFA
-#define	IPG_FRAMESABORTXSCOLLS		0xFC
-#define	IPG_FRAMESWEXDEFERRAL		0xFE
-
-/* RMON statistic register offsets. */
-#define	IPG_ETHERSTATSCOLLISIONS			0x100
-#define	IPG_ETHERSTATSOCTETSTRANSMIT			0x104
-#define	IPG_ETHERSTATSPKTSTRANSMIT			0x108
-#define	IPG_ETHERSTATSPKTS64OCTESTSTRANSMIT		0x10C
-#define	IPG_ETHERSTATSPKTS65TO127OCTESTSTRANSMIT	0x110
-#define	IPG_ETHERSTATSPKTS128TO255OCTESTSTRANSMIT	0x114
-#define	IPG_ETHERSTATSPKTS256TO511OCTESTSTRANSMIT	0x118
-#define	IPG_ETHERSTATSPKTS512TO1023OCTESTSTRANSMIT	0x11C
-#define	IPG_ETHERSTATSPKTS1024TO1518OCTESTSTRANSMIT	0x120
-#define	IPG_ETHERSTATSCRCALIGNERRORS			0x124
-#define	IPG_ETHERSTATSUNDERSIZEPKTS			0x128
-#define	IPG_ETHERSTATSFRAGMENTS				0x12C
-#define	IPG_ETHERSTATSJABBERS				0x130
-#define	IPG_ETHERSTATSOCTETS				0x134
-#define	IPG_ETHERSTATSPKTS				0x138
-#define	IPG_ETHERSTATSPKTS64OCTESTS			0x13C
-#define	IPG_ETHERSTATSPKTS65TO127OCTESTS		0x140
-#define	IPG_ETHERSTATSPKTS128TO255OCTESTS		0x144
-#define	IPG_ETHERSTATSPKTS256TO511OCTESTS		0x148
-#define	IPG_ETHERSTATSPKTS512TO1023OCTESTS		0x14C
-#define	IPG_ETHERSTATSPKTS1024TO1518OCTESTS		0x150
-
-/* RMON statistic register equivalents. */
-#define	IPG_ETHERSTATSMULTICASTPKTSTRANSMIT		0xE0
-#define	IPG_ETHERSTATSBROADCASTPKTSTRANSMIT		0xF6
-#define	IPG_ETHERSTATSMULTICASTPKTS			0xB8
-#define	IPG_ETHERSTATSBROADCASTPKTS			0xBE
-#define	IPG_ETHERSTATSOVERSIZEPKTS			0xC8
-#define	IPG_ETHERSTATSDROPEVENTS			0xCE
-
-/* Serial EEPROM offsets */
-#define	IPG_EEPROM_CONFIGPARAM		0x00
-#define	IPG_EEPROM_ASICCTRL		0x01
-#define	IPG_EEPROM_SUBSYSTEMVENDORID	0x02
-#define	IPG_EEPROM_SUBSYSTEMID		0x03
-#define	IPG_EEPROM_STATIONADDRESS0	0x10
-#define	IPG_EEPROM_STATIONADDRESS1	0x11
-#define	IPG_EEPROM_STATIONADDRESS2	0x12
-
-/* Register & data structure bit masks */
-
-/* PCI register masks. */
-
-/* IOBaseAddress */
-#define         IPG_PIB_RSVD_MASK		0xFFFFFE01
-#define         IPG_PIB_IOBASEADDRESS		0xFFFFFF00
-#define         IPG_PIB_IOBASEADDRIND		0x00000001
-
-/* MemBaseAddress */
-#define         IPG_PMB_RSVD_MASK		0xFFFFFE07
-#define         IPG_PMB_MEMBASEADDRIND		0x00000001
-#define         IPG_PMB_MEMMAPTYPE		0x00000006
-#define         IPG_PMB_MEMMAPTYPE0		0x00000002
-#define         IPG_PMB_MEMMAPTYPE1		0x00000004
-#define         IPG_PMB_MEMBASEADDRESS		0xFFFFFE00
-
-/* ConfigStatus */
-#define IPG_CS_RSVD_MASK                0xFFB0
-#define IPG_CS_CAPABILITIES             0x0010
-#define IPG_CS_66MHZCAPABLE             0x0020
-#define IPG_CS_FASTBACK2BACK            0x0080
-#define IPG_CS_DATAPARITYREPORTED       0x0100
-#define IPG_CS_DEVSELTIMING             0x0600
-#define IPG_CS_SIGNALEDTARGETABORT      0x0800
-#define IPG_CS_RECEIVEDTARGETABORT      0x1000
-#define IPG_CS_RECEIVEDMASTERABORT      0x2000
-#define IPG_CS_SIGNALEDSYSTEMERROR      0x4000
-#define IPG_CS_DETECTEDPARITYERROR      0x8000
-
-/* TFD data structure masks. */
-
-/* TFDList, TFC */
-#define	IPG_TFC_RSVD_MASK			0x0000FFFF9FFFFFFFULL
-#define	IPG_TFC_FRAMEID				0x000000000000FFFFULL
-#define	IPG_TFC_WORDALIGN			0x0000000000030000ULL
-#define	IPG_TFC_WORDALIGNTODWORD		0x0000000000000000ULL
-#define	IPG_TFC_WORDALIGNTOWORD			0x0000000000020000ULL
-#define	IPG_TFC_WORDALIGNDISABLED		0x0000000000030000ULL
-#define	IPG_TFC_TCPCHECKSUMENABLE		0x0000000000040000ULL
-#define	IPG_TFC_UDPCHECKSUMENABLE		0x0000000000080000ULL
-#define	IPG_TFC_IPCHECKSUMENABLE		0x0000000000100000ULL
-#define	IPG_TFC_FCSAPPENDDISABLE		0x0000000000200000ULL
-#define	IPG_TFC_TXINDICATE			0x0000000000400000ULL
-#define	IPG_TFC_TXDMAINDICATE			0x0000000000800000ULL
-#define	IPG_TFC_FRAGCOUNT			0x000000000F000000ULL
-#define	IPG_TFC_VLANTAGINSERT			0x0000000010000000ULL
-#define	IPG_TFC_TFDDONE				0x0000000080000000ULL
-#define	IPG_TFC_VID				0x00000FFF00000000ULL
-#define	IPG_TFC_CFI				0x0000100000000000ULL
-#define	IPG_TFC_USERPRIORITY			0x0000E00000000000ULL
-
-/* TFDList, FragInfo */
-#define	IPG_TFI_RSVD_MASK			0xFFFF00FFFFFFFFFFULL
-#define	IPG_TFI_FRAGADDR			0x000000FFFFFFFFFFULL
-#define	IPG_TFI_FRAGLEN				0xFFFF000000000000ULL
-
-/* RFD data structure masks. */
-
-/* RFDList, RFS */
-#define	IPG_RFS_RSVD_MASK			0x0000FFFFFFFFFFFFULL
-#define	IPG_RFS_RXFRAMELEN			0x000000000000FFFFULL
-#define	IPG_RFS_RXFIFOOVERRUN			0x0000000000010000ULL
-#define	IPG_RFS_RXRUNTFRAME			0x0000000000020000ULL
-#define	IPG_RFS_RXALIGNMENTERROR		0x0000000000040000ULL
-#define	IPG_RFS_RXFCSERROR			0x0000000000080000ULL
-#define	IPG_RFS_RXOVERSIZEDFRAME		0x0000000000100000ULL
-#define	IPG_RFS_RXLENGTHERROR			0x0000000000200000ULL
-#define	IPG_RFS_VLANDETECTED			0x0000000000400000ULL
-#define	IPG_RFS_TCPDETECTED			0x0000000000800000ULL
-#define	IPG_RFS_TCPERROR			0x0000000001000000ULL
-#define	IPG_RFS_UDPDETECTED			0x0000000002000000ULL
-#define	IPG_RFS_UDPERROR			0x0000000004000000ULL
-#define	IPG_RFS_IPDETECTED			0x0000000008000000ULL
-#define	IPG_RFS_IPERROR				0x0000000010000000ULL
-#define	IPG_RFS_FRAMESTART			0x0000000020000000ULL
-#define	IPG_RFS_FRAMEEND			0x0000000040000000ULL
-#define	IPG_RFS_RFDDONE				0x0000000080000000ULL
-#define	IPG_RFS_TCI				0x0000FFFF00000000ULL
-
-/* RFDList, FragInfo */
-#define	IPG_RFI_RSVD_MASK			0xFFFF00FFFFFFFFFFULL
-#define	IPG_RFI_FRAGADDR			0x000000FFFFFFFFFFULL
-#define	IPG_RFI_FRAGLEN				0xFFFF000000000000ULL
-
-/* I/O Register masks. */
-
-/* RMON Statistics Mask */
-#define	IPG_RZ_ALL					0x0FFFFFFF
-
-/* Statistics Mask */
-#define	IPG_SM_ALL					0x0FFFFFFF
-#define	IPG_SM_OCTETRCVOK_FRAMESRCVDOK			0x00000001
-#define	IPG_SM_MCSTOCTETRCVDOK_MCSTFRAMESRCVDOK		0x00000002
-#define	IPG_SM_BCSTOCTETRCVDOK_BCSTFRAMESRCVDOK		0x00000004
-#define	IPG_SM_RXJUMBOFRAMES				0x00000008
-#define	IPG_SM_TCPCHECKSUMERRORS			0x00000010
-#define	IPG_SM_IPCHECKSUMERRORS				0x00000020
-#define	IPG_SM_UDPCHECKSUMERRORS			0x00000040
-#define	IPG_SM_MACCONTROLFRAMESRCVD			0x00000080
-#define	IPG_SM_FRAMESTOOLONGERRORS			0x00000100
-#define	IPG_SM_INRANGELENGTHERRORS			0x00000200
-#define	IPG_SM_FRAMECHECKSEQERRORS			0x00000400
-#define	IPG_SM_FRAMESLOSTRXERRORS			0x00000800
-#define	IPG_SM_OCTETXMTOK_FRAMESXMTOK			0x00001000
-#define	IPG_SM_MCSTOCTETXMTOK_MCSTFRAMESXMTDOK		0x00002000
-#define	IPG_SM_BCSTOCTETXMTOK_BCSTFRAMESXMTDOK		0x00004000
-#define	IPG_SM_FRAMESWDEFERREDXMT			0x00008000
-#define	IPG_SM_LATECOLLISIONS				0x00010000
-#define	IPG_SM_MULTICOLFRAMES				0x00020000
-#define	IPG_SM_SINGLECOLFRAMES				0x00040000
-#define	IPG_SM_TXJUMBOFRAMES				0x00080000
-#define	IPG_SM_CARRIERSENSEERRORS			0x00100000
-#define	IPG_SM_MACCONTROLFRAMESXMTD			0x00200000
-#define	IPG_SM_FRAMESABORTXSCOLLS			0x00400000
-#define	IPG_SM_FRAMESWEXDEFERAL				0x00800000
-
-/* Countdown */
-#define	IPG_CD_RSVD_MASK		0x0700FFFF
-#define	IPG_CD_COUNT			0x0000FFFF
-#define	IPG_CD_COUNTDOWNSPEED		0x01000000
-#define	IPG_CD_COUNTDOWNMODE		0x02000000
-#define	IPG_CD_COUNTINTENABLED		0x04000000
-
-/* TxDMABurstThresh */
-#define IPG_TB_RSVD_MASK                0xFF
-
-/* TxDMAUrgentThresh */
-#define IPG_TU_RSVD_MASK                0xFF
-
-/* TxDMAPollPeriod */
-#define IPG_TP_RSVD_MASK                0xFF
-
-/* RxDMAUrgentThresh */
-#define IPG_RU_RSVD_MASK                0xFF
-
-/* RxDMAPollPeriod */
-#define IPG_RP_RSVD_MASK                0xFF
-
-/* ReceiveMode */
-#define IPG_RM_RSVD_MASK                0x3F
-#define IPG_RM_RECEIVEUNICAST           0x01
-#define IPG_RM_RECEIVEMULTICAST         0x02
-#define IPG_RM_RECEIVEBROADCAST         0x04
-#define IPG_RM_RECEIVEALLFRAMES         0x08
-#define IPG_RM_RECEIVEMULTICASTHASH     0x10
-#define IPG_RM_RECEIVEIPMULTICAST       0x20
-
-/* PhySet */
-#define IPG_PS_MEM_LENB9B               0x01
-#define IPG_PS_MEM_LEN9                 0x02
-#define IPG_PS_NON_COMPDET              0x04
-
-/* PhyCtrl */
-#define IPG_PC_RSVD_MASK                0xFF
-#define IPG_PC_MGMTCLK_LO               0x00
-#define IPG_PC_MGMTCLK_HI               0x01
-#define IPG_PC_MGMTCLK                  0x01
-#define IPG_PC_MGMTDATA                 0x02
-#define IPG_PC_MGMTDIR                  0x04
-#define IPG_PC_DUPLEX_POLARITY          0x08
-#define IPG_PC_DUPLEX_STATUS            0x10
-#define IPG_PC_LINK_POLARITY            0x20
-#define IPG_PC_LINK_SPEED               0xC0
-#define IPG_PC_LINK_SPEED_10MBPS        0x40
-#define IPG_PC_LINK_SPEED_100MBPS       0x80
-#define IPG_PC_LINK_SPEED_1000MBPS      0xC0
-
-/* DMACtrl */
-#define IPG_DC_RSVD_MASK                0xC07D9818
-#define IPG_DC_RX_DMA_COMPLETE          0x00000008
-#define IPG_DC_RX_DMA_POLL_NOW          0x00000010
-#define IPG_DC_TX_DMA_COMPLETE          0x00000800
-#define IPG_DC_TX_DMA_POLL_NOW          0x00001000
-#define IPG_DC_TX_DMA_IN_PROG           0x00008000
-#define IPG_DC_RX_EARLY_DISABLE         0x00010000
-#define IPG_DC_MWI_DISABLE              0x00040000
-#define IPG_DC_TX_WRITE_BACK_DISABLE    0x00080000
-#define IPG_DC_TX_BURST_LIMIT           0x00700000
-#define IPG_DC_TARGET_ABORT             0x40000000
-#define IPG_DC_MASTER_ABORT             0x80000000
-
-/* ASICCtrl */
-#define IPG_AC_RSVD_MASK                0x07FFEFF2
-#define IPG_AC_EXP_ROM_SIZE             0x00000002
-#define IPG_AC_PHY_SPEED10              0x00000010
-#define IPG_AC_PHY_SPEED100             0x00000020
-#define IPG_AC_PHY_SPEED1000            0x00000040
-#define IPG_AC_PHY_MEDIA                0x00000080
-#define IPG_AC_FORCED_CFG               0x00000700
-#define IPG_AC_D3RESETDISABLE           0x00000800
-#define IPG_AC_SPEED_UP_MODE            0x00002000
-#define IPG_AC_LED_MODE                 0x00004000
-#define IPG_AC_RST_OUT_POLARITY         0x00008000
-#define IPG_AC_GLOBAL_RESET             0x00010000
-#define IPG_AC_RX_RESET                 0x00020000
-#define IPG_AC_TX_RESET                 0x00040000
-#define IPG_AC_DMA                      0x00080000
-#define IPG_AC_FIFO                     0x00100000
-#define IPG_AC_NETWORK                  0x00200000
-#define IPG_AC_HOST                     0x00400000
-#define IPG_AC_AUTO_INIT                0x00800000
-#define IPG_AC_RST_OUT                  0x01000000
-#define IPG_AC_INT_REQUEST              0x02000000
-#define IPG_AC_RESET_BUSY               0x04000000
-#define IPG_AC_LED_SPEED                0x08000000
-#define IPG_AC_LED_MODE_BIT_1           0x20000000
-
-/* EepromCtrl */
-#define IPG_EC_RSVD_MASK                0x83FF
-#define IPG_EC_EEPROM_ADDR              0x00FF
-#define IPG_EC_EEPROM_OPCODE            0x0300
-#define IPG_EC_EEPROM_SUBCOMMAD         0x0000
-#define IPG_EC_EEPROM_WRITEOPCODE       0x0100
-#define IPG_EC_EEPROM_READOPCODE        0x0200
-#define IPG_EC_EEPROM_ERASEOPCODE       0x0300
-#define IPG_EC_EEPROM_BUSY              0x8000
-
-/* FIFOCtrl */
-#define IPG_FC_RSVD_MASK                0xC001
-#define IPG_FC_RAM_TEST_MODE            0x0001
-#define IPG_FC_TRANSMITTING             0x4000
-#define IPG_FC_RECEIVING                0x8000
-
-/* TxStatus */
-#define IPG_TS_RSVD_MASK                0xFFFF00DD
-#define IPG_TS_TX_ERROR                 0x00000001
-#define IPG_TS_LATE_COLLISION           0x00000004
-#define IPG_TS_TX_MAX_COLL              0x00000008
-#define IPG_TS_TX_UNDERRUN              0x00000010
-#define IPG_TS_TX_IND_REQD              0x00000040
-#define IPG_TS_TX_COMPLETE              0x00000080
-#define IPG_TS_TX_FRAMEID               0xFFFF0000
-
-/* WakeEvent */
-#define IPG_WE_WAKE_PKT_ENABLE          0x01
-#define IPG_WE_MAGIC_PKT_ENABLE         0x02
-#define IPG_WE_LINK_EVT_ENABLE          0x04
-#define IPG_WE_WAKE_POLARITY            0x08
-#define IPG_WE_WAKE_PKT_EVT             0x10
-#define IPG_WE_MAGIC_PKT_EVT            0x20
-#define IPG_WE_LINK_EVT                 0x40
-#define IPG_WE_WOL_ENABLE               0x80
-
-/* IntEnable */
-#define IPG_IE_RSVD_MASK                0x1FFE
-#define IPG_IE_HOST_ERROR               0x0002
-#define IPG_IE_TX_COMPLETE              0x0004
-#define IPG_IE_MAC_CTRL_FRAME           0x0008
-#define IPG_IE_RX_COMPLETE              0x0010
-#define IPG_IE_RX_EARLY                 0x0020
-#define IPG_IE_INT_REQUESTED            0x0040
-#define IPG_IE_UPDATE_STATS             0x0080
-#define IPG_IE_LINK_EVENT               0x0100
-#define IPG_IE_TX_DMA_COMPLETE          0x0200
-#define IPG_IE_RX_DMA_COMPLETE          0x0400
-#define IPG_IE_RFD_LIST_END             0x0800
-#define IPG_IE_RX_DMA_PRIORITY          0x1000
-
-/* IntStatus */
-#define IPG_IS_RSVD_MASK                0x1FFF
-#define IPG_IS_INTERRUPT_STATUS         0x0001
-#define IPG_IS_HOST_ERROR               0x0002
-#define IPG_IS_TX_COMPLETE              0x0004
-#define IPG_IS_MAC_CTRL_FRAME           0x0008
-#define IPG_IS_RX_COMPLETE              0x0010
-#define IPG_IS_RX_EARLY                 0x0020
-#define IPG_IS_INT_REQUESTED            0x0040
-#define IPG_IS_UPDATE_STATS             0x0080
-#define IPG_IS_LINK_EVENT               0x0100
-#define IPG_IS_TX_DMA_COMPLETE          0x0200
-#define IPG_IS_RX_DMA_COMPLETE          0x0400
-#define IPG_IS_RFD_LIST_END             0x0800
-#define IPG_IS_RX_DMA_PRIORITY          0x1000
-
-/* MACCtrl */
-#define IPG_MC_RSVD_MASK                0x7FE33FA3
-#define IPG_MC_IFS_SELECT               0x00000003
-#define IPG_MC_IFS_4352BIT              0x00000003
-#define IPG_MC_IFS_1792BIT              0x00000002
-#define IPG_MC_IFS_1024BIT              0x00000001
-#define IPG_MC_IFS_96BIT                0x00000000
-#define IPG_MC_DUPLEX_SELECT            0x00000020
-#define IPG_MC_DUPLEX_SELECT_FD         0x00000020
-#define IPG_MC_DUPLEX_SELECT_HD         0x00000000
-#define IPG_MC_TX_FLOW_CONTROL_ENABLE   0x00000080
-#define IPG_MC_RX_FLOW_CONTROL_ENABLE   0x00000100
-#define IPG_MC_RCV_FCS                  0x00000200
-#define IPG_MC_FIFO_LOOPBACK            0x00000400
-#define IPG_MC_MAC_LOOPBACK             0x00000800
-#define IPG_MC_AUTO_VLAN_TAGGING        0x00001000
-#define IPG_MC_AUTO_VLAN_UNTAGGING      0x00002000
-#define IPG_MC_COLLISION_DETECT         0x00010000
-#define IPG_MC_CARRIER_SENSE            0x00020000
-#define IPG_MC_STATISTICS_ENABLE        0x00200000
-#define IPG_MC_STATISTICS_DISABLE       0x00400000
-#define IPG_MC_STATISTICS_ENABLED       0x00800000
-#define IPG_MC_TX_ENABLE                0x01000000
-#define IPG_MC_TX_DISABLE               0x02000000
-#define IPG_MC_TX_ENABLED               0x04000000
-#define IPG_MC_RX_ENABLE                0x08000000
-#define IPG_MC_RX_DISABLE               0x10000000
-#define IPG_MC_RX_ENABLED               0x20000000
-#define IPG_MC_PAUSED                   0x40000000
-
-/*
- *	Tune
- */
-
-/* Assign IPG_APPEND_FCS_ON_TX > 0 for auto FCS append on TX. */
-#define         IPG_APPEND_FCS_ON_TX         1
-
-/* Assign IPG_APPEND_FCS_ON_TX > 0 for auto FCS strip on RX. */
-#define         IPG_STRIP_FCS_ON_RX          1
-
-/* Assign IPG_DROP_ON_RX_ETH_ERRORS > 0 to drop RX frames with
- * Ethernet errors.
- */
-#define         IPG_DROP_ON_RX_ETH_ERRORS    1
-
-/* Assign IPG_INSERT_MANUAL_VLAN_TAG > 0 to insert VLAN tags manually
- * (via TFC).
- */
-#define		IPG_INSERT_MANUAL_VLAN_TAG   0
-
-/* Assign IPG_ADD_IPCHECKSUM_ON_TX > 0 for auto IP checksum on TX. */
-#define         IPG_ADD_IPCHECKSUM_ON_TX     0
-
-/* Assign IPG_ADD_TCPCHECKSUM_ON_TX > 0 for auto TCP checksum on TX.
- * DO NOT USE FOR SILICON REVISIONS B3 AND EARLIER.
- */
-#define         IPG_ADD_TCPCHECKSUM_ON_TX    0
-
-/* Assign IPG_ADD_UDPCHECKSUM_ON_TX > 0 for auto UDP checksum on TX.
- * DO NOT USE FOR SILICON REVISIONS B3 AND EARLIER.
- */
-#define         IPG_ADD_UDPCHECKSUM_ON_TX    0
-
-/* If inserting VLAN tags manually, assign the IPG_MANUAL_VLAN_xx
- * constants as desired.
- */
-#define		IPG_MANUAL_VLAN_VID		0xABC
-#define		IPG_MANUAL_VLAN_CFI		0x1
-#define		IPG_MANUAL_VLAN_USERPRIORITY 0x5
-
-#define         IPG_IO_REG_RANGE		0xFF
-#define         IPG_MEM_REG_RANGE		0x154
-#define         IPG_DRIVER_NAME		"Sundance Technology IPG Triple-Speed Ethernet"
-#define         IPG_NIC_PHY_ADDRESS          0x01
-#define		IPG_DMALIST_ALIGN_PAD	0x07
-#define		IPG_MULTICAST_HASHTABLE_SIZE	0x40
-
-/* Number of milliseconds to wait after issuing a software reset.
- * 0x05 <= IPG_AC_RESETWAIT to account for proper 10Mbps operation.
- */
-#define         IPG_AC_RESETWAIT             0x05
-
-/* Number of IPG_AC_RESETWAIT timeperiods before declaring timeout. */
-#define         IPG_AC_RESET_TIMEOUT         0x0A
-
-/* Minimum number of nanoseconds used to toggle MDC clock during
- * MII/GMII register access.
- */
-#define		IPG_PC_PHYCTRLWAIT_NS		200
-
-#define		IPG_TFDLIST_LENGTH		0x100
-
-/* Number of frames between TxDMAComplete interrupt.
- * 0 < IPG_FRAMESBETWEENTXDMACOMPLETES <= IPG_TFDLIST_LENGTH
- */
-#define		IPG_FRAMESBETWEENTXDMACOMPLETES 0x1
-
-#define		IPG_RFDLIST_LENGTH		0x100
-
-/* Maximum number of RFDs to process per interrupt.
- * 1 < IPG_MAXRFDPROCESS_COUNT < IPG_RFDLIST_LENGTH
- */
-#define		IPG_MAXRFDPROCESS_COUNT	0x80
-
-/* Minimum margin between last freed RFD, and current RFD.
- * 1 < IPG_MINUSEDRFDSTOFREE < IPG_RFDLIST_LENGTH
- */
-#define		IPG_MINUSEDRFDSTOFREE	0x80
-
-/* specify the jumbo frame maximum size
- * per unit is 0x600 (the rx_buffer size that one RFD can carry)
- */
-#define     MAX_JUMBOSIZE	        0x8	/* max is 12K */
-
-/* Key register values loaded at driver start up. */
-
-/* TXDMAPollPeriod is specified in 320ns increments.
- *
- * Value	Time
- * ---------------------
- * 0x00-0x01	320ns
- * 0x03		~1us
- * 0x1F		~10us
- * 0xFF		~82us
- */
-#define		IPG_TXDMAPOLLPERIOD_VALUE	0x26
-
-/* TxDMAUrgentThresh specifies the minimum amount of
- * data in the transmit FIFO before asserting an
- * urgent transmit DMA request.
- *
- * Value	Min TxFIFO occupied space before urgent TX request
- * ---------------------------------------------------------------
- * 0x00-0x04	128 bytes (1024 bits)
- * 0x27		1248 bytes (~10000 bits)
- * 0x30		1536 bytes (12288 bits)
- * 0xFF		8192 bytes (65535 bits)
- */
-#define		IPG_TXDMAURGENTTHRESH_VALUE	0x04
-
-/* TxDMABurstThresh specifies the minimum amount of
- * free space in the transmit FIFO before asserting an
- * transmit DMA request.
- *
- * Value	Min TxFIFO free space before TX request
- * ----------------------------------------------------
- * 0x00-0x08	256 bytes
- * 0x30		1536 bytes
- * 0xFF		8192 bytes
- */
-#define		IPG_TXDMABURSTTHRESH_VALUE	0x30
-
-/* RXDMAPollPeriod is specified in 320ns increments.
- *
- * Value	Time
- * ---------------------
- * 0x00-0x01	320ns
- * 0x03		~1us
- * 0x1F		~10us
- * 0xFF		~82us
- */
-#define		IPG_RXDMAPOLLPERIOD_VALUE	0x01
-
-/* RxDMAUrgentThresh specifies the minimum amount of
- * free space within the receive FIFO before asserting
- * a urgent receive DMA request.
- *
- * Value	Min RxFIFO free space before urgent RX request
- * ---------------------------------------------------------------
- * 0x00-0x04	128 bytes (1024 bits)
- * 0x27		1248 bytes (~10000 bits)
- * 0x30		1536 bytes (12288 bits)
- * 0xFF		8192 bytes (65535 bits)
- */
-#define		IPG_RXDMAURGENTTHRESH_VALUE	0x30
-
-/* RxDMABurstThresh specifies the minimum amount of
- * occupied space within the receive FIFO before asserting
- * a receive DMA request.
- *
- * Value	Min TxFIFO free space before TX request
- * ----------------------------------------------------
- * 0x00-0x08	256 bytes
- * 0x30		1536 bytes
- * 0xFF		8192 bytes
- */
-#define		IPG_RXDMABURSTTHRESH_VALUE	0x30
-
-/* FlowOnThresh specifies the maximum amount of occupied
- * space in the receive FIFO before a PAUSE frame with
- * maximum pause time transmitted.
- *
- * Value	Max RxFIFO occupied space before PAUSE
- * ---------------------------------------------------
- * 0x0000	0 bytes
- * 0x0740	29,696 bytes
- * 0x07FF	32,752 bytes
- */
-#define		IPG_FLOWONTHRESH_VALUE	0x0740
-
-/* FlowOffThresh specifies the minimum amount of occupied
- * space in the receive FIFO before a PAUSE frame with
- * zero pause time is transmitted.
- *
- * Value	Max RxFIFO occupied space before PAUSE
- * ---------------------------------------------------
- * 0x0000	0 bytes
- * 0x00BF	3056 bytes
- * 0x07FF	32,752 bytes
- */
-#define		IPG_FLOWOFFTHRESH_VALUE	0x00BF
-
-/*
- * Miscellaneous macros.
- */
-
-/* Macros for printing debug statements. */
-#ifdef IPG_DEBUG
-#  define IPG_DEBUG_MSG(fmt, args...)			\
-do {							\
-	if (0)						\
-		printk(KERN_DEBUG "IPG: " fmt, ##args);	\
-} while (0)
-#  define IPG_DDEBUG_MSG(fmt, args...)			\
-	printk(KERN_DEBUG "IPG: " fmt, ##args)
-#  define IPG_DUMPRFDLIST(args) ipg_dump_rfdlist(args)
-#  define IPG_DUMPTFDLIST(args) ipg_dump_tfdlist(args)
-#else
-#  define IPG_DEBUG_MSG(fmt, args...)			\
-do {							\
-	if (0)						\
-		printk(KERN_DEBUG "IPG: " fmt, ##args);	\
-} while (0)
-#  define IPG_DDEBUG_MSG(fmt, args...)			\
-do {							\
-	if (0)						\
-		printk(KERN_DEBUG "IPG: " fmt, ##args);	\
-} while (0)
-#  define IPG_DUMPRFDLIST(args)
-#  define IPG_DUMPTFDLIST(args)
-#endif
-
-/*
- * End miscellaneous macros.
- */
-
-/* Transmit Frame Descriptor. The IPG supports 15 fragments,
- * however Linux requires only a single fragment. Note, each
- * TFD field is 64 bits wide.
- */
-struct ipg_tx {
-	__le64 next_desc;
-	__le64 tfc;
-	__le64 frag_info;
-};
-
-/* Receive Frame Descriptor. Note, each RFD field is 64 bits wide.
- */
-struct ipg_rx {
-	__le64 next_desc;
-	__le64 rfs;
-	__le64 frag_info;
-};
-
-struct ipg_jumbo {
-	int found_start;
-	int current_size;
-	struct sk_buff *skb;
-};
-
-/* Structure of IPG NIC specific data. */
-struct ipg_nic_private {
-	void __iomem *ioaddr;
-	struct ipg_tx *txd;
-	struct ipg_rx *rxd;
-	dma_addr_t txd_map;
-	dma_addr_t rxd_map;
-	struct sk_buff *tx_buff[IPG_TFDLIST_LENGTH];
-	struct sk_buff *rx_buff[IPG_RFDLIST_LENGTH];
-	unsigned int tx_current;
-	unsigned int tx_dirty;
-	unsigned int rx_current;
-	unsigned int rx_dirty;
-	bool is_jumbo;
-	struct ipg_jumbo jumbo;
-	unsigned long rxfrag_size;
-	unsigned long rxsupport_size;
-	unsigned long max_rxframe_size;
-	unsigned int rx_buf_sz;
-	struct pci_dev *pdev;
-	struct net_device *dev;
-	struct net_device_stats stats;
-	spinlock_t lock;
-	int tenmbpsmode;
-
-	u16 led_mode;
-	u16 station_addr[3];	/* Station Address in EEPROM Reg 0x10..0x12 */
-
-	struct mutex		mii_mutex;
-	struct mii_if_info	mii_if;
-	int reset_current_tfd;
-#ifdef IPG_DEBUG
-	int RFDlistendCount;
-	int RFDListCheckedCount;
-	int EmptyRFDListCount;
-#endif
-	struct delayed_work task;
-};
-
-#endif				/* __LINUX_IPG_H */

From 6c606fa32c4272894248c1abbc15257fc04250be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Mon, 16 Nov 2015 14:36:32 +0200
Subject: [PATCH 68/83] via-velocity: unconditionally drop frames with bad l2
 length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

By default the driver allowed incorrect frames to be received. What is
worse the code does not handle very short frames correctly. The FCS
length is unconditionally subtracted, and the underflow can cause
skb_put to be called with large number after implicit cast to unsigned.
And indeed, an skb_over_panic() was observed with via-velocity.

This removes the module parameter as it does not work in it's
current state, and should be implemented via NETIF_F_RXALL if needed.

Suggested-by: Francois Romieu <romieu@fr.zoreil.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/via/via-velocity.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index ae68afd50a15..f38696ceee74 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -345,13 +345,6 @@ VELOCITY_PARAM(flow_control, "Enable flow control ability");
 */
 VELOCITY_PARAM(speed_duplex, "Setting the speed and duplex mode");
 
-#define VAL_PKT_LEN_DEF     0
-/* ValPktLen[] is used for setting the checksum offload ability of NIC.
-   0: Receive frame with invalid layer 2 length (Default)
-   1: Drop frame with invalid layer 2 length
-*/
-VELOCITY_PARAM(ValPktLen, "Receiving or Drop invalid 802.3 frame");
-
 #define WOL_OPT_DEF     0
 #define WOL_OPT_MIN     0
 #define WOL_OPT_MAX     7
@@ -494,7 +487,6 @@ static void velocity_get_options(struct velocity_opt *opts, int index,
 
 	velocity_set_int_opt(&opts->flow_cntl, flow_control[index], FLOW_CNTL_MIN, FLOW_CNTL_MAX, FLOW_CNTL_DEF, "flow_control", devname);
 	velocity_set_bool_opt(&opts->flags, IP_byte_align[index], IP_ALIG_DEF, VELOCITY_FLAGS_IP_ALIGN, "IP_byte_align", devname);
-	velocity_set_bool_opt(&opts->flags, ValPktLen[index], VAL_PKT_LEN_DEF, VELOCITY_FLAGS_VAL_PKT_LEN, "ValPktLen", devname);
 	velocity_set_int_opt((int *) &opts->spd_dpx, speed_duplex[index], MED_LNK_MIN, MED_LNK_MAX, MED_LNK_DEF, "Media link mode", devname);
 	velocity_set_int_opt(&opts->wol_opts, wol_opts[index], WOL_OPT_MIN, WOL_OPT_MAX, WOL_OPT_DEF, "Wake On Lan options", devname);
 	opts->numrx = (opts->numrx & ~3);
@@ -2055,8 +2047,9 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 	int pkt_len = le16_to_cpu(rd->rdesc0.len) & 0x3fff;
 	struct sk_buff *skb;
 
-	if (rd->rdesc0.RSR & (RSR_STP | RSR_EDP)) {
-		VELOCITY_PRT(MSG_LEVEL_VERBOSE, KERN_ERR " %s : the received frame spans multiple RDs.\n", vptr->netdev->name);
+	if (unlikely(rd->rdesc0.RSR & (RSR_STP | RSR_EDP | RSR_RL))) {
+		if (rd->rdesc0.RSR & (RSR_STP | RSR_EDP))
+			VELOCITY_PRT(MSG_LEVEL_VERBOSE, KERN_ERR " %s : the received frame spans multiple RDs.\n", vptr->netdev->name);
 		stats->rx_length_errors++;
 		return -EINVAL;
 	}
@@ -2069,17 +2062,6 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 	dma_sync_single_for_cpu(vptr->dev, rd_info->skb_dma,
 				    vptr->rx.buf_sz, DMA_FROM_DEVICE);
 
-	/*
-	 *	Drop frame not meeting IEEE 802.3
-	 */
-
-	if (vptr->flags & VELOCITY_FLAGS_VAL_PKT_LEN) {
-		if (rd->rdesc0.RSR & RSR_RL) {
-			stats->rx_length_errors++;
-			return -EINVAL;
-		}
-	}
-
 	velocity_rx_csum(rd, skb);
 
 	if (velocity_rx_copy(&skb, pkt_len, vptr) < 0) {

From a6e18ff111701b4ff6947605bfbe9594ec42a6e8 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 16 Nov 2015 15:43:44 -0500
Subject: [PATCH 69/83] vlan: Fix untag operations of stacked vlans with
 REORDER_HEADER off

When we have multiple stacked vlan devices all of which have
turned off REORDER_HEADER flag, the untag operation does not
locate the ethernet addresses correctly for nested vlans.
The reason is that in case of REORDER_HEADER flag being off,
the outer vlan headers are put back and the mac_len is adjusted
to account for the presense of the header.  Then, the subsequent
untag operation, for the next level vlan, always use VLAN_ETH_HLEN
to locate the begining of the ethernet header and that ends up
being a multiple of 4 bytes short of the actuall beginning
of the mac header (the multiple depending on the how many vlan
encapsulations ethere are).

As a reslult, if there are multiple levles of vlan devices
with REODER_HEADER being off, the recevied packets end up
being dropped.

To solve this, we use skb->mac_len as the offset.  The value
is always set on receive path and starts out as a ETH_HLEN.
The value is also updated when the vlan header manupations occur
so we know it will be correct.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index aa41e6dd6429..152b9c70e252 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4268,7 +4268,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 		return NULL;
 	}
 
-	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
+		2 * ETH_ALEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }

From 28f9ee22bcdd84726dbf6267d0b58f254166b900 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 16 Nov 2015 15:43:45 -0500
Subject: [PATCH 70/83] vlan: Do not put vlan headers back on bridge and
 macvlan ports

When a vlan is configured with REORDER_HEADER set to 0, the vlan
header is put back into the packet and makes it appear that
the vlan header is still there even after it's been processed.
This posses a problem for bridge and macvlan ports.  The packets
passed to those device may be forwarded and at the time of the
forward, vlan headers end up being unexpectedly present.

With the patch, we make sure that we do not put the vlan header
back (when REORDER_HEADER is 0) if a bridge or macvlan has
been configured on top of the vlan device.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++++
 net/8021q/vlan_core.c     | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc221b967687..67bfac1abfc1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3857,6 +3857,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_EBRIDGE;
 }
 
+static inline bool netif_is_bridge_port(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_BRIDGE_PORT;
+}
+
 static inline bool netif_is_ovs_master(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_OPENVSWITCH;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
 			skb->pkt_type = PACKET_HOST;
 	}
 
-	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
+	    !netif_is_macvlan_port(vlan_dev) &&
+	    !netif_is_bridge_port(vlan_dev)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 
 		/*

From cf554ada0be7077906aa9a17faf151ff66e3cb8e Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 16 Nov 2015 22:34:26 +0100
Subject: [PATCH 71/83] ipvlan: fix leak in ipvlan_rcv_frame

Pass a **skb to ipvlan_rcv_frame so that if skb_share_check returns a
new skb, we actually use it during further processing.

It's safe to ignore the new skb in the ipvlan_xmit_* functions, because
they call ipvlan_rcv_frame with local == true, so that dev_forward_skb
is called and always takes ownership of the skb.

Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_core.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index d50887e3df6d..a9148e46dad0 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -254,7 +254,7 @@ acct:
 	}
 }
 
-static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
+static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
 			    bool local)
 {
 	struct ipvl_dev *ipvlan = addr->master;
@@ -262,6 +262,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
 	unsigned int len;
 	rx_handler_result_t ret = RX_HANDLER_CONSUMED;
 	bool success = false;
+	struct sk_buff *skb = *pskb;
 
 	len = skb->len + ETH_HLEN;
 	if (unlikely(!(dev->flags & IFF_UP))) {
@@ -273,6 +274,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
 	if (!skb)
 		goto out;
 
+	*pskb = skb;
 	skb->dev = dev;
 	skb->pkt_type = PACKET_HOST;
 
@@ -486,7 +488,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 
 	addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
 	if (addr)
-		return ipvlan_rcv_frame(addr, skb, true);
+		return ipvlan_rcv_frame(addr, &skb, true);
 
 out:
 	skb->dev = ipvlan->phy_dev;
@@ -506,7 +508,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		if (lyr3h) {
 			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
 			if (addr)
-				return ipvlan_rcv_frame(addr, skb, true);
+				return ipvlan_rcv_frame(addr, &skb, true);
 		}
 		skb = skb_share_check(skb, GFP_ATOMIC);
 		if (!skb)
@@ -589,7 +591,7 @@ static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
 
 	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
 	if (addr)
-		ret = ipvlan_rcv_frame(addr, skb, false);
+		ret = ipvlan_rcv_frame(addr, pskb, false);
 
 out:
 	return ret;
@@ -626,7 +628,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 
 		addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
 		if (addr)
-			ret = ipvlan_rcv_frame(addr, skb, false);
+			ret = ipvlan_rcv_frame(addr, pskb, false);
 	}
 
 	return ret;

From a534dc529853c69e94994aa47c1d80a03ce2c11d Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 16 Nov 2015 22:44:53 +0100
Subject: [PATCH 72/83] ipvlan: fix use after free of skb

ipvlan_handle_frame is a rx_handler, and when it returns a value other
than RX_HANDLER_CONSUMED (here, NET_RX_DROP aka RX_HANDLER_ANOTHER),
__netif_receive_skb_core expects that the skb still exists and will
process it further, but we just freed it.

Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index a9148e46dad0..8c48bb2a94ea 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -653,5 +653,5 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
 	WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
 			  port->mode);
 	kfree_skb(skb);
-	return NET_RX_DROP;
+	return RX_HANDLER_CONSUMED;
 }

From e639b8d8a7a728f0b05ef2df6cb6b45dc3d4e556 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 16 Nov 2015 22:54:20 +0100
Subject: [PATCH 73/83] macvlan: fix leak in macvlan_handle_frame

Reset pskb in macvlan_handle_frame in case skb_share_check returned a
clone.

Fixes: 8a4eb5734e8d ("net: introduce rx_handler results and logic around that")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 86f6c6292c27..06c8bfeaccd6 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -415,6 +415,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 		skb = ip_check_defrag(dev_net(skb->dev), skb, IP_DEFRAG_MACVLAN);
 		if (!skb)
 			return RX_HANDLER_CONSUMED;
+		*pskb = skb;
 		eth = eth_hdr(skb);
 		macvlan_forward_source(skb, port, eth->h_source);
 		src = macvlan_hash_lookup(port, eth->h_source);
@@ -456,6 +457,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 		goto out;
 	}
 
+	*pskb = skb;
 	skb->dev = dev;
 	skb->pkt_type = PACKET_HOST;
 

From ec0738db8d7a9bd6653d9f9d81b201a519144d63 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Mon, 16 Nov 2015 14:35:35 -0800
Subject: [PATCH 74/83] arm64: bpf: make BPF prologue and epilogue align with
 ARM64 AAPCS

Save and restore FP/LR in BPF prog prologue and epilogue, save SP to FP
in prologue in order to get the correct stack backtrace.

However, ARM64 JIT used FP (x29) as eBPF fp register, FP is subjected to
change during function call so it may cause the BPF prog stack base address
change too.

Use x25 to replace FP as BPF stack base register (fp). Since x25 is callee
saved register, so it will keep intact during function call.
It is initialized in BPF prog prologue when BPF prog is started to run
everytime. Save and restore x25/x26 in BPF prologue and epilogue to keep
them intact for the outside of BPF. Actually, x26 is unnecessary, but SP
requires 16 bytes alignment.

So, the BPF stack layout looks like:

                                 high
         original A64_SP =>   0:+-----+ BPF prologue
                                |FP/LR|
         current A64_FP =>  -16:+-----+
                                | ... | callee saved registers
                                +-----+
                                |     | x25/x26
         BPF fp register => -80:+-----+
                                |     |
                                | ... | BPF prog stack
                                |     |
                                |     |
         current A64_SP =>      +-----+
                                |     |
                                | ... | Function call stack
                                |     |
                                +-----+
                                  low

CC: Zi Shen Lim <zlim.lnx@gmail.com>
CC: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/net/bpf_jit_comp.c | 44 +++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 64a8bc12f4a2..a6905b8d12d3 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -50,7 +50,7 @@ static const int bpf2a64[] = {
 	[BPF_REG_8] = A64_R(21),
 	[BPF_REG_9] = A64_R(22),
 	/* read-only frame pointer to access stack */
-	[BPF_REG_FP] = A64_FP,
+	[BPF_REG_FP] = A64_R(25),
 	/* temporary register for internal BPF JIT */
 	[TMP_REG_1] = A64_R(23),
 	[TMP_REG_2] = A64_R(24),
@@ -155,16 +155,47 @@ static void build_prologue(struct jit_ctx *ctx)
 	stack_size += 4; /* extra for skb_copy_bits buffer */
 	stack_size = STACK_ALIGN(stack_size);
 
+	/*
+	 * BPF prog stack layout
+	 *
+	 *                         high
+	 * original A64_SP =>   0:+-----+ BPF prologue
+	 *                        |FP/LR|
+	 * current A64_FP =>  -16:+-----+
+	 *                        | ... | callee saved registers
+	 *                        +-----+
+	 *                        |     | x25/x26
+	 * BPF fp register => -80:+-----+
+	 *                        |     |
+	 *                        | ... | BPF prog stack
+	 *                        |     |
+	 *                        |     |
+	 * current A64_SP =>      +-----+
+	 *                        |     |
+	 *                        | ... | Function call stack
+	 *                        |     |
+	 *                        +-----+
+	 *                          low
+	 *
+	 */
+
+	/* Save FP and LR registers to stay align with ARM64 AAPCS */
+	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
+
 	/* Save callee-saved register */
 	emit(A64_PUSH(r6, r7, A64_SP), ctx);
 	emit(A64_PUSH(r8, r9, A64_SP), ctx);
 	if (ctx->tmp_used)
 		emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
 
-	/* Set up frame pointer */
+	/* Save fp (x25) and x26. SP requires 16 bytes alignment */
+	emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
+
+	/* Set up BPF prog stack base register (x25) */
 	emit(A64_MOV(1, fp, A64_SP), ctx);
 
-	/* Set up BPF stack */
+	/* Set up function call stack */
 	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
 
 	/* Clear registers A and X */
@@ -190,14 +221,17 @@ static void build_epilogue(struct jit_ctx *ctx)
 	/* We're done with BPF stack */
 	emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx);
 
+	/* Restore fs (x25) and x26 */
+	emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
+
 	/* Restore callee-saved register */
 	if (ctx->tmp_used)
 		emit(A64_POP(tmp1, tmp2, A64_SP), ctx);
 	emit(A64_POP(r8, r9, A64_SP), ctx);
 	emit(A64_POP(r6, r7, A64_SP), ctx);
 
-	/* Restore frame pointer */
-	emit(A64_MOV(1, fp, A64_SP), ctx);
+	/* Restore FP/LR registers */
+	emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
 
 	/* Set return value */
 	emit(A64_MOV(1, A64_R(0), r0), ctx);

From 819ec8e1f349f73bdf65bf33a364538e59007a9a Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 16 Nov 2015 23:34:41 +0100
Subject: [PATCH 75/83] phy: marvell: Add support for 88E1540 PHY

The 88E1540 can be found embedded in the Marvell 88E6352 switch.  It
is compatible with the 88E1510, so add support for it, using the
88E1510 specific functions.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c   | 16 ++++++++++++++++
 include/linux/marvell_phy.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 5de8d5827536..0240552b50f3 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1153,6 +1153,21 @@ static struct phy_driver marvell_drivers[] = {
 		.suspend = &genphy_suspend,
 		.driver = { .owner = THIS_MODULE },
 	},
+	{
+		.phy_id = MARVELL_PHY_ID_88E1540,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88E1540",
+		.features = PHY_GBIT_FEATURES,
+		.flags = PHY_HAS_INTERRUPT,
+		.config_aneg = &m88e1510_config_aneg,
+		.read_status = &marvell_read_status,
+		.ack_interrupt = &marvell_ack_interrupt,
+		.config_intr = &marvell_config_intr,
+		.did_interrupt = &m88e1121_did_interrupt,
+		.resume = &genphy_resume,
+		.suspend = &genphy_suspend,
+		.driver = { .owner = THIS_MODULE },
+	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E3016,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
@@ -1186,6 +1201,7 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = {
 	{ MARVELL_PHY_ID_88E1318S, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1116R, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1510, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
 	{ }
 };
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index e6982ac3200d..a57f0dfb6db7 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -16,6 +16,7 @@
 #define MARVELL_PHY_ID_88E1318S		0x01410e90
 #define MARVELL_PHY_ID_88E1116R		0x01410e40
 #define MARVELL_PHY_ID_88E1510		0x01410dd0
+#define MARVELL_PHY_ID_88E1540		0x01410eb0
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 
 /* struct phy_device dev_flags definitions */

From 321beec5047af83db90c88114b7e664b156f49fe Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 16 Nov 2015 23:36:46 +0100
Subject: [PATCH 76/83] net: phy: Use interrupts when available in NOLINK state

The NOLINK state will poll the phy once a second to see if the link
has come up. If the phy has an interrupt line, this polling can be
skipped, since the phy should interrupt when the link returns.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index adb48abafc87..48ce6ef400fe 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -863,6 +863,9 @@ void phy_state_machine(struct work_struct *work)
 			needs_aneg = true;
 		break;
 	case PHY_NOLINK:
+		if (phy_interrupt_is_valid(phydev))
+			break;
+
 		err = phy_read_status(phydev);
 		if (err)
 			break;

From 4194b4914a824c21bcad9305d6180825f4dc1f5e Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Tue, 17 Nov 2015 10:38:36 +0100
Subject: [PATCH 77/83] packet: Don't check frames_per_block against negative
 values

rb->frames_per_block is an unsigned int, thus can never be negative.

Also fix spacing in the calculation of frames_per_block.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 242bce1cf0f3..533981d49290 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4121,8 +4121,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			goto out;
 
-		rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
-		if (unlikely(rb->frames_per_block <= 0))
+		rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
+		if (unlikely(rb->frames_per_block == 0))
 			goto out;
 		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
 					req->tp_frame_nr))

From 90836b67e2171f48be696bdd3c4006d1e6f031fc Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Tue, 17 Nov 2015 10:40:21 +0100
Subject: [PATCH 78/83] packet: Use PAGE_ALIGNED macro

Use PAGE_ALIGNED(...) instead of open-coding it.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 533981d49290..1cf928fb573e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4109,7 +4109,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		err = -EINVAL;
 		if (unlikely((int)req->tp_block_size <= 0))
 			goto out;
-		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
+		if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
 			goto out;
 		if (po->tp_version >= TPACKET_V3 &&
 		    (int)(req->tp_block_size -

From 19125c1a4fd873421698876f34a06dbe18d25f17 Mon Sep 17 00:00:00 2001
From: Martin Zhang <martinbj2008@gmail.com>
Date: Tue, 17 Nov 2015 20:49:30 +0800
Subject: [PATCH 79/83] net: use skb_clone to avoid alloc_pages failure.

1. new skb only need dst and ip address(v4 or v6).
2. skb_copy may need high order pages, which is very rare on long running server.

Signed-off-by: Junwei Zhang <linggao.zjw@alibaba-inc.com>
Signed-off-by: Martin Zhang <martinbj2008@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1aa8437ed6c4..e6af42da28d9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -857,7 +857,7 @@ static void neigh_probe(struct neighbour *neigh)
 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
 	/* keep skb alive even if arp_queue overflows */
 	if (skb)
-		skb = skb_copy(skb, GFP_ATOMIC);
+		skb = skb_clone(skb, GFP_ATOMIC);
 	write_unlock(&neigh->lock);
 	neigh->ops->solicit(neigh, skb);
 	atomic_inc(&neigh->probes);

From b22b941b2c253a20e1d000c671594c4f3f0a3858 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 17 Nov 2015 14:16:52 +0100
Subject: [PATCH 80/83] rtnetlink: fix frame size warning in rtnl_fill_ifinfo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following warning:

  CC      net/core/rtnetlink.o
net/core/rtnetlink.c: In function ‘rtnl_fill_ifinfo’:
net/core/rtnetlink.c:1308:1: warning: the frame size of 2864 bytes is larger than 2048 bytes [-Wframe-larger-than=]
 }
 ^
by splitting up the huge rtnl_fill_ifinfo into some smaller ones, so we
don't have the huge frame allocations at the same time.

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 274 ++++++++++++++++++++++++-------------------
 1 file changed, 152 insertions(+), 122 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 504bd17b7456..34ba7a08876d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1045,15 +1045,156 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
+static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
+					      struct net_device *dev)
+{
+	const struct rtnl_link_stats64 *stats;
+	struct rtnl_link_stats64 temp;
+	struct nlattr *attr;
+
+	stats = dev_get_stats(dev, &temp);
+
+	attr = nla_reserve(skb, IFLA_STATS,
+			   sizeof(struct rtnl_link_stats));
+	if (!attr)
+		return -EMSGSIZE;
+
+	copy_rtnl_link_stats(nla_data(attr), stats);
+
+	attr = nla_reserve(skb, IFLA_STATS64,
+			   sizeof(struct rtnl_link_stats64));
+	if (!attr)
+		return -EMSGSIZE;
+
+	copy_rtnl_link_stats64(nla_data(attr), stats);
+
+	return 0;
+}
+
+static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
+					       struct net_device *dev,
+					       int vfs_num,
+					       struct nlattr *vfinfo)
+{
+	struct ifla_vf_rss_query_en vf_rss_query_en;
+	struct ifla_vf_link_state vf_linkstate;
+	struct ifla_vf_spoofchk vf_spoofchk;
+	struct ifla_vf_tx_rate vf_tx_rate;
+	struct ifla_vf_stats vf_stats;
+	struct ifla_vf_trust vf_trust;
+	struct ifla_vf_vlan vf_vlan;
+	struct ifla_vf_rate vf_rate;
+	struct nlattr *vf, *vfstats;
+	struct ifla_vf_mac vf_mac;
+	struct ifla_vf_info ivi;
+
+	/* Not all SR-IOV capable drivers support the
+	 * spoofcheck and "RSS query enable" query.  Preset to
+	 * -1 so the user space tool can detect that the driver
+	 * didn't report anything.
+	 */
+	ivi.spoofchk = -1;
+	ivi.rss_query_en = -1;
+	ivi.trusted = -1;
+	memset(ivi.mac, 0, sizeof(ivi.mac));
+	/* The default value for VF link state is "auto"
+	 * IFLA_VF_LINK_STATE_AUTO which equals zero
+	 */
+	ivi.linkstate = 0;
+	if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
+		return 0;
+
+	vf_mac.vf =
+		vf_vlan.vf =
+		vf_rate.vf =
+		vf_tx_rate.vf =
+		vf_spoofchk.vf =
+		vf_linkstate.vf =
+		vf_rss_query_en.vf =
+		vf_trust.vf = ivi.vf;
+
+	memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+	vf_vlan.vlan = ivi.vlan;
+	vf_vlan.qos = ivi.qos;
+	vf_tx_rate.rate = ivi.max_tx_rate;
+	vf_rate.min_tx_rate = ivi.min_tx_rate;
+	vf_rate.max_tx_rate = ivi.max_tx_rate;
+	vf_spoofchk.setting = ivi.spoofchk;
+	vf_linkstate.link_state = ivi.linkstate;
+	vf_rss_query_en.setting = ivi.rss_query_en;
+	vf_trust.setting = ivi.trusted;
+	vf = nla_nest_start(skb, IFLA_VF_INFO);
+	if (!vf) {
+		nla_nest_cancel(skb, vfinfo);
+		return -EMSGSIZE;
+	}
+	if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
+	    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+	    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+		    &vf_rate) ||
+	    nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
+		    &vf_tx_rate) ||
+	    nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
+		    &vf_spoofchk) ||
+	    nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
+		    &vf_linkstate) ||
+	    nla_put(skb, IFLA_VF_RSS_QUERY_EN,
+		    sizeof(vf_rss_query_en),
+		    &vf_rss_query_en) ||
+	    nla_put(skb, IFLA_VF_TRUST,
+		    sizeof(vf_trust), &vf_trust))
+		return -EMSGSIZE;
+	memset(&vf_stats, 0, sizeof(vf_stats));
+	if (dev->netdev_ops->ndo_get_vf_stats)
+		dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
+						&vf_stats);
+	vfstats = nla_nest_start(skb, IFLA_VF_STATS);
+	if (!vfstats) {
+		nla_nest_cancel(skb, vf);
+		nla_nest_cancel(skb, vfinfo);
+		return -EMSGSIZE;
+	}
+	if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
+			vf_stats.rx_packets) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
+			vf_stats.tx_packets) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
+			vf_stats.rx_bytes) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
+			vf_stats.tx_bytes) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
+			vf_stats.broadcast) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
+			vf_stats.multicast))
+		return -EMSGSIZE;
+	nla_nest_end(skb, vfstats);
+	nla_nest_end(skb, vf);
+	return 0;
+}
+
+static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
+{
+	struct rtnl_link_ifmap map = {
+		.mem_start   = dev->mem_start,
+		.mem_end     = dev->mem_end,
+		.base_addr   = dev->base_addr,
+		.irq         = dev->irq,
+		.dma         = dev->dma,
+		.port        = dev->if_port,
+	};
+	if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags, u32 ext_filter_mask)
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
-	struct rtnl_link_stats64 temp;
-	const struct rtnl_link_stats64 *stats;
-	struct nlattr *attr, *af_spec;
+	struct nlattr *af_spec;
 	struct rtnl_af_ops *af_ops;
 	struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
 
@@ -1096,18 +1237,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	    nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
 		goto nla_put_failure;
 
-	if (1) {
-		struct rtnl_link_ifmap map = {
-			.mem_start   = dev->mem_start,
-			.mem_end     = dev->mem_end,
-			.base_addr   = dev->base_addr,
-			.irq         = dev->irq,
-			.dma         = dev->dma,
-			.port        = dev->if_port,
-		};
-		if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
-			goto nla_put_failure;
-	}
+	if (rtnl_fill_link_ifmap(skb, dev))
+		goto nla_put_failure;
 
 	if (dev->addr_len) {
 		if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
@@ -1124,128 +1255,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (rtnl_phys_switch_id_fill(skb, dev))
 		goto nla_put_failure;
 
-	attr = nla_reserve(skb, IFLA_STATS,
-			sizeof(struct rtnl_link_stats));
-	if (attr == NULL)
+	if (rtnl_fill_stats(skb, dev))
 		goto nla_put_failure;
 
-	stats = dev_get_stats(dev, &temp);
-	copy_rtnl_link_stats(nla_data(attr), stats);
-
-	attr = nla_reserve(skb, IFLA_STATS64,
-			sizeof(struct rtnl_link_stats64));
-	if (attr == NULL)
-		goto nla_put_failure;
-	copy_rtnl_link_stats64(nla_data(attr), stats);
-
 	if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
 	    nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
 		goto nla_put_failure;
 
-	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
-	    && (ext_filter_mask & RTEXT_FILTER_VF)) {
+	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
+	    ext_filter_mask & RTEXT_FILTER_VF) {
 		int i;
-
-		struct nlattr *vfinfo, *vf, *vfstats;
+		struct nlattr *vfinfo;
 		int num_vfs = dev_num_vf(dev->dev.parent);
 
 		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
 		if (!vfinfo)
 			goto nla_put_failure;
 		for (i = 0; i < num_vfs; i++) {
-			struct ifla_vf_info ivi;
-			struct ifla_vf_mac vf_mac;
-			struct ifla_vf_vlan vf_vlan;
-			struct ifla_vf_rate vf_rate;
-			struct ifla_vf_tx_rate vf_tx_rate;
-			struct ifla_vf_spoofchk vf_spoofchk;
-			struct ifla_vf_link_state vf_linkstate;
-			struct ifla_vf_rss_query_en vf_rss_query_en;
-			struct ifla_vf_stats vf_stats;
-			struct ifla_vf_trust vf_trust;
-
-			/*
-			 * Not all SR-IOV capable drivers support the
-			 * spoofcheck and "RSS query enable" query.  Preset to
-			 * -1 so the user space tool can detect that the driver
-			 * didn't report anything.
-			 */
-			ivi.spoofchk = -1;
-			ivi.rss_query_en = -1;
-			ivi.trusted = -1;
-			memset(ivi.mac, 0, sizeof(ivi.mac));
-			/* The default value for VF link state is "auto"
-			 * IFLA_VF_LINK_STATE_AUTO which equals zero
-			 */
-			ivi.linkstate = 0;
-			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
-				break;
-			vf_mac.vf =
-				vf_vlan.vf =
-				vf_rate.vf =
-				vf_tx_rate.vf =
-				vf_spoofchk.vf =
-				vf_linkstate.vf =
-				vf_rss_query_en.vf =
-				vf_trust.vf = ivi.vf;
-
-			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
-			vf_vlan.vlan = ivi.vlan;
-			vf_vlan.qos = ivi.qos;
-			vf_tx_rate.rate = ivi.max_tx_rate;
-			vf_rate.min_tx_rate = ivi.min_tx_rate;
-			vf_rate.max_tx_rate = ivi.max_tx_rate;
-			vf_spoofchk.setting = ivi.spoofchk;
-			vf_linkstate.link_state = ivi.linkstate;
-			vf_rss_query_en.setting = ivi.rss_query_en;
-			vf_trust.setting = ivi.trusted;
-			vf = nla_nest_start(skb, IFLA_VF_INFO);
-			if (!vf) {
-				nla_nest_cancel(skb, vfinfo);
+			if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
 				goto nla_put_failure;
-			}
-			if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
-			    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
-			    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
-				    &vf_rate) ||
-			    nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
-				    &vf_tx_rate) ||
-			    nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
-				    &vf_spoofchk) ||
-			    nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
-				    &vf_linkstate) ||
-			    nla_put(skb, IFLA_VF_RSS_QUERY_EN,
-				    sizeof(vf_rss_query_en),
-				    &vf_rss_query_en) ||
-			    nla_put(skb, IFLA_VF_TRUST,
-				    sizeof(vf_trust), &vf_trust))
-				goto nla_put_failure;
-			memset(&vf_stats, 0, sizeof(vf_stats));
-			if (dev->netdev_ops->ndo_get_vf_stats)
-				dev->netdev_ops->ndo_get_vf_stats(dev, i,
-								  &vf_stats);
-			vfstats = nla_nest_start(skb, IFLA_VF_STATS);
-			if (!vfstats) {
-				nla_nest_cancel(skb, vf);
-				nla_nest_cancel(skb, vfinfo);
-				goto nla_put_failure;
-			}
-			if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
-					vf_stats.rx_packets) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
-					vf_stats.tx_packets) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
-					vf_stats.rx_bytes) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
-					vf_stats.tx_bytes) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
-					vf_stats.broadcast) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
-					vf_stats.multicast))
-				goto nla_put_failure;
-			nla_nest_end(skb, vfstats);
-			nla_nest_end(skb, vf);
 		}
+
 		nla_nest_end(skb, vfinfo);
 	}
 

From a3a116e04cc6a94d595ead4e956ab1bc1d2f4746 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 17 Nov 2015 15:10:59 +0100
Subject: [PATCH 81/83] af_unix: take receive queue lock while appending new
 skb

While possibly in future we don't necessarily need to use
sk_buff_head.lock this is a rather larger change, as it affects the
af_unix fd garbage collector, diag and socket cleanups. This is too much
for a stable patch.

For the time being grab sk_buff_head.lock without disabling bh and irqs,
so don't use locked skb_queue_tail.

Fixes: 869e7c62486e ("net: af_unix: implement stream sendpage support")
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Reported-by: Eric Dumazet <edumazet@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a8352db5c5b5..955ec152cb71 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1813,8 +1813,11 @@ alloc_skb:
 	skb->truesize += size;
 	atomic_add(size, &sk->sk_wmem_alloc);
 
-	if (newskb)
+	if (newskb) {
+		spin_lock(&other->sk_receive_queue.lock);
 		__skb_queue_tail(&other->sk_receive_queue, newskb);
+		spin_unlock(&other->sk_receive_queue.lock);
+	}
 
 	unix_state_unlock(other);
 	mutex_unlock(&unix_sk(other)->readlock);

From 17b85d29e82cc3c874a497a8bc5764d6a2b043e2 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Tue, 17 Nov 2015 15:49:06 +0100
Subject: [PATCH 82/83] net/core: revert "net: fix __netdev_update_features
 return.." and add comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 00ee59271777 ("net: fix __netdev_update_features return
on ndo_set_features failure")
and adds a comment explaining why it's okay to return a value other than
0 upon error. Some drivers might actually change flags and return an
error so it's better to fire a spurious notification rather than miss
these.

CC: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 5dbc86ea6b58..ae00b894e675 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6436,7 +6436,10 @@ int __netdev_update_features(struct net_device *dev)
 		netdev_err(dev,
 			"set_features() failed (%d); wanted %pNF, left %pNF\n",
 			err, &features, &dev->features);
-		return 0;
+		/* return non-0 since some features might have changed and
+		 * it's better to fire a spurious notification than miss it
+		 */
+		return -1;
 	}
 
 sync_lower:

From e7523a497d48a9921983a80670f7a02dc4639d41 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 17 Nov 2015 18:25:07 +0200
Subject: [PATCH 83/83] MAINTAINERS: Update Mellanox's Eth NIC driver entries

Eugenia (Jenny) Emantayev is replacing Amir Vadai as the
mlx4 Ethernet driver maintainer.

Saeed Mahameed is assigned to maintain mlx5 Eth functionality.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 45320675a460..902cdab54de0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6914,13 +6914,21 @@ F:	drivers/scsi/megaraid.*
 F:	drivers/scsi/megaraid/
 
 MELLANOX ETHERNET DRIVER (mlx4_en)
-M:	Amir Vadai <amirv@mellanox.com>
+M: 	Eugenia Emantayev <eugenia@mellanox.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.mellanox.com
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/mellanox/mlx4/en_*
 
+MELLANOX ETHERNET DRIVER (mlx5e)
+M:	Saeed Mahameed <saeedm@mellanox.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+W:	http://www.mellanox.com
+Q:	http://patchwork.ozlabs.org/project/netdev/list/
+F:	drivers/net/ethernet/mellanox/mlx5/core/en_*
+
 MELLANOX ETHERNET SWITCH DRIVERS
 M:	Jiri Pirko <jiri@mellanox.com>
 M:	Ido Schimmel <idosch@mellanox.com>