From 55378a238e04b39cc82957d91d16499704ea719b Mon Sep 17 00:00:00 2001
From: Mohamad Haj Yahia <mohamad@mellanox.com>
Date: Thu, 30 Mar 2017 17:00:25 +0300
Subject: [PATCH 01/41] net/mlx5: Fix driver load bad flow when having fw
 initializing timeout

If FW is stuck in initializing state we will skip the driver load, but
current error handling flow doesn't clean previously allocated command
interface resources.

Fixes: e3297246c2c8 ('net/mlx5_core: Wait for FW readiness on startup')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 60154a175bd3..0ad66324247f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1029,7 +1029,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (err) {
 		dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
 			FW_INIT_TIMEOUT_MILI);
-		goto out_err;
+		goto err_cmd_cleanup;
 	}
 
 	err = mlx5_core_enable_hca(dev, 0);

From c415f704c8276bc686abcb0497bf2606038ca73c Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 30 Mar 2017 15:56:10 +0200
Subject: [PATCH 02/41] net/mlx5: E-Switch, Correctly deal with inline mode on
 ConnectX-5

On ConnectX5 the wqe inline mode is "none" and hence the FW
reports MLX5_CAP_INLINE_MODE_NOT_REQUIRED.

Fix our devlink callbacks to deal with that on get and set.

Also fix the tc flow parsing code not to fail anything when
inline isn't required.

Fixes: bffaa916588e ('net/mlx5: E-Switch, Add control for inline mode')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   |  3 +-
 .../mellanox/mlx5/core/eswitch_offloads.c     | 36 ++++++++++++-------
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fade7233dac5..b7c99c38a7c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -639,7 +639,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 
 	if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
 	    rep->vport != FDB_UPLINK_VPORT) {
-		if (min_inline > esw->offloads.inline_mode) {
+		if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
+		    esw->offloads.inline_mode < min_inline) {
 			netdev_warn(priv->netdev,
 				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
 				    min_inline, esw->offloads.inline_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 307ec6c5fd3b..d111cebca9f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -911,8 +911,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	int num_vports = esw->enabled_vports;
-	int err;
-	int vport;
+	int err, vport;
 	u8 mlx5_mode;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
@@ -921,9 +920,17 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+		if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+			return 0;
+		/* fall through */
+	case MLX5_CAP_INLINE_MODE_L2:
+		esw_warn(dev, "Inline mode can't be set\n");
 		return -EOPNOTSUPP;
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		break;
+	}
 
 	if (esw->offloads.num_flows > 0) {
 		esw_warn(dev, "Can't set inline mode when flows are configured\n");
@@ -966,18 +973,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
-		return -EOPNOTSUPP;
-
 	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 }
 
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 {
+	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
 	struct mlx5_core_dev *dev = esw->dev;
 	int vport;
-	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
 		return -EOPNOTSUPP;
@@ -985,10 +988,18 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
-		return -EOPNOTSUPP;
+	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+		mlx5_mode = MLX5_INLINE_MODE_NONE;
+		goto out;
+	case MLX5_CAP_INLINE_MODE_L2:
+		mlx5_mode = MLX5_INLINE_MODE_L2;
+		goto out;
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		goto query_vports;
+	}
 
+query_vports:
 	for (vport = 1; vport <= nvfs; vport++) {
 		mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
 		if (vport > 1 && prev_mlx5_mode != mlx5_mode)
@@ -996,6 +1007,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 		prev_mlx5_mode = mlx5_mode;
 	}
 
+out:
 	*mode = mlx5_mode;
 	return 0;
 }

From 32f3671f6951836cfe9d84e1a6554908b85c61d7 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 6 Apr 2017 19:20:21 +0300
Subject: [PATCH 03/41] net/mlx5e: Make sure the FW max encap size is enough
 for ipv4 tunnels

Otherwise the code that fills the ipv4 encapsulation headers could be writing
beyond the allocated headers buffer.

Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index b7c99c38a7c4..fc7c1d30461c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -786,16 +786,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 	return 0;
 }
 
-static int gen_vxlan_header_ipv4(struct net_device *out_dev,
-				 char buf[],
-				 unsigned char h_dest[ETH_ALEN],
-				 int ttl,
-				 __be32 daddr,
-				 __be32 saddr,
-				 __be16 udp_dst_port,
-				 __be32 vx_vni)
+static void gen_vxlan_header_ipv4(struct net_device *out_dev,
+				  char buf[], int encap_size,
+				  unsigned char h_dest[ETH_ALEN],
+				  int ttl,
+				  __be32 daddr,
+				  __be32 saddr,
+				  __be16 udp_dst_port,
+				  __be32 vx_vni)
 {
-	int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
 	struct ethhdr *eth = (struct ethhdr *)buf;
 	struct iphdr  *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
 	struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
@@ -818,8 +817,6 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev,
 	udp->dest = udp_dst_port;
 	vxh->vx_flags = VXLAN_HF_VNI;
 	vxh->vx_vni = vxlan_vni_field(vx_vni);
-
-	return encap_size;
 }
 
 static int gen_vxlan_header_ipv6(struct net_device *out_dev,
@@ -863,13 +860,20 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 					  struct net_device **out_dev)
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
 	struct ip_tunnel_key *tun_key = &e->tun_info.key;
-	int encap_size, ttl, err;
 	struct neighbour *n = NULL;
 	struct flowi4 fl4 = {};
 	char *encap_header;
+	int ttl, err;
 
-	encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+	if (max_encap_size < ipv4_encap_size) {
+		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+			       ipv4_encap_size, max_encap_size);
+		return -EOPNOTSUPP;
+	}
+
+	encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
 	if (!encap_header)
 		return -ENOMEM;
 
@@ -904,11 +908,11 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 
 	switch (e->tunnel_type) {
 	case MLX5_HEADER_TYPE_VXLAN:
-		encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
-						   e->h_dest, ttl,
-						   fl4.daddr,
-						   fl4.saddr, tun_key->tp_dst,
-						   tunnel_id_to_key32(tun_key->tun_id));
+		gen_vxlan_header_ipv4(*out_dev, encap_header,
+				      ipv4_encap_size, e->h_dest, ttl,
+				      fl4.daddr,
+				      fl4.saddr, tun_key->tp_dst,
+				      tunnel_id_to_key32(tun_key->tun_id));
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -916,7 +920,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 	}
 
 	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       encap_size, encap_header, &e->encap_id);
+			       ipv4_encap_size, encap_header, &e->encap_id);
 out:
 	if (err && n)
 		neigh_release(n);

From 225aabaffe9ab42b2dd8770359442f019083275e Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 6 Apr 2017 19:28:30 +0300
Subject: [PATCH 04/41] net/mlx5e: Make sure the FW max encap size is enough
 for ipv6 tunnels

Otherwise the code that fills the ipv6 encapsulation headers could be writing
beyond the allocated headers buffer.

Fixes: ce99f6b97fcd ('net/mlx5e: Support SRIOV TC encapsulation offloads for IPv6 tunnels')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fc7c1d30461c..5436866798f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -819,16 +819,15 @@ static void gen_vxlan_header_ipv4(struct net_device *out_dev,
 	vxh->vx_vni = vxlan_vni_field(vx_vni);
 }
 
-static int gen_vxlan_header_ipv6(struct net_device *out_dev,
-				 char buf[],
-				 unsigned char h_dest[ETH_ALEN],
-				 int ttl,
-				 struct in6_addr *daddr,
-				 struct in6_addr *saddr,
-				 __be16 udp_dst_port,
-				 __be32 vx_vni)
+static void gen_vxlan_header_ipv6(struct net_device *out_dev,
+				  char buf[], int encap_size,
+				  unsigned char h_dest[ETH_ALEN],
+				  int ttl,
+				  struct in6_addr *daddr,
+				  struct in6_addr *saddr,
+				  __be16 udp_dst_port,
+				  __be32 vx_vni)
 {
-	int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN;
 	struct ethhdr *eth = (struct ethhdr *)buf;
 	struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
 	struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
@@ -850,8 +849,6 @@ static int gen_vxlan_header_ipv6(struct net_device *out_dev,
 	udp->dest = udp_dst_port;
 	vxh->vx_flags = VXLAN_HF_VNI;
 	vxh->vx_vni = vxlan_vni_field(vx_vni);
-
-	return encap_size;
 }
 
 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
@@ -935,13 +932,20 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
 	struct ip_tunnel_key *tun_key = &e->tun_info.key;
-	int encap_size, err, ttl = 0;
 	struct neighbour *n = NULL;
 	struct flowi6 fl6 = {};
 	char *encap_header;
+	int err, ttl = 0;
 
-	encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+	if (max_encap_size < ipv6_encap_size) {
+		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+			       ipv6_encap_size, max_encap_size);
+		return -EOPNOTSUPP;
+	}
+
+	encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
 	if (!encap_header)
 		return -ENOMEM;
 
@@ -977,11 +981,11 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 
 	switch (e->tunnel_type) {
 	case MLX5_HEADER_TYPE_VXLAN:
-		encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header,
-						   e->h_dest, ttl,
-						   &fl6.daddr,
-						   &fl6.saddr, tun_key->tp_dst,
-						   tunnel_id_to_key32(tun_key->tun_id));
+		gen_vxlan_header_ipv6(*out_dev, encap_header,
+				      ipv6_encap_size, e->h_dest, ttl,
+				      &fl6.daddr,
+				      &fl6.saddr, tun_key->tp_dst,
+				      tunnel_id_to_key32(tun_key->tun_id));
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -989,7 +993,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 	}
 
 	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       encap_size, encap_header, &e->encap_id);
+			       ipv6_encap_size, encap_header, &e->encap_id);
 out:
 	if (err && n)
 		neigh_release(n);

From 5ae85b0edaa597b063ee9d8f48b830519a6e0c0f Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 6 Apr 2017 10:49:12 +0300
Subject: [PATCH 05/41] net/mlx5: Fix UAR memory leak

When UAR is released, we deallocate the device resource, but
don't unmmap the UAR mapping memory.
Fix the leak by unmapping this memory.

Fixes: a6d51b68611e9 ('net/mlx5: Introduce blue flame register allocator)
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/uar.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 2e6b0f290ddc..222b25908d01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -87,6 +87,7 @@ static void up_rel_func(struct kref *kref)
 	struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count);
 
 	list_del(&up->list);
+	iounmap(up->map);
 	if (mlx5_cmd_free_uar(up->mdev, up->index))
 		mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
 	kfree(up->reg_bitmap);

From cbad8cddb6ed7ef3a5f0a9a70f1711d4d7fb9a8f Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Wed, 22 Mar 2017 11:44:14 +0200
Subject: [PATCH 06/41] net/mlx5e: Fix small packet threshold

RX packet headers are meant to be contained in SKB linear part,
and chose a threshold of 128.
It turns out this is not enough, i.e. for IPv6 packet over VxLAN.
In this case, UDP/IPv4 needs 42 bytes, GENEVE header is 8 bytes,
and 86 bytes for TCP/IPv6. In total 136 bytes that is more than
current 128 bytes. In this case expand header flow is reached.
The warning in skb_try_coalesce() caused by a wrong truesize
was already fixed here:
commit 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()").
Still, we prefer to totally avoid the expand header flow for performance reasons.
Tested regular TCP_STREAM with iperf for 1 and 8 streams, no degradation was found.

Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)")
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index dc52053128bc..3d9490cd2db1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -90,7 +90,7 @@
 #define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX)
 
 #define MLX5_UMR_ALIGN				(2048)
-#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(128)
+#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(256)
 
 #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
 #define MLX5E_DEFAULT_LRO_TIMEOUT                       32

From 5e82c9e4ed60beba83f46a1a5a8307b99a23e982 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Thu, 2 Mar 2017 15:49:45 +0200
Subject: [PATCH 07/41] net/mlx5e: Fix ETHTOOL_GRXCLSRLALL handling

Handler for ETHTOOL_GRXCLSRLALL must set info->data to the size
of the table, regardless of the amount of entries in it.
Existing code does not do that, and this breaks all usage of ethtool -N
or -n without explicit location, with this error:
rmgr: Invalid RX class rules table size: Success

Set info->data to the table size.

Tested:
ethtool -n ens8
ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1
ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1 loc 55
ethtool -n ens8
ethtool -N ens8 delete 1023
ethtool -N ens8 delete 55

Fixes: f913a72aa008 ("net/mlx5e: Add support to get ethtool flow rules")
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index d55fff0ba388..26fc77e80f7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -564,6 +564,7 @@ int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *i
 	int idx = 0;
 	int err = 0;
 
+	info->data = MAX_NUM_OF_ETHTOOL_RULES;
 	while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
 		err = mlx5e_ethtool_get_flow(priv, info, location);
 		if (!err)

From 1510d728639e183b60bd1285b09a49c393923ba5 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Apr 2017 21:40:12 -0700
Subject: [PATCH 08/41] net/mlx5e: Fix race in mlx5e_sw_stats and
 mlx5e_vport_stats

We have observed a sudden spike in rx/tx_packets and rx/tx_bytes
reported under /proc/net/dev.  There is a race in mlx5e_update_stats()
and some of the get-stats functions (the one that we hit is the
mlx5e_get_stats() which is called by ndo_get_stats64()).

In particular, the very first thing mlx5e_update_sw_counters()
does is 'memset(s, 0, sizeof(*s))'.  For example, if mlx5e_get_stats()
is unlucky at one point, rx_bytes and rx_packets could be 0.  One second
later, a normal (and much bigger than 0) value will be reported.

This patch is to use a 'struct mlx5e_sw_stats temp' to avoid
a direct memset zero on priv->stats.sw.

mlx5e_update_vport_counters() has a similar race.  Hence, addressed
together.  However, memset zero is removed instead because
it is not needed.

I am lucky enough to catch this 0-reset in rx multicast:
eth0: 41457665   76804   70    0    0    70          0     47085 15586634   87502    3    0    0     0       3          0
eth0: 41459860   76815   70    0    0    70          0     47094 15588376   87516    3    0    0     0       3          0
eth0: 41460577   76822   70    0    0    70          0         0 15589083   87521    3    0    0     0       3          0
eth0: 41463293   76838   70    0    0    70          0     47108 15595872   87538    3    0    0     0       3          0
eth0: 41463379   76839   70    0    0    70          0     47116 15596138   87539    3    0    0     0       3          0

v2: Remove memset zero from mlx5e_update_vport_counters()
v1: Use temp and memcpy

Fixes: 9218b44dcc05 ("net/mlx5e: Statistics handling refactoring")
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Suggested-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 66c133757a5e..15cc7b469d2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -174,7 +174,7 @@ unlock:
 
 static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 {
-	struct mlx5e_sw_stats *s = &priv->stats.sw;
+	struct mlx5e_sw_stats temp, *s = &temp;
 	struct mlx5e_rq_stats *rq_stats;
 	struct mlx5e_sq_stats *sq_stats;
 	u64 tx_offload_none = 0;
@@ -229,6 +229,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 	s->link_down_events_phy = MLX5_GET(ppcnt_reg,
 				priv->stats.pport.phy_counters,
 				counter_set.phys_layer_cntrs.link_down_events);
+	memcpy(&priv->stats.sw, s, sizeof(*s));
 }
 
 static void mlx5e_update_vport_counters(struct mlx5e_priv *priv)
@@ -243,7 +244,6 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv)
 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
 
-	memset(out, 0, outlen);
 	mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 }
 

From 11faa7b0359aaf7efd406b7a6a077fda2b037d8e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 21 Apr 2017 13:49:37 +0300
Subject: [PATCH 09/41] net: tc35815: move free after the dereference

We dereference "skb" to get "skb->len" so we should probably do that
step before freeing the skb.

Fixes: eea221ce4880 ("tc35815 driver update (take 2)")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/toshiba/tc35815.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index a45f98fa4aa7..3dadee1080b9 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -1017,8 +1017,8 @@ tc35815_free_queues(struct net_device *dev)
 			BUG_ON(lp->tx_skbs[i].skb != skb);
 #endif
 			if (skb) {
-				dev_kfree_skb(skb);
 				pci_unmap_single(lp->pci_dev, lp->tx_skbs[i].skb_dma, skb->len, PCI_DMA_TODEVICE);
+				dev_kfree_skb(skb);
 				lp->tx_skbs[i].skb = NULL;
 				lp->tx_skbs[i].skb_dma = 0;
 			}

From b7c8487cb3d99509220092fe77a2464dff43f015 Mon Sep 17 00:00:00 2001
From: Robert Shearman <rshearma@brocade.com>
Date: Fri, 21 Apr 2017 21:34:59 +0100
Subject: [PATCH 10/41] ipv4: Avoid caching l3mdev dst on mismatched local
 route

David reported that doing the following:

    ip li add red type vrf table 10
    ip link set dev eth1 vrf red
    ip addr add 127.0.0.1/8 dev red
    ip link set dev eth1 up
    ip li set red up
    ping -c1 -w1 -I red 127.0.0.1
    ip li del red

when either policy routing IP rules are present or the local table
lookup ip rule is before the l3mdev lookup results in a hang with
these messages:

    unregister_netdevice: waiting for red to become free. Usage count = 1

The problem is caused by caching the dst used for sending the packet
out of the specified interface on a local route with a different
nexthop interface. Thus the dst could stay around until the route in
the table the lookup was done is deleted which may be never.

Address the problem by not forcing output device to be the l3mdev in
the flow's output interface if the lookup didn't use the l3mdev. This
then results in the dst using the right device according to the route.

Changes in v2:
 - make the dev_out passed in by __ip_route_output_key_hash correct
   instead of checking the nh dev if FLOWI_FLAG_SKIP_NH_OIF is set as
   suggested by David.

Fixes: 5f02ce24c2696 ("net: l3mdev: Allow the l3mdev to be a loopback")
Reported-by: David Ahern <dsa@cumulusnetworks.com>
Suggested-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Robert Shearman <rshearma@brocade.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Tested-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index acd69cfe2951..d9724889ff09 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2359,7 +2359,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
 		}
 
 		/* L3 master device is the loopback for that domain */
-		dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
+		dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? :
+			net->loopback_dev;
 		fl4->flowi4_oif = dev_out->ifindex;
 		flags |= RTCF_LOCAL;
 		goto make_route;

From 4d6fa57b4dab0d77f4d8e9d9c73d1e63f6fe8fee Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 21 Apr 2017 23:14:48 +0200
Subject: [PATCH 11/41] macsec: avoid heap overflow in skb_to_sgvec

While this may appear as a humdrum one line change, it's actually quite
important. An sk_buff stores data in three places:

1. A linear chunk of allocated memory in skb->data. This is the easiest
   one to work with, but it precludes using scatterdata since the memory
   must be linear.
2. The array skb_shinfo(skb)->frags, which is of maximum length
   MAX_SKB_FRAGS. This is nice for scattergather, since these fragments
   can point to different pages.
3. skb_shinfo(skb)->frag_list, which is a pointer to another sk_buff,
   which in turn can have data in either (1) or (2).

The first two are rather easy to deal with, since they're of a fixed
maximum length, while the third one is not, since there can be
potentially limitless chains of fragments. Fortunately dealing with
frag_list is opt-in for drivers, so drivers don't actually have to deal
with this mess. For whatever reason, macsec decided it wanted pain, and
so it explicitly specified NETIF_F_FRAGLIST.

Because dealing with (1), (2), and (3) is insane, most users of sk_buff
doing any sort of crypto or paging operation calls a convenient function
called skb_to_sgvec (which happens to be recursive if (3) is in use!).
This takes a sk_buff as input, and writes into its output pointer an
array of scattergather list items. Sometimes people like to declare a
fixed size scattergather list on the stack; othertimes people like to
allocate a fixed size scattergather list on the heap. However, if you're
doing it in a fixed-size fashion, you really shouldn't be using
NETIF_F_FRAGLIST too (unless you're also ensuring the sk_buff and its
frag_list children arent't shared and then you check the number of
fragments in total required.)

Macsec specifically does this:

        size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1);
        tmp = kmalloc(size, GFP_ATOMIC);
        *sg = (struct scatterlist *)(tmp + sg_offset);
	...
        sg_init_table(sg, MAX_SKB_FRAGS + 1);
        skb_to_sgvec(skb, sg, 0, skb->len);

Specifying MAX_SKB_FRAGS + 1 is the right answer usually, but not if you're
using NETIF_F_FRAGLIST, in which case the call to skb_to_sgvec will
overflow the heap, and disaster ensues.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: stable@vger.kernel.org
Cc: security@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macsec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index ff0a5ed3ca80..dbab05afcdbe 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2716,7 +2716,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 }
 
 #define MACSEC_FEATURES \
-	(NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
+	(NETIF_F_SG | NETIF_F_HIGHDMA)
 static struct lock_class_key macsec_netdev_addr_lock_key;
 
 static int macsec_dev_init(struct net_device *dev)

From b40c5f4fde22fb98eff205b3aece05b471c24eed Mon Sep 17 00:00:00 2001
From: Ansis Atteka <aatteka@ovn.org>
Date: Fri, 21 Apr 2017 15:23:05 -0700
Subject: [PATCH 12/41] udp: disable inner UDP checksum offloads in IPsec case

Otherwise, UDP checksum offloads could corrupt ESP packets by attempting
to calculate UDP checksum when this inner UDP packet is already protected
by IPsec.

One way to reproduce this bug is to have a VM with virtio_net driver (UFO
set to ON in the guest VM); and then encapsulate all guest's Ethernet
frames in Geneve; and then further encrypt Geneve with IPsec.  In this
case following symptoms are observed:
1. If using ixgbe NIC, then it will complain with following error message:
   ixgbe 0000:01:00.1: partial checksum but l4 proto=32!
2. Receiving IPsec stack will drop all the corrupted ESP packets and
   increase XfrmInStateProtoError counter in /proc/net/xfrm_stat.
3. iperf UDP test from the VM with packet sizes above MTU will not work at
   all.
4. iperf TCP test from the VM will get ridiculously low performance because.

Signed-off-by: Ansis Atteka <aatteka@ovn.org>
Co-authored-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_offload.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b2be1d9757ef..781250151d40 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -29,6 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	u16 mac_len = skb->mac_len;
 	int udp_offset, outer_hlen;
 	__wsum partial;
+	bool need_ipsec;
 
 	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
 		goto out;
@@ -62,8 +63,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 
 	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
 
+	need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
 	/* Try to offload checksum if possible */
 	offload_csum = !!(need_csum &&
+			  !need_ipsec &&
 			  (skb->dev->features &
 			   (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
 				      (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));

From 9199cb7677b388b42e3d95c755090dfc5ab2b11a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 22 Apr 2017 13:46:56 +0300
Subject: [PATCH 13/41] ravb: Double free on error in ravb_start_xmit()

If skb_put_padto() fails then it frees the skb.  I shifted that code
up a bit to make my error handling a little simpler.

Fixes: a0d2f20650e8 ("Renesas Ethernet AVB PTP clock driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 8cfc4a54f2dc..3cd7989c007d 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1516,11 +1516,12 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		spin_unlock_irqrestore(&priv->lock, flags);
 		return NETDEV_TX_BUSY;
 	}
-	entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC);
-	priv->tx_skb[q][entry / NUM_TX_DESC] = skb;
 
 	if (skb_put_padto(skb, ETH_ZLEN))
-		goto drop;
+		goto exit;
+
+	entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC);
+	priv->tx_skb[q][entry / NUM_TX_DESC] = skb;
 
 	buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) +
 		 entry / NUM_TX_DESC * DPTR_ALIGN;

From fc1f8f4f310ac65b1337e2d7ba52ae4ff2b7c849 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Sat, 22 Apr 2017 09:10:13 -0700
Subject: [PATCH 14/41] net: ipv6: send unsolicited NA if enabled for all
 interfaces

When arp_notify is set to 1 for either a specific interface or for 'all'
interfaces, gratuitous arp requests are sent. Since ndisc_notify is the
ipv6 equivalent to arp_notify, it should follow the same semantics.
Commit 4a6e3c5def13 ("net: ipv6: send unsolicited NA on admin up") sends
the NA on admin up. The final piece is checking devconf_all->ndisc_notify
in addition to the per device setting. Add it.

Fixes: 5cb04436eef6 ("ipv6: add knob to send unsolicited ND on link-layer address change")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7ebac630d3c6..cb1766724a4c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1749,7 +1749,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 		idev = in6_dev_get(dev);
 		if (!idev)
 			break;
-		if (idev->cnf.ndisc_notify)
+		if (idev->cnf.ndisc_notify ||
+		    net->ipv6.devconf_all->ndisc_notify)
 			ndisc_send_unsol_na(dev);
 		in6_dev_put(idev);
 		break;

From a424f0de61638cbb5047e0a888c54da9cf471f90 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 24 Apr 2017 14:27:21 -0700
Subject: [PATCH 15/41] net: dsa: b53: Include IMP/CPU port in dumb forwarding
 mode

Since Broadcom tags are not enabled in b53 (DSA_PROTO_TAG_NONE), we need
to make sure that the IMP/CPU port is included in the forwarding
decision.

Without this change, switching between non-management ports would work,
but not between management ports and non-management ports thus breaking
the default state in which DSA switch are brought up.

Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch")
Reported-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 10 ++++++++++
 drivers/net/dsa/b53/b53_regs.h   |  4 ++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 8cf4801994e8..ca7f3b005a29 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -326,6 +326,7 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid,
 
 static void b53_set_forwarding(struct b53_device *dev, int enable)
 {
+	struct dsa_switch *ds = dev->ds;
 	u8 mgmt;
 
 	b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
@@ -336,6 +337,15 @@ static void b53_set_forwarding(struct b53_device *dev, int enable)
 		mgmt &= ~SM_SW_FWD_EN;
 
 	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
+
+	/* Include IMP port in dumb forwarding mode when no tagging protocol is
+	 * set
+	 */
+	if (ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_NONE) {
+		b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt);
+		mgmt |= B53_MII_DUMB_FWDG_EN;
+		b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt);
+	}
 }
 
 static void b53_enable_vlan(struct b53_device *dev, bool enable)
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
index 9fd24c418fa4..f2a060e7a637 100644
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -104,6 +104,10 @@
 #define  B53_UC_FWD_EN			BIT(6)
 #define  B53_MC_FWD_EN			BIT(7)
 
+/* Switch control (8 bit) */
+#define B53_SWITCH_CTRL			0x22
+#define  B53_MII_DUMB_FWDG_EN		BIT(6)
+
 /* (16 bit) */
 #define B53_UC_FLOOD_MASK		0x32
 #define B53_MC_FLOOD_MASK		0x34

From 3fb22b0534e412569dd67dec625b4a051c7c2d7e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 24 Apr 2017 14:27:22 -0700
Subject: [PATCH 16/41] net: dsa: b53: Implement software reset for 58xx
 devices

Implement the correct software reset sequence for 58xx devices by
setting all 3 reset bits and polling for the SW_RST bit to clear itself
without a given timeout. We cannot use is58xx() here because that would
also include the 7445/7278 Starfighter 2 which have their own driver
doing the reset earlier on due to the HW specific integration.

Fixes: 991a36bb4645 ("net: dsa: b53: Add support for BCM585xx/586xx/88312 integrated switch")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 25 ++++++++++++++++++++++++-
 drivers/net/dsa/b53/b53_regs.h   |  1 +
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index ca7f3b005a29..b66ee18cbe49 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -608,7 +608,8 @@ static void b53_switch_reset_gpio(struct b53_device *dev)
 
 static int b53_switch_reset(struct b53_device *dev)
 {
-	u8 mgmt;
+	unsigned int timeout = 1000;
+	u8 mgmt, reg;
 
 	b53_switch_reset_gpio(dev);
 
@@ -617,6 +618,28 @@ static int b53_switch_reset(struct b53_device *dev)
 		b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, 0x00);
 	}
 
+	/* This is specific to 58xx devices here, do not use is58xx() which
+	 * covers the larger Starfigther 2 family, including 7445/7278 which
+	 * still use this driver as a library and need to perform the reset
+	 * earlier.
+	 */
+	if (dev->chip_id == BCM58XX_DEVICE_ID) {
+		b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, &reg);
+		reg |= SW_RST | EN_SW_RST | EN_CH_RST;
+		b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, reg);
+
+		do {
+			b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, &reg);
+			if (!(reg & SW_RST))
+				break;
+
+			usleep_range(1000, 2000);
+		} while (timeout-- > 0);
+
+		if (timeout == 0)
+			return -ETIMEDOUT;
+	}
+
 	b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
 
 	if (!(mgmt & SM_SW_FWD_EN)) {
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
index f2a060e7a637..e5c86d44667a 100644
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -143,6 +143,7 @@
 /* Software reset register (8 bit) */
 #define B53_SOFTRESET			0x79
 #define   SW_RST			BIT(7)
+#define   EN_CH_RST			BIT(6)
 #define   EN_SW_RST			BIT(4)
 
 /* Fast Aging Control register (8 bit) */

From bfcda65c9ba57004533c12673347503f250c3290 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 24 Apr 2017 14:27:23 -0700
Subject: [PATCH 17/41] net: dsa: b53: Fix CPU port for 58xx devices

The 58xx devices (Northstar Plus) do actually have their CPU port wired
at port 8, it was unfortunately set to port 5 (B53_CPU_PORT_25) which is
incorrect, since that is the second possible management port.

Fixes: 991a36bb4645 ("net: dsa: b53: Add support for BCM585xx/586xx/88312 integrated switch")
Reported-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index b66ee18cbe49..fa0eece21eef 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1764,7 +1764,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans	= 4096,
 		.enabled_ports = 0x1ff,
 		.arl_entries = 4,
-		.cpu_port = B53_CPU_PORT_25,
+		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
 		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,

From ea8b65b596d78969629562f9728f76cbf565fbec Mon Sep 17 00:00:00 2001
From: Stephane Grosjean <s.grosjean@peak-system.com>
Date: Mon, 27 Mar 2017 14:36:10 +0200
Subject: [PATCH 18/41] can: usb: Add support of PCAN-Chip USB stamp module

This patch adds the support of the PCAN-Chip USB, a stamp module for
customer hardware designs, which communicates via USB 2.0 with the
hardware. The integrated CAN controller supports the protocols CAN 2.0 A/B
as well as CAN FD. The physical CAN connection is determined by external
wiring. The Stamp module with its single-sided mounting and plated
half-holes is suitable for automatic assembly.

Note that the chip is equipped with the same logic than the PCAN-USB FD.

Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/Kconfig                  |  1 +
 drivers/net/can/usb/peak_usb/pcan_usb_core.c |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_core.h |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c   | 72 ++++++++++++++++++++
 4 files changed, 77 insertions(+)

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 8483a40e7e9e..3f8adc366af4 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -72,6 +72,7 @@ config CAN_PEAK_USB
 	  PCAN-USB Pro         dual CAN 2.0b channels USB adapter
 	  PCAN-USB FD          single CAN-FD channel USB adapter
 	  PCAN-USB Pro FD      dual CAN-FD channels USB adapter
+	  PCAN-Chip USB        CAN-FD to USB stamp module
 
 	  (see also http://www.peak-system.com).
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 0b0302af3bd2..57913dbbae0a 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -39,6 +39,7 @@ static struct usb_device_id peak_usb_table[] = {
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID)},
+	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID)},
 	{} /* Terminating entry */
 };
@@ -51,6 +52,7 @@ static const struct peak_usb_adapter *const peak_usb_adapters_list[] = {
 	&pcan_usb_pro,
 	&pcan_usb_fd,
 	&pcan_usb_pro_fd,
+	&pcan_usb_chip,
 	&pcan_usb_x6,
 };
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index 3cbfb069893d..c01316cac354 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -27,6 +27,7 @@
 #define PCAN_USBPRO_PRODUCT_ID		0x000d
 #define PCAN_USBPROFD_PRODUCT_ID	0x0011
 #define PCAN_USBFD_PRODUCT_ID		0x0012
+#define PCAN_USBCHIP_PRODUCT_ID		0x0013
 #define PCAN_USBX6_PRODUCT_ID		0x0014
 
 #define PCAN_USB_DRIVER_NAME		"peak_usb"
@@ -90,6 +91,7 @@ struct peak_usb_adapter {
 extern const struct peak_usb_adapter pcan_usb;
 extern const struct peak_usb_adapter pcan_usb_pro;
 extern const struct peak_usb_adapter pcan_usb_fd;
+extern const struct peak_usb_adapter pcan_usb_chip;
 extern const struct peak_usb_adapter pcan_usb_pro_fd;
 extern const struct peak_usb_adapter pcan_usb_x6;
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 304732550f0a..528d3bb4917f 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -1061,6 +1061,78 @@ const struct peak_usb_adapter pcan_usb_fd = {
 	.do_get_berr_counter = pcan_usb_fd_get_berr_counter,
 };
 
+/* describes the PCAN-CHIP USB */
+static const struct can_bittiming_const pcan_usb_chip_const = {
+	.name = "pcan_chip_usb",
+	.tseg1_min = 1,
+	.tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS),
+	.tseg2_min = 1,
+	.tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS),
+	.sjw_max = (1 << PUCAN_TSLOW_SJW_BITS),
+	.brp_min = 1,
+	.brp_max = (1 << PUCAN_TSLOW_BRP_BITS),
+	.brp_inc = 1,
+};
+
+static const struct can_bittiming_const pcan_usb_chip_data_const = {
+	.name = "pcan_chip_usb",
+	.tseg1_min = 1,
+	.tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS),
+	.tseg2_min = 1,
+	.tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS),
+	.sjw_max = (1 << PUCAN_TFAST_SJW_BITS),
+	.brp_min = 1,
+	.brp_max = (1 << PUCAN_TFAST_BRP_BITS),
+	.brp_inc = 1,
+};
+
+const struct peak_usb_adapter pcan_usb_chip = {
+	.name = "PCAN-Chip USB",
+	.device_id = PCAN_USBCHIP_PRODUCT_ID,
+	.ctrl_count = PCAN_USBFD_CHANNEL_COUNT,
+	.ctrlmode_supported = CAN_CTRLMODE_FD |
+		CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY,
+	.clock = {
+		.freq = PCAN_UFD_CRYSTAL_HZ,
+	},
+	.bittiming_const = &pcan_usb_chip_const,
+	.data_bittiming_const = &pcan_usb_chip_data_const,
+
+	/* size of device private data */
+	.sizeof_dev_private = sizeof(struct pcan_usb_fd_device),
+
+	/* timestamps usage */
+	.ts_used_bits = 32,
+	.ts_period = 1000000, /* calibration period in ts. */
+	.us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
+	.us_per_ts_shift = 0,
+
+	/* give here messages in/out endpoints */
+	.ep_msg_in = PCAN_USBPRO_EP_MSGIN,
+	.ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0},
+
+	/* size of rx/tx usb buffers */
+	.rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE,
+	.tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE,
+
+	/* device callbacks */
+	.intf_probe = pcan_usb_pro_probe,	/* same as PCAN-USB Pro */
+	.dev_init = pcan_usb_fd_init,
+
+	.dev_exit = pcan_usb_fd_exit,
+	.dev_free = pcan_usb_fd_free,
+	.dev_set_bus = pcan_usb_fd_set_bus,
+	.dev_set_bittiming = pcan_usb_fd_set_bittiming_slow,
+	.dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast,
+	.dev_decode_buf = pcan_usb_fd_decode_buf,
+	.dev_start = pcan_usb_fd_start,
+	.dev_stop = pcan_usb_fd_stop,
+	.dev_restart_async = pcan_usb_fd_restart_async,
+	.dev_encode_msg = pcan_usb_fd_encode_msg,
+
+	.do_get_berr_counter = pcan_usb_fd_get_berr_counter,
+};
+
 /* describes the PCAN-USB Pro FD adapter */
 static const struct can_bittiming_const pcan_usb_pro_fd_const = {
 	.name = "pcan_usb_pro_fd",

From 71b611562f45e8798d519a38d0143d5eafd6eb35 Mon Sep 17 00:00:00 2001
From: Stephane Grosjean <s.grosjean@peak-system.com>
Date: Mon, 27 Mar 2017 14:36:11 +0200
Subject: [PATCH 19/41] can: usb: Kconfig: Add PCAN-USB X6 device in help text

This patch adds a text line in the help section of the CAN_PEAK_USB
config item describing the support of the PCAN-USB X6 adapter, which is
already included in the Kernel since 4.9.

Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 3f8adc366af4..5f9e0e6301d0 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -73,6 +73,7 @@ config CAN_PEAK_USB
 	  PCAN-USB FD          single CAN-FD channel USB adapter
 	  PCAN-USB Pro FD      dual CAN-FD channels USB adapter
 	  PCAN-Chip USB        CAN-FD to USB stamp module
+	  PCAN-USB X6          6 CAN-FD channels USB adapter
 
 	  (see also http://www.peak-system.com).
 

From b05c73bd1e3ec60357580eb042ee932a5ed754d5 Mon Sep 17 00:00:00 2001
From: Maksim Salau <maksim.salau@gmail.com>
Date: Sun, 23 Apr 2017 20:31:40 +0300
Subject: [PATCH 20/41] net: can: usb: gs_usb: Fix buffer on stack

Allocate buffers on HEAP instead of STACK for local structures
that are to be sent using usb_control_msg().

Signed-off-by: Maksim Salau <maksim.salau@gmail.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v4.8
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 300349fe8dc0..eecee7f8dfb7 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -739,13 +739,18 @@ static const struct net_device_ops gs_usb_netdev_ops = {
 static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
 {
 	struct gs_can *dev = netdev_priv(netdev);
-	struct gs_identify_mode imode;
+	struct gs_identify_mode *imode;
 	int rc;
 
+	imode = kmalloc(sizeof(*imode), GFP_KERNEL);
+
+	if (!imode)
+		return -ENOMEM;
+
 	if (do_identify)
-		imode.mode = GS_CAN_IDENTIFY_ON;
+		imode->mode = GS_CAN_IDENTIFY_ON;
 	else
-		imode.mode = GS_CAN_IDENTIFY_OFF;
+		imode->mode = GS_CAN_IDENTIFY_OFF;
 
 	rc = usb_control_msg(interface_to_usbdev(dev->iface),
 			     usb_sndctrlpipe(interface_to_usbdev(dev->iface),
@@ -755,10 +760,12 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
 			     USB_RECIP_INTERFACE,
 			     dev->channel,
 			     0,
-			     &imode,
-			     sizeof(imode),
+			     imode,
+			     sizeof(*imode),
 			     100);
 
+	kfree(imode);
+
 	return (rc > 0) ? 0 : rc;
 }
 

From 6f2aee0c0de65013333bbc26fe50c9c7b09a37f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Roman=20Spycha=C5=82a?= <roed@onet.eu>
Date: Thu, 20 Apr 2017 12:04:10 +0200
Subject: [PATCH 21/41] usb: plusb: Add support for PL-27A1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds support for the PL-27A1 by adding the appropriate
USB ID's. This chip is used in the goobay Active USB 3.0 Data Link
and Unitek Y-3501 cables.

Signed-off-by: Roman Spychała <roed@onet.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/Kconfig |  2 +-
 drivers/net/usb/plusb.c | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 3dd490f53e48..f28bd74ac275 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -369,7 +369,7 @@ config USB_NET_NET1080
 	  optionally with LEDs that indicate traffic
 
 config USB_NET_PLUSB
-	tristate "Prolific PL-2301/2302/25A1 based cables"
+	tristate "Prolific PL-2301/2302/25A1/27A1 based cables"
 	# if the handshake/init/reset problems, from original 'plusb',
 	# are ever resolved ... then remove "experimental"
 	depends on USB_USBNET
diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c
index 22e1a9a99a7d..6fe59373cba9 100644
--- a/drivers/net/usb/plusb.c
+++ b/drivers/net/usb/plusb.c
@@ -102,7 +102,7 @@ static int pl_reset(struct usbnet *dev)
 }
 
 static const struct driver_info	prolific_info = {
-	.description =	"Prolific PL-2301/PL-2302/PL-25A1",
+	.description =	"Prolific PL-2301/PL-2302/PL-25A1/PL-27A1",
 	.flags =	FLAG_POINTTOPOINT | FLAG_NO_SETINT,
 		/* some PL-2302 versions seem to fail usb_set_interface() */
 	.reset =	pl_reset,
@@ -139,6 +139,17 @@ static const struct usb_device_id	products [] = {
 					 * Host-to-Host Cable
 					 */
 	.driver_info =  (unsigned long) &prolific_info,
+
+},
+
+/* super speed cables */
+{
+	USB_DEVICE(0x067b, 0x27a1),     /* PL-27A1, no eeprom
+					 * also: goobay Active USB 3.0
+					 * Data Link,
+					 * Unitek Y-3501
+					 */
+	.driver_info =  (unsigned long) &prolific_info,
 },
 
 	{ },		// END
@@ -158,5 +169,5 @@ static struct usb_driver plusb_driver = {
 module_usb_driver(plusb_driver);
 
 MODULE_AUTHOR("David Brownell");
-MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1 USB Host to Host Link Driver");
+MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1/27A1 USB Host to Host Link Driver");
 MODULE_LICENSE("GPL");

From f6478218e6edc2a587b8f132f66373baa7b2497c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 20 Apr 2017 20:55:12 +0800
Subject: [PATCH 22/41] macvlan: Fix device ref leak when purging bc_queue

When a parent macvlan device is destroyed we end up purging its
broadcast queue without dropping the device reference count on
the packet source device.  This causes the source device to linger.

This patch drops that reference count.

Fixes: 260916dfb48c ("macvlan: Fix potential use-after free for...")
Reported-by: Joe Ghalam <Joe.Ghalam@dell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9261722960a7..b34eaaae03fd 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1139,6 +1139,7 @@ static int macvlan_port_create(struct net_device *dev)
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = macvlan_port_get_rtnl(dev);
+	struct sk_buff *skb;
 
 	dev->priv_flags &= ~IFF_MACVLAN_PORT;
 	netdev_rx_handler_unregister(dev);
@@ -1147,7 +1148,15 @@ static void macvlan_port_destroy(struct net_device *dev)
 	 * but we need to cancel it and purge left skbs if any.
 	 */
 	cancel_work_sync(&port->bc_work);
-	__skb_queue_purge(&port->bc_queue);
+
+	while ((skb = __skb_dequeue(&port->bc_queue))) {
+		const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src;
+
+		if (src)
+			dev_put(src->dev);
+
+		kfree_skb(skb);
+	}
 
 	kfree(port);
 }

From a53d26eb888ab1c41779c443daf1af948c641e0b Mon Sep 17 00:00:00 2001
From: Bert Kenward <bkenward@solarflare.com>
Date: Tue, 25 Apr 2017 13:44:54 +0100
Subject: [PATCH 23/41] sfc: tx ring can only have 2048 entries for all EF10
 NICs

Fixes: dd248f1bc65b ("sfc: Add PCI ID for Solarflare 8000 series 10/40G NIC")
Reported-by: Patrick Talbert <ptalbert@redhat.com>
Signed-off-by: Bert Kenward <bkenward@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/efx.h         | 5 ++++-
 drivers/net/ethernet/sfc/workarounds.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index ee14662415c5..a0c52e328102 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -74,7 +74,10 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
 #define EFX_RXQ_MIN_ENT		128U
 #define EFX_TXQ_MIN_ENT(efx)	(2 * efx_tx_max_skb_descs(efx))
 
-#define EFX_TXQ_MAX_ENT(efx)	(EFX_WORKAROUND_35388(efx) ? \
+/* All EF10 architecture NICs steal one bit of the DMAQ size for various
+ * other purposes when counting TxQ entries, so we halve the queue size.
+ */
+#define EFX_TXQ_MAX_ENT(efx)	(EFX_WORKAROUND_EF10(efx) ? \
 				 EFX_MAX_DMAQ_SIZE / 2 : EFX_MAX_DMAQ_SIZE)
 
 static inline bool efx_rss_enabled(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h
index 103f827a1623..c67fa18b8121 100644
--- a/drivers/net/ethernet/sfc/workarounds.h
+++ b/drivers/net/ethernet/sfc/workarounds.h
@@ -16,6 +16,7 @@
  */
 
 #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0)
+#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
 #define EFX_WORKAROUND_10G(efx) 1
 
 /* Bit-bashed I2C reads cause performance drop */

From 72ec0bc64b9a5d8e0efcb717abfc757746b101b7 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Mon, 24 Apr 2017 18:29:16 +0800
Subject: [PATCH 24/41] team: fix memory leaks

In functions team_nl_send_port_list_get() and
team_nl_send_options_get(), pointer skb keeps the return value of
nlmsg_new(). When the call to genlmsg_put() fails, the memory is not
freed(). This will result in memory leak bugs.

Fixes: 9b00cf2d1024 ("team: implement multipart netlink messages for options transfers")
Signed-off-by: Pan Bian <bianpan2016@163.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index f8c81f12d988..85c01247f2e3 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2361,8 +2361,10 @@ start_again:
 
 	hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI,
 			  TEAM_CMD_OPTIONS_GET);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
 		goto nla_put_failure;
@@ -2634,8 +2636,10 @@ start_again:
 
 	hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI,
 			  TEAM_CMD_PORT_LIST_GET);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
 		goto nla_put_failure;

From b7d6df57516f8e6a1c847b822ec2a62555455f88 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 24 Apr 2017 14:18:28 +0200
Subject: [PATCH 25/41] ipv6: move stub initialization after ipv6 setup
 completion

The ipv6 stub pointer is currently initialized before the ipv6
routing subsystem: a 3rd party can access and use such stub
before the routing data is ready.
Moreover, such pointer is not cleared in case of initialization
error, possibly leading to dangling pointers usage.

This change addresses the above moving the stub initialization
at the end of ipv6 init code.

Fixes: 5f81bd2e5d80 ("ipv6: export a stub for IPv6 symbols used by vxlan")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a9a9553ee63d..e82e59f22dfc 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -933,8 +933,6 @@ static int __init inet6_init(void)
 	if (err)
 		goto igmp_fail;
 
-	ipv6_stub = &ipv6_stub_impl;
-
 	err = ipv6_netfilter_init();
 	if (err)
 		goto netfilter_fail;
@@ -1010,6 +1008,10 @@ static int __init inet6_init(void)
 	if (err)
 		goto sysctl_fail;
 #endif
+
+	/* ensure that ipv6 stubs are visible only after ipv6 is ready */
+	wmb();
+	ipv6_stub = &ipv6_stub_impl;
 out:
 	return err;
 

From 3364d61c92ecca7a8da990659c4b0ae1fcf0fcfb Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Mon, 24 Apr 2017 15:00:42 +0200
Subject: [PATCH 26/41] tipc: fix socket flow control accounting error at
 tipc_send_stream

Until now in tipc_send_stream(), we return -1 when the socket
encounters link congestion even if the socket had successfully
sent partial data. This is incorrect as the application resends
the same the partial data leading to data corruption at
receiver's end.

In this commit, we return the partially sent bytes as the return
value at link congestion.

Fixes: 10724cc7bb78 ("tipc: redesign connection-level flow control")
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7130e73bd42c..b28e94f1c739 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1083,7 +1083,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
 		}
 	} while (sent < dlen && !rc);
 
-	return rc ? rc : sent;
+	return sent ? sent : rc;
 }
 
 /**

From 05ff8378975a9d5fdde19104b62163d2902926fb Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Mon, 24 Apr 2017 15:00:43 +0200
Subject: [PATCH 27/41] tipc: fix socket flow control accounting error at
 tipc_recv_stream

Until now in tipc_recv_stream(), we update the received
unacknowledged bytes based on a stack variable and not based on the
actual message size.
If the user buffer passed at tipc_recv_stream() is smaller than the
received skb, the size variable in stack differs from the actual
message size in the skb. This leads to a flow control accounting
error causing permanent congestion.

In this commit, we fix this accounting error by always using the
size of the incoming message.

Fixes: 10724cc7bb78 ("tipc: redesign connection-level flow control")
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b28e94f1c739..566906795c8c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1484,7 +1484,7 @@ restart:
 	if (unlikely(flags & MSG_PEEK))
 		goto exit;
 
-	tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
+	tsk->rcv_unacked += tsk_inc(tsk, hlen + msg_data_sz(msg));
 	if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
 		tipc_sk_send_ack(tsk);
 	tsk_advance_rx_queue(sk);

From a23f6ce6d95900ca83b44b6fa691afe6c7d4b941 Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas@kemnade.info>
Date: Mon, 24 Apr 2017 21:18:39 +0200
Subject: [PATCH 28/41] net: hso: fix module unloading

keep tty driver until usb driver is unregistered
rmmod hso
produces traces like this without that:

[40261.645904] usb 2-2: new high-speed USB device number 2 using ehci-omap
[40261.854644] usb 2-2: New USB device found, idVendor=0af0, idProduct=8800
[40261.862609] usb 2-2: New USB device strings: Mfr=3, Product=2, SerialNumber=0
[40261.872772] usb 2-2: Product: Globetrotter HSUPA Modem
[40261.880279] usb 2-2: Manufacturer: Option N.V.
[40262.021270] hso 2-2:1.5: Not our interface
[40265.556945] hso: unloaded
[40265.559875] usbcore: deregistering interface driver hso
[40265.595947] Unable to handle kernel NULL pointer dereference at virtual address 00000033
[40265.604522] pgd = ecb14000
[40265.611877] [00000033] *pgd=00000000
[40265.617034] Internal error: Oops: 5 [#1] PREEMPT SMP ARM
[40265.622650] Modules linked in: hso(-) bnep bluetooth ipv6 arc4 twl4030_madc_hwmon wl18xx wlcore mac80211 cfg80211 snd_soc_simple_card snd_soc_simple_card_utils snd_soc_omap_twl4030 snd_soc_gtm601 generic_adc_battery extcon_gpio omap3_isp videobuf2_dma_contig videobuf2_memops wlcore_sdio videobuf2_v4l2 videobuf2_core ov9650 bmp280_i2c v4l2_common bmp280 bmg160_i2c bmg160_core at24 nvmem_core videodev bmc150_accel_i2c bmc150_magn_i2c media bmc150_accel_core tsc2007 bmc150_magn leds_tca6507 bno055 snd_soc_omap_mcbsp industrialio_triggered_buffer snd_soc_omap kfifo_buf snd_pcm_dmaengine gpio_twl4030 snd_soc_twl4030 twl4030_vibra twl4030_madc wwan_on_off ehci_omap pwm_bl pwm_omap_dmtimer panel_tpo_td028ttec1 encoder_opa362 connector_analog_tv omapdrm drm_kms_helper cfbfillrect syscopyarea cfbimgblt sysfillrect
[40265.698211]  sysimgblt fb_sys_fops cfbcopyarea drm omapdss usb_f_ecm g_ether usb_f_rndis u_ether libcomposite configfs omap2430 phy_twl4030_usb musb_hdrc twl4030_charger industrialio w2sg0004 twl4030_pwrbutton bq27xxx_battery w1_bq27000 omap_hdq [last unloaded: hso]
[40265.723175] CPU: 0 PID: 2701 Comm: rmmod Not tainted 4.11.0-rc6-letux+ #6
[40265.730346] Hardware name: Generic OMAP36xx (Flattened Device Tree)
[40265.736938] task: ecb81100 task.stack: ecb82000
[40265.741729] PC is at cdev_del+0xc/0x2c
[40265.745666] LR is at tty_unregister_device+0x40/0x50
[40265.750915] pc : [<c027472c>]    lr : [<c04b3ecc>]    psr: 600b0113
sp : ecb83ea8  ip : eca4f898  fp : 00000000
[40265.763000] r10: 00000000  r9 : 00000000  r8 : 00000001
[40265.768493] r7 : eca4f800  r6 : 00000003  r5 : 00000000  r4 : ffffffff
[40265.775360] r3 : c1458d54  r2 : 00000000  r1 : 00000004  r0 : ffffffff
[40265.782257] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[40265.789764] Control: 10c5387d  Table: acb14019  DAC: 00000051
[40265.795806] Process rmmod (pid: 2701, stack limit = 0xecb82218)
[40265.802062] Stack: (0xecb83ea8 to 0xecb84000)
[40265.806640] 3ea0:                   ec9e8100 c04b3ecc bf737378 ed5b7c00 00000003 bf7327ec
[40265.815277] 3ec0: eca4f800 00000000 ec9fd800 eca4f800 bf737070 bf7328bc eca4f820 c05a9a04
[40265.823883] 3ee0: eca4f820 00000000 00000001 eca4f820 ec9fd870 bf737070 eca4f854 ec9fd8a4
[40265.832519] 3f00: ecb82000 00000000 00000000 c04e6960 eca4f820 bf737070 bf737048 00000081
[40265.841125] 3f20: c01071e4 c04e6a60 ecb81100 bf737070 bf737070 c04e5d94 bf737020 c05a8f88
[40265.849731] 3f40: bf737100 00000800 7f5fa254 00000081 c01071e4 c01c4afc 00000000 006f7368
[40265.858367] 3f60: ecb815f4 00000000 c0cac9c4 c01071e4 ecb82000 00000000 00000000 c01512f4
[40265.866973] 3f80: ed5b3200 c01071e4 7f5fa220 7f5fa220 bea78ec9 0010711c 7f5fa220 7f5fa220
[40265.875579] 3fa0: bea78ec9 c0107040 7f5fa220 7f5fa220 7f5fa254 00000800 dd35b800 dd35b800
[40265.884216] 3fc0: 7f5fa220 7f5fa220 bea78ec9 00000081 bea78dcc 00000000 bea78bd8 00000000
[40265.892822] 3fe0: b6f70521 bea78b6c 7f5dd613 b6f70526 80070030 7f5fa254 ffffffff ffffffff
[40265.901458] [<c027472c>] (cdev_del) from [<c04b3ecc>] (tty_unregister_device+0x40/0x50)
[40265.909942] [<c04b3ecc>] (tty_unregister_device) from [<bf7327ec>] (hso_free_interface+0x80/0x144 [hso])
[40265.919982] [<bf7327ec>] (hso_free_interface [hso]) from [<bf7328bc>] (hso_disconnect+0xc/0x18 [hso])
[40265.929718] [<bf7328bc>] (hso_disconnect [hso]) from [<c05a9a04>] (usb_unbind_interface+0x84/0x200)
[40265.939239] [<c05a9a04>] (usb_unbind_interface) from [<c04e6960>] (device_release_driver_internal+0x138/0x1cc)
[40265.949798] [<c04e6960>] (device_release_driver_internal) from [<c04e6a60>] (driver_detach+0x60/0x6c)
[40265.959503] [<c04e6a60>] (driver_detach) from [<c04e5d94>] (bus_remove_driver+0x64/0x8c)
[40265.968017] [<c04e5d94>] (bus_remove_driver) from [<c05a8f88>] (usb_deregister+0x5c/0xb8)
[40265.976654] [<c05a8f88>] (usb_deregister) from [<c01c4afc>] (SyS_delete_module+0x160/0x1dc)
[40265.985443] [<c01c4afc>] (SyS_delete_module) from [<c0107040>] (ret_fast_syscall+0x0/0x1c)
[40265.994171] Code: c1458d54 e59f3020 e92d4010 e1a04000 (e5941034)
[40266.016693] ---[ end trace 9d5ac43c7e41075c ]---

Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 4f2e8141dbe2..93411a348f12 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -3279,9 +3279,9 @@ static void __exit hso_exit(void)
 	pr_info("unloaded\n");
 
 	tty_unregister_driver(tty_drv);
-	put_tty_driver(tty_drv);
 	/* deregister the usb driver */
 	usb_deregister(&hso_driver);
+	put_tty_driver(tty_drv);
 }
 
 /* Module definitions */

From fdfb70d275223b9d69d5d3abe1f88507da579139 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Mon, 24 Apr 2017 18:33:38 -0700
Subject: [PATCH 29/41] netvsc: fix calculation of available send sections

My change (introduced in 4.11) to use find_first_clear_bit
incorrectly assumed that the size argument was words, not bits.
The effect was only a small limited number of the available send
sections were being actually used. This can cause performance loss
with some workloads.

Since map_words is now used only during initialization, it can
be on stack instead of in per-device data.

Fixes: b58a185801da ("netvsc: simplify get next send section")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h | 1 -
 drivers/net/hyperv/netvsc.c     | 9 ++++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index f9f3dba7a588..db23cb36ae5c 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -751,7 +751,6 @@ struct netvsc_device {
 	u32 send_section_cnt;
 	u32 send_section_size;
 	unsigned long *send_section_map;
-	int map_words;
 
 	/* Used for NetVSP initialization protocol */
 	struct completion channel_init_wait;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 8dd0b8770328..15ef713d96c0 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -236,6 +236,7 @@ static int netvsc_init_buf(struct hv_device *device)
 	struct netvsc_device *net_device;
 	struct nvsp_message *init_packet;
 	struct net_device *ndev;
+	size_t map_words;
 	int node;
 
 	net_device = get_outbound_net_device(device);
@@ -401,11 +402,9 @@ static int netvsc_init_buf(struct hv_device *device)
 		   net_device->send_section_size, net_device->send_section_cnt);
 
 	/* Setup state for managing the send buffer. */
-	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
-					     BITS_PER_LONG);
+	map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
 
-	net_device->send_section_map = kcalloc(net_device->map_words,
-					       sizeof(ulong), GFP_KERNEL);
+	net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
 	if (net_device->send_section_map == NULL) {
 		ret = -ENOMEM;
 		goto cleanup;
@@ -683,7 +682,7 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
 	unsigned long *map_addr = net_device->send_section_map;
 	unsigned int i;
 
-	for_each_clear_bit(i, map_addr, net_device->map_words) {
+	for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
 		if (sync_test_and_set_bit(i, map_addr) == 0)
 			return i;
 	}

From c8fcd133eabcbbf51a6ecfd12a975cace8877d5a Mon Sep 17 00:00:00 2001
From: "sudarsana.kalluru@cavium.com" <sudarsana.kalluru@cavium.com>
Date: Mon, 24 Apr 2017 20:59:10 -0700
Subject: [PATCH 30/41] qed: Fix error in the dcbx app meta data
 initialization.

DCBX app_data array is initialized with the incorrect values for
personality field. This would  prevent offloaded protocols from
honoring the PFC.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index a6e2bbe629bd..cfdadb658ade 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -64,11 +64,11 @@
 	((u32)(prio_tc_tbl >> ((7 - prio) * 4)) & 0x7)
 
 static const struct qed_dcbx_app_metadata qed_dcbx_app_update[] = {
-	{DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH}
+	{DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_ISCSI},
+	{DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_FCOE},
+	{DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_ETH_ROCE},
+	{DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_ETH_ROCE},
+	{DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH},
 };
 
 static bool qed_dcbx_app_ethtype(u32 app_info_bitmap)

From ec9c4215fef37da6668c4105f5ad3891aaa6527a Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 25 Apr 2017 15:56:50 +0200
Subject: [PATCH 31/41] ipv6: fix source routing

Commit a149e7c7ce81 ("ipv6: sr: add support for SRH injection through
setsockopt") introduced handling of IPV6_SRCRT_TYPE_4, but at the same
time restricted it to only IPV6_SRCRT_TYPE_0 and
IPV6_SRCRT_TYPE_4. Previously, ipv6_push_exthdr() and fl6_update_dst()
would also handle other values (ie STRICT and TYPE_2).

Restore previous source routing behavior, by handling IPV6_SRCRT_STRICT
and IPV6_SRCRT_TYPE_2 the same way as IPV6_SRCRT_TYPE_0 in
ipv6_push_exthdr() and fl6_update_dst().

Fixes: a149e7c7ce81 ("ipv6: sr: add support for SRH injection through setsockopt")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 25192a3b0cd7..d32e2110aff2 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -909,6 +909,8 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
 {
 	switch (opt->type) {
 	case IPV6_SRCRT_TYPE_0:
+	case IPV6_SRCRT_STRICT:
+	case IPV6_SRCRT_TYPE_2:
 		ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr);
 		break;
 	case IPV6_SRCRT_TYPE_4:
@@ -1163,6 +1165,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
 
 	switch (opt->srcrt->type) {
 	case IPV6_SRCRT_TYPE_0:
+	case IPV6_SRCRT_STRICT:
+	case IPV6_SRCRT_TYPE_2:
 		fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
 		break;
 	case IPV6_SRCRT_TYPE_4:

From b1b9d366028ff580e6dd80b48a69c473361456f1 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 25 Apr 2017 22:58:37 +0800
Subject: [PATCH 32/41] bridge: move bridge multicast cleanup to ndo_uninit

During removing a bridge device, if the bridge is still up, a new mdb entry
still can be added in br_multicast_add_group() after all mdb entries are
removed in br_multicast_dev_del(). Like the path:

  mld_ifc_timer_expire ->
    mld_sendpack -> ...
      br_multicast_rcv ->
        br_multicast_add_group

The new mp's timer will be set up. If the timer expires after the bridge
is freed, it may cause use-after-free panic in br_multicast_group_expired.

BUG: unable to handle kernel NULL pointer dereference at 0000000000000048
IP: [<ffffffffa07ed2c8>] br_multicast_group_expired+0x28/0xb0 [bridge]
Call Trace:
 <IRQ>
 [<ffffffff81094536>] call_timer_fn+0x36/0x110
 [<ffffffffa07ed2a0>] ? br_mdb_free+0x30/0x30 [bridge]
 [<ffffffff81096967>] run_timer_softirq+0x237/0x340
 [<ffffffff8108dcbf>] __do_softirq+0xef/0x280
 [<ffffffff8169889c>] call_softirq+0x1c/0x30
 [<ffffffff8102c275>] do_softirq+0x65/0xa0
 [<ffffffff8108e055>] irq_exit+0x115/0x120
 [<ffffffff81699515>] smp_apic_timer_interrupt+0x45/0x60
 [<ffffffff81697a5d>] apic_timer_interrupt+0x6d/0x80

Nikolay also found it would cause a memory leak - the mdb hash is
reallocated and not freed due to the mdb rehash.

unreferenced object 0xffff8800540ba800 (size 2048):
  backtrace:
    [<ffffffff816e2287>] kmemleak_alloc+0x67/0xc0
    [<ffffffff81260bea>] __kmalloc+0x1ba/0x3e0
    [<ffffffffa05c60ee>] br_mdb_rehash+0x5e/0x340 [bridge]
    [<ffffffffa05c74af>] br_multicast_new_group+0x43f/0x6e0 [bridge]
    [<ffffffffa05c7aa3>] br_multicast_add_group+0x203/0x260 [bridge]
    [<ffffffffa05ca4b5>] br_multicast_rcv+0x945/0x11d0 [bridge]
    [<ffffffffa05b6b10>] br_dev_xmit+0x180/0x470 [bridge]
    [<ffffffff815c781b>] dev_hard_start_xmit+0xbb/0x3d0
    [<ffffffff815c8743>] __dev_queue_xmit+0xb13/0xc10
    [<ffffffff815c8850>] dev_queue_xmit+0x10/0x20
    [<ffffffffa02f8d7a>] ip6_finish_output2+0x5ca/0xac0 [ipv6]
    [<ffffffffa02fbfc6>] ip6_finish_output+0x126/0x2c0 [ipv6]
    [<ffffffffa02fc245>] ip6_output+0xe5/0x390 [ipv6]
    [<ffffffffa032b92c>] NF_HOOK.constprop.44+0x6c/0x240 [ipv6]
    [<ffffffffa032bd16>] mld_sendpack+0x216/0x3e0 [ipv6]
    [<ffffffffa032d5eb>] mld_ifc_timer_expire+0x18b/0x2b0 [ipv6]

This could happen when ip link remove a bridge or destroy a netns with a
bridge device inside.

With Nikolay's suggestion, this patch is to clean up bridge multicast in
ndo_uninit after bridge dev is shutdown, instead of br_dev_delete, so
that netif_running check in br_multicast_add_group can avoid this issue.

v1->v2:
  - fix this issue by moving br_multicast_dev_del to ndo_uninit, instead
    of calling dev_close in br_dev_delete.

(NOTE: Depends upon b6fe0440c637 ("bridge: implement missing ndo_uninit()"))

Fixes: e10177abf842 ("bridge: multicast: fix handling of temp and perm entries")
Reported-by: Jianwen Ji <jiji@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 1 +
 net/bridge/br_if.c     | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 90f49a194249..430b53e7d941 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -123,6 +123,7 @@ static void br_dev_uninit(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
+	br_multicast_dev_del(br);
 	br_multicast_uninit_stats(br);
 	br_vlan_flush(br);
 	free_percpu(br->stats);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 56a2a72e7738..a8d0ed282a10 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -311,7 +311,6 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
 
 	br_fdb_delete_by_port(br, NULL, 0, 1);
 
-	br_multicast_dev_del(br);
 	cancel_delayed_work_sync(&br->gc_work);
 
 	br_sysfs_delbr(br->dev);

From 8048ced9beb21a52e3305f3332ae82020619f24e Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 25 Apr 2017 09:17:29 -0700
Subject: [PATCH 33/41] net: ipv6: regenerate host route if moved to gc list

Taking down the loopback device wreaks havoc on IPv6 routing. By
extension, taking down a VRF device wreaks havoc on its table.

Dmitry and Andrey both reported heap out-of-bounds reports in the IPv6
FIB code while running syzkaller fuzzer. The root cause is a dead dst
that is on the garbage list gets reinserted into the IPv6 FIB. While on
the gc (or perhaps when it gets added to the gc list) the dst->next is
set to an IPv4 dst. A subsequent walk of the ipv6 tables causes the
out-of-bounds access.

Andrey's reproducer was the key to getting to the bottom of this.

With IPv6, host routes for an address have the dst->dev set to the
loopback device. When the 'lo' device is taken down, rt6_ifdown initiates
a walk of the fib evicting routes with the 'lo' device which means all
host routes are removed. That process moves the dst which is attached to
an inet6_ifaddr to the gc list and marks it as dead.

The recent change to keep global IPv6 addresses added a new function,
fixup_permanent_addr, that is called on admin up. That function restarts
dad for an inet6_ifaddr and when it completes the host route attached
to it is inserted into the fib. Since the route was marked dead and
moved to the gc list, re-inserting the route causes the reported
out-of-bounds accesses. If the device with the address is taken down
or the address is removed, the WARN_ON in fib6_del is triggered.

All of those faults are fixed by regenerating the host route if the
existing one has been moved to the gc list, something that can be
determined by checking if the rt6i_ref counter is 0.

Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 80ce478c4851..0ea96c4d334d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3271,14 +3271,24 @@ static void addrconf_gre_config(struct net_device *dev)
 static int fixup_permanent_addr(struct inet6_dev *idev,
 				struct inet6_ifaddr *ifp)
 {
-	if (!ifp->rt) {
-		struct rt6_info *rt;
+	/* rt6i_ref == 0 means the host route was removed from the
+	 * FIB, for example, if 'lo' device is taken down. In that
+	 * case regenerate the host route.
+	 */
+	if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+		struct rt6_info *rt, *prev;
 
 		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
 		if (unlikely(IS_ERR(rt)))
 			return PTR_ERR(rt);
 
+		/* ifp->rt can be accessed outside of rtnl */
+		spin_lock(&ifp->lock);
+		prev = ifp->rt;
 		ifp->rt = rt;
+		spin_unlock(&ifp->lock);
+
+		ip6_rt_put(prev);
 	}
 
 	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {

From fd2c83b35752f0a8236b976978ad4658df14a59f Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Tue, 25 Apr 2017 18:51:46 +0200
Subject: [PATCH 34/41] net/packet: check length in getsockopt() called with
 PACKET_HDRLEN

In the case getsockopt() is called with PACKET_HDRLEN and optlen < 4
|val| remains uninitialized and the syscall may behave differently
depending on its value, and even copy garbage to userspace on certain
architectures. To fix this we now return -EINVAL if optlen is too small.

This bug has been detected with KMSAN.

Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8489beff5c25..ea81ccf3c7d6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3836,6 +3836,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	case PACKET_HDRLEN:
 		if (len > sizeof(int))
 			len = sizeof(int);
+		if (len < sizeof(int))
+			return -EINVAL;
 		if (copy_from_user(&val, optval, len))
 			return -EFAULT;
 		switch (val) {

From f555f34fdc586a56204cd16d9a7c104ec6cb6650 Mon Sep 17 00:00:00 2001
From: Alexander Kochetkov <al.kochet@gmail.com>
Date: Thu, 20 Apr 2017 14:00:04 +0300
Subject: [PATCH 35/41] net: phy: fix auto-negotiation stall due to unavailable
 interrupt

The Ethernet link on an interrupt driven PHY was not coming up if the Ethernet
cable was plugged before the Ethernet interface was brought up.

The patch trigger PHY state machine to update link state if PHY was requested to
do auto-negotiation and auto-negotiation complete flag already set.

During power-up cycle the PHY do auto-negotiation, generate interrupt and set
auto-negotiation complete flag. Interrupt is handled by PHY state machine but
doesn't update link state because PHY is in PHY_READY state. After some time
MAC bring up, start and request PHY to do auto-negotiation. If there are no new
settings to advertise genphy_config_aneg() doesn't start PHY auto-negotiation.
PHY continue to stay in auto-negotiation complete state and doesn't fire
interrupt. At the same time PHY state machine expect that PHY started
auto-negotiation and is waiting for interrupt from PHY and it won't get it.

Fixes: 321beec5047a ("net: phy: Use interrupts when available in NOLINK state")
Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
Cc: stable <stable@vger.kernel.org> # v4.9+
Tested-by: Roger Quadros <rogerq@ti.com>
Tested-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 40 ++++++++++++++++++++++++++++++++++++----
 include/linux/phy.h   |  1 +
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index a2bfc82e95d7..97ff1278167b 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -591,16 +591,18 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 EXPORT_SYMBOL(phy_mii_ioctl);
 
 /**
- * phy_start_aneg - start auto-negotiation for this PHY device
+ * phy_start_aneg_priv - start auto-negotiation for this PHY device
  * @phydev: the phy_device struct
+ * @sync: indicate whether we should wait for the workqueue cancelation
  *
  * Description: Sanitizes the settings (if we're not autonegotiating
  *   them), and then calls the driver's config_aneg function.
  *   If the PHYCONTROL Layer is operating, we change the state to
  *   reflect the beginning of Auto-negotiation or forcing.
  */
-int phy_start_aneg(struct phy_device *phydev)
+static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
 {
+	bool trigger = 0;
 	int err;
 
 	if (!phydev->drv)
@@ -628,10 +630,40 @@ int phy_start_aneg(struct phy_device *phydev)
 		}
 	}
 
+	/* Re-schedule a PHY state machine to check PHY status because
+	 * negotiation may already be done and aneg interrupt may not be
+	 * generated.
+	 */
+	if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) {
+		err = phy_aneg_done(phydev);
+		if (err > 0) {
+			trigger = true;
+			err = 0;
+		}
+	}
+
 out_unlock:
 	mutex_unlock(&phydev->lock);
+
+	if (trigger)
+		phy_trigger_machine(phydev, sync);
+
 	return err;
 }
+
+/**
+ * phy_start_aneg - start auto-negotiation for this PHY device
+ * @phydev: the phy_device struct
+ *
+ * Description: Sanitizes the settings (if we're not autonegotiating
+ *   them), and then calls the driver's config_aneg function.
+ *   If the PHYCONTROL Layer is operating, we change the state to
+ *   reflect the beginning of Auto-negotiation or forcing.
+ */
+int phy_start_aneg(struct phy_device *phydev)
+{
+	return phy_start_aneg_priv(phydev, true);
+}
 EXPORT_SYMBOL(phy_start_aneg);
 
 /**
@@ -659,7 +691,7 @@ void phy_start_machine(struct phy_device *phydev)
  *   state machine runs.
  */
 
-static void phy_trigger_machine(struct phy_device *phydev, bool sync)
+void phy_trigger_machine(struct phy_device *phydev, bool sync)
 {
 	if (sync)
 		cancel_delayed_work_sync(&phydev->state_queue);
@@ -1154,7 +1186,7 @@ void phy_state_machine(struct work_struct *work)
 	mutex_unlock(&phydev->lock);
 
 	if (needs_aneg)
-		err = phy_start_aneg(phydev);
+		err = phy_start_aneg_priv(phydev, false);
 	else if (do_suspend)
 		phy_suspend(phydev);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 43a774873aa9..fb3857337151 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -852,6 +852,7 @@ void phy_change_work(struct work_struct *work);
 void phy_mac_interrupt(struct phy_device *phydev, int new_link);
 void phy_start_machine(struct phy_device *phydev);
 void phy_stop_machine(struct phy_device *phydev);
+void phy_trigger_machine(struct phy_device *phydev, bool sync);
 int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 int phy_ethtool_ksettings_get(struct phy_device *phydev,

From b43bd72835a5f7adef81fe53fa1fbe7b0e43df8e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 26 Apr 2017 14:33:14 -0400
Subject: [PATCH 36/41] Revert "phy: micrel: Disable auto negotiation on
 startup"

This reverts commit 99f81afc139c6edd14d77a91ee91685a414a1c66.

It was papering over the real problem, which is fixed by commit
f555f34fdc58 ("net: phy: fix auto-negotiation stall due to unavailable
interrupt")

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 1326d99771c1..da5b39268370 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -297,17 +297,6 @@ static int kszphy_config_init(struct phy_device *phydev)
 	if (priv->led_mode >= 0)
 		kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
 
-	if (phy_interrupt_is_valid(phydev)) {
-		int ctl = phy_read(phydev, MII_BMCR);
-
-		if (ctl < 0)
-			return ctl;
-
-		ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE);
-		if (ret < 0)
-			return ret;
-	}
-
 	return 0;
 }
 

From 5294b83086cc1c35b4efeca03644cf9d12282e5b Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 25 Apr 2017 19:08:18 +0200
Subject: [PATCH 37/41] macsec: dynamically allocate space for sglist

We call skb_cow_data, which is good anyway to ensure we can actually
modify the skb as such (another error from prior). Now that we have the
number of fragments required, we can safely allocate exactly that amount
of memory.

Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macsec.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index dbab05afcdbe..49ce4e9f4a0f 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -617,7 +617,8 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err)
 
 static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
 					     unsigned char **iv,
-					     struct scatterlist **sg)
+					     struct scatterlist **sg,
+					     int num_frags)
 {
 	size_t size, iv_offset, sg_offset;
 	struct aead_request *req;
@@ -629,7 +630,7 @@ static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
 
 	size = ALIGN(size, __alignof__(struct scatterlist));
 	sg_offset = size;
-	size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1);
+	size += sizeof(struct scatterlist) * num_frags;
 
 	tmp = kmalloc(size, GFP_ATOMIC);
 	if (!tmp)
@@ -649,6 +650,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 {
 	int ret;
 	struct scatterlist *sg;
+	struct sk_buff *trailer;
 	unsigned char *iv;
 	struct ethhdr *eth;
 	struct macsec_eth_header *hh;
@@ -723,7 +725,14 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 		return ERR_PTR(-EINVAL);
 	}
 
-	req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg);
+	ret = skb_cow_data(skb, 0, &trailer);
+	if (unlikely(ret < 0)) {
+		macsec_txsa_put(tx_sa);
+		kfree_skb(skb);
+		return ERR_PTR(ret);
+	}
+
+	req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg, ret);
 	if (!req) {
 		macsec_txsa_put(tx_sa);
 		kfree_skb(skb);
@@ -732,7 +741,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 
 	macsec_fill_iv(iv, secy->sci, pn);
 
-	sg_init_table(sg, MAX_SKB_FRAGS + 1);
+	sg_init_table(sg, ret);
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
 	if (tx_sc->encrypt) {
@@ -917,6 +926,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 {
 	int ret;
 	struct scatterlist *sg;
+	struct sk_buff *trailer;
 	unsigned char *iv;
 	struct aead_request *req;
 	struct macsec_eth_header *hdr;
@@ -927,7 +937,12 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg);
+	ret = skb_cow_data(skb, 0, &trailer);
+	if (unlikely(ret < 0)) {
+		kfree_skb(skb);
+		return ERR_PTR(ret);
+	}
+	req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg, ret);
 	if (!req) {
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
@@ -936,7 +951,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 	hdr = (struct macsec_eth_header *)skb->data;
 	macsec_fill_iv(iv, sci, ntohl(hdr->packet_number));
 
-	sg_init_table(sg, MAX_SKB_FRAGS + 1);
+	sg_init_table(sg, ret);
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
 	if (hdr->tci_an & MACSEC_TCI_E) {
@@ -2716,7 +2731,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 }
 
 #define MACSEC_FEATURES \
-	(NETIF_F_SG | NETIF_F_HIGHDMA)
+	(NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
 static struct lock_class_key macsec_netdev_addr_lock_key;
 
 static int macsec_dev_init(struct net_device *dev)

From 9899886d5e8ec5b343b1efe44f185a0e68dc6454 Mon Sep 17 00:00:00 2001
From: Myungho Jung <mhjungk@gmail.com>
Date: Tue, 25 Apr 2017 11:58:15 -0700
Subject: [PATCH 38/41] net: core: Prevent from dereferencing null pointer when
 releasing SKB

Added NULL check to make __dev_kfree_skb_irq consistent with kfree
family of functions.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=195289

Signed-off-by: Myungho Jung <mhjungk@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 533a6d6f6092..9b5875388c23 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2450,6 +2450,9 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
 {
 	unsigned long flags;
 
+	if (unlikely(!skb))
+		return;
+
 	if (likely(atomic_read(&skb->users) == 1)) {
 		smp_rmb();
 		atomic_set(&skb->users, 0);

From 199ab00f3cdb6f154ea93fa76fd80192861a821d Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 25 Apr 2017 14:37:15 -0700
Subject: [PATCH 39/41] ipv6: check skb->protocol before lookup for nexthop

Andrey reported a out-of-bound access in ip6_tnl_xmit(), this
is because we use an ipv4 dst in ip6_tnl_xmit() and cast an IPv4
neigh key as an IPv6 address:

        neigh = dst_neigh_lookup(skb_dst(skb),
                                 &ipv6_hdr(skb)->daddr);
        if (!neigh)
                goto tx_err_link_failure;

        addr6 = (struct in6_addr *)&neigh->primary_key; // <=== HERE
        addr_type = ipv6_addr_type(addr6);

        if (addr_type == IPV6_ADDR_ANY)
                addr6 = &ipv6_hdr(skb)->daddr;

        memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));

Also the network header of the skb at this point should be still IPv4
for 4in6 tunnels, we shold not just use it as IPv6 header.

This patch fixes it by checking if skb->protocol is ETH_P_IPV6: if it
is, we are safe to do the nexthop lookup using skb_dst() and
ipv6_hdr(skb)->daddr; if not (aka IPv4), we have no clue about which
dest address we can pick here, we have to rely on callers to fill it
from tunnel config, so just fall to ip6_route_output() to make the
decision.

Fixes: ea3dc9601bda ("ip6_tunnel: Add support for wildcard tunnel endpoints.")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 75fac933c209..a9692ec0cd6d 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1037,7 +1037,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net *net = t->net;
 	struct net_device_stats *stats = &t->dev->stats;
-	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct ipv6hdr *ipv6h;
 	struct ipv6_tel_txoption opt;
 	struct dst_entry *dst = NULL, *ndst = NULL;
 	struct net_device *tdev;
@@ -1057,26 +1057,28 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 
 	/* NBMA tunnel */
 	if (ipv6_addr_any(&t->parms.raddr)) {
-		struct in6_addr *addr6;
-		struct neighbour *neigh;
-		int addr_type;
+		if (skb->protocol == htons(ETH_P_IPV6)) {
+			struct in6_addr *addr6;
+			struct neighbour *neigh;
+			int addr_type;
 
-		if (!skb_dst(skb))
-			goto tx_err_link_failure;
+			if (!skb_dst(skb))
+				goto tx_err_link_failure;
 
-		neigh = dst_neigh_lookup(skb_dst(skb),
-					 &ipv6_hdr(skb)->daddr);
-		if (!neigh)
-			goto tx_err_link_failure;
+			neigh = dst_neigh_lookup(skb_dst(skb),
+						 &ipv6_hdr(skb)->daddr);
+			if (!neigh)
+				goto tx_err_link_failure;
 
-		addr6 = (struct in6_addr *)&neigh->primary_key;
-		addr_type = ipv6_addr_type(addr6);
+			addr6 = (struct in6_addr *)&neigh->primary_key;
+			addr_type = ipv6_addr_type(addr6);
 
-		if (addr_type == IPV6_ADDR_ANY)
-			addr6 = &ipv6_hdr(skb)->daddr;
+			if (addr_type == IPV6_ADDR_ANY)
+				addr6 = &ipv6_hdr(skb)->daddr;
 
-		memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
-		neigh_release(neigh);
+			memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+			neigh_release(neigh);
+		}
 	} else if (!(t->parms.flags &
 		     (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
 		/* enable the cache only only if the routing decision does

From c1201444075009507a6818de6518e2822b9a87c8 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 25 Apr 2017 17:38:02 -0700
Subject: [PATCH 40/41] tcp: memset ca_priv data to 0 properly

Always zero out ca_priv data in tcp_assign_congestion_control() so that
ca_priv data is cleared out during socket creation.
Also always zero out ca_priv data in tcp_reinit_congestion_control() so
that when cc algorithm is changed, ca_priv data is cleared out as well.
We should still zero out ca_priv data even in TCP_CLOSE state because
user could call connect() on AF_UNSPEC to disconnect the socket and
leave it in TCP_CLOSE state and later call setsockopt() to switch cc
algorithm on this socket.

Fixes: 2b0a8c9ee ("tcp: add CDG congestion control")
Reported-by: Andrey Konovalov  <andreyknvl@google.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 79c4817abc94..6e3c512054a6 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -168,12 +168,8 @@ void tcp_assign_congestion_control(struct sock *sk)
 	}
 out:
 	rcu_read_unlock();
+	memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
 
-	/* Clear out private data before diag gets it and
-	 * the ca has not been initialized.
-	 */
-	if (ca->get_info)
-		memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
 	if (ca->flags & TCP_CONG_NEEDS_ECN)
 		INET_ECN_xmit(sk);
 	else
@@ -200,11 +196,10 @@ static void tcp_reinit_congestion_control(struct sock *sk,
 	tcp_cleanup_congestion_control(sk);
 	icsk->icsk_ca_ops = ca;
 	icsk->icsk_ca_setsockopt = 1;
+	memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
 
-	if (sk->sk_state != TCP_CLOSE) {
-		memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+	if (sk->sk_state != TCP_CLOSE)
 		tcp_init_congestion_control(sk);
-	}
 }
 
 /* Manage refcounts on socket close. */

From 105f5528b9bbaa08b526d3405a5bcd2ff0c953c8 Mon Sep 17 00:00:00 2001
From: Jamie Bainbridge <jbainbri@redhat.com>
Date: Wed, 26 Apr 2017 10:43:27 +1000
Subject: [PATCH 41/41] ipv6: check raw payload size correctly in ioctl

In situations where an skb is paged, the transport header pointer and
tail pointer can be the same because the skb contents are in frags.

This results in ioctl(SIOCINQ/FIONREAD) incorrectly returning a
length of 0 when the length to receive is actually greater than zero.

skb->len is already correctly set in ip6_input_finish() with
pskb_pull(), so use skb->len as it always returns the correct result
for both linear and paged data.

Signed-off-by: Jamie Bainbridge <jbainbri@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index f174e76e6505..0da6a12b5472 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1178,8 +1178,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		spin_lock_bh(&sk->sk_receive_queue.lock);
 		skb = skb_peek(&sk->sk_receive_queue);
 		if (skb)
-			amount = skb_tail_pointer(skb) -
-				skb_transport_header(skb);
+			amount = skb->len;
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
 		return put_user(amount, (int __user *)arg);
 	}