From 55378a238e04b39cc82957d91d16499704ea719b Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 30 Mar 2017 17:00:25 +0300 Subject: [PATCH 01/41] net/mlx5: Fix driver load bad flow when having fw initializing timeout If FW is stuck in initializing state we will skip the driver load, but current error handling flow doesn't clean previously allocated command interface resources. Fixes: e3297246c2c8 ('net/mlx5_core: Wait for FW readiness on startup') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 60154a175bd3..0ad66324247f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1029,7 +1029,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (err) { dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI); - goto out_err; + goto err_cmd_cleanup; } err = mlx5_core_enable_hca(dev, 0); From c415f704c8276bc686abcb0497bf2606038ca73c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 30 Mar 2017 15:56:10 +0200 Subject: [PATCH 02/41] net/mlx5: E-Switch, Correctly deal with inline mode on ConnectX-5 On ConnectX5 the wqe inline mode is "none" and hence the FW reports MLX5_CAP_INLINE_MODE_NOT_REQUIRED. Fix our devlink callbacks to deal with that on get and set. Also fix the tc flow parsing code not to fail anything when inline isn't required. Fixes: bffaa916588e ('net/mlx5: E-Switch, Add control for inline mode') Signed-off-by: Or Gerlitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +- .../mellanox/mlx5/core/eswitch_offloads.c | 36 ++++++++++++------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fade7233dac5..b7c99c38a7c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -639,7 +639,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) && rep->vport != FDB_UPLINK_VPORT) { - if (min_inline > esw->offloads.inline_mode) { + if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < min_inline) { netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", min_inline, esw->offloads.inline_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 307ec6c5fd3b..d111cebca9f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -911,8 +911,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; int num_vports = esw->enabled_vports; - int err; - int vport; + int err, vport; u8 mlx5_mode; if (!MLX5_CAP_GEN(dev, vport_group_manager)) @@ -921,9 +920,17 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - if (MLX5_CAP_ETH(dev, wqe_inline_mode) != - MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) + return 0; + /* fall through */ + case MLX5_CAP_INLINE_MODE_L2: + esw_warn(dev, "Inline mode can't be set\n"); return -EOPNOTSUPP; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + break; + } if (esw->offloads.num_flows > 0) { esw_warn(dev, "Can't set inline mode when flows are configured\n"); @@ -966,18 +973,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - if (MLX5_CAP_ETH(dev, wqe_inline_mode) != - MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) - return -EOPNOTSUPP; - return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) { + u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; struct mlx5_core_dev *dev = esw->dev; int vport; - u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; @@ -985,10 +988,18 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - if (MLX5_CAP_ETH(dev, wqe_inline_mode) != - MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) - return -EOPNOTSUPP; + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + mlx5_mode = MLX5_INLINE_MODE_NONE; + goto out; + case MLX5_CAP_INLINE_MODE_L2: + mlx5_mode = MLX5_INLINE_MODE_L2; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + goto query_vports; + } +query_vports: for (vport = 1; vport <= nvfs; vport++) { mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); if (vport > 1 && prev_mlx5_mode != mlx5_mode) @@ -996,6 +1007,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) prev_mlx5_mode = mlx5_mode; } +out: *mode = mlx5_mode; return 0; } From 32f3671f6951836cfe9d84e1a6554908b85c61d7 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 6 Apr 2017 19:20:21 +0300 Subject: [PATCH 03/41] net/mlx5e: Make sure the FW max encap size is enough for ipv4 tunnels Otherwise the code that fills the ipv4 encapsulation headers could be writing beyond the allocated headers buffer. Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b7c99c38a7c4..fc7c1d30461c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -786,16 +786,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return 0; } -static int gen_vxlan_header_ipv4(struct net_device *out_dev, - char buf[], - unsigned char h_dest[ETH_ALEN], - int ttl, - __be32 daddr, - __be32 saddr, - __be16 udp_dst_port, - __be32 vx_vni) +static void gen_vxlan_header_ipv4(struct net_device *out_dev, + char buf[], int encap_size, + unsigned char h_dest[ETH_ALEN], + int ttl, + __be32 daddr, + __be32 saddr, + __be16 udp_dst_port, + __be32 vx_vni) { - int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN; struct ethhdr *eth = (struct ethhdr *)buf; struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); @@ -818,8 +817,6 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev, udp->dest = udp_dst_port; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(vx_vni); - - return encap_size; } static int gen_vxlan_header_ipv6(struct net_device *out_dev, @@ -863,13 +860,20 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device **out_dev) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; - int encap_size, ttl, err; struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; + int ttl, err; - encap_header = kzalloc(max_encap_size, GFP_KERNEL); + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); if (!encap_header) return -ENOMEM; @@ -904,11 +908,11 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, - e->h_dest, ttl, - fl4.daddr, - fl4.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); + gen_vxlan_header_ipv4(*out_dev, encap_header, + ipv4_encap_size, e->h_dest, ttl, + fl4.daddr, + fl4.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -916,7 +920,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - encap_size, encap_header, &e->encap_id); + ipv4_encap_size, encap_header, &e->encap_id); out: if (err && n) neigh_release(n); From 225aabaffe9ab42b2dd8770359442f019083275e Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 6 Apr 2017 19:28:30 +0300 Subject: [PATCH 04/41] net/mlx5e: Make sure the FW max encap size is enough for ipv6 tunnels Otherwise the code that fills the ipv6 encapsulation headers could be writing beyond the allocated headers buffer. Fixes: ce99f6b97fcd ('net/mlx5e: Support SRIOV TC encapsulation offloads for IPv6 tunnels') Signed-off-by: Or Gerlitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fc7c1d30461c..5436866798f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -819,16 +819,15 @@ static void gen_vxlan_header_ipv4(struct net_device *out_dev, vxh->vx_vni = vxlan_vni_field(vx_vni); } -static int gen_vxlan_header_ipv6(struct net_device *out_dev, - char buf[], - unsigned char h_dest[ETH_ALEN], - int ttl, - struct in6_addr *daddr, - struct in6_addr *saddr, - __be16 udp_dst_port, - __be32 vx_vni) +static void gen_vxlan_header_ipv6(struct net_device *out_dev, + char buf[], int encap_size, + unsigned char h_dest[ETH_ALEN], + int ttl, + struct in6_addr *daddr, + struct in6_addr *saddr, + __be16 udp_dst_port, + __be32 vx_vni) { - int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN; struct ethhdr *eth = (struct ethhdr *)buf; struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); @@ -850,8 +849,6 @@ static int gen_vxlan_header_ipv6(struct net_device *out_dev, udp->dest = udp_dst_port; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(vx_vni); - - return encap_size; } static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, @@ -935,13 +932,20 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; - int encap_size, err, ttl = 0; struct neighbour *n = NULL; struct flowi6 fl6 = {}; char *encap_header; + int err, ttl = 0; - encap_header = kzalloc(max_encap_size, GFP_KERNEL); + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); if (!encap_header) return -ENOMEM; @@ -977,11 +981,11 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header, - e->h_dest, ttl, - &fl6.daddr, - &fl6.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); + gen_vxlan_header_ipv6(*out_dev, encap_header, + ipv6_encap_size, e->h_dest, ttl, + &fl6.daddr, + &fl6.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -989,7 +993,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - encap_size, encap_header, &e->encap_id); + ipv6_encap_size, encap_header, &e->encap_id); out: if (err && n) neigh_release(n); From 5ae85b0edaa597b063ee9d8f48b830519a6e0c0f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 6 Apr 2017 10:49:12 +0300 Subject: [PATCH 05/41] net/mlx5: Fix UAR memory leak When UAR is released, we deallocate the device resource, but don't unmmap the UAR mapping memory. Fix the leak by unmapping this memory. Fixes: a6d51b68611e9 ('net/mlx5: Introduce blue flame register allocator) Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/uar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 2e6b0f290ddc..222b25908d01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -87,6 +87,7 @@ static void up_rel_func(struct kref *kref) struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); list_del(&up->list); + iounmap(up->map); if (mlx5_cmd_free_uar(up->mdev, up->index)) mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); kfree(up->reg_bitmap); From cbad8cddb6ed7ef3a5f0a9a70f1711d4d7fb9a8f Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 22 Mar 2017 11:44:14 +0200 Subject: [PATCH 06/41] net/mlx5e: Fix small packet threshold RX packet headers are meant to be contained in SKB linear part, and chose a threshold of 128. It turns out this is not enough, i.e. for IPv6 packet over VxLAN. In this case, UDP/IPv4 needs 42 bytes, GENEVE header is 8 bytes, and 86 bytes for TCP/IPv6. In total 136 bytes that is more than current 128 bytes. In this case expand header flow is reached. The warning in skb_try_coalesce() caused by a wrong truesize was already fixed here: commit 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()"). Still, we prefer to totally avoid the expand header flow for performance reasons. Tested regular TCP_STREAM with iperf for 1 and 8 streams, no degradation was found. Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)") Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index dc52053128bc..3d9490cd2db1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -90,7 +90,7 @@ #define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX) #define MLX5_UMR_ALIGN (2048) -#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) +#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (256) #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_DEFAULT_LRO_TIMEOUT 32 From 5e82c9e4ed60beba83f46a1a5a8307b99a23e982 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Thu, 2 Mar 2017 15:49:45 +0200 Subject: [PATCH 07/41] net/mlx5e: Fix ETHTOOL_GRXCLSRLALL handling Handler for ETHTOOL_GRXCLSRLALL must set info->data to the size of the table, regardless of the amount of entries in it. Existing code does not do that, and this breaks all usage of ethtool -N or -n without explicit location, with this error: rmgr: Invalid RX class rules table size: Success Set info->data to the table size. Tested: ethtool -n ens8 ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1 ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1 loc 55 ethtool -n ens8 ethtool -N ens8 delete 1023 ethtool -N ens8 delete 55 Fixes: f913a72aa008 ("net/mlx5e: Add support to get ethtool flow rules") Signed-off-by: Ilan Tayari Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d55fff0ba388..26fc77e80f7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -564,6 +564,7 @@ int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *i int idx = 0; int err = 0; + info->data = MAX_NUM_OF_ETHTOOL_RULES; while ((!err || err == -ENOENT) && idx < info->rule_cnt) { err = mlx5e_ethtool_get_flow(priv, info, location); if (!err) From 1510d728639e183b60bd1285b09a49c393923ba5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Apr 2017 21:40:12 -0700 Subject: [PATCH 08/41] net/mlx5e: Fix race in mlx5e_sw_stats and mlx5e_vport_stats We have observed a sudden spike in rx/tx_packets and rx/tx_bytes reported under /proc/net/dev. There is a race in mlx5e_update_stats() and some of the get-stats functions (the one that we hit is the mlx5e_get_stats() which is called by ndo_get_stats64()). In particular, the very first thing mlx5e_update_sw_counters() does is 'memset(s, 0, sizeof(*s))'. For example, if mlx5e_get_stats() is unlucky at one point, rx_bytes and rx_packets could be 0. One second later, a normal (and much bigger than 0) value will be reported. This patch is to use a 'struct mlx5e_sw_stats temp' to avoid a direct memset zero on priv->stats.sw. mlx5e_update_vport_counters() has a similar race. Hence, addressed together. However, memset zero is removed instead because it is not needed. I am lucky enough to catch this 0-reset in rx multicast: eth0: 41457665 76804 70 0 0 70 0 47085 15586634 87502 3 0 0 0 3 0 eth0: 41459860 76815 70 0 0 70 0 47094 15588376 87516 3 0 0 0 3 0 eth0: 41460577 76822 70 0 0 70 0 0 15589083 87521 3 0 0 0 3 0 eth0: 41463293 76838 70 0 0 70 0 47108 15595872 87538 3 0 0 0 3 0 eth0: 41463379 76839 70 0 0 70 0 47116 15596138 87539 3 0 0 0 3 0 v2: Remove memset zero from mlx5e_update_vport_counters() v1: Use temp and memcpy Fixes: 9218b44dcc05 ("net/mlx5e: Statistics handling refactoring") Suggested-by: Eric Dumazet Suggested-by: Saeed Mahameed Signed-off-by: Martin KaFai Lau Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 66c133757a5e..15cc7b469d2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -174,7 +174,7 @@ unlock: static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats *s = &priv->stats.sw; + struct mlx5e_sw_stats temp, *s = &temp; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; u64 tx_offload_none = 0; @@ -229,6 +229,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->link_down_events_phy = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); + memcpy(&priv->stats.sw, s, sizeof(*s)); } static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) @@ -243,7 +244,6 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, other_vport, 0); - memset(out, 0, outlen); mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } From 11faa7b0359aaf7efd406b7a6a077fda2b037d8e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Apr 2017 13:49:37 +0300 Subject: [PATCH 09/41] net: tc35815: move free after the dereference We dereference "skb" to get "skb->len" so we should probably do that step before freeing the skb. Fixes: eea221ce4880 ("tc35815 driver update (take 2)") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/tc35815.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index a45f98fa4aa7..3dadee1080b9 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -1017,8 +1017,8 @@ tc35815_free_queues(struct net_device *dev) BUG_ON(lp->tx_skbs[i].skb != skb); #endif if (skb) { - dev_kfree_skb(skb); pci_unmap_single(lp->pci_dev, lp->tx_skbs[i].skb_dma, skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb(skb); lp->tx_skbs[i].skb = NULL; lp->tx_skbs[i].skb_dma = 0; } From b7c8487cb3d99509220092fe77a2464dff43f015 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Fri, 21 Apr 2017 21:34:59 +0100 Subject: [PATCH 10/41] ipv4: Avoid caching l3mdev dst on mismatched local route David reported that doing the following: ip li add red type vrf table 10 ip link set dev eth1 vrf red ip addr add 127.0.0.1/8 dev red ip link set dev eth1 up ip li set red up ping -c1 -w1 -I red 127.0.0.1 ip li del red when either policy routing IP rules are present or the local table lookup ip rule is before the l3mdev lookup results in a hang with these messages: unregister_netdevice: waiting for red to become free. Usage count = 1 The problem is caused by caching the dst used for sending the packet out of the specified interface on a local route with a different nexthop interface. Thus the dst could stay around until the route in the table the lookup was done is deleted which may be never. Address the problem by not forcing output device to be the l3mdev in the flow's output interface if the lookup didn't use the l3mdev. This then results in the dst using the right device according to the route. Changes in v2: - make the dev_out passed in by __ip_route_output_key_hash correct instead of checking the nh dev if FLOWI_FLAG_SKIP_NH_OIF is set as suggested by David. Fixes: 5f02ce24c2696 ("net: l3mdev: Allow the l3mdev to be a loopback") Reported-by: David Ahern Suggested-by: David Ahern Signed-off-by: Robert Shearman Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index acd69cfe2951..d9724889ff09 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2359,7 +2359,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, } /* L3 master device is the loopback for that domain */ - dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev; + dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? : + net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; From 4d6fa57b4dab0d77f4d8e9d9c73d1e63f6fe8fee Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 21 Apr 2017 23:14:48 +0200 Subject: [PATCH 11/41] macsec: avoid heap overflow in skb_to_sgvec While this may appear as a humdrum one line change, it's actually quite important. An sk_buff stores data in three places: 1. A linear chunk of allocated memory in skb->data. This is the easiest one to work with, but it precludes using scatterdata since the memory must be linear. 2. The array skb_shinfo(skb)->frags, which is of maximum length MAX_SKB_FRAGS. This is nice for scattergather, since these fragments can point to different pages. 3. skb_shinfo(skb)->frag_list, which is a pointer to another sk_buff, which in turn can have data in either (1) or (2). The first two are rather easy to deal with, since they're of a fixed maximum length, while the third one is not, since there can be potentially limitless chains of fragments. Fortunately dealing with frag_list is opt-in for drivers, so drivers don't actually have to deal with this mess. For whatever reason, macsec decided it wanted pain, and so it explicitly specified NETIF_F_FRAGLIST. Because dealing with (1), (2), and (3) is insane, most users of sk_buff doing any sort of crypto or paging operation calls a convenient function called skb_to_sgvec (which happens to be recursive if (3) is in use!). This takes a sk_buff as input, and writes into its output pointer an array of scattergather list items. Sometimes people like to declare a fixed size scattergather list on the stack; othertimes people like to allocate a fixed size scattergather list on the heap. However, if you're doing it in a fixed-size fashion, you really shouldn't be using NETIF_F_FRAGLIST too (unless you're also ensuring the sk_buff and its frag_list children arent't shared and then you check the number of fragments in total required.) Macsec specifically does this: size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1); tmp = kmalloc(size, GFP_ATOMIC); *sg = (struct scatterlist *)(tmp + sg_offset); ... sg_init_table(sg, MAX_SKB_FRAGS + 1); skb_to_sgvec(skb, sg, 0, skb->len); Specifying MAX_SKB_FRAGS + 1 is the right answer usually, but not if you're using NETIF_F_FRAGLIST, in which case the call to skb_to_sgvec will overflow the heap, and disaster ensues. Signed-off-by: Jason A. Donenfeld Cc: stable@vger.kernel.org Cc: security@kernel.org Signed-off-by: David S. Miller --- drivers/net/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index ff0a5ed3ca80..dbab05afcdbe 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2716,7 +2716,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, } #define MACSEC_FEATURES \ - (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) + (NETIF_F_SG | NETIF_F_HIGHDMA) static struct lock_class_key macsec_netdev_addr_lock_key; static int macsec_dev_init(struct net_device *dev) From b40c5f4fde22fb98eff205b3aece05b471c24eed Mon Sep 17 00:00:00 2001 From: Ansis Atteka Date: Fri, 21 Apr 2017 15:23:05 -0700 Subject: [PATCH 12/41] udp: disable inner UDP checksum offloads in IPsec case Otherwise, UDP checksum offloads could corrupt ESP packets by attempting to calculate UDP checksum when this inner UDP packet is already protected by IPsec. One way to reproduce this bug is to have a VM with virtio_net driver (UFO set to ON in the guest VM); and then encapsulate all guest's Ethernet frames in Geneve; and then further encrypt Geneve with IPsec. In this case following symptoms are observed: 1. If using ixgbe NIC, then it will complain with following error message: ixgbe 0000:01:00.1: partial checksum but l4 proto=32! 2. Receiving IPsec stack will drop all the corrupted ESP packets and increase XfrmInStateProtoError counter in /proc/net/xfrm_stat. 3. iperf UDP test from the VM with packet sizes above MTU will not work at all. 4. iperf TCP test from the VM will get ridiculously low performance because. Signed-off-by: Ansis Atteka Co-authored-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/udp_offload.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b2be1d9757ef..781250151d40 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -29,6 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, u16 mac_len = skb->mac_len; int udp_offset, outer_hlen; __wsum partial; + bool need_ipsec; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; @@ -62,8 +63,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); + need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && + !need_ipsec && (skb->dev->features & (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); From 9199cb7677b388b42e3d95c755090dfc5ab2b11a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 22 Apr 2017 13:46:56 +0300 Subject: [PATCH 13/41] ravb: Double free on error in ravb_start_xmit() If skb_put_padto() fails then it frees the skb. I shifted that code up a bit to make my error handling a little simpler. Fixes: a0d2f20650e8 ("Renesas Ethernet AVB PTP clock driver") Signed-off-by: Dan Carpenter Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 8cfc4a54f2dc..3cd7989c007d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1516,11 +1516,12 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) spin_unlock_irqrestore(&priv->lock, flags); return NETDEV_TX_BUSY; } - entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC); - priv->tx_skb[q][entry / NUM_TX_DESC] = skb; if (skb_put_padto(skb, ETH_ZLEN)) - goto drop; + goto exit; + + entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC); + priv->tx_skb[q][entry / NUM_TX_DESC] = skb; buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) + entry / NUM_TX_DESC * DPTR_ALIGN; From fc1f8f4f310ac65b1337e2d7ba52ae4ff2b7c849 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 22 Apr 2017 09:10:13 -0700 Subject: [PATCH 14/41] net: ipv6: send unsolicited NA if enabled for all interfaces When arp_notify is set to 1 for either a specific interface or for 'all' interfaces, gratuitous arp requests are sent. Since ndisc_notify is the ipv6 equivalent to arp_notify, it should follow the same semantics. Commit 4a6e3c5def13 ("net: ipv6: send unsolicited NA on admin up") sends the NA on admin up. The final piece is checking devconf_all->ndisc_notify in addition to the per device setting. Add it. Fixes: 5cb04436eef6 ("ipv6: add knob to send unsolicited ND on link-layer address change") Signed-off-by: David Ahern Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7ebac630d3c6..cb1766724a4c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1749,7 +1749,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, idev = in6_dev_get(dev); if (!idev) break; - if (idev->cnf.ndisc_notify) + if (idev->cnf.ndisc_notify || + net->ipv6.devconf_all->ndisc_notify) ndisc_send_unsol_na(dev); in6_dev_put(idev); break; From a424f0de61638cbb5047e0a888c54da9cf471f90 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 24 Apr 2017 14:27:21 -0700 Subject: [PATCH 15/41] net: dsa: b53: Include IMP/CPU port in dumb forwarding mode Since Broadcom tags are not enabled in b53 (DSA_PROTO_TAG_NONE), we need to make sure that the IMP/CPU port is included in the forwarding decision. Without this change, switching between non-management ports would work, but not between management ports and non-management ports thus breaking the default state in which DSA switch are brought up. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Reported-by: Eric Anholt Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 10 ++++++++++ drivers/net/dsa/b53/b53_regs.h | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 8cf4801994e8..ca7f3b005a29 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -326,6 +326,7 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid, static void b53_set_forwarding(struct b53_device *dev, int enable) { + struct dsa_switch *ds = dev->ds; u8 mgmt; b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt); @@ -336,6 +337,15 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) mgmt &= ~SM_SW_FWD_EN; b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt); + + /* Include IMP port in dumb forwarding mode when no tagging protocol is + * set + */ + if (ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_NONE) { + b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt); + mgmt |= B53_MII_DUMB_FWDG_EN; + b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt); + } } static void b53_enable_vlan(struct b53_device *dev, bool enable) diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 9fd24c418fa4..f2a060e7a637 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -104,6 +104,10 @@ #define B53_UC_FWD_EN BIT(6) #define B53_MC_FWD_EN BIT(7) +/* Switch control (8 bit) */ +#define B53_SWITCH_CTRL 0x22 +#define B53_MII_DUMB_FWDG_EN BIT(6) + /* (16 bit) */ #define B53_UC_FLOOD_MASK 0x32 #define B53_MC_FLOOD_MASK 0x34 From 3fb22b0534e412569dd67dec625b4a051c7c2d7e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 24 Apr 2017 14:27:22 -0700 Subject: [PATCH 16/41] net: dsa: b53: Implement software reset for 58xx devices Implement the correct software reset sequence for 58xx devices by setting all 3 reset bits and polling for the SW_RST bit to clear itself without a given timeout. We cannot use is58xx() here because that would also include the 7445/7278 Starfighter 2 which have their own driver doing the reset earlier on due to the HW specific integration. Fixes: 991a36bb4645 ("net: dsa: b53: Add support for BCM585xx/586xx/88312 integrated switch") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 25 ++++++++++++++++++++++++- drivers/net/dsa/b53/b53_regs.h | 1 + 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index ca7f3b005a29..b66ee18cbe49 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -608,7 +608,8 @@ static void b53_switch_reset_gpio(struct b53_device *dev) static int b53_switch_reset(struct b53_device *dev) { - u8 mgmt; + unsigned int timeout = 1000; + u8 mgmt, reg; b53_switch_reset_gpio(dev); @@ -617,6 +618,28 @@ static int b53_switch_reset(struct b53_device *dev) b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, 0x00); } + /* This is specific to 58xx devices here, do not use is58xx() which + * covers the larger Starfigther 2 family, including 7445/7278 which + * still use this driver as a library and need to perform the reset + * earlier. + */ + if (dev->chip_id == BCM58XX_DEVICE_ID) { + b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, ®); + reg |= SW_RST | EN_SW_RST | EN_CH_RST; + b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, reg); + + do { + b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, ®); + if (!(reg & SW_RST)) + break; + + usleep_range(1000, 2000); + } while (timeout-- > 0); + + if (timeout == 0) + return -ETIMEDOUT; + } + b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt); if (!(mgmt & SM_SW_FWD_EN)) { diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index f2a060e7a637..e5c86d44667a 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -143,6 +143,7 @@ /* Software reset register (8 bit) */ #define B53_SOFTRESET 0x79 #define SW_RST BIT(7) +#define EN_CH_RST BIT(6) #define EN_SW_RST BIT(4) /* Fast Aging Control register (8 bit) */ From bfcda65c9ba57004533c12673347503f250c3290 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 24 Apr 2017 14:27:23 -0700 Subject: [PATCH 17/41] net: dsa: b53: Fix CPU port for 58xx devices The 58xx devices (Northstar Plus) do actually have their CPU port wired at port 8, it was unfortunately set to port 5 (B53_CPU_PORT_25) which is incorrect, since that is the second possible management port. Fixes: 991a36bb4645 ("net: dsa: b53: Add support for BCM585xx/586xx/88312 integrated switch") Reported-by: Eric Anholt Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b66ee18cbe49..fa0eece21eef 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1764,7 +1764,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .vlans = 4096, .enabled_ports = 0x1ff, .arl_entries = 4, - .cpu_port = B53_CPU_PORT_25, + .cpu_port = B53_CPU_PORT, .vta_regs = B53_VTA_REGS, .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, From ea8b65b596d78969629562f9728f76cbf565fbec Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Mon, 27 Mar 2017 14:36:10 +0200 Subject: [PATCH 18/41] can: usb: Add support of PCAN-Chip USB stamp module This patch adds the support of the PCAN-Chip USB, a stamp module for customer hardware designs, which communicates via USB 2.0 with the hardware. The integrated CAN controller supports the protocols CAN 2.0 A/B as well as CAN FD. The physical CAN connection is determined by external wiring. The Stamp module with its single-sided mounting and plated half-holes is suitable for automatic assembly. Note that the chip is equipped with the same logic than the PCAN-USB FD. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/Kconfig | 1 + drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 + drivers/net/can/usb/peak_usb/pcan_usb_core.h | 2 + drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 72 ++++++++++++++++++++ 4 files changed, 77 insertions(+) diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index 8483a40e7e9e..3f8adc366af4 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -72,6 +72,7 @@ config CAN_PEAK_USB PCAN-USB Pro dual CAN 2.0b channels USB adapter PCAN-USB FD single CAN-FD channel USB adapter PCAN-USB Pro FD dual CAN-FD channels USB adapter + PCAN-Chip USB CAN-FD to USB stamp module (see also http://www.peak-system.com). diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 0b0302af3bd2..57913dbbae0a 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -39,6 +39,7 @@ static struct usb_device_id peak_usb_table[] = { {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID)}, {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID)}, {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID)}, + {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID)}, {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID)}, {} /* Terminating entry */ }; @@ -51,6 +52,7 @@ static const struct peak_usb_adapter *const peak_usb_adapters_list[] = { &pcan_usb_pro, &pcan_usb_fd, &pcan_usb_pro_fd, + &pcan_usb_chip, &pcan_usb_x6, }; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h index 3cbfb069893d..c01316cac354 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h @@ -27,6 +27,7 @@ #define PCAN_USBPRO_PRODUCT_ID 0x000d #define PCAN_USBPROFD_PRODUCT_ID 0x0011 #define PCAN_USBFD_PRODUCT_ID 0x0012 +#define PCAN_USBCHIP_PRODUCT_ID 0x0013 #define PCAN_USBX6_PRODUCT_ID 0x0014 #define PCAN_USB_DRIVER_NAME "peak_usb" @@ -90,6 +91,7 @@ struct peak_usb_adapter { extern const struct peak_usb_adapter pcan_usb; extern const struct peak_usb_adapter pcan_usb_pro; extern const struct peak_usb_adapter pcan_usb_fd; +extern const struct peak_usb_adapter pcan_usb_chip; extern const struct peak_usb_adapter pcan_usb_pro_fd; extern const struct peak_usb_adapter pcan_usb_x6; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 304732550f0a..528d3bb4917f 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -1061,6 +1061,78 @@ const struct peak_usb_adapter pcan_usb_fd = { .do_get_berr_counter = pcan_usb_fd_get_berr_counter, }; +/* describes the PCAN-CHIP USB */ +static const struct can_bittiming_const pcan_usb_chip_const = { + .name = "pcan_chip_usb", + .tseg1_min = 1, + .tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS), + .tseg2_min = 1, + .tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS), + .sjw_max = (1 << PUCAN_TSLOW_SJW_BITS), + .brp_min = 1, + .brp_max = (1 << PUCAN_TSLOW_BRP_BITS), + .brp_inc = 1, +}; + +static const struct can_bittiming_const pcan_usb_chip_data_const = { + .name = "pcan_chip_usb", + .tseg1_min = 1, + .tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS), + .tseg2_min = 1, + .tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS), + .sjw_max = (1 << PUCAN_TFAST_SJW_BITS), + .brp_min = 1, + .brp_max = (1 << PUCAN_TFAST_BRP_BITS), + .brp_inc = 1, +}; + +const struct peak_usb_adapter pcan_usb_chip = { + .name = "PCAN-Chip USB", + .device_id = PCAN_USBCHIP_PRODUCT_ID, + .ctrl_count = PCAN_USBFD_CHANNEL_COUNT, + .ctrlmode_supported = CAN_CTRLMODE_FD | + CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY, + .clock = { + .freq = PCAN_UFD_CRYSTAL_HZ, + }, + .bittiming_const = &pcan_usb_chip_const, + .data_bittiming_const = &pcan_usb_chip_data_const, + + /* size of device private data */ + .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), + + /* timestamps usage */ + .ts_used_bits = 32, + .ts_period = 1000000, /* calibration period in ts. */ + .us_per_ts_scale = 1, /* us = (ts * scale) >> shift */ + .us_per_ts_shift = 0, + + /* give here messages in/out endpoints */ + .ep_msg_in = PCAN_USBPRO_EP_MSGIN, + .ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0}, + + /* size of rx/tx usb buffers */ + .rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE, + .tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE, + + /* device callbacks */ + .intf_probe = pcan_usb_pro_probe, /* same as PCAN-USB Pro */ + .dev_init = pcan_usb_fd_init, + + .dev_exit = pcan_usb_fd_exit, + .dev_free = pcan_usb_fd_free, + .dev_set_bus = pcan_usb_fd_set_bus, + .dev_set_bittiming = pcan_usb_fd_set_bittiming_slow, + .dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast, + .dev_decode_buf = pcan_usb_fd_decode_buf, + .dev_start = pcan_usb_fd_start, + .dev_stop = pcan_usb_fd_stop, + .dev_restart_async = pcan_usb_fd_restart_async, + .dev_encode_msg = pcan_usb_fd_encode_msg, + + .do_get_berr_counter = pcan_usb_fd_get_berr_counter, +}; + /* describes the PCAN-USB Pro FD adapter */ static const struct can_bittiming_const pcan_usb_pro_fd_const = { .name = "pcan_usb_pro_fd", From 71b611562f45e8798d519a38d0143d5eafd6eb35 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Mon, 27 Mar 2017 14:36:11 +0200 Subject: [PATCH 19/41] can: usb: Kconfig: Add PCAN-USB X6 device in help text This patch adds a text line in the help section of the CAN_PEAK_USB config item describing the support of the PCAN-USB X6 adapter, which is already included in the Kernel since 4.9. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index 3f8adc366af4..5f9e0e6301d0 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -73,6 +73,7 @@ config CAN_PEAK_USB PCAN-USB FD single CAN-FD channel USB adapter PCAN-USB Pro FD dual CAN-FD channels USB adapter PCAN-Chip USB CAN-FD to USB stamp module + PCAN-USB X6 6 CAN-FD channels USB adapter (see also http://www.peak-system.com). From b05c73bd1e3ec60357580eb042ee932a5ed754d5 Mon Sep 17 00:00:00 2001 From: Maksim Salau Date: Sun, 23 Apr 2017 20:31:40 +0300 Subject: [PATCH 20/41] net: can: usb: gs_usb: Fix buffer on stack Allocate buffers on HEAP instead of STACK for local structures that are to be sent using usb_control_msg(). Signed-off-by: Maksim Salau Cc: linux-stable # >= v4.8 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 300349fe8dc0..eecee7f8dfb7 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -739,13 +739,18 @@ static const struct net_device_ops gs_usb_netdev_ops = { static int gs_usb_set_identify(struct net_device *netdev, bool do_identify) { struct gs_can *dev = netdev_priv(netdev); - struct gs_identify_mode imode; + struct gs_identify_mode *imode; int rc; + imode = kmalloc(sizeof(*imode), GFP_KERNEL); + + if (!imode) + return -ENOMEM; + if (do_identify) - imode.mode = GS_CAN_IDENTIFY_ON; + imode->mode = GS_CAN_IDENTIFY_ON; else - imode.mode = GS_CAN_IDENTIFY_OFF; + imode->mode = GS_CAN_IDENTIFY_OFF; rc = usb_control_msg(interface_to_usbdev(dev->iface), usb_sndctrlpipe(interface_to_usbdev(dev->iface), @@ -755,10 +760,12 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify) USB_RECIP_INTERFACE, dev->channel, 0, - &imode, - sizeof(imode), + imode, + sizeof(*imode), 100); + kfree(imode); + return (rc > 0) ? 0 : rc; } From 6f2aee0c0de65013333bbc26fe50c9c7b09a37f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20Spycha=C5=82a?= Date: Thu, 20 Apr 2017 12:04:10 +0200 Subject: [PATCH 21/41] usb: plusb: Add support for PL-27A1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for the PL-27A1 by adding the appropriate USB ID's. This chip is used in the goobay Active USB 3.0 Data Link and Unitek Y-3501 cables. Signed-off-by: Roman SpychaƂa Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 2 +- drivers/net/usb/plusb.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 3dd490f53e48..f28bd74ac275 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -369,7 +369,7 @@ config USB_NET_NET1080 optionally with LEDs that indicate traffic config USB_NET_PLUSB - tristate "Prolific PL-2301/2302/25A1 based cables" + tristate "Prolific PL-2301/2302/25A1/27A1 based cables" # if the handshake/init/reset problems, from original 'plusb', # are ever resolved ... then remove "experimental" depends on USB_USBNET diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 22e1a9a99a7d..6fe59373cba9 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -102,7 +102,7 @@ static int pl_reset(struct usbnet *dev) } static const struct driver_info prolific_info = { - .description = "Prolific PL-2301/PL-2302/PL-25A1", + .description = "Prolific PL-2301/PL-2302/PL-25A1/PL-27A1", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT, /* some PL-2302 versions seem to fail usb_set_interface() */ .reset = pl_reset, @@ -139,6 +139,17 @@ static const struct usb_device_id products [] = { * Host-to-Host Cable */ .driver_info = (unsigned long) &prolific_info, + +}, + +/* super speed cables */ +{ + USB_DEVICE(0x067b, 0x27a1), /* PL-27A1, no eeprom + * also: goobay Active USB 3.0 + * Data Link, + * Unitek Y-3501 + */ + .driver_info = (unsigned long) &prolific_info, }, { }, // END @@ -158,5 +169,5 @@ static struct usb_driver plusb_driver = { module_usb_driver(plusb_driver); MODULE_AUTHOR("David Brownell"); -MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1 USB Host to Host Link Driver"); +MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1/27A1 USB Host to Host Link Driver"); MODULE_LICENSE("GPL"); From f6478218e6edc2a587b8f132f66373baa7b2497c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 20 Apr 2017 20:55:12 +0800 Subject: [PATCH 22/41] macvlan: Fix device ref leak when purging bc_queue When a parent macvlan device is destroyed we end up purging its broadcast queue without dropping the device reference count on the packet source device. This causes the source device to linger. This patch drops that reference count. Fixes: 260916dfb48c ("macvlan: Fix potential use-after free for...") Reported-by: Joe Ghalam Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9261722960a7..b34eaaae03fd 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1139,6 +1139,7 @@ static int macvlan_port_create(struct net_device *dev) static void macvlan_port_destroy(struct net_device *dev) { struct macvlan_port *port = macvlan_port_get_rtnl(dev); + struct sk_buff *skb; dev->priv_flags &= ~IFF_MACVLAN_PORT; netdev_rx_handler_unregister(dev); @@ -1147,7 +1148,15 @@ static void macvlan_port_destroy(struct net_device *dev) * but we need to cancel it and purge left skbs if any. */ cancel_work_sync(&port->bc_work); - __skb_queue_purge(&port->bc_queue); + + while ((skb = __skb_dequeue(&port->bc_queue))) { + const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src; + + if (src) + dev_put(src->dev); + + kfree_skb(skb); + } kfree(port); } From a53d26eb888ab1c41779c443daf1af948c641e0b Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Tue, 25 Apr 2017 13:44:54 +0100 Subject: [PATCH 23/41] sfc: tx ring can only have 2048 entries for all EF10 NICs Fixes: dd248f1bc65b ("sfc: Add PCI ID for Solarflare 8000 series 10/40G NIC") Reported-by: Patrick Talbert Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.h | 5 ++++- drivers/net/ethernet/sfc/workarounds.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index ee14662415c5..a0c52e328102 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -74,7 +74,10 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); #define EFX_RXQ_MIN_ENT 128U #define EFX_TXQ_MIN_ENT(efx) (2 * efx_tx_max_skb_descs(efx)) -#define EFX_TXQ_MAX_ENT(efx) (EFX_WORKAROUND_35388(efx) ? \ +/* All EF10 architecture NICs steal one bit of the DMAQ size for various + * other purposes when counting TxQ entries, so we halve the queue size. + */ +#define EFX_TXQ_MAX_ENT(efx) (EFX_WORKAROUND_EF10(efx) ? \ EFX_MAX_DMAQ_SIZE / 2 : EFX_MAX_DMAQ_SIZE) static inline bool efx_rss_enabled(struct efx_nic *efx) diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h index 103f827a1623..c67fa18b8121 100644 --- a/drivers/net/ethernet/sfc/workarounds.h +++ b/drivers/net/ethernet/sfc/workarounds.h @@ -16,6 +16,7 @@ */ #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0) +#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) #define EFX_WORKAROUND_10G(efx) 1 /* Bit-bashed I2C reads cause performance drop */ From 72ec0bc64b9a5d8e0efcb717abfc757746b101b7 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Mon, 24 Apr 2017 18:29:16 +0800 Subject: [PATCH 24/41] team: fix memory leaks In functions team_nl_send_port_list_get() and team_nl_send_options_get(), pointer skb keeps the return value of nlmsg_new(). When the call to genlmsg_put() fails, the memory is not freed(). This will result in memory leak bugs. Fixes: 9b00cf2d1024 ("team: implement multipart netlink messages for options transfers") Signed-off-by: Pan Bian Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f8c81f12d988..85c01247f2e3 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2361,8 +2361,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; @@ -2634,8 +2636,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_PORT_LIST_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; From b7d6df57516f8e6a1c847b822ec2a62555455f88 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 24 Apr 2017 14:18:28 +0200 Subject: [PATCH 25/41] ipv6: move stub initialization after ipv6 setup completion The ipv6 stub pointer is currently initialized before the ipv6 routing subsystem: a 3rd party can access and use such stub before the routing data is ready. Moreover, such pointer is not cleared in case of initialization error, possibly leading to dangling pointers usage. This change addresses the above moving the stub initialization at the end of ipv6 init code. Fixes: 5f81bd2e5d80 ("ipv6: export a stub for IPv6 symbols used by vxlan") Signed-off-by: Paolo Abeni Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a9a9553ee63d..e82e59f22dfc 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -933,8 +933,6 @@ static int __init inet6_init(void) if (err) goto igmp_fail; - ipv6_stub = &ipv6_stub_impl; - err = ipv6_netfilter_init(); if (err) goto netfilter_fail; @@ -1010,6 +1008,10 @@ static int __init inet6_init(void) if (err) goto sysctl_fail; #endif + + /* ensure that ipv6 stubs are visible only after ipv6 is ready */ + wmb(); + ipv6_stub = &ipv6_stub_impl; out: return err; From 3364d61c92ecca7a8da990659c4b0ae1fcf0fcfb Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Mon, 24 Apr 2017 15:00:42 +0200 Subject: [PATCH 26/41] tipc: fix socket flow control accounting error at tipc_send_stream Until now in tipc_send_stream(), we return -1 when the socket encounters link congestion even if the socket had successfully sent partial data. This is incorrect as the application resends the same the partial data leading to data corruption at receiver's end. In this commit, we return the partially sent bytes as the return value at link congestion. Fixes: 10724cc7bb78 ("tipc: redesign connection-level flow control") Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7130e73bd42c..b28e94f1c739 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1083,7 +1083,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) } } while (sent < dlen && !rc); - return rc ? rc : sent; + return sent ? sent : rc; } /** From 05ff8378975a9d5fdde19104b62163d2902926fb Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Mon, 24 Apr 2017 15:00:43 +0200 Subject: [PATCH 27/41] tipc: fix socket flow control accounting error at tipc_recv_stream Until now in tipc_recv_stream(), we update the received unacknowledged bytes based on a stack variable and not based on the actual message size. If the user buffer passed at tipc_recv_stream() is smaller than the received skb, the size variable in stack differs from the actual message size in the skb. This leads to a flow control accounting error causing permanent congestion. In this commit, we fix this accounting error by always using the size of the incoming message. Fixes: 10724cc7bb78 ("tipc: redesign connection-level flow control") Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b28e94f1c739..566906795c8c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1484,7 +1484,7 @@ restart: if (unlikely(flags & MSG_PEEK)) goto exit; - tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); + tsk->rcv_unacked += tsk_inc(tsk, hlen + msg_data_sz(msg)); if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) tipc_sk_send_ack(tsk); tsk_advance_rx_queue(sk); From a23f6ce6d95900ca83b44b6fa691afe6c7d4b941 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Mon, 24 Apr 2017 21:18:39 +0200 Subject: [PATCH 28/41] net: hso: fix module unloading keep tty driver until usb driver is unregistered rmmod hso produces traces like this without that: [40261.645904] usb 2-2: new high-speed USB device number 2 using ehci-omap [40261.854644] usb 2-2: New USB device found, idVendor=0af0, idProduct=8800 [40261.862609] usb 2-2: New USB device strings: Mfr=3, Product=2, SerialNumber=0 [40261.872772] usb 2-2: Product: Globetrotter HSUPA Modem [40261.880279] usb 2-2: Manufacturer: Option N.V. [40262.021270] hso 2-2:1.5: Not our interface [40265.556945] hso: unloaded [40265.559875] usbcore: deregistering interface driver hso [40265.595947] Unable to handle kernel NULL pointer dereference at virtual address 00000033 [40265.604522] pgd = ecb14000 [40265.611877] [00000033] *pgd=00000000 [40265.617034] Internal error: Oops: 5 [#1] PREEMPT SMP ARM [40265.622650] Modules linked in: hso(-) bnep bluetooth ipv6 arc4 twl4030_madc_hwmon wl18xx wlcore mac80211 cfg80211 snd_soc_simple_card snd_soc_simple_card_utils snd_soc_omap_twl4030 snd_soc_gtm601 generic_adc_battery extcon_gpio omap3_isp videobuf2_dma_contig videobuf2_memops wlcore_sdio videobuf2_v4l2 videobuf2_core ov9650 bmp280_i2c v4l2_common bmp280 bmg160_i2c bmg160_core at24 nvmem_core videodev bmc150_accel_i2c bmc150_magn_i2c media bmc150_accel_core tsc2007 bmc150_magn leds_tca6507 bno055 snd_soc_omap_mcbsp industrialio_triggered_buffer snd_soc_omap kfifo_buf snd_pcm_dmaengine gpio_twl4030 snd_soc_twl4030 twl4030_vibra twl4030_madc wwan_on_off ehci_omap pwm_bl pwm_omap_dmtimer panel_tpo_td028ttec1 encoder_opa362 connector_analog_tv omapdrm drm_kms_helper cfbfillrect syscopyarea cfbimgblt sysfillrect [40265.698211] sysimgblt fb_sys_fops cfbcopyarea drm omapdss usb_f_ecm g_ether usb_f_rndis u_ether libcomposite configfs omap2430 phy_twl4030_usb musb_hdrc twl4030_charger industrialio w2sg0004 twl4030_pwrbutton bq27xxx_battery w1_bq27000 omap_hdq [last unloaded: hso] [40265.723175] CPU: 0 PID: 2701 Comm: rmmod Not tainted 4.11.0-rc6-letux+ #6 [40265.730346] Hardware name: Generic OMAP36xx (Flattened Device Tree) [40265.736938] task: ecb81100 task.stack: ecb82000 [40265.741729] PC is at cdev_del+0xc/0x2c [40265.745666] LR is at tty_unregister_device+0x40/0x50 [40265.750915] pc : [] lr : [] psr: 600b0113 sp : ecb83ea8 ip : eca4f898 fp : 00000000 [40265.763000] r10: 00000000 r9 : 00000000 r8 : 00000001 [40265.768493] r7 : eca4f800 r6 : 00000003 r5 : 00000000 r4 : ffffffff [40265.775360] r3 : c1458d54 r2 : 00000000 r1 : 00000004 r0 : ffffffff [40265.782257] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [40265.789764] Control: 10c5387d Table: acb14019 DAC: 00000051 [40265.795806] Process rmmod (pid: 2701, stack limit = 0xecb82218) [40265.802062] Stack: (0xecb83ea8 to 0xecb84000) [40265.806640] 3ea0: ec9e8100 c04b3ecc bf737378 ed5b7c00 00000003 bf7327ec [40265.815277] 3ec0: eca4f800 00000000 ec9fd800 eca4f800 bf737070 bf7328bc eca4f820 c05a9a04 [40265.823883] 3ee0: eca4f820 00000000 00000001 eca4f820 ec9fd870 bf737070 eca4f854 ec9fd8a4 [40265.832519] 3f00: ecb82000 00000000 00000000 c04e6960 eca4f820 bf737070 bf737048 00000081 [40265.841125] 3f20: c01071e4 c04e6a60 ecb81100 bf737070 bf737070 c04e5d94 bf737020 c05a8f88 [40265.849731] 3f40: bf737100 00000800 7f5fa254 00000081 c01071e4 c01c4afc 00000000 006f7368 [40265.858367] 3f60: ecb815f4 00000000 c0cac9c4 c01071e4 ecb82000 00000000 00000000 c01512f4 [40265.866973] 3f80: ed5b3200 c01071e4 7f5fa220 7f5fa220 bea78ec9 0010711c 7f5fa220 7f5fa220 [40265.875579] 3fa0: bea78ec9 c0107040 7f5fa220 7f5fa220 7f5fa254 00000800 dd35b800 dd35b800 [40265.884216] 3fc0: 7f5fa220 7f5fa220 bea78ec9 00000081 bea78dcc 00000000 bea78bd8 00000000 [40265.892822] 3fe0: b6f70521 bea78b6c 7f5dd613 b6f70526 80070030 7f5fa254 ffffffff ffffffff [40265.901458] [] (cdev_del) from [] (tty_unregister_device+0x40/0x50) [40265.909942] [] (tty_unregister_device) from [] (hso_free_interface+0x80/0x144 [hso]) [40265.919982] [] (hso_free_interface [hso]) from [] (hso_disconnect+0xc/0x18 [hso]) [40265.929718] [] (hso_disconnect [hso]) from [] (usb_unbind_interface+0x84/0x200) [40265.939239] [] (usb_unbind_interface) from [] (device_release_driver_internal+0x138/0x1cc) [40265.949798] [] (device_release_driver_internal) from [] (driver_detach+0x60/0x6c) [40265.959503] [] (driver_detach) from [] (bus_remove_driver+0x64/0x8c) [40265.968017] [] (bus_remove_driver) from [] (usb_deregister+0x5c/0xb8) [40265.976654] [] (usb_deregister) from [] (SyS_delete_module+0x160/0x1dc) [40265.985443] [] (SyS_delete_module) from [] (ret_fast_syscall+0x0/0x1c) [40265.994171] Code: c1458d54 e59f3020 e92d4010 e1a04000 (e5941034) [40266.016693] ---[ end trace 9d5ac43c7e41075c ]--- Signed-off-by: Andreas Kemnade Reviewed-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 4f2e8141dbe2..93411a348f12 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -3279,9 +3279,9 @@ static void __exit hso_exit(void) pr_info("unloaded\n"); tty_unregister_driver(tty_drv); - put_tty_driver(tty_drv); /* deregister the usb driver */ usb_deregister(&hso_driver); + put_tty_driver(tty_drv); } /* Module definitions */ From fdfb70d275223b9d69d5d3abe1f88507da579139 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 24 Apr 2017 18:33:38 -0700 Subject: [PATCH 29/41] netvsc: fix calculation of available send sections My change (introduced in 4.11) to use find_first_clear_bit incorrectly assumed that the size argument was words, not bits. The effect was only a small limited number of the available send sections were being actually used. This can cause performance loss with some workloads. Since map_words is now used only during initialization, it can be on stack instead of in per-device data. Fixes: b58a185801da ("netvsc: simplify get next send section") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 - drivers/net/hyperv/netvsc.c | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index f9f3dba7a588..db23cb36ae5c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -751,7 +751,6 @@ struct netvsc_device { u32 send_section_cnt; u32 send_section_size; unsigned long *send_section_map; - int map_words; /* Used for NetVSP initialization protocol */ struct completion channel_init_wait; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 8dd0b8770328..15ef713d96c0 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -236,6 +236,7 @@ static int netvsc_init_buf(struct hv_device *device) struct netvsc_device *net_device; struct nvsp_message *init_packet; struct net_device *ndev; + size_t map_words; int node; net_device = get_outbound_net_device(device); @@ -401,11 +402,9 @@ static int netvsc_init_buf(struct hv_device *device) net_device->send_section_size, net_device->send_section_cnt); /* Setup state for managing the send buffer. */ - net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, - BITS_PER_LONG); + map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG); - net_device->send_section_map = kcalloc(net_device->map_words, - sizeof(ulong), GFP_KERNEL); + net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL); if (net_device->send_section_map == NULL) { ret = -ENOMEM; goto cleanup; @@ -683,7 +682,7 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) unsigned long *map_addr = net_device->send_section_map; unsigned int i; - for_each_clear_bit(i, map_addr, net_device->map_words) { + for_each_clear_bit(i, map_addr, net_device->send_section_cnt) { if (sync_test_and_set_bit(i, map_addr) == 0) return i; } From c8fcd133eabcbbf51a6ecfd12a975cace8877d5a Mon Sep 17 00:00:00 2001 From: "sudarsana.kalluru@cavium.com" Date: Mon, 24 Apr 2017 20:59:10 -0700 Subject: [PATCH 30/41] qed: Fix error in the dcbx app meta data initialization. DCBX app_data array is initialized with the incorrect values for personality field. This would prevent offloaded protocols from honoring the PFC. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index a6e2bbe629bd..cfdadb658ade 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -64,11 +64,11 @@ ((u32)(prio_tc_tbl >> ((7 - prio) * 4)) & 0x7) static const struct qed_dcbx_app_metadata qed_dcbx_app_update[] = { - {DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH} + {DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_ISCSI}, + {DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_FCOE}, + {DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_ETH_ROCE}, + {DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_ETH_ROCE}, + {DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH}, }; static bool qed_dcbx_app_ethtype(u32 app_info_bitmap) From ec9c4215fef37da6668c4105f5ad3891aaa6527a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 25 Apr 2017 15:56:50 +0200 Subject: [PATCH 31/41] ipv6: fix source routing Commit a149e7c7ce81 ("ipv6: sr: add support for SRH injection through setsockopt") introduced handling of IPV6_SRCRT_TYPE_4, but at the same time restricted it to only IPV6_SRCRT_TYPE_0 and IPV6_SRCRT_TYPE_4. Previously, ipv6_push_exthdr() and fl6_update_dst() would also handle other values (ie STRICT and TYPE_2). Restore previous source routing behavior, by handling IPV6_SRCRT_STRICT and IPV6_SRCRT_TYPE_2 the same way as IPV6_SRCRT_TYPE_0 in ipv6_push_exthdr() and fl6_update_dst(). Fixes: a149e7c7ce81 ("ipv6: sr: add support for SRH injection through setsockopt") Signed-off-by: Sabrina Dubroca Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 25192a3b0cd7..d32e2110aff2 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -909,6 +909,8 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, { switch (opt->type) { case IPV6_SRCRT_TYPE_0: + case IPV6_SRCRT_STRICT: + case IPV6_SRCRT_TYPE_2: ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); break; case IPV6_SRCRT_TYPE_4: @@ -1163,6 +1165,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, switch (opt->srcrt->type) { case IPV6_SRCRT_TYPE_0: + case IPV6_SRCRT_STRICT: + case IPV6_SRCRT_TYPE_2: fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; break; case IPV6_SRCRT_TYPE_4: From b1b9d366028ff580e6dd80b48a69c473361456f1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 25 Apr 2017 22:58:37 +0800 Subject: [PATCH 32/41] bridge: move bridge multicast cleanup to ndo_uninit During removing a bridge device, if the bridge is still up, a new mdb entry still can be added in br_multicast_add_group() after all mdb entries are removed in br_multicast_dev_del(). Like the path: mld_ifc_timer_expire -> mld_sendpack -> ... br_multicast_rcv -> br_multicast_add_group The new mp's timer will be set up. If the timer expires after the bridge is freed, it may cause use-after-free panic in br_multicast_group_expired. BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 IP: [] br_multicast_group_expired+0x28/0xb0 [bridge] Call Trace: [] call_timer_fn+0x36/0x110 [] ? br_mdb_free+0x30/0x30 [bridge] [] run_timer_softirq+0x237/0x340 [] __do_softirq+0xef/0x280 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x115/0x120 [] smp_apic_timer_interrupt+0x45/0x60 [] apic_timer_interrupt+0x6d/0x80 Nikolay also found it would cause a memory leak - the mdb hash is reallocated and not freed due to the mdb rehash. unreferenced object 0xffff8800540ba800 (size 2048): backtrace: [] kmemleak_alloc+0x67/0xc0 [] __kmalloc+0x1ba/0x3e0 [] br_mdb_rehash+0x5e/0x340 [bridge] [] br_multicast_new_group+0x43f/0x6e0 [bridge] [] br_multicast_add_group+0x203/0x260 [bridge] [] br_multicast_rcv+0x945/0x11d0 [bridge] [] br_dev_xmit+0x180/0x470 [bridge] [] dev_hard_start_xmit+0xbb/0x3d0 [] __dev_queue_xmit+0xb13/0xc10 [] dev_queue_xmit+0x10/0x20 [] ip6_finish_output2+0x5ca/0xac0 [ipv6] [] ip6_finish_output+0x126/0x2c0 [ipv6] [] ip6_output+0xe5/0x390 [ipv6] [] NF_HOOK.constprop.44+0x6c/0x240 [ipv6] [] mld_sendpack+0x216/0x3e0 [ipv6] [] mld_ifc_timer_expire+0x18b/0x2b0 [ipv6] This could happen when ip link remove a bridge or destroy a netns with a bridge device inside. With Nikolay's suggestion, this patch is to clean up bridge multicast in ndo_uninit after bridge dev is shutdown, instead of br_dev_delete, so that netif_running check in br_multicast_add_group can avoid this issue. v1->v2: - fix this issue by moving br_multicast_dev_del to ndo_uninit, instead of calling dev_close in br_dev_delete. (NOTE: Depends upon b6fe0440c637 ("bridge: implement missing ndo_uninit()")) Fixes: e10177abf842 ("bridge: multicast: fix handling of temp and perm entries") Reported-by: Jianwen Ji Signed-off-by: Xin Long Reviewed-by: Stephen Hemminger Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_device.c | 1 + net/bridge/br_if.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 90f49a194249..430b53e7d941 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -123,6 +123,7 @@ static void br_dev_uninit(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + br_multicast_dev_del(br); br_multicast_uninit_stats(br); br_vlan_flush(br); free_percpu(br->stats); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 56a2a72e7738..a8d0ed282a10 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -311,7 +311,6 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) br_fdb_delete_by_port(br, NULL, 0, 1); - br_multicast_dev_del(br); cancel_delayed_work_sync(&br->gc_work); br_sysfs_delbr(br->dev); From 8048ced9beb21a52e3305f3332ae82020619f24e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 25 Apr 2017 09:17:29 -0700 Subject: [PATCH 33/41] net: ipv6: regenerate host route if moved to gc list Taking down the loopback device wreaks havoc on IPv6 routing. By extension, taking down a VRF device wreaks havoc on its table. Dmitry and Andrey both reported heap out-of-bounds reports in the IPv6 FIB code while running syzkaller fuzzer. The root cause is a dead dst that is on the garbage list gets reinserted into the IPv6 FIB. While on the gc (or perhaps when it gets added to the gc list) the dst->next is set to an IPv4 dst. A subsequent walk of the ipv6 tables causes the out-of-bounds access. Andrey's reproducer was the key to getting to the bottom of this. With IPv6, host routes for an address have the dst->dev set to the loopback device. When the 'lo' device is taken down, rt6_ifdown initiates a walk of the fib evicting routes with the 'lo' device which means all host routes are removed. That process moves the dst which is attached to an inet6_ifaddr to the gc list and marks it as dead. The recent change to keep global IPv6 addresses added a new function, fixup_permanent_addr, that is called on admin up. That function restarts dad for an inet6_ifaddr and when it completes the host route attached to it is inserted into the fib. Since the route was marked dead and moved to the gc list, re-inserting the route causes the reported out-of-bounds accesses. If the device with the address is taken down or the address is removed, the WARN_ON in fib6_del is triggered. All of those faults are fixed by regenerating the host route if the existing one has been moved to the gc list, something that can be determined by checking if the rt6i_ref counter is 0. Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") Reported-by: Dmitry Vyukov Reported-by: Andrey Konovalov Signed-off-by: David Ahern Acked-by: Martin KaFai Lau Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 80ce478c4851..0ea96c4d334d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3271,14 +3271,24 @@ static void addrconf_gre_config(struct net_device *dev) static int fixup_permanent_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - if (!ifp->rt) { - struct rt6_info *rt; + /* rt6i_ref == 0 means the host route was removed from the + * FIB, for example, if 'lo' device is taken down. In that + * case regenerate the host route. + */ + if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) { + struct rt6_info *rt, *prev; rt = addrconf_dst_alloc(idev, &ifp->addr, false); if (unlikely(IS_ERR(rt))) return PTR_ERR(rt); + /* ifp->rt can be accessed outside of rtnl */ + spin_lock(&ifp->lock); + prev = ifp->rt; ifp->rt = rt; + spin_unlock(&ifp->lock); + + ip6_rt_put(prev); } if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { From fd2c83b35752f0a8236b976978ad4658df14a59f Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 25 Apr 2017 18:51:46 +0200 Subject: [PATCH 34/41] net/packet: check length in getsockopt() called with PACKET_HDRLEN In the case getsockopt() is called with PACKET_HDRLEN and optlen < 4 |val| remains uninitialized and the syscall may behave differently depending on its value, and even copy garbage to userspace on certain architectures. To fix this we now return -EINVAL if optlen is too small. This bug has been detected with KMSAN. Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8489beff5c25..ea81ccf3c7d6 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3836,6 +3836,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_HDRLEN: if (len > sizeof(int)) len = sizeof(int); + if (len < sizeof(int)) + return -EINVAL; if (copy_from_user(&val, optval, len)) return -EFAULT; switch (val) { From f555f34fdc586a56204cd16d9a7c104ec6cb6650 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Thu, 20 Apr 2017 14:00:04 +0300 Subject: [PATCH 35/41] net: phy: fix auto-negotiation stall due to unavailable interrupt The Ethernet link on an interrupt driven PHY was not coming up if the Ethernet cable was plugged before the Ethernet interface was brought up. The patch trigger PHY state machine to update link state if PHY was requested to do auto-negotiation and auto-negotiation complete flag already set. During power-up cycle the PHY do auto-negotiation, generate interrupt and set auto-negotiation complete flag. Interrupt is handled by PHY state machine but doesn't update link state because PHY is in PHY_READY state. After some time MAC bring up, start and request PHY to do auto-negotiation. If there are no new settings to advertise genphy_config_aneg() doesn't start PHY auto-negotiation. PHY continue to stay in auto-negotiation complete state and doesn't fire interrupt. At the same time PHY state machine expect that PHY started auto-negotiation and is waiting for interrupt from PHY and it won't get it. Fixes: 321beec5047a ("net: phy: Use interrupts when available in NOLINK state") Signed-off-by: Alexander Kochetkov Cc: stable # v4.9+ Tested-by: Roger Quadros Tested-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 40 ++++++++++++++++++++++++++++++++++++---- include/linux/phy.h | 1 + 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a2bfc82e95d7..97ff1278167b 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -591,16 +591,18 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) EXPORT_SYMBOL(phy_mii_ioctl); /** - * phy_start_aneg - start auto-negotiation for this PHY device + * phy_start_aneg_priv - start auto-negotiation for this PHY device * @phydev: the phy_device struct + * @sync: indicate whether we should wait for the workqueue cancelation * * Description: Sanitizes the settings (if we're not autonegotiating * them), and then calls the driver's config_aneg function. * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -int phy_start_aneg(struct phy_device *phydev) +static int phy_start_aneg_priv(struct phy_device *phydev, bool sync) { + bool trigger = 0; int err; if (!phydev->drv) @@ -628,10 +630,40 @@ int phy_start_aneg(struct phy_device *phydev) } } + /* Re-schedule a PHY state machine to check PHY status because + * negotiation may already be done and aneg interrupt may not be + * generated. + */ + if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) { + err = phy_aneg_done(phydev); + if (err > 0) { + trigger = true; + err = 0; + } + } + out_unlock: mutex_unlock(&phydev->lock); + + if (trigger) + phy_trigger_machine(phydev, sync); + return err; } + +/** + * phy_start_aneg - start auto-negotiation for this PHY device + * @phydev: the phy_device struct + * + * Description: Sanitizes the settings (if we're not autonegotiating + * them), and then calls the driver's config_aneg function. + * If the PHYCONTROL Layer is operating, we change the state to + * reflect the beginning of Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + return phy_start_aneg_priv(phydev, true); +} EXPORT_SYMBOL(phy_start_aneg); /** @@ -659,7 +691,7 @@ void phy_start_machine(struct phy_device *phydev) * state machine runs. */ -static void phy_trigger_machine(struct phy_device *phydev, bool sync) +void phy_trigger_machine(struct phy_device *phydev, bool sync) { if (sync) cancel_delayed_work_sync(&phydev->state_queue); @@ -1154,7 +1186,7 @@ void phy_state_machine(struct work_struct *work) mutex_unlock(&phydev->lock); if (needs_aneg) - err = phy_start_aneg(phydev); + err = phy_start_aneg_priv(phydev, false); else if (do_suspend) phy_suspend(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index 43a774873aa9..fb3857337151 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -852,6 +852,7 @@ void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); +void phy_trigger_machine(struct phy_device *phydev, bool sync); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_ksettings_get(struct phy_device *phydev, From b43bd72835a5f7adef81fe53fa1fbe7b0e43df8e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Apr 2017 14:33:14 -0400 Subject: [PATCH 36/41] Revert "phy: micrel: Disable auto negotiation on startup" This reverts commit 99f81afc139c6edd14d77a91ee91685a414a1c66. It was papering over the real problem, which is fixed by commit f555f34fdc58 ("net: phy: fix auto-negotiation stall due to unavailable interrupt") Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 1326d99771c1..da5b39268370 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -297,17 +297,6 @@ static int kszphy_config_init(struct phy_device *phydev) if (priv->led_mode >= 0) kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode); - if (phy_interrupt_is_valid(phydev)) { - int ctl = phy_read(phydev, MII_BMCR); - - if (ctl < 0) - return ctl; - - ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE); - if (ret < 0) - return ret; - } - return 0; } From 5294b83086cc1c35b4efeca03644cf9d12282e5b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Apr 2017 19:08:18 +0200 Subject: [PATCH 37/41] macsec: dynamically allocate space for sglist We call skb_cow_data, which is good anyway to ensure we can actually modify the skb as such (another error from prior). Now that we have the number of fragments required, we can safely allocate exactly that amount of memory. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Jason A. Donenfeld Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index dbab05afcdbe..49ce4e9f4a0f 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -617,7 +617,8 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err) static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, unsigned char **iv, - struct scatterlist **sg) + struct scatterlist **sg, + int num_frags) { size_t size, iv_offset, sg_offset; struct aead_request *req; @@ -629,7 +630,7 @@ static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, size = ALIGN(size, __alignof__(struct scatterlist)); sg_offset = size; - size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1); + size += sizeof(struct scatterlist) * num_frags; tmp = kmalloc(size, GFP_ATOMIC); if (!tmp) @@ -649,6 +650,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, { int ret; struct scatterlist *sg; + struct sk_buff *trailer; unsigned char *iv; struct ethhdr *eth; struct macsec_eth_header *hh; @@ -723,7 +725,14 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, return ERR_PTR(-EINVAL); } - req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg); + ret = skb_cow_data(skb, 0, &trailer); + if (unlikely(ret < 0)) { + macsec_txsa_put(tx_sa); + kfree_skb(skb); + return ERR_PTR(ret); + } + + req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg, ret); if (!req) { macsec_txsa_put(tx_sa); kfree_skb(skb); @@ -732,7 +741,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, macsec_fill_iv(iv, secy->sci, pn); - sg_init_table(sg, MAX_SKB_FRAGS + 1); + sg_init_table(sg, ret); skb_to_sgvec(skb, sg, 0, skb->len); if (tx_sc->encrypt) { @@ -917,6 +926,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, { int ret; struct scatterlist *sg; + struct sk_buff *trailer; unsigned char *iv; struct aead_request *req; struct macsec_eth_header *hdr; @@ -927,7 +937,12 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, if (!skb) return ERR_PTR(-ENOMEM); - req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg); + ret = skb_cow_data(skb, 0, &trailer); + if (unlikely(ret < 0)) { + kfree_skb(skb); + return ERR_PTR(ret); + } + req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg, ret); if (!req) { kfree_skb(skb); return ERR_PTR(-ENOMEM); @@ -936,7 +951,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, hdr = (struct macsec_eth_header *)skb->data; macsec_fill_iv(iv, sci, ntohl(hdr->packet_number)); - sg_init_table(sg, MAX_SKB_FRAGS + 1); + sg_init_table(sg, ret); skb_to_sgvec(skb, sg, 0, skb->len); if (hdr->tci_an & MACSEC_TCI_E) { @@ -2716,7 +2731,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, } #define MACSEC_FEATURES \ - (NETIF_F_SG | NETIF_F_HIGHDMA) + (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) static struct lock_class_key macsec_netdev_addr_lock_key; static int macsec_dev_init(struct net_device *dev) From 9899886d5e8ec5b343b1efe44f185a0e68dc6454 Mon Sep 17 00:00:00 2001 From: Myungho Jung Date: Tue, 25 Apr 2017 11:58:15 -0700 Subject: [PATCH 38/41] net: core: Prevent from dereferencing null pointer when releasing SKB Added NULL check to make __dev_kfree_skb_irq consistent with kfree family of functions. Link: https://bugzilla.kernel.org/show_bug.cgi?id=195289 Signed-off-by: Myungho Jung Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 533a6d6f6092..9b5875388c23 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2450,6 +2450,9 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) { unsigned long flags; + if (unlikely(!skb)) + return; + if (likely(atomic_read(&skb->users) == 1)) { smp_rmb(); atomic_set(&skb->users, 0); From 199ab00f3cdb6f154ea93fa76fd80192861a821d Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 25 Apr 2017 14:37:15 -0700 Subject: [PATCH 39/41] ipv6: check skb->protocol before lookup for nexthop Andrey reported a out-of-bound access in ip6_tnl_xmit(), this is because we use an ipv4 dst in ip6_tnl_xmit() and cast an IPv4 neigh key as an IPv6 address: neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); if (!neigh) goto tx_err_link_failure; addr6 = (struct in6_addr *)&neigh->primary_key; // <=== HERE addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) addr6 = &ipv6_hdr(skb)->daddr; memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); Also the network header of the skb at this point should be still IPv4 for 4in6 tunnels, we shold not just use it as IPv6 header. This patch fixes it by checking if skb->protocol is ETH_P_IPV6: if it is, we are safe to do the nexthop lookup using skb_dst() and ipv6_hdr(skb)->daddr; if not (aka IPv4), we have no clue about which dest address we can pick here, we have to rely on callers to fill it from tunnel config, so just fall to ip6_route_output() to make the decision. Fixes: ea3dc9601bda ("ip6_tunnel: Add support for wildcard tunnel endpoints.") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Steffen Klassert Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 75fac933c209..a9692ec0cd6d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1037,7 +1037,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; struct net_device_stats *stats = &t->dev->stats; - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ipv6hdr *ipv6h; struct ipv6_tel_txoption opt; struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; @@ -1057,26 +1057,28 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, /* NBMA tunnel */ if (ipv6_addr_any(&t->parms.raddr)) { - struct in6_addr *addr6; - struct neighbour *neigh; - int addr_type; + if (skb->protocol == htons(ETH_P_IPV6)) { + struct in6_addr *addr6; + struct neighbour *neigh; + int addr_type; - if (!skb_dst(skb)) - goto tx_err_link_failure; + if (!skb_dst(skb)) + goto tx_err_link_failure; - neigh = dst_neigh_lookup(skb_dst(skb), - &ipv6_hdr(skb)->daddr); - if (!neigh) - goto tx_err_link_failure; + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (!neigh) + goto tx_err_link_failure; - addr6 = (struct in6_addr *)&neigh->primary_key; - addr_type = ipv6_addr_type(addr6); + addr6 = (struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); - if (addr_type == IPV6_ADDR_ANY) - addr6 = &ipv6_hdr(skb)->daddr; + if (addr_type == IPV6_ADDR_ANY) + addr6 = &ipv6_hdr(skb)->daddr; - memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); - neigh_release(neigh); + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + neigh_release(neigh); + } } else if (!(t->parms.flags & (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { /* enable the cache only only if the routing decision does From c1201444075009507a6818de6518e2822b9a87c8 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 25 Apr 2017 17:38:02 -0700 Subject: [PATCH 40/41] tcp: memset ca_priv data to 0 properly Always zero out ca_priv data in tcp_assign_congestion_control() so that ca_priv data is cleared out during socket creation. Also always zero out ca_priv data in tcp_reinit_congestion_control() so that when cc algorithm is changed, ca_priv data is cleared out as well. We should still zero out ca_priv data even in TCP_CLOSE state because user could call connect() on AF_UNSPEC to disconnect the socket and leave it in TCP_CLOSE state and later call setsockopt() to switch cc algorithm on this socket. Fixes: 2b0a8c9ee ("tcp: add CDG congestion control") Reported-by: Andrey Konovalov Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_cong.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 79c4817abc94..6e3c512054a6 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -168,12 +168,8 @@ void tcp_assign_congestion_control(struct sock *sk) } out: rcu_read_unlock(); + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); - /* Clear out private data before diag gets it and - * the ca has not been initialized. - */ - if (ca->get_info) - memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else @@ -200,11 +196,10 @@ static void tcp_reinit_congestion_control(struct sock *sk, tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); - if (sk->sk_state != TCP_CLOSE) { - memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + if (sk->sk_state != TCP_CLOSE) tcp_init_congestion_control(sk); - } } /* Manage refcounts on socket close. */ From 105f5528b9bbaa08b526d3405a5bcd2ff0c953c8 Mon Sep 17 00:00:00 2001 From: Jamie Bainbridge Date: Wed, 26 Apr 2017 10:43:27 +1000 Subject: [PATCH 41/41] ipv6: check raw payload size correctly in ioctl In situations where an skb is paged, the transport header pointer and tail pointer can be the same because the skb contents are in frags. This results in ioctl(SIOCINQ/FIONREAD) incorrectly returning a length of 0 when the length to receive is actually greater than zero. skb->len is already correctly set in ip6_input_finish() with pskb_pull(), so use skb->len as it always returns the correct result for both linear and paged data. Signed-off-by: Jamie Bainbridge Signed-off-by: David S. Miller --- net/ipv6/raw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f174e76e6505..0da6a12b5472 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1178,8 +1178,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) - amount = skb_tail_pointer(skb) - - skb_transport_header(skb); + amount = skb->len; spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); }