From 661b8d1b0e3a745e25f05adef2ebd00d830eeea7 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 1 Oct 2018 14:51:33 +0200 Subject: [PATCH 1/5] net: add umem reference in netdev{_rx}_queue These references to the umem will be used to store information on what kind of AF_XDP umem that is bound to a queue id, if any. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1cbbf77a685f..8318f79586c2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -609,6 +609,9 @@ struct netdev_queue { /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif /* * write-mostly part */ @@ -738,6 +741,9 @@ struct netdev_rx_queue { struct kobject kobj; struct net_device *dev; struct xdp_rxq_info xdp_rxq; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif } ____cacheline_aligned_in_smp; /* From c9b47cc1fabca533fb8ebaeb417778036c1ce27d Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 1 Oct 2018 14:51:34 +0200 Subject: [PATCH 2/5] xsk: fix bug when trying to use both copy and zero-copy on one queue id Previously, the xsk code did not record which umem was bound to a specific queue id. This was not required if all drivers were zero-copy enabled as this had to be recorded in the driver anyway. So if a user tried to bind two umems to the same queue, the driver would say no. But if copy-mode was first enabled and then zero-copy mode (or the reverse order), we mistakenly enabled both of them on the same umem leading to buggy behavior. The main culprit for this is that we did not store the association of umem to queue id in the copy case and only relied on the driver reporting this. As this relation was not stored in the driver for copy mode (it does not rely on the AF_XDP NDOs), this obviously could not work. This patch fixes the problem by always recording the umem to queue id relationship in the netdev_queue and netdev_rx_queue structs. This way we always know what kind of umem has been bound to a queue id and can act appropriately at bind time. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 92 ++++++++++++++++++++++++++++++++-------------- net/xdp/xdp_umem.h | 2 +- net/xdp/xsk.c | 7 ---- 3 files changed, 65 insertions(+), 36 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 555427b3e0fe..4d6c6652f5d1 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -42,27 +42,47 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) } } -int xdp_umem_query(struct net_device *dev, u16 queue_id) +/* The umem is stored both in the _rx struct and the _tx struct as we do + * not know if the device has more tx queues than rx, or the opposite. + * This might also change during run time. + */ +static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, + u16 queue_id) { - struct netdev_bpf bpf; + if (queue_id < dev->real_num_rx_queues) + dev->_rx[queue_id].umem = umem; + if (queue_id < dev->real_num_tx_queues) + dev->_tx[queue_id].umem = umem; +} - ASSERT_RTNL(); +static struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, + u16 queue_id) +{ + if (queue_id < dev->real_num_rx_queues) + return dev->_rx[queue_id].umem; + if (queue_id < dev->real_num_tx_queues) + return dev->_tx[queue_id].umem; - memset(&bpf, 0, sizeof(bpf)); - bpf.command = XDP_QUERY_XSK_UMEM; - bpf.xsk.queue_id = queue_id; + return NULL; +} - if (!dev->netdev_ops->ndo_bpf) - return 0; - return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem; +static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id) +{ + /* Zero out the entry independent on how many queues are configured + * at this point in time, as it might be used in the future. + */ + if (queue_id < dev->num_rx_queues) + dev->_rx[queue_id].umem = NULL; + if (queue_id < dev->num_tx_queues) + dev->_tx[queue_id].umem = NULL; } int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, - u32 queue_id, u16 flags) + u16 queue_id, u16 flags) { bool force_zc, force_copy; struct netdev_bpf bpf; - int err; + int err = 0; force_zc = flags & XDP_ZEROCOPY; force_copy = flags & XDP_COPY; @@ -70,17 +90,23 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, if (force_zc && force_copy) return -EINVAL; - if (force_copy) - return 0; - - if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit) - return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */ - rtnl_lock(); - err = xdp_umem_query(dev, queue_id); - if (err) { - err = err < 0 ? -EOPNOTSUPP : -EBUSY; - goto err_rtnl_unlock; + if (xdp_get_umem_from_qid(dev, queue_id)) { + err = -EBUSY; + goto out_rtnl_unlock; + } + + xdp_reg_umem_at_qid(dev, umem, queue_id); + umem->dev = dev; + umem->queue_id = queue_id; + if (force_copy) + /* For copy-mode, we are done. */ + goto out_rtnl_unlock; + + if (!dev->netdev_ops->ndo_bpf || + !dev->netdev_ops->ndo_xsk_async_xmit) { + err = -EOPNOTSUPP; + goto err_unreg_umem; } bpf.command = XDP_SETUP_XSK_UMEM; @@ -89,18 +115,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, err = dev->netdev_ops->ndo_bpf(dev, &bpf); if (err) - goto err_rtnl_unlock; + goto err_unreg_umem; rtnl_unlock(); dev_hold(dev); - umem->dev = dev; - umem->queue_id = queue_id; umem->zc = true; return 0; -err_rtnl_unlock: +err_unreg_umem: + xdp_clear_umem_at_qid(dev, queue_id); + if (!force_zc) + err = 0; /* fallback to copy mode */ +out_rtnl_unlock: rtnl_unlock(); - return force_zc ? err : 0; /* fail or fallback */ + return err; } static void xdp_umem_clear_dev(struct xdp_umem *umem) @@ -108,7 +136,7 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem) struct netdev_bpf bpf; int err; - if (umem->dev) { + if (umem->zc) { bpf.command = XDP_SETUP_XSK_UMEM; bpf.xsk.umem = NULL; bpf.xsk.queue_id = umem->queue_id; @@ -119,9 +147,17 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem) if (err) WARN(1, "failed to disable umem!\n"); + } + if (umem->dev) { + rtnl_lock(); + xdp_clear_umem_at_qid(umem->dev, umem->queue_id); + rtnl_unlock(); + } + + if (umem->zc) { dev_put(umem->dev); - umem->dev = NULL; + umem->zc = false; } } diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index c8be1ad3eb88..27603227601b 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -9,7 +9,7 @@ #include int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, - u32 queue_id, u16 flags); + u16 queue_id, u16 flags); bool xdp_umem_validate_queues(struct xdp_umem *umem); void xdp_get_umem(struct xdp_umem *umem); void xdp_put_umem(struct xdp_umem *umem); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5a432dfee4ee..caeddad15b7c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -419,13 +419,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } qid = sxdp->sxdp_queue_id; - - if ((xs->rx && qid >= dev->real_num_rx_queues) || - (xs->tx && qid >= dev->real_num_tx_queues)) { - err = -EINVAL; - goto out_unlock; - } - flags = sxdp->sxdp_flags; if (flags & XDP_SHARED_UMEM) { From b8c8a2e2e3758be93959587e6b19aadbb0ce7275 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 1 Oct 2018 14:51:35 +0200 Subject: [PATCH 3/5] ethtool: rename local variable max -> curr ethtool_set_channels() validates the config against driver's max settings. It retrieves the current config and stores it in a variable called max. This was okay when only max settings were accessed but we will soon want to access current settings as well, so calling the entire structure max makes the code less readable. While at it drop unnecessary parenthesis. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- net/core/ethtool.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 96afc55aa61e..9a648fb09594 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1655,7 +1655,7 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev, static noinline_for_stack int ethtool_set_channels(struct net_device *dev, void __user *useraddr) { - struct ethtool_channels channels, max = { .cmd = ETHTOOL_GCHANNELS }; + struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS }; u32 max_rx_in_use = 0; if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) @@ -1664,13 +1664,13 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, if (copy_from_user(&channels, useraddr, sizeof(channels))) return -EFAULT; - dev->ethtool_ops->get_channels(dev, &max); + dev->ethtool_ops->get_channels(dev, &curr); /* ensure new counts are within the maximums */ - if ((channels.rx_count > max.max_rx) || - (channels.tx_count > max.max_tx) || - (channels.combined_count > max.max_combined) || - (channels.other_count > max.max_other)) + if (channels.rx_count > curr.max_rx || + channels.tx_count > curr.max_tx || + channels.combined_count > curr.max_combined || + channels.other_count > curr.max_other) return -EINVAL; /* ensure the new Rx count fits within the configured Rx flow From 1661d346628115c364e2b7d5b15a64ca3bd0dbd4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 1 Oct 2018 14:51:36 +0200 Subject: [PATCH 4/5] ethtool: don't allow disabling queues with umem installed We already check the RSS indirection table does not use queues which would be disabled by channel reconfiguration. Make sure user does not try to disable queues which have a UMEM and zero-copy AF_XDP socket installed. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/net/xdp_sock.h | 7 +++++++ net/core/ethtool.c | 11 +++++++++++ net/xdp/xdp_umem.c | 4 ++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 70a115bea4f4..13acb9803a6d 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -86,6 +86,7 @@ struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries); struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, struct xdp_umem_fq_reuse *newq); void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq); +struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id); static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { @@ -183,6 +184,12 @@ static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq) { } +static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, + u16 queue_id) +{ + return NULL; +} + static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { return NULL; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9a648fb09594..5a788adeba0b 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * Some useful ethtool_ops methods that're device independent. @@ -1656,7 +1657,9 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, void __user *useraddr) { struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS }; + u16 from_channel, to_channel; u32 max_rx_in_use = 0; + unsigned int i; if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) return -EOPNOTSUPP; @@ -1680,6 +1683,14 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, (channels.combined_count + channels.rx_count) <= max_rx_in_use) return -EINVAL; + /* Disabling channels, query zero-copy AF_XDP sockets */ + from_channel = channels.combined_count + + min(channels.rx_count, channels.tx_count); + to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count); + for (i = from_channel; i < to_channel; i++) + if (xdp_get_umem_from_qid(dev, i)) + return -EINVAL; + return dev->ethtool_ops->set_channels(dev, &channels); } diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 4d6c6652f5d1..773326f682b1 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -55,8 +55,8 @@ static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, dev->_tx[queue_id].umem = umem; } -static struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, - u16 queue_id) +struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, + u16 queue_id) { if (queue_id < dev->real_num_rx_queues) return dev->_rx[queue_id].umem; From a41b4f3c58dddcb39b7072a97ac8cb9036ce53f6 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 1 Oct 2018 14:51:37 +0200 Subject: [PATCH 5/5] xsk: simplify xdp_clear_umem_at_qid implementation As we now do not allow ethtool to deactivate the queue id we are running an AF_XDP socket on, we can simplify the implementation of xdp_clear_umem_at_qid(). Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 773326f682b1..c6007c58231c 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -68,12 +68,9 @@ struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id) { - /* Zero out the entry independent on how many queues are configured - * at this point in time, as it might be used in the future. - */ - if (queue_id < dev->num_rx_queues) + if (queue_id < dev->real_num_rx_queues) dev->_rx[queue_id].umem = NULL; - if (queue_id < dev->num_tx_queues) + if (queue_id < dev->real_num_tx_queues) dev->_tx[queue_id].umem = NULL; }