diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 203353857535..2e36ec842372 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -186,6 +186,7 @@ struct xfrm_state { refcount_t refcnt; spinlock_t lock; + u32 pcpu_num; struct xfrm_id id; struct xfrm_selector sel; struct xfrm_mark mark; @@ -1660,7 +1661,7 @@ struct xfrmk_spdinfo { u32 spdhmcnt; }; -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); @@ -1772,7 +1773,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack); int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi, struct netlink_ext_ack *extack); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, - u8 mode, u32 reqid, u32 if_id, u8 proto, + u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family); diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 6a77328be114..1703b6fc868e 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -315,6 +315,7 @@ enum xfrm_attr_type_t { XFRMA_SET_MARK_MASK, /* __u32 */ XFRMA_IF_ID, /* __u32 */ XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ + XFRMA_SA_PCPU, /* __u32 */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ @@ -430,6 +431,7 @@ struct xfrm_userpolicy_info { #define XFRM_POLICY_LOCALOK 1 /* Allow user to override global policy */ /* Automatically expand selector to include matching ICMP payloads. */ #define XFRM_POLICY_ICMP 2 +#define XFRM_POLICY_CPU_ACQUIRE 4 __u8 share; }; diff --git a/net/key/af_key.c b/net/key/af_key.c index f79fb99271ed..c56bb4f451e6 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1354,7 +1354,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ } if (hdr->sadb_msg_seq) { - x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX); if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; @@ -1362,7 +1362,8 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ } if (!x) - x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family); + x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, UINT_MAX, + proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; @@ -1417,7 +1418,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; - x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX); if (x == NULL) return 0; diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 655fe4ff8621..c6d93eed4e57 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -129,6 +129,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SA_PCPU] = { .type = NLA_U32 }, }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, @@ -277,9 +278,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_SET_MARK_MASK: case XFRMA_IF_ID: case XFRMA_MTIMER_THRESH: + case XFRMA_SA_PCPU: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -434,7 +436,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8a6e8656d014..f84ff3582b7c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -679,6 +679,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) x->lft.hard_packet_limit = XFRM_INF; x->replay_maxage = 0; x->replay_maxdiff = 0; + x->pcpu_num = UINT_MAX; spin_lock_init(&x->lock); } return x; @@ -1154,6 +1155,12 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, struct xfrm_state **best, int *acq_in_progress, int *error) { + /* We need the cpu id just as a lookup key, + * we don't require it to be stable. + */ + unsigned int pcpu_id = get_cpu(); + put_cpu(); + /* Resolution logic: * 1. There is a valid state with matching selector. Done. * 2. Valid state with inappropriate selector. Skip. @@ -1173,13 +1180,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, &fl->u.__fl_common)) return; + if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id) + return; + if (!*best || + ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) || (*best)->km.dying > x->km.dying || ((*best)->km.dying == x->km.dying && (*best)->curlft.add_time < x->curlft.add_time)) *best = x; } else if (x->km.state == XFRM_STATE_ACQ) { - *acq_in_progress = 1; + if (!*best || x->pcpu_num == pcpu_id) + *acq_in_progress = 1; } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { if ((!x->sel.family || @@ -1208,6 +1220,13 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short encap_family = tmpl->encap_family; unsigned int sequence; struct km_event c; + unsigned int pcpu_id; + + /* We need the cpu id just as a lookup key, + * we don't require it to be stable. + */ + pcpu_id = get_cpu(); + put_cpu(); to_put = NULL; @@ -1281,7 +1300,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, } found: - x = best; + if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || + (best && (best->pcpu_num == pcpu_id))) + x = best; + if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && (x0 = __xfrm_state_lookup_all(net, mark, daddr, @@ -1313,6 +1335,8 @@ found: xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); x->if_id = if_id; + if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best) + x->pcpu_num = pcpu_id; error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); if (error) { @@ -1390,6 +1414,11 @@ found: x = NULL; error = -ESRCH; } + + /* Use the already installed 'fallback' while the CPU-specific + * SA acquire is handled*/ + if (best) + x = best; } out: if (x) { @@ -1521,12 +1550,14 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) unsigned int h; u32 mark = xnew->mark.v & xnew->mark.m; u32 if_id = xnew->if_id; + u32 cpu_id = xnew->pcpu_num; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && x->if_id == if_id && + x->pcpu_num == cpu_id && (mark & x->mark.m) == x->mark.v && xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) @@ -1549,7 +1580,7 @@ EXPORT_SYMBOL(xfrm_state_insert); static struct xfrm_state *__find_acq_core(struct net *net, const struct xfrm_mark *m, unsigned short family, u8 mode, - u32 reqid, u32 if_id, u8 proto, + u32 reqid, u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create) @@ -1566,6 +1597,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, x->id.spi != 0 || x->id.proto != proto || (mark & x->mark.m) != x->mark.v || + x->pcpu_num != pcpu_num || !xfrm_addr_equal(&x->id.daddr, daddr, family) || !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; @@ -1599,6 +1631,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, break; } + x->pcpu_num = pcpu_num; x->km.state = XFRM_STATE_ACQ; x->id.proto = proto; x->props.family = family; @@ -1627,7 +1660,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, return x; } -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_add(struct xfrm_state *x) { @@ -1653,7 +1686,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && x->km.seq) { - x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq); + x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num); if (x1 && ((x1->id.proto != x->id.proto) || !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; @@ -1663,7 +1696,7 @@ int xfrm_state_add(struct xfrm_state *x) if (use_spi && !x1) x1 = __find_acq_core(net, &x->mark, family, x->props.mode, - x->props.reqid, x->if_id, x->id.proto, + x->props.reqid, x->if_id, x->pcpu_num, x->id.proto, &x->id.daddr, &x->props.saddr, 0); __xfrm_state_bump_genids(x); @@ -1788,6 +1821,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->props.flags = orig->props.flags; x->props.extra_flags = orig->props.extra_flags; + x->pcpu_num = orig->pcpu_num; x->if_id = orig->if_id; x->tfcpad = orig->tfcpad; x->replay_maxdiff = orig->replay_maxdiff; @@ -2043,13 +2077,14 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, - u32 if_id, u8 proto, const xfrm_address_t *daddr, + u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&net->xfrm.xfrm_state_lock); - x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create); + x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num, + proto, daddr, saddr, create); spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; @@ -2184,7 +2219,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num) { unsigned int h = xfrm_seq_hash(net, seq); struct xfrm_state *x; @@ -2192,6 +2227,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && + x->pcpu_num == pcpu_num && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; @@ -2201,12 +2237,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num) { struct xfrm_state *x; spin_lock_bh(&net->xfrm.xfrm_state_lock); - x = __xfrm_find_acq_byseq(net, mark, seq); + x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num); spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0f26500773d2..d5a67d7eed3b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -738,6 +738,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_IF_ID]) x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (attrs[XFRMA_SA_PCPU]) { + x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); + if (x->pcpu_num >= num_possible_cpus()) + goto error; + } + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); if (err) goto error; @@ -1188,8 +1194,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (ret) goto out; } - if (x->mapping_maxage) + if (x->mapping_maxage) { ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage); + if (ret) + goto out; + } + if (x->pcpu_num != UINT_MAX) + ret = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); out: return ret; } @@ -1585,6 +1596,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, u32 mark; struct xfrm_mark m; u32 if_id = 0; + u32 pcpu_num = UINT_MAX; p = nlmsg_data(nlh); err = verify_spi_info(p->info.id.proto, p->min, p->max, extack); @@ -1601,8 +1613,16 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (attrs[XFRMA_SA_PCPU]) { + pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); + if (pcpu_num >= num_possible_cpus()) { + err = -EINVAL; + goto out_noput; + } + } + if (p->info.seq) { - x = xfrm_find_acq_byseq(net, mark, p->info.seq); + x = xfrm_find_acq_byseq(net, mark, p->info.seq, pcpu_num); if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; @@ -1611,7 +1631,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, if (!x) x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid, - if_id, p->info.id.proto, daddr, + if_id, pcpu_num, p->info.id.proto, daddr, &p->info.saddr, 1, family); err = -ENOENT; @@ -2407,7 +2427,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x) + nla_total_size_64bit(sizeof(struct xfrm_lifetime_cur)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ - + nla_total_size(4); /* XFRM_AE_ETHR */ + + nla_total_size(4) /* XFRM_AE_ETHR */ + + nla_total_size(4); /* XFRMA_SA_PCPU */ } static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) @@ -2463,6 +2484,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct err = xfrm_if_id_put(skb, x->if_id); if (err) goto out_cancel; + if (x->pcpu_num != UINT_MAX) + err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); nlmsg_end(skb, nlh); return 0; @@ -2727,6 +2750,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, xfrm_mark_get(attrs, &mark); + if (attrs[XFRMA_SA_PCPU]) { + x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); + err = -EINVAL; + if (x->pcpu_num >= num_possible_cpus()) + goto free_state; + } + err = verify_newpolicy_info(&ua->policy, extack); if (err) goto free_state; @@ -3054,6 +3084,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SA_PCPU] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); @@ -3195,7 +3226,8 @@ static void xfrm_netlink_rcv(struct sk_buff *skb) static inline unsigned int xfrm_expire_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) - + nla_total_size(sizeof(struct xfrm_mark)); + + nla_total_size(sizeof(struct xfrm_mark)) + + nla_total_size(4); /* XFRMA_SA_PCPU */ } static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) @@ -3221,6 +3253,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct err = xfrm_if_id_put(skb, x->if_id); if (err) return err; + if (x->pcpu_num != UINT_MAX) { + err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); + if (err) + return err; + } nlmsg_end(skb, nlh); return 0; @@ -3322,6 +3359,8 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) } if (x->if_id) l += nla_total_size(sizeof(x->if_id)); + if (x->pcpu_num) + l += nla_total_size(sizeof(x->pcpu_num)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); @@ -3422,6 +3461,7 @@ static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x, + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(xfrm_user_sec_ctx_size(x->security)) + + nla_total_size(4) /* XFRMA_SA_PCPU */ + userpolicy_type_attrsize(); } @@ -3458,6 +3498,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, err = xfrm_if_id_put(skb, xp->if_id); if (!err && xp->xdo.dev) err = copy_user_offload(&xp->xdo, skb); + if (!err && x->pcpu_num != UINT_MAX) + err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); if (err) { nlmsg_cancel(skb, nlh); return err;