From d94a69cb2cfa77294921aae9afcfb866e723a2da Mon Sep 17 00:00:00 2001 From: Xin Xiong Date: Thu, 23 Dec 2021 10:48:12 +0800 Subject: [PATCH 1/4] netfilter: ipt_CLUSTERIP: fix refcount leak in clusterip_tg_check() The issue takes place in one error path of clusterip_tg_check(). When memcmp() returns nonzero, the function simply returns the error code, forgetting to decrease the reference count of a clusterip_config object, which is bumped earlier by clusterip_config_find_get(). This may incur reference count leak. Fix this issue by decrementing the refcount of the object in specific error path. Fixes: 06aa151ad1fc74 ("netfilter: ipt_CLUSTERIP: check MAC address when duplicate config is set") Signed-off-by: Xin Xiong Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 8fd1aba8af31..b518f20c9a24 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -520,8 +520,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (IS_ERR(config)) return PTR_ERR(config); } - } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) + } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) { + clusterip_config_entry_put(config); + clusterip_config_put(config); return -EINVAL; + } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { From 1585f590a2e5dcae5833b05e030c47229299dd09 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 27 Dec 2021 11:52:53 +0800 Subject: [PATCH 2/4] selftests: netfilter: switch to socat for tests using -q option The nc cmd(nmap-ncat) that distributed with Fedora/Red Hat does not have option -q. This make some tests failed with: nc: invalid option -- 'q' Let's switch to socat which is far more dependable. Signed-off-by: Hangbin Liu Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../testing/selftests/netfilter/ipip-conntrack-mtu.sh | 9 +++++---- tools/testing/selftests/netfilter/nf_nat_edemux.sh | 10 +++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh index 4a6f5c3b3215..eb9553e4986b 100755 --- a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh +++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh @@ -41,7 +41,7 @@ checktool (){ checktool "iptables --version" "run test without iptables" checktool "ip -Version" "run test without ip tool" -checktool "which nc" "run test without nc (netcat)" +checktool "which socat" "run test without socat" checktool "ip netns add ${r_a}" "create net namespace" for n in ${r_b} ${r_w} ${c_a} ${c_b};do @@ -60,11 +60,12 @@ trap cleanup EXIT test_path() { msg="$1" - ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null & + ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null & sleep 1 for i in 1 2 3; do - head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000 + head -c1400 /dev/zero | tr "\000" "a" | \ + ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000 done wait @@ -189,7 +190,7 @@ ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null #--------------------- #Now we send a 1400 bytes UDP packet from Client A to Client B: -# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000 +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000 test_path "without" # The IPv4 stack on Client A already knows the PMTU to Client B, so the diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh index cfee3b65be0f..1092bbcb1fba 100755 --- a/tools/testing/selftests/netfilter/nf_nat_edemux.sh +++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh @@ -76,23 +76,23 @@ ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1 sleep 1 # add a persistent connection from the other namespace -ip netns exec $ns2 nc -q 10 -w 10 192.168.1.1 5201 > /dev/null & +ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & sleep 1 # ip daddr:dport will be rewritten to 192.168.1.1 5201 # NAT must reallocate source port 10000 because # 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use -echo test | ip netns exec $ns2 nc -w 3 -q 3 10.96.0.1 443 >/dev/null +echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443 >/dev/null ret=$? kill $iperfs -# Check nc can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). +# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). if [ $ret -eq 0 ]; then - echo "PASS: nc can connect via NAT'd address" + echo "PASS: socat can connect via NAT'd address" else - echo "FAIL: nc cannot connect via NAT'd address" + echo "FAIL: socat cannot connect via NAT'd address" exit 1 fi From 4e1860a3863707e8177329c006d10f9e37e097a8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 5 Jan 2022 16:09:57 +0100 Subject: [PATCH 3/4] netfilter: nft_payload: do not update layer 4 checksum when mangling fragments IP fragments do not come with the transport header, hence skip bogus layer 4 checksum updates. Fixes: 1814096980bb ("netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields") Reported-and-tested-by: Steffen Weinreich Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index bd689938a2e0..58e96a0fe0b4 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -546,6 +546,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, struct sk_buff *skb, unsigned int *l4csum_offset) { + if (pkt->fragoff) + return -1; + switch (pkt->tprot) { case IPPROTO_TCP: *l4csum_offset = offsetof(struct tcphdr, check); From 23c54263efd7cb605e2f7af72717a2a951999217 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 5 Jan 2022 14:19:54 +0100 Subject: [PATCH 4/4] netfilter: nft_set_pipapo: allocate pcpu scratch maps on clone This is needed in case a new transaction is made that doesn't insert any new elements into an already existing set. Else, after second 'nft -f ruleset.txt', lookups in such a set will fail because ->lookup() encounters raw_cpu_ptr(m->scratch) == NULL. For the initial rule load, insertion of elements takes care of the allocation, but for rule reloads this isn't guaranteed: we might not have additions to the set. Fixes: 3c4287f62044a90e ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: etkaar Signed-off-by: Florian Westphal Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index dce866d93fee..2c8051d8cca6 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1290,6 +1290,11 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch_aligned) goto out_scratch; #endif + for_each_possible_cpu(i) + *per_cpu_ptr(new->scratch, i) = NULL; + + if (pipapo_realloc_scratch(new, old->bsize_max)) + goto out_scratch_realloc; rcu_head_init(&new->rcu); @@ -1334,6 +1339,9 @@ out_lt: kvfree(dst->lt); dst--; } +out_scratch_realloc: + for_each_possible_cpu(i) + kfree(*per_cpu_ptr(new->scratch, i)); #ifdef NFT_PIPAPO_ALIGN free_percpu(new->scratch_aligned); #endif