From b32411f0459d2e069d9ab7b989d11bf4bf72524b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 30 Apr 2025 11:05:54 +0300 Subject: [PATCH 01/30] dm: add missing unlock on in dm_keyslot_evict() commit 650266ac4c7230c89bcd1307acf5c9c92cfa85e2 upstream. We need to call dm_put_live_table() even if dm_get_live_table() returns NULL. Fixes: 9355a9eb21a5 ("dm: support key eviction from keyslot managers of underlying devices") Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Dan Carpenter Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9cacc49f2cb0..3dc5bc3d29d6 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1183,7 +1183,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, t = dm_get_live_table(md, &srcu_idx); if (!t) - return 0; + goto put_live_table; for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1194,6 +1194,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, (void *)key); } +put_live_table: dm_put_live_table(md, srcu_idx); return 0; } From 61e0fc3312309867e5a3495329dad0286d2a5703 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 29 Apr 2025 01:09:33 +0200 Subject: [PATCH 02/30] fs/erofs/fileio: call erofs_onlinefolio_split() after bio_add_folio() commit bbfe756dc3062c1e934f06e5ba39c239aa953b92 upstream. If bio_add_folio() fails (because it is full), erofs_fileio_scan_folio() needs to submit the I/O request via erofs_fileio_rq_submit() and allocate a new I/O request with an empty `struct bio`. Then it retries the bio_add_folio() call. However, at this point, erofs_onlinefolio_split() has already been called which increments `folio->private`; the retry will call erofs_onlinefolio_split() again, but there will never be a matching erofs_onlinefolio_end() call. This leaves the folio locked forever and all waiters will be stuck in folio_wait_bit_common(). This bug has been added by commit ce63cb62d794 ("erofs: support unencoded inodes for fileio"), but was practically unreachable because there was room for 256 folios in the `struct bio` - until commit 9f74ae8c9ac9 ("erofs: shorten bvecs[] for file-backed mounts") which reduced the array capacity to 16 folios. It was now trivial to trigger the bug by manually invoking readahead from userspace, e.g.: posix_fadvise(fd, 0, st.st_size, POSIX_FADV_WILLNEED); This should be fixed by invoking erofs_onlinefolio_split() only after bio_add_folio() has succeeded. This is safe: asynchronous completions invoking erofs_onlinefolio_end() will not unlock the folio because erofs_fileio_scan_folio() is still holding a reference to be released by erofs_onlinefolio_end() at the end. Fixes: ce63cb62d794 ("erofs: support unencoded inodes for fileio") Fixes: 9f74ae8c9ac9 ("erofs: shorten bvecs[] for file-backed mounts") Cc: stable@vger.kernel.org Signed-off-by: Max Kellermann Reviewed-by: Gao Xiang Tested-by: Hongbo Li Link: https://lore.kernel.org/r/20250428230933.3422273-1-max.kellermann@ionos.com Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- fs/erofs/fileio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 17aed5f6c549..12e709d93445 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -150,10 +150,10 @@ io_retry: io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9; attached = 0; } - if (!attached++) - erofs_onlinefolio_split(folio); if (!bio_add_folio(&io->rq->bio, folio, len, cur)) goto io_retry; + if (!attached++) + erofs_onlinefolio_split(folio); io->dev.m_pa += len; } cur += len; From ff7d691921c7a51ea8bb90fd8ce4a06247764e5e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 17 Jan 2025 09:09:34 +1030 Subject: [PATCH 03/30] Revert "btrfs: canonicalize the device path before adding it" commit 8fb1dcbbcc1ffe6ed7cf3f0f96d2737491dd1fbf upstream. This reverts commit 7e06de7c83a746e58d4701e013182af133395188. Commit 7e06de7c83a7 ("btrfs: canonicalize the device path before adding it") tries to make btrfs to use "/dev/mapper/*" name first, then any filename inside "/dev/" as the device path. This is mostly fine when there is only the root namespace involved, but when multiple namespace are involved, things can easily go wrong for the d_path() usage. As d_path() returns a file path that is namespace dependent, the resulted string may not make any sense in another namespace. Furthermore, the "/dev/" prefix checks itself is not reliable, one can still make a valid initramfs without devtmpfs, and fill all needed device nodes manually. Overall the userspace has all its might to pass whatever device path for mount, and we are not going to win the war trying to cover every corner case. So just revert that commit, and do no extra d_path() based file path sanity check. CC: stable@vger.kernel.org # 6.12+ Link: https://lore.kernel.org/linux-fsdevel/20250115185608.GA2223535@zen.localdomain/ Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 91 +--------------------------------------------- 1 file changed, 1 insertion(+), 90 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 587ac07cd194..8e6501860001 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -732,82 +732,6 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb) return has_metadata_uuid ? sb->metadata_uuid : sb->fsid; } -/* - * We can have very weird soft links passed in. - * One example is "/proc/self/fd/", which can be a soft link to - * a block device. - * - * But it's never a good idea to use those weird names. - * Here we check if the path (not following symlinks) is a good one inside - * "/dev/". - */ -static bool is_good_dev_path(const char *dev_path) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - bool is_good = false; - int ret; - - if (!dev_path) - goto out; - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) - goto out; - - /* - * Do not follow soft link, just check if the original path is inside - * "/dev/". - */ - ret = kern_path(dev_path, 0, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) - goto out; - if (strncmp(resolved_path, "/dev/", strlen("/dev/"))) - goto out; - is_good = true; -out: - kfree(path_buf); - path_put(&path); - return is_good; -} - -static int get_canonical_dev_path(const char *dev_path, char *canonical) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - int ret; - - if (!dev_path) { - ret = -EINVAL; - goto out; - } - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) { - ret = -ENOMEM; - goto out; - } - - ret = kern_path(dev_path, LOOKUP_FOLLOW, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) { - ret = PTR_ERR(resolved_path); - goto out; - } - ret = strscpy(canonical, resolved_path, PATH_MAX); -out: - kfree(path_buf); - path_put(&path); - return ret; -} - static bool is_same_device(struct btrfs_device *device, const char *new_path) { struct path old = { .mnt = NULL, .dentry = NULL }; @@ -1495,23 +1419,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, bool new_device_added = false; struct btrfs_device *device = NULL; struct file *bdev_file; - char *canonical_path = NULL; u64 bytenr; dev_t devt; int ret; lockdep_assert_held(&uuid_mutex); - if (!is_good_dev_path(path)) { - canonical_path = kmalloc(PATH_MAX, GFP_KERNEL); - if (canonical_path) { - ret = get_canonical_dev_path(path, canonical_path); - if (ret < 0) { - kfree(canonical_path); - canonical_path = NULL; - } - } - } /* * Avoid an exclusive open here, as the systemd-udev may initiate the * device scan which may race with the user's mount or mkfs command, @@ -1556,8 +1469,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, goto free_disk_super; } - device = device_list_add(canonical_path ? : path, disk_super, - &new_device_added); + device = device_list_add(path, disk_super, &new_device_added); if (!IS_ERR(device) && new_device_added) btrfs_free_stale_devices(device->devt, device); @@ -1566,7 +1478,6 @@ free_disk_super: error_bdev_put: fput(bdev_file); - kfree(canonical_path); return device; } From fe3da1bfd8d409b17aec47efc2f44b8d050a6521 Mon Sep 17 00:00:00 2001 From: Wojciech Dubowik Date: Thu, 24 Apr 2025 11:59:14 +0200 Subject: [PATCH 04/30] arm64: dts: imx8mm-verdin: Link reg_usdhc2_vqmmc to usdhc2 commit 5591ce0069ddda97cdbbea596bed53e698f399c2 upstream. Define vqmmc regulator-gpio for usdhc2 with vin-supply coming from LDO5. Without this definition LDO5 will be powered down, disabling SD card after bootup. This has been introduced in commit f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5"). Fixes: 6a57f224f734 ("arm64: dts: freescale: add initial support for verdin imx8m mini") Fixes: f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5") Tested-by: Manuel Traut Reviewed-by: Philippe Schenker Tested-by: Francesco Dolcini Reviewed-by: Francesco Dolcini Cc: stable@vger.kernel.org Signed-off-by: Wojciech Dubowik Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- .../boot/dts/freescale/imx8mm-verdin.dtsi | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index aee79a50d0e2..d9b13c87f93b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -165,6 +165,19 @@ startup-delay-us = <20000>; }; + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc2_vsel>; + gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>; + regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; + states = <1800000 0x1>, + <3300000 0x0>; + regulator-name = "PMIC_USDHC_VSELECT"; + vin-supply = <®_nvcc_sd>; + }; + reserved-memory { #address-cells = <2>; #size-cells = <2>; @@ -290,7 +303,7 @@ "SODIMM_19", "", "", - "", + "PMIC_USDHC_VSELECT", "", "", "", @@ -801,6 +814,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>; pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_cd_sleep>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; }; &wdog1 { @@ -1222,13 +1236,17 @@ ; /* SODIMM 76 */ }; + pinctrl_usdhc2_vsel: usdhc2vselgrp { + fsl,pins = + ; /* PMIC_USDHC_VSELECT */ + }; + /* * Note: Due to ERR050080 we use discrete external on-module resistors pulling-up to the * on-module +V3.3_1.8_SD (LDO5) rail and explicitly disable the internal pull-ups here. */ pinctrl_usdhc2: usdhc2grp { fsl,pins = - , , /* SODIMM 78 */ , /* SODIMM 74 */ , /* SODIMM 80 */ @@ -1239,7 +1257,6 @@ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = - , , , , @@ -1250,7 +1267,6 @@ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = - , , , , @@ -1262,7 +1278,6 @@ /* Avoid backfeeding with removed card power */ pinctrl_usdhc2_sleep: usdhc2slpgrp { fsl,pins = - , , , , From 14ee85b74807ab05c897094ab194bf3b50080d20 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 10 Mar 2025 17:58:00 +0000 Subject: [PATCH 05/30] firmware: arm_scmi: Fix timeout checks on polling path commit c23c03bf1faa1e76be1eba35bad6da6a2a7c95ee upstream. Polling mode transactions wait for a reply busy-looping without holding a spinlock, but currently the timeout checks are based only on elapsed time: as a result we could hit a false positive whenever our busy-looping thread is pre-empted and scheduled out for a time greater than the polling timeout. Change the checks at the end of the busy-loop to make sure that the polling wasn't indeed successful or an out-of-order reply caused the polling to be forcibly terminated. Fixes: 31d2f803c19c ("firmware: arm_scmi: Add sync_cmds_completed_on_ret transport flag") Reported-by: Huangjie Closes: https://lore.kernel.org/arm-scmi/20250123083323.2363749-1-jackhuang021@gmail.com/ Signed-off-by: Cristian Marussi Cc: stable@vger.kernel.org # 5.18.x Message-Id: <20250310175800.1444293-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/arm_scmi/driver.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index f8934d049d68..993615fa490e 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1219,7 +1219,8 @@ static void xfer_put(const struct scmi_protocol_handle *ph, } static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, - struct scmi_xfer *xfer, ktime_t stop) + struct scmi_xfer *xfer, ktime_t stop, + bool *ooo) { struct scmi_info *info = handle_to_scmi_info(cinfo->handle); @@ -1228,7 +1229,7 @@ static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, * in case of out-of-order receptions of delayed responses */ return info->desc->ops->poll_done(cinfo, xfer) || - try_wait_for_completion(&xfer->done) || + (*ooo = try_wait_for_completion(&xfer->done)) || ktime_after(ktime_get(), stop); } @@ -1245,15 +1246,17 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, * itself to support synchronous commands replies. */ if (!desc->sync_cmds_completed_on_ret) { + bool ooo = false; + /* * Poll on xfer using transport provided .poll_done(); * assumes no completion interrupt was available. */ ktime_t stop = ktime_add_ms(ktime_get(), timeout_ms); - spin_until_cond(scmi_xfer_done_no_timeout(cinfo, - xfer, stop)); - if (ktime_after(ktime_get(), stop)) { + spin_until_cond(scmi_xfer_done_no_timeout(cinfo, xfer, + stop, &ooo)); + if (!ooo && !info->desc->ops->poll_done(cinfo, xfer)) { dev_err(dev, "timed out in resp(caller: %pS) - polling\n", (void *)_RET_IP_); From e0a8e3ca07be66064056efa97c79eebfc0756a74 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:46 +0200 Subject: [PATCH 06/30] can: mcan: m_can_class_unregister(): fix order of unregistration calls commit 0713a1b3276b98c7dafbeefef00d7bc3a9119a84 upstream. If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 1be37d3b0414 ("can: m_can: fix periph RX path: use rx-offload to ensure skbs are sent from softirq context") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-3-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 97cd8bbf2e32..3c2c1db9866d 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2456,9 +2456,9 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { + unregister_candev(cdev->net); if (cdev->is_peripheral) can_rx_offload_del(&cdev->offload); - unregister_candev(cdev->net); } EXPORT_SYMBOL_GPL(m_can_class_unregister); From be54b750c333a9db7c3b3686846bb06b07b011fe Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 30 Apr 2025 15:26:18 +0200 Subject: [PATCH 07/30] s390/pci: Fix missing check for zpci_create_device() error return commit 42420c50c68f3e95e90de2479464f420602229fc upstream. The zpci_create_device() function returns an error pointer that needs to be checked before dereferencing it as a struct zpci_dev pointer. Add the missing check in __clp_add() where it was missed when adding the scan_list in the fixed commit. Simply not adding the device to the scan list results in the previous behavior. Cc: stable@vger.kernel.org Fixes: 0467cdde8c43 ("s390/pci: Sort PCI functions prior to creating virtual busses") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci_clp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 74dac6da03d5..8edd13237a17 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -422,6 +422,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } From 9423f6da825172b8dc60d4688ed3d147291c3be9 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 24 Apr 2025 18:01:42 +0530 Subject: [PATCH 08/30] wifi: cfg80211: fix out-of-bounds access during multi-link element defragmentation commit 023c1f2f0609218103cbcb48e0104b144d4a16dc upstream. Currently during the multi-link element defragmentation process, the multi-link element length added to the total IEs length when calculating the length of remaining IEs after the multi-link element in cfg80211_defrag_mle(). This could lead to out-of-bounds access if the multi-link element or its corresponding fragment elements are the last elements in the IEs buffer. To address this issue, correctly calculate the remaining IEs length by deducting the multi-link element end offset from total IEs end offset. Cc: stable@vger.kernel.org Fixes: 2481b5da9c6b ("wifi: cfg80211: handle BSS data contained in ML probe responses") Signed-off-by: Veerendranath Jakkam Link: https://patch.msgid.link/20250424-fix_mle_defragmentation_oob_access-v1-1-84412a1743fa@quicinc.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 18e132cdea72..f0dd1f448d4d 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2644,7 +2644,7 @@ cfg80211_defrag_mle(const struct element *mle, const u8 *ie, size_t ielen, /* Required length for first defragmentation */ buf_len = mle->datalen - 1; for_each_element(elem, mle->data + mle->datalen, - ielen - sizeof(*mle) + mle->datalen) { + ie + ielen - mle->data - mle->datalen) { if (elem->id != WLAN_EID_FRAGMENT) break; From afa5cdce062a246ec3b465082d87e52768771667 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 2 May 2025 16:40:31 -0600 Subject: [PATCH 09/30] vfio/pci: Align huge faults to order commit c1d9dac0db168198b6f63f460665256dedad9b6e upstream. The vfio-pci huge_fault handler doesn't make any attempt to insert a mapping containing the faulting address, it only inserts mappings if the faulting address and resulting pfn are aligned. This works in a lot of cases, particularly in conjunction with QEMU where DMA mappings linearly fault the mmap. However, there are configurations where we don't get that linear faulting and pages are faulted on-demand. The scenario reported in the bug below is such a case, where the physical address width of the CPU is greater than that of the IOMMU, resulting in a VM where guest firmware has mapped device MMIO beyond the address width of the IOMMU. In this configuration, the MMIO is faulted on demand and tracing indicates that occasionally the faults generate a VM_FAULT_OOM. Given the use case, this results in a "error: kvm run failed Bad address", killing the VM. The host is not under memory pressure in this test, therefore it's suspected that VM_FAULT_OOM is actually the result of a NULL return from __pte_offset_map_lock() in the get_locked_pte() path from insert_pfn(). This suggests a potential race inserting a pte concurrent to a pmd, and maybe indicates some deficiency in the mm layer properly handling such a case. Nevertheless, Peter noted the inconsistency of vfio-pci's huge_fault handler where our mapping granularity depends on the alignment of the faulting address relative to the order rather than aligning the faulting address to the order to more consistently insert huge mappings. This change not only uses the page tables more consistently and efficiently, but as any fault to an aligned page results in the same mapping, the race condition suspected in the VM_FAULT_OOM is avoided. Reported-by: Adolfo Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220057 Fixes: 09dfc8a5f2ce ("vfio/pci: Fallback huge faults for unaligned pfn") Cc: stable@vger.kernel.org Tested-by: Adolfo Co-developed-by: Peter Xu Signed-off-by: Peter Xu Link: https://lore.kernel.org/r/20250502224035.3183451-1-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci_core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 1a4ed5a357d3..c9eaba227636 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1658,14 +1658,14 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf, { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_core_device *vdev = vma->vm_private_data; - unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff; + unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1); + unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; + unsigned long pfn = vma_to_pfn(vma) + pgoff; vm_fault_t ret = VM_FAULT_SIGBUS; - pfn = vma_to_pfn(vma) + pgoff; - - if (order && (pfn & ((1 << order) - 1) || - vmf->address & ((PAGE_SIZE << order) - 1) || - vmf->address + (PAGE_SIZE << order) > vma->vm_end)) { + if (order && (addr < vma->vm_start || + addr + (PAGE_SIZE << order) > vma->vm_end || + pfn & ((1 << order) - 1))) { ret = VM_FAULT_FALLBACK; goto out; } From c488f8b53e156d6dcc0514ef0afa3a33376b8f9e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 30 Apr 2025 15:26:19 +0200 Subject: [PATCH 10/30] s390/pci: Fix duplicate pci_dev_put() in disable_slot() when PF has child VFs commit 05a2538f2b48500cf4e8a0a0ce76623cc5bafcf1 upstream. With commit bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") the code to ignore power off of a PF that has child VFs was changed from a direct return to a goto to the unlock and pci_dev_put() section. The change however left the existing pci_dev_put() untouched resulting in a doubple put. This can subsequently cause a use after free if the struct pci_dev is released in an unexpected state. Fix this by removing the extra pci_dev_put(). Cc: stable@vger.kernel.org Fixes: bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/s390_pci_hpc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 055518ee354d..e9e9aaa91770 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -59,7 +59,6 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); if (pdev && pci_num_vf(pdev)) { - pci_dev_put(pdev); rc = -EBUSY; goto out; } From 12ef60f1b99ca63af8a65dabbbdf63c030eb1232 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:44 +0200 Subject: [PATCH 11/30] can: mcp251xfd: mcp251xfd_remove(): fix order of unregistration calls commit 84f5eb833f53ae192baed4cfb8d9eaab43481fc9 upstream. If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. With the mcp251xfd driver the removal of the module causes the following warning: | WARNING: CPU: 0 PID: 352 at net/core/dev.c:7342 __netif_napi_del_locked+0xc8/0xd8 as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-1-59a9b131589d@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 3bc56517fe7a..dd0b3fb42f1b 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2174,8 +2174,8 @@ static void mcp251xfd_remove(struct spi_device *spi) struct mcp251xfd_priv *priv = spi_get_drvdata(spi); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); mcp251xfd_unregister(priv); + can_rx_offload_del(&priv->offload); spi->max_speed_hz = priv->spi_max_speed_hz_orig; free_candev(ndev); } From 1b7b1cbc6e730f3a49428c5d0f917fcc92a5e6f7 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:45 +0200 Subject: [PATCH 12/30] can: rockchip_canfd: rkcanfd_remove(): fix order of unregistration calls commit 037ada7a3181300218e4fd78bef6a741cfa7f808 upstream. If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: ff60bfbaf67f ("can: rockchip_canfd: add driver for Rockchip CAN-FD controller") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-2-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/rockchip/rockchip_canfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index ac514766d431..07510b6f9073 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -942,8 +942,8 @@ static void rkcanfd_remove(struct platform_device *pdev) struct rkcanfd_priv *priv = platform_get_drvdata(pdev); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); rkcanfd_unregister(priv); + can_rx_offload_del(&priv->offload); free_candev(ndev); } From c57301e332cc413fe0a7294a90725f4e21e9549d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 30 Apr 2025 11:18:28 +0900 Subject: [PATCH 13/30] ksmbd: prevent rename with empty string commit 53e3e5babc0963a92d856a5ec0ce92c59f54bc12 upstream. Client can send empty newname string to ksmbd server. It will cause a kernel oops from d_alloc. This patch return the error when attempting to rename a file or directory with an empty new name string. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 85348705f255..f0760d786502 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -633,6 +633,11 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\0') { + kfree(name); + return ERR_PTR(-EINVAL); + } + if (*name == '\\') { pr_err("not allow directory name included leading slash\n"); kfree(name); From d62ba16563a86aae052f96d270b3b6f78fca154c Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Fri, 2 May 2025 08:21:58 +0900 Subject: [PATCH 14/30] ksmbd: prevent out-of-bounds stream writes by validating *pos commit 0ca6df4f40cf4c32487944aaf48319cb6c25accc upstream. ksmbd_vfs_stream_write() did not validate whether the write offset (*pos) was within the bounds of the existing stream data length (v_len). If *pos was greater than or equal to v_len, this could lead to an out-of-bounds memory write. This patch adds a check to ensure *pos is less than v_len before proceeding. If the condition fails, -EINVAL is returned. Cc: stable@vger.kernel.org Signed-off-by: Norbert Szetei Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index a7694aae0b94..e059316be36f 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -443,6 +443,13 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, goto out; } + if (v_len <= *pos) { + pr_err("stream write position %lld is out of bounds (stream length: %zd)\n", + *pos, v_len); + err = -EINVAL; + goto out; + } + if (v_len < size) { wbuf = kvzalloc(size, KSMBD_DEFAULT_GFP); if (!wbuf) { From 9e9841e232b51171ddf3bc4ee517d5d28dc8cad6 Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Tue, 6 May 2025 22:04:52 +0900 Subject: [PATCH 15/30] ksmbd: Fix UAF in __close_file_table_ids commit 36991c1ccde2d5a521577c448ffe07fcccfe104d upstream. A use-after-free is possible if one thread destroys the file via __ksmbd_close_fd while another thread holds a reference to it. The existing checks on fp->refcount are not sufficient to prevent this. The fix takes ft->lock around the section which removes the file from the file table. This prevents two threads acquiring the same file pointer via __close_file_table_ids, as well as the other functions which retrieve a file from the IDR and which already use this same lock. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs_cache.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 1f8fa3468173..dfed6fce8904 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -661,21 +661,40 @@ __close_file_table_ids(struct ksmbd_file_table *ft, bool (*skip)(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp)) { - unsigned int id; - struct ksmbd_file *fp; - int num = 0; + struct ksmbd_file *fp; + unsigned int id = 0; + int num = 0; - idr_for_each_entry(ft->idr, fp, id) { - if (skip(tcon, fp)) + while (1) { + write_lock(&ft->lock); + fp = idr_get_next(ft->idr, &id); + if (!fp) { + write_unlock(&ft->lock); + break; + } + + if (skip(tcon, fp) || + !atomic_dec_and_test(&fp->refcount)) { + id++; + write_unlock(&ft->lock); continue; + } set_close_state_blocked_works(fp); + idr_remove(ft->idr, fp->volatile_id); + fp->volatile_id = KSMBD_NO_FID; + write_unlock(&ft->lock); + + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - if (!atomic_dec_and_test(&fp->refcount)) - continue; __ksmbd_close_fd(ft, fp); + num++; + id++; } + return num; } From ec334aaab74705cc515205e1da3cb369fdfd93cd Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 6 May 2025 16:28:54 +0200 Subject: [PATCH 16/30] openvswitch: Fix unsafe attribute parsing in output_userspace() commit 6beb6835c1fbb3f676aebb51a5fee6b77fed9308 upstream. This patch replaces the manual Netlink attribute iteration in output_userspace() with nla_for_each_nested(), which ensures that only well-formed attributes are processed. Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Eelco Chaudron Acked-by: Ilya Maximets Acked-by: Aaron Conole Link: https://patch.msgid.link/0bd65949df61591d9171c0dc13e42cea8941da10.1746541734.git.echaudro@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/actions.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 61fea7baae5d..2f22ca59586f 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -975,8 +975,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.mru = OVS_CB(skb)->mru; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { + nla_for_each_nested(a, attr, rem) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; From 2148d34371b06dac696c0497a98a6bf905a51650 Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Wed, 30 Apr 2025 11:16:23 +0800 Subject: [PATCH 17/30] ksmbd: fix memory leak in parse_lease_state() [ Upstream commit eb4447bcce915b43b691123118893fca4f372a8f ] The previous patch that added bounds check for create lease context introduced a memory leak. When the bounds check fails, the function returns NULL without freeing the previously allocated lease_ctx_info structure. This patch fixes the issue by adding kfree(lreq) before returning NULL in both boundary check cases. Fixes: bab703ed8472 ("ksmbd: add bounds check for create lease context") Signed-off-by: Wang Zhaolong Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/oplock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 81a29857b1e3..03f606afad93 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1496,7 +1496,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease_v2) - 4) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1512,7 +1512,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease)) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1521,6 +1521,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req) lreq->version = 1; } return lreq; +err_out: + kfree(lreq); + return NULL; } /** From 5c3b8f05756b17c30b8272b63d80fba6b0c7934f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 24 Apr 2025 17:07:01 +0200 Subject: [PATCH 18/30] s390/entry: Fix last breaking event handling in case of stack corruption [ Upstream commit ae952eea6f4a7e2193f8721a5366049946e012e7 ] In case of stack corruption stack_invalid() is called and the expectation is that register r10 contains the last breaking event address. This dependency is quite subtle and broke a couple of years ago without that anybody noticed. Fix this by getting rid of the dependency and read the last breaking event address from lowcore. Fixes: 56e62a737028 ("s390: convert to generic entry") Acked-by: Ilya Leoshkevich Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/entry.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a7de838f8031..669d335c87ab 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -636,7 +636,8 @@ SYM_CODE_START(stack_overflow) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow From 98cd7ed92753090a714f0802d4434314526fe61d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 28 Apr 2025 16:29:54 -0700 Subject: [PATCH 19/30] sch_htb: make htb_deactivate() idempotent [ Upstream commit 3769478610135e82b262640252d90f6efb05be71 ] Alan reported a NULL pointer dereference in htb_next_rb_node() after we made htb_qlen_notify() idempotent. It turns out in the following case it introduced some regression: htb_dequeue_tree(): |-> fq_codel_dequeue() |-> qdisc_tree_reduce_backlog() |-> htb_qlen_notify() |-> htb_deactivate() |-> htb_next_rb_node() |-> htb_deactivate() For htb_next_rb_node(), after calling the 1st htb_deactivate(), the clprio[prio]->ptr could be already set to NULL, which means htb_next_rb_node() is vulnerable here. For htb_deactivate(), although we checked qlen before calling it, in case of qlen==0 after qdisc_tree_reduce_backlog(), we may call it again which triggers the warning inside. To fix the issues here, we need to: 1) Make htb_deactivate() idempotent, that is, simply return if we already call it before. 2) Make htb_next_rb_node() safe against ptr==NULL. Many thanks to Alan for testing and for the reproducer. Fixes: 5ba8b837b522 ("sch_htb: make htb_qlen_notify() idempotent") Reported-by: Alan J. Wylie Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250428232955.1740419-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_htb.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 12cccc84d58a..b2494d24a542 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q, */ static inline void htb_next_rb_node(struct rb_node **n) { - *n = rb_next(*n); + if (*n) + *n = rb_next(*n); } /** @@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(!cl->prio_activity); - + if (!cl->prio_activity) + return; htb_deactivate_prios(q, cl); cl->prio_activity = 0; } @@ -1485,8 +1486,6 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (!cl->prio_activity) - return; htb_deactivate(qdisc_priv(sch), cl); } @@ -1740,8 +1739,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, if (cl->parent) cl->parent->children--; - if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&cl->pq_node, @@ -1949,8 +1947,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_purge_queue(parent->leaf.q); parent_qdisc = parent->leaf.q; - if (parent->prio_activity) - htb_deactivate(q, parent); + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { From edd53ee790f3885bba157e99c79405b6aa1b78e5 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 12 Nov 2024 09:29:24 +0800 Subject: [PATCH 20/30] virtio_net: xsk: bind/unbind xsk for tx [ Upstream commit 21a4e3ce6dc7b0a3bc882ebe1cb921a40235ddb0 ] This patch implement the logic of bind/unbind xsk pool to sq and rq. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Link: https://patch.msgid.link/20241112012928.102478-10-xuanzhuo@linux.alibaba.com Signed-off-by: Jakub Kicinski Stable-dep-of: 4397684a292a ("virtio-net: free xsk_buffs on error in virtnet_xsk_pool_enable()") Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 60027b439021..476c8a9cc494 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -298,6 +298,10 @@ struct send_queue { /* Record whether sq is in reset state. */ bool reset; + + struct xsk_buff_pool *xsk_pool; + + dma_addr_t xsk_hdr_dma_addr; }; /* Internal representation of a receive virtqueue */ @@ -501,6 +505,8 @@ struct virtio_net_common_hdr { }; }; +static struct virtio_net_common_hdr xsk_hdr; + static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq); static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, @@ -5556,6 +5562,29 @@ unreg: return err; } +static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi, + struct send_queue *sq, + struct xsk_buff_pool *pool) +{ + int err, qindex; + + qindex = sq - vi->sq; + + virtnet_tx_pause(vi, sq); + + err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf); + if (err) { + netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err); + pool = NULL; + } + + sq->xsk_pool = pool; + + virtnet_tx_resume(vi, sq); + + return err; +} + static int virtnet_xsk_pool_enable(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) @@ -5564,6 +5593,7 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, struct receive_queue *rq; struct device *dma_dev; struct send_queue *sq; + dma_addr_t hdr_dma; int err, size; if (vi->hdr_len > xsk_pool_get_headroom(pool)) @@ -5601,6 +5631,11 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, if (!rq->xsk_buffs) return -ENOMEM; + hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, + DMA_TO_DEVICE, 0); + if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) + return -ENOMEM; + err = xsk_pool_dma_map(pool, dma_dev, 0); if (err) goto err_xsk_map; @@ -5609,11 +5644,24 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, if (err) goto err_rq; + err = virtnet_sq_bind_xsk_pool(vi, sq, pool); + if (err) + goto err_sq; + + /* Now, we do not support tx offload(such as tx csum), so all the tx + * virtnet hdr is zero. So all the tx packets can share a single hdr. + */ + sq->xsk_hdr_dma_addr = hdr_dma; + return 0; +err_sq: + virtnet_rq_bind_xsk_pool(vi, rq, NULL); err_rq: xsk_pool_dma_unmap(pool, 0); err_xsk_map: + virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, + DMA_TO_DEVICE, 0); return err; } @@ -5622,19 +5670,24 @@ static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) struct virtnet_info *vi = netdev_priv(dev); struct xsk_buff_pool *pool; struct receive_queue *rq; + struct send_queue *sq; int err; if (qid >= vi->curr_queue_pairs) return -EINVAL; + sq = &vi->sq[qid]; rq = &vi->rq[qid]; pool = rq->xsk_pool; err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); + err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL); xsk_pool_dma_unmap(pool, 0); + virtqueue_dma_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, + vi->hdr_len, DMA_TO_DEVICE, 0); kvfree(rq->xsk_buffs); return err; From 94a6f6c204abb2b2dcd2ce287536cc924469cfb5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Apr 2025 09:38:36 -0700 Subject: [PATCH 21/30] virtio-net: free xsk_buffs on error in virtnet_xsk_pool_enable() [ Upstream commit 4397684a292a71fbc1e815c3e283f7490ddce5ae ] The selftests added to our CI by Bui Quang Minh recently reveals that there is a mem leak on the error path of virtnet_xsk_pool_enable(): unreferenced object 0xffff88800a68a000 (size 2048): comm "xdp_helper", pid 318, jiffies 4294692778 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): __kvmalloc_node_noprof+0x402/0x570 virtnet_xsk_pool_enable+0x293/0x6a0 (drivers/net/virtio_net.c:5882) xp_assign_dev+0x369/0x670 (net/xdp/xsk_buff_pool.c:226) xsk_bind+0x6a5/0x1ae0 __sys_bind+0x15e/0x230 __x64_sys_bind+0x72/0xb0 do_syscall_64+0xc1/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Acked-by: Jason Wang Fixes: e9f3962441c0 ("virtio_net: xsk: rx: support fill with xsk buffer") Link: https://patch.msgid.link/20250430163836.3029761-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 476c8a9cc494..9493b1134875 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5633,8 +5633,10 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, DMA_TO_DEVICE, 0); - if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) - return -ENOMEM; + if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { + err = -ENOMEM; + goto err_free_buffs; + } err = xsk_pool_dma_map(pool, dma_dev, 0); if (err) @@ -5662,6 +5664,8 @@ err_rq: err_xsk_map: virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, DMA_TO_DEVICE, 0); +err_free_buffs: + kvfree(rq->xsk_buffs); return err; } From 35be4c0cdf46a8ecf16733ec78c920213c7442b1 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 May 2025 00:57:52 +0200 Subject: [PATCH 22/30] gre: Fix again IPv6 link-local address generation. [ Upstream commit 3e6a0243ff002ddbd7ee18a8974ae61d2e6ed00d ] Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE devices in most cases and fall back to using add_v4_addrs() only in case the GRE configuration is incompatible with addrconf_addr_gen(). GRE used to use addrconf_addr_gen() until commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") restricted this use to gretap and ip6gretap devices, and created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. The original problem came when commit 9af28511be10 ("addrconf: refuse isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its addr parameter was 0. The commit says that this would create an invalid address, however, I couldn't find any RFC saying that the generated interface identifier would be wrong. Anyway, since gre over IPv4 devices pass their local tunnel address to __ipv6_isatap_ifid(), that commit broke their IPv6 link-local address generation when the local address was unspecified. Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") tried to fix that case by defining add_v4_addrs() and calling it to generate the IPv6 link-local address instead of using addrconf_addr_gen() (apart for gretap and ip6gretap devices, which would still use the regular addrconf_addr_gen(), since they have a MAC address). That broke several use cases because add_v4_addrs() isn't properly integrated into the rest of IPv6 Neighbor Discovery code. Several of these shortcomings have been fixed over time, but add_v4_addrs() remains broken on several aspects. In particular, it doesn't send any Router Sollicitations, so the SLAAC process doesn't start until the interface receives a Router Advertisement. Also, add_v4_addrs() mostly ignores the address generation mode of the interface (/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. Fix the situation by using add_v4_addrs() only in the specific scenario where the normal method would fail. That is, for interfaces that have all of the following characteristics: * run over IPv4, * transport IP packets directly, not Ethernet (that is, not gretap interfaces), * tunnel endpoint is INADDR_ANY (that is, 0), * device address generation mode is EUI64. In all other cases, revert back to the regular addrconf_addr_gen(). Also, remove the special case for ip6gre interfaces in add_v4_addrs(), since ip6gre devices now always use addrconf_addr_gen() instead. Note: This patch was originally applied as commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). However, it was then reverted by commit fc486c2d060f ("Revert "gre: Fix IPv6 link-local address generation."") because it uncovered another bug that ended up breaking net/forwarding/ip6gre_custom_multipath_hash.sh. That other bug has now been fixed by commit 4d0ab3a6885e ("ipv6: Start path selection from the first nexthop"). Therefore we can now revive this GRE patch (no changes since original commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/a88cc5c4811af36007645d610c95102dccb360a6.1746225214.git.gnault@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f5d49162f798..16ba3bb12fc4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3237,16 +3237,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3557,7 +3554,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } From aac9d5fa537b0c19f6d3af3c8a124028510bb647 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 5 May 2025 02:07:32 +0100 Subject: [PATCH 23/30] net: ethernet: mtk_eth_soc: reset all TX queues on DMA free [ Upstream commit 4db6c75124d871fbabf8243f947d34cc7e0697fc ] The purpose of resetting the TX queue is to reset the byte and packet count as well as to clear the software flow control XOFF bit. MediaTek developers pointed out that netdev_reset_queue would only resets queue 0 of the network device. Queues that are not reset may cause unexpected issues. Packets may stop being sent after reset and "transmit timeout" log may be displayed. Import fix from MediaTek's SDK to resolve this issue. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/319c0d9905579a46dc448579f892f364f1f84818 Fixes: f63959c7eec31 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/c9ff9adceac4f152239a0f65c397f13547639175.1746406763.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c5d5b9ff8bc4..d50017012ca1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3140,11 +3140,19 @@ static int mtk_dma_init(struct mtk_eth *eth) static void mtk_dma_free(struct mtk_eth *eth) { const struct mtk_soc_data *soc = eth->soc; - int i; + int i, j, txqs = 1; + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + txqs = MTK_QDMA_NUM_QUEUES; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + for (j = 0; j < txqs; j++) + netdev_tx_reset_subqueue(eth->netdev[i], j); + } - for (i = 0; i < MTK_MAX_DEVS; i++) - if (eth->netdev[i]) - netdev_reset_queue(eth->netdev[i]); if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && eth->scratch_ring) { dma_free_coherent(eth->dma_dev, MTK_QDMA_RING_SIZE * soc->tx.desc_size, From 79a6945e3de54339888fd3febcf4aef63dc735b8 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Mon, 5 May 2025 02:07:58 +0100 Subject: [PATCH 24/30] net: ethernet: mtk_eth_soc: do not reset PSE when setting FE [ Upstream commit e8716b5b0dff1b3d523b4a83fd5e94d57b887c5c ] Remove redundant PSE reset. When setting FE register there is no need to reset PSE, doing so may cause FE to work abnormal. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/3a5223473e086a4b54a2b9a44df7d9ddcc2bc75a Fixes: dee4dd10c79aa ("net: ethernet: mtk_eth_soc: ppe: add support for multiple PPEs") Signed-off-by: Frank Wunderlich Link: https://patch.msgid.link/18f0ac7d83f82defa3342c11ef0d1362f6b81e88.1746406763.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d50017012ca1..0a13f7c4684e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3427,9 +3427,6 @@ static int mtk_open(struct net_device *dev) } mtk_gdm_config(eth, target_mac->id, gdm_config); } - /* Reset and enable PSE */ - mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); - mtk_w32(eth, 0, MTK_RST_GL); napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); From 2ecce25ea296f328d79070ee36229a15aeeb7aca Mon Sep 17 00:00:00 2001 From: Antonios Salios Date: Fri, 25 Apr 2025 13:17:45 +0200 Subject: [PATCH 25/30] can: m_can: m_can_class_allocate_dev(): initialize spin lock on device probe [ Upstream commit dcaeeb8ae84c5506ebc574732838264f3887738c ] The spin lock tx_handling_spinlock in struct m_can_classdev is not being initialized. This leads the following spinlock bad magic complaint from the kernel, eg. when trying to send CAN frames with cansend from can-utils: | BUG: spinlock bad magic on CPU#0, cansend/95 | lock: 0xff60000002ec1010, .magic: 00000000, .owner: /-1, .owner_cpu: 0 | CPU: 0 UID: 0 PID: 95 Comm: cansend Not tainted 6.15.0-rc3-00032-ga79be02bba5c #5 NONE | Hardware name: MachineWare SIM-V (DT) | Call Trace: | [] dump_backtrace+0x1c/0x24 | [] show_stack+0x28/0x34 | [] dump_stack_lvl+0x4a/0x68 | [] dump_stack+0x14/0x1c | [] spin_dump+0x62/0x6e | [] do_raw_spin_lock+0xd0/0x142 | [] _raw_spin_lock_irqsave+0x20/0x2c | [] m_can_start_xmit+0x90/0x34a | [] dev_hard_start_xmit+0xa6/0xee | [] sch_direct_xmit+0x114/0x292 | [] __dev_queue_xmit+0x3b0/0xaa8 | [] can_send+0xc6/0x242 | [] raw_sendmsg+0x1a8/0x36c | [] sock_write_iter+0x9a/0xee | [] vfs_write+0x184/0x3a6 | [] ksys_write+0xa0/0xc0 | [] __riscv_sys_write+0x14/0x1c | [] do_trap_ecall_u+0x168/0x212 | [] handle_exception+0x146/0x152 Initializing the spin lock in m_can_class_allocate_dev solves that problem. Fixes: 1fa80e23c150 ("can: m_can: Introduce a tx_fifo_in_flight counter") Signed-off-by: Antonios Salios Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250425111744.37604-2-antonios@mwa.re Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 3c2c1db9866d..dbd4d8796f9b 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2372,6 +2372,7 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, SET_NETDEV_DEV(net_dev, dev); m_can_of_parse_mram(class_dev, mram_config_vals); + spin_lock_init(&class_dev->tx_handling_spinlock); out: return class_dev; } From 8f24cc6a72eb5ac22f82a3e8dbc837f70d00f7e0 Mon Sep 17 00:00:00 2001 From: Kelsey Maes Date: Wed, 30 Apr 2025 09:15:01 -0700 Subject: [PATCH 26/30] can: mcp251xfd: fix TDC setting for low data bit rates [ Upstream commit 5e1663810e11c64956aa7e280cf74b2f3284d816 ] The TDC is currently hardcoded enabled. This means that even for lower CAN-FD data bitrates (with a DBRP (data bitrate prescaler) > 2) a TDC is configured. This leads to a bus-off condition. ISO 11898-1 section 11.3.3 says "Transmitter delay compensation" (TDC) is only applicable if DBRP is 1 or 2. To fix the problem, switch the driver to use the TDC calculation provided by the CAN driver framework (which respects ISO 11898-1 section 11.3.3). This has the positive side effect that userspace can control TDC as needed. Demonstration of the feature in action: | $ ip link set can0 up type can bitrate 125000 dbitrate 500000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 125000 sample-point 0.875 | tq 50 prop-seg 69 phase-seg1 70 phase-seg2 20 sjw 10 brp 2 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 500000 dsample-point 0.875 | dtq 125 dprop-seg 6 dphase-seg1 7 dphase-seg2 2 dsjw 1 dbrp 5 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 | $ ip link set can0 up type can bitrate 1000000 dbitrate 4000000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 1000000 sample-point 0.750 | tq 25 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 5 brp 1 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 4000000 dsample-point 0.700 | dtq 25 dprop-seg 3 dphase-seg1 3 dphase-seg2 3 dsjw 1 dbrp 1 | tdco 7 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 There has been some confusion about the MCP2518FD using a relative or absolute TDCO due to the datasheet specifying a range of [-64,63]. I have a custom board with a 40 MHz clock and an estimated loop delay of 100 to 216 ns. During testing at a data bit rate of 4 Mbit/s I found that using can_get_relative_tdco() resulted in bus-off errors. The final TDCO value was 1 which corresponds to a 10% SSP in an absolute configuration. This behavior is expected if the TDCO value is really absolute and not relative. Using priv->can.tdc.tdco instead results in a final TDCO of 8, setting the SSP at exactly 80%. This configuration works. The automatic, manual, and off TDC modes were tested at speeds up to, and including, 8 Mbit/s on real hardware and behave as expected. Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Reported-by: Kelsey Maes Closes: https://lore.kernel.org/all/C2121586-C87F-4B23-A933-845362C29CA1@vpprocess.com Reviewed-by: Vincent Mailhol Signed-off-by: Kelsey Maes Link: https://patch.msgid.link/20250430161501.79370-1-kelsey@vpprocess.com [mkl: add comment] Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index dd0b3fb42f1b..c30b04f8fc0d 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -75,6 +75,24 @@ static const struct can_bittiming_const mcp251xfd_data_bittiming_const = { .brp_inc = 1, }; +/* The datasheet of the mcp2518fd (DS20006027B) specifies a range of + * [-64,63] for TDCO, indicating a relative TDCO. + * + * Manual tests have shown, that using a relative TDCO configuration + * results in bus off, while an absolute configuration works. + * + * For TDCO use the max value (63) from the data sheet, but 0 as the + * minimum. + */ +static const struct can_tdc_const mcp251xfd_tdc_const = { + .tdcv_min = 0, + .tdcv_max = 63, + .tdco_min = 0, + .tdco_max = 63, + .tdcf_min = 0, + .tdcf_max = 0, +}; + static const char *__mcp251xfd_get_model_str(enum mcp251xfd_model model) { switch (model) { @@ -510,8 +528,7 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) { const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u32 val = 0; - s8 tdco; + u32 tdcmod, val = 0; int err; /* CAN Control Register @@ -575,11 +592,16 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) return err; /* Transmitter Delay Compensation */ - tdco = clamp_t(int, dbt->brp * (dbt->prop_seg + dbt->phase_seg1), - -64, 63); - val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, - MCP251XFD_REG_TDC_TDCMOD_AUTO) | - FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, tdco); + if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_AUTO) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_AUTO; + else if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_MANUAL) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_MANUAL; + else + tdcmod = MCP251XFD_REG_TDC_TDCMOD_DISABLED; + + val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, tdcmod) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCV_MASK, priv->can.tdc.tdcv) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, priv->can.tdc.tdco); return regmap_write(priv->map_reg, MCP251XFD_REG_TDC, val); } @@ -2083,10 +2105,12 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->can.do_get_berr_counter = mcp251xfd_get_berr_counter; priv->can.bittiming_const = &mcp251xfd_bittiming_const; priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const; + priv->can.tdc_const = &mcp251xfd_tdc_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING | CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO | - CAN_CTRLMODE_CC_LEN8_DLC; + CAN_CTRLMODE_CC_LEN8_DLC | CAN_CTRLMODE_TDC_AUTO | + CAN_CTRLMODE_TDC_MANUAL; set_bit(MCP251XFD_FLAGS_DOWN, priv->flags); priv->ndev = ndev; priv->spi = spi; From c33927f3858c555e7240e076f33a43143c2104a8 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 29 Apr 2025 09:05:55 +0200 Subject: [PATCH 27/30] can: gw: fix RCU/BH usage in cgw_create_job() [ Upstream commit 511e64e13d8cc72853275832e3f372607466c18c ] As reported by Sebastian Andrzej Siewior the use of local_bh_disable() is only feasible in uni processor systems to update the modification rules. The usual use-case to update the modification rules is to update the data of the modifications but not the modification types (AND/OR/XOR/SET) or the checksum functions itself. To omit additional memory allocations to maintain fast modification switching times, the modification description space is doubled at gw-job creation time so that only the reference to the active modification description is changed under rcu protection. Rename cgw_job::mod to cf_mod and make it a RCU pointer. Allocate in cgw_create_job() and free it together with cgw_job in cgw_job_free_rcu(). Update all users to dereference cgw_job::cf_mod with a RCU accessor and if possible once. [bigeasy: Replace mod1/mod2 from the Oliver's original patch with dynamic allocation, use RCU annotation and accessor] Reported-by: Sebastian Andrzej Siewior Closes: https://lore.kernel.org/linux-can/20231031112349.y0aLoBrz@linutronix.de/ Fixes: dd895d7f21b2 ("can: cangw: introduce optional uid to reference created routing jobs") Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250429070555.cs-7b_eZ@linutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/gw.c | 151 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 60 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index 37528826935e..e65500c52bf5 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -130,7 +130,7 @@ struct cgw_job { u32 handled_frames; u32 dropped_frames; u32 deleted_frames; - struct cf_mod mod; + struct cf_mod __rcu *cf_mod; union { /* CAN frame data source */ struct net_device *dev; @@ -459,6 +459,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) struct cgw_job *gwj = (struct cgw_job *)data; struct canfd_frame *cf; struct sk_buff *nskb; + struct cf_mod *mod; int modidx = 0; /* process strictly Classic CAN or CAN FD frames */ @@ -506,7 +507,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. */ - if (gwj->mod.modfunc[0]) + mod = rcu_dereference(gwj->cf_mod); + if (mod->modfunc[0]) nskb = skb_copy(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -529,8 +531,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ - while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) - (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + while (modidx < MAX_MODFUNCTIONS && mod->modfunc[modidx]) + (*mod->modfunc[modidx++])(cf, mod); /* Has the CAN frame been modified? */ if (modidx) { @@ -546,11 +548,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) } /* check for checksum updates */ - if (gwj->mod.csumfunc.crc8) - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (mod->csumfunc.crc8) + (*mod->csumfunc.crc8)(cf, &mod->csum.crc8); - if (gwj->mod.csumfunc.xor) - (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + if (mod->csumfunc.xor) + (*mod->csumfunc.xor)(cf, &mod->csum.xor); } /* clear the skb timestamp if not configured the other way */ @@ -581,9 +583,20 @@ static void cgw_job_free_rcu(struct rcu_head *rcu_head) { struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); + /* cgw_job::cf_mod is always accessed from the same cgw_job object within + * the same RCU read section. Once cgw_job is scheduled for removal, + * cf_mod can also be removed without mandating an additional grace period. + */ + kfree(rcu_access_pointer(gwj->cf_mod)); kmem_cache_free(cgw_cache, gwj); } +/* Return cgw_job::cf_mod with RTNL protected section */ +static struct cf_mod *cgw_job_cf_mod(struct cgw_job *gwj) +{ + return rcu_dereference_protected(gwj->cf_mod, rtnl_is_locked()); +} + static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { @@ -616,6 +629,7 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, { struct rtcanmsg *rtcan; struct nlmsghdr *nlh; + struct cf_mod *mod; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags); if (!nlh) @@ -650,82 +664,83 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + mod = cgw_job_cf_mod(gwj); if (gwj->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } else { struct cgw_frame_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } - if (gwj->mod.uid) { - if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) + if (mod->uid) { + if (nla_put_u32(skb, CGW_MOD_UID, mod->uid) < 0) goto cancel; } - if (gwj->mod.csumfunc.crc8) { + if (mod->csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, - &gwj->mod.csum.crc8) < 0) + &mod->csum.crc8) < 0) goto cancel; } - if (gwj->mod.csumfunc.xor) { + if (mod->csumfunc.xor) { if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, - &gwj->mod.csum.xor) < 0) + &mod->csum.xor) < 0) goto cancel; } @@ -1059,7 +1074,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; - struct cf_mod mod; + struct cf_mod *mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; @@ -1078,37 +1093,48 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; - err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); - if (err < 0) - return err; + mod = kmalloc(sizeof(*mod), GFP_KERNEL); + if (!mod) + return -ENOMEM; - if (mod.uid) { + err = cgw_parse_attr(nlh, mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); + if (err < 0) + goto out_free_cf; + + if (mod->uid) { ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { - if (gwj->mod.uid != mod.uid) + struct cf_mod *old_cf; + + old_cf = cgw_job_cf_mod(gwj); + if (old_cf->uid != mod->uid) continue; /* interfaces & filters must be identical */ - if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) - return -EINVAL; + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) { + err = -EINVAL; + goto out_free_cf; + } - /* update modifications with disabled softirq & quit */ - local_bh_disable(); - memcpy(&gwj->mod, &mod, sizeof(mod)); - local_bh_enable(); + rcu_assign_pointer(gwj->cf_mod, mod); + kfree_rcu_mightsleep(old_cf); return 0; } } /* ifindex == 0 is not allowed for job creation */ - if (!ccgw.src_idx || !ccgw.dst_idx) - return -ENODEV; + if (!ccgw.src_idx || !ccgw.dst_idx) { + err = -ENODEV; + goto out_free_cf; + } gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); - if (!gwj) - return -ENOMEM; + if (!gwj) { + err = -ENOMEM; + goto out_free_cf; + } gwj->handled_frames = 0; gwj->dropped_frames = 0; @@ -1118,7 +1144,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->limit_hops = limhops; /* insert already parsed information */ - memcpy(&gwj->mod, &mod, sizeof(mod)); + RCU_INIT_POINTER(gwj->cf_mod, mod); memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; @@ -1152,9 +1178,11 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (!err) hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: - if (err) + if (err) { kmem_cache_free(cgw_cache, gwj); - +out_free_cf: + kfree(mod); + } return err; } @@ -1214,19 +1242,22 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { + struct cf_mod *cf_mod; + if (gwj->flags != r->flags) continue; if (gwj->limit_hops != limhops) continue; + cf_mod = cgw_job_cf_mod(gwj); /* we have a match when uid is enabled and identical */ - if (gwj->mod.uid || mod.uid) { - if (gwj->mod.uid != mod.uid) + if (cf_mod->uid || mod.uid) { + if (cf_mod->uid != mod.uid) continue; } else { /* no uid => check for identical modifications */ - if (memcmp(&gwj->mod, &mod, sizeof(mod))) + if (memcmp(cf_mod, &mod, sizeof(mod))) continue; } From 4555c4a13a930ea7775aed9fe4f5444642176cf9 Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Mon, 5 May 2025 16:19:46 +0800 Subject: [PATCH 28/30] wifi: mac80211: fix the type of status_code for negotiated TID to Link Mapping [ Upstream commit e12a42f64fc3d74872b349eedd47f90c6676b78a ] The status code should be type of __le16. Fixes: 83e897a961b8 ("wifi: ieee80211: add definitions for negotiated TID to Link map") Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Signed-off-by: Michael-CY Lee Link: https://patch.msgid.link/20250505081946.3927214-1-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- include/linux/ieee80211.h | 2 +- net/mac80211/mlme.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 3750e56bfcbb..777f6aa8efa7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1524,7 +1524,7 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ad0d040569dc..cc8c5d18b130 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7177,6 +7177,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; + u16 status_code; skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len); if (!skb) @@ -7199,19 +7200,18 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, WARN_ON(1); fallthrough; case NEG_TTLM_RES_REJECT: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; + status_code = WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; break; case NEG_TTLM_RES_ACCEPT: - mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS; + status_code = WLAN_STATUS_SUCCESS; break; case NEG_TTLM_RES_SUGGEST_PREFERRED: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; + status_code = WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); break; } + mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code); ieee80211_tx_skb(sdata, skb); } @@ -7377,7 +7377,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, * This can be better implemented in the future, to handle request * rejections. */ - if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS) + if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) __ieee80211_disconnect(sdata); } From 62946989e66023a8eed84f7d3a36021bb9af8c40 Mon Sep 17 00:00:00 2001 From: Sergey Temerkhanov Date: Wed, 21 Aug 2024 15:09:55 +0200 Subject: [PATCH 29/30] ice: Initial support for E825C hardware in ice_adapter [ Upstream commit fdb7f54700b1c88e734323a62fea986d9ce5a9c6 ] Address E825C devices by PCI ID since dual IP core configurations need 1 ice_adapter for both devices. Signed-off-by: Sergey Temerkhanov Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Stable-dep-of: 0093cb194a75 ("ice: use DSN instead of PCI BDF for ice_adapter index") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_adapter.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index f3e195974a8e..01a08cfd0090 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -9,12 +9,14 @@ #include #include #include "ice_adapter.h" +#include "ice.h" static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); /* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ #define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) +#define INDEX_FIELD_DEV GENMASK(31, 16) #define INDEX_FIELD_BUS GENMASK(12, 5) #define INDEX_FIELD_SLOT GENMASK(4, 0) @@ -24,9 +26,17 @@ static unsigned long ice_adapter_index(const struct pci_dev *pdev) WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); - return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | - FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | - FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); + switch (pdev->device) { + case ICE_DEV_ID_E825C_BACKPLANE: + case ICE_DEV_ID_E825C_QSFP: + case ICE_DEV_ID_E825C_SFP: + case ICE_DEV_ID_E825C_SGMII: + return FIELD_PREP(INDEX_FIELD_DEV, pdev->device); + default: + return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | + FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | + FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); + } } static struct ice_adapter *ice_adapter_new(void) From 574686c80754ebd144c0a3f6531010e6554a646b Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 5 May 2025 09:19:38 -0700 Subject: [PATCH 30/30] ice: use DSN instead of PCI BDF for ice_adapter index [ Upstream commit 0093cb194a7511d1e68865fa35b763c72e44c2f0 ] Use Device Serial Number instead of PCI bus/device/function for the index of struct ice_adapter. Functions on the same physical device should point to the very same ice_adapter instance, but with two PFs, when at least one of them is PCI-e passed-through to a VM, it is no longer the case - PFs will get seemingly random PCI BDF values, and thus indices, what finally leds to each of them being on their own instance of ice_adapter. That causes them to don't attempt any synchronization of the PTP HW clock usage, or any other future resources. DSN works nicely in place of the index, as it is "immutable" in terms of virtualization. Fixes: 0e2bddf9e5f9 ("ice: add ice_adapter for shared data across PFs on the same NIC") Suggested-by: Jacob Keller Suggested-by: Jakub Kicinski Suggested-by: Jiri Pirko Reviewed-by: Aleksandr Loktionov Signed-off-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250505161939.2083581-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_adapter.c | 47 ++++++++------------ drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 01a08cfd0090..66e070095d1b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright Red Hat -#include #include #include #include @@ -14,32 +13,16 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ -#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) -#define INDEX_FIELD_DEV GENMASK(31, 16) -#define INDEX_FIELD_BUS GENMASK(12, 5) -#define INDEX_FIELD_SLOT GENMASK(4, 0) - -static unsigned long ice_adapter_index(const struct pci_dev *pdev) +static unsigned long ice_adapter_index(u64 dsn) { - unsigned int domain = pci_domain_nr(pdev->bus); - - WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); - - switch (pdev->device) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return FIELD_PREP(INDEX_FIELD_DEV, pdev->device); - default: - return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | - FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | - FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); - } +#if BITS_PER_LONG == 64 + return dsn; +#else + return (u32)dsn ^ (u32)(dsn >> 32); +#endif } -static struct ice_adapter *ice_adapter_new(void) +static struct ice_adapter *ice_adapter_new(u64 dsn) { struct ice_adapter *adapter; @@ -47,6 +30,7 @@ static struct ice_adapter *ice_adapter_new(void) if (!adapter) return NULL; + adapter->device_serial_number = dsn; spin_lock_init(&adapter->ptp_gltsyn_time_lock); refcount_set(&adapter->refcount, 1); @@ -77,23 +61,26 @@ static void ice_adapter_free(struct ice_adapter *adapter) * Return: Pointer to ice_adapter on success. * ERR_PTR() on error. -ENOMEM is the only possible error. */ -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; int err; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); + WARN_ON_ONCE(adapter->device_serial_number != dsn); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(); + adapter = ice_adapter_new(dsn); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -110,11 +97,13 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) * * Context: Process, may sleep. */ -void ice_adapter_put(const struct pci_dev *pdev) +void ice_adapter_put(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index e233225848b3..ac15c0d2bc1a 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -32,6 +32,7 @@ struct ice_port_list { * @refcount: Reference count. struct ice_pf objects hold the references. * @ctrl_pf: Control PF of the adapter * @ports: Ports list + * @device_serial_number: DSN cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -40,9 +41,10 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; + u64 device_serial_number; }; -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev); -void ice_adapter_put(const struct pci_dev *pdev); +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); +void ice_adapter_put(struct pci_dev *pdev); #endif /* _ICE_ADAPTER_H */