Files
ack-tegra/net/ipv6/ip6_output.c
Greg Kroah-Hartman 8cb2595f93 Merge 6.12.35 into android16-6.12-lts
GKI (arm64) relevant 87 out of 414 changes, affecting 112 files +738/-352
  bdb71ee651 configfs: Do not override creating attribute file failure in populate_attrs() [1 file, +1/-1]
  ba789be63d io_uring: account drain memory to cgroup [1 file, +1/-1]
  c58b577cf7 io_uring/kbuf: account ring io_buffer_list memory [1 file, +1/-1]
  f78b38af35 jbd2: fix data-race and null-ptr-deref in jbd2_journal_dirty_metadata() [1 file, +3/-2]
  2429bb9fad media: v4l2-dev: fix error handling in __video_register_device() [1 file, +7/-7]
  5d8b057ed7 media: videobuf2: use sgtable-based scatterlist wrappers [1 file, +2/-2]
  b52dc88361 media: uvcvideo: Return the number of processed controls [1 file, +10/-1]
  6d2b12e7c5 media: uvcvideo: Send control events for partial succeeds [1 file, +9/-3]
  aac91ae06c media: uvcvideo: Fix deferred probing error [1 file, +19/-8]
  86d9837e46 arm64/mm: Close theoretical race where stale TLB entry remains valid [1 file, +5/-4]
  5538af3843 block: use plug request list tail for one-shot backmerge attempt [1 file, +13/-13]
  943801c380 block: Clear BIO_EMULATES_ZONE_APPEND flag on BIO completion [1 file, +1/-0]
  1c71f3cf5f cgroup,freezer: fix incomplete freezing when attaching tasks [1 file, +1/-2]
  a0890b7805 bus: firewall: Fix missing static inline annotations for stubs [1 file, +9/-6]
  5766da2237 ext4: inline: fix len overflow in ext4_prepare_inline_data [1 file, +1/-1]
  796632e6f8 ext4: fix calculation of credits for extent tree modification [1 file, +6/-5]
  4b36399711 ext4: ensure i_size is smaller than maxbytes [1 file, +2/-1]
  be5f3061a6 ext4: only dirty folios when data journaling regular files [1 file, +6/-1]
  a0b1c91ada Input: gpio-keys - fix possible concurrent access in gpio_keys_irq_timer() [1 file, +2/-0]
  fed611bd8c f2fs: fix to do sanity check on ino and xnid [1 file, +6/-0]
  aaa644e7ff f2fs: prevent kernel warning due to negative i_nlink from corrupted image [1 file, +9/-0]
  ee1b421c46 f2fs: fix to do sanity check on sit_bitmap_size [1 file, +8/-0]
  f16a797dce watchdog: fix watchdog may detect false positive of softlockup [1 file, +27/-14]
  02137179ff mm: fix ratelimit_pages update error in dirty_ratio_handler() [1 file, +1/-1]
  462eee6d42 firmware: arm_scmi: Ensure that the message-id supports fastchannel [2 files, +45/-33]
  e3cf1ef571 dm-verity: fix a memory leak if some arguments are specified multiple times [3 files, +24/-5]
  f2986bccf2 dm: lock limits when reading them [1 file, +7/-1]
  ec5f0b4412 ovl: Fix nested backing file paths [1 file, +2/-2]
  92776ca0cc remoteproc: core: Cleanup acquired resources when rproc_handle_resources() fails in rproc_attach() [1 file, +2/-3]
  f4ef928ca5 remoteproc: core: Release rproc->clean_table after rproc_attach() fails [1 file, +1/-0]
  68e58f5791 PCI: dwc: ep: Correct PBA offset in .set_msix() callback [1 file, +3/-2]
  b20701d594 PCI: Add ACS quirk for Loongson PCIe [1 file, +23/-0]
  be0cf75cbd PCI: Fix lock symmetry in pci_slot_unlock() [1 file, +2/-1]
  7b45d2401d clocksource: Fix the CPUs' choice in the watchdog per CPU verification [1 file, +1/-1]
  c05aba32a9 ACPICA: Avoid sequence overread in call to strncmp() [1 file, +1/-1]
  66613b13cd ACPI: Add missing prototype for non CONFIG_SUSPEND/CONFIG_X86 case [1 file, +8/-1]
  33cd650d38 pmdomain: core: Reset genpd->states to avoid freeing invalid data [1 file, +3/-1]
  f34e0c1556 platform-msi: Add msi_remove_device_irq_domain() in platform_device_msi_free_irqs_all() [1 file, +1/-0]
  c519f81e9c gpiolib: of: Add polarity quirk for s5m8767 [1 file, +9/-0]
  1f152ae557 PM: runtime: fix denying of auto suspend in pm_suspend_timer_fn() [1 file, +1/-1]
  6c1151d53c tipc: use kfree_sensitive() for aead cleanup [1 file, +1/-1]
  b0e647442c f2fs: use vmalloc instead of kvmalloc in .init_{,de}compress_ctx [2 files, +15/-13]
  2d834477bb bpf: Check rcu_read_lock_trace_held() in bpf_map_lookup_percpu_elem() [1 file, +2/-1]
  77ff6aec7c cpufreq: scmi: Skip SCMI devices that aren't used by the CPUs [1 file, +35/-1]
  0a8446058c tcp: always seek for minimal rtt in tcp_rcv_rtt_update() [1 file, +8/-14]
  f97085d365 tcp: remove zero TCP TS samples for autotuning [1 file, +5/-5]
  89b20c406e tcp: fix initial tp->rcvq_space.space value for passive TS enabled flows [1 file, +3/-3]
  84c156a351 tcp: add receive queue awareness in tcp_rcv_space_adjust() [2 files, +5/-3]
  3a9e74d158 ipv4/route: Use this_cpu_inc() for stats on PREEMPT_RT [1 file, +4/-0]
  5eb9c50e0c net: page_pool: Don't recycle into cache on PREEMPT_RT [1 file, +4/-0]
  8b0741b167 xfrm: validate assignment of maximal possible SEQ number [1 file, +42/-10]
  8fdf2f79eb bpf: Pass the same orig_call value to trampoline functions [1 file, +1/-1]
  f0023d7a2a f2fs: fix to bail out in get_new_segment() [2 files, +6/-1]
  448dc45eea bpf: Use proper type to calculate bpf_raw_tp_null_args.mask index [1 file, +2/-2]
  78f768e36c net: bridge: mcast: re-implement br_multicast_{enable, disable}_port functions [1 file, +69/-8]
  4b3383110b software node: Correct a OOB check in software_node_get_reference_args() [1 file, +1/-1]
  b7129ef57d sock: Correct error checking condition for (assign|release)_proto_idx() [1 file, +2/-2]
  a58f0a0e99 f2fs: fix to set atomic write status more clear [3 files, +12/-2]
  b8b4b8bb34 bpf, sockmap: Fix data lost during EAGAIN retries [1 file, +2/-1]
  7c41f73b64 fs/xattr.c: fix simple_xattr_list() [1 file, +1/-0]
  2e10dc9c2a io_uring/kbuf: don't truncate end buffer for multiple buffer peeks [1 file, +4/-1]
  1a4254ab06 io_uring: fix task leak issue in io_wq_create() [1 file, +3/-1]
  4220cc0b98 nvme: always punt polled uring_cmd end_io work to task_work [1 file, +7/-14]
  f9b97d466e net_sched: sch_sfq: reject invalid perturb period [1 file, +8/-2]
  2a3ad42a57 net: clear the dst when changing skb protocol [1 file, +13/-6]
  510a29d776 mm: close theoretical race where stale TLB entries could linger [1 file, +2/-0]
  57ec081869 sched_ext, sched/core: Don't call scx_group_set_weight() prematurely from sched_create_group() [3 files, +9/-2]
  3d828519bd atm: Revert atm_account_tx() if copy_from_iter_full() fails. [3 files, +8/-1]
  47f34289d1 arm64: Restrict pagetable teardown to avoid false warning [1 file, +2/-1]
  9cf5b2a3b7 mm/hugetlb: unshare page tables during VMA split, not before [5 files, +57/-16]
  dc5f0aef9e net: Fix checksum update for ILA adj-transport [4 files, +7/-7]
  2516299184 bpf: Fix L4 csum update on IPv6 in CHECKSUM_COMPLETE [3 files, +7/-2]
  50189d9c5e erofs: remove unused trace event erofs_destroy_inode [1 file, +0/-18]
  348e541fef ipv6: remove leftover ip6 cookie initializer [1 file, +0/-2]
  3c44ebad5a ipv6: replace ipcm6_init calls with ipcm6_init_sk [4 files, +3/-29]
  6b358b3adf io_uring/sqpoll: don't put task_struct on tctx setup failure [1 file, +1/-4]
  8873080b88 workqueue: Initialize wq_isolated_cpumask in workqueue_init_early() [1 file, +2/-1]
  ac462a75fd net: netmem: fix skb_ensure_writable with unreadable skbs [1 file, +0/-3]
  61b39e189d ptp: allow reading of currently dialed frequency to succeed on free-running clocks [1 file, +2/-1]
  397c1faf8f tcp: fix tcp_packet_delayed() for tcp_is_non_sack_preventing_reopen() behavior [1 file, +25/-12]
  0d3d91c350 tipc: fix null-ptr-deref when acquiring remote ip of ethernet bearer [1 file, +2/-2]
  31d50dfe9c tcp: fix passive TFO socket having invalid NAPI ID [1 file, +3/-0]
  0f8df5d6f2 ublk: santizize the arguments from userspace when adding a device [1 file, +3/-0]
  456019adaa perf: Fix sample vs do_exit() [2 files, +16/-8]
  7335c33d62 perf: Fix cgroup state vs ERROR [1 file, +30/-21]
  fd199366bf perf/core: Fix WARN in perf_cgroup_switch() [1 file, +20/-2]
  22f935bc86 arm64/ptrace: Fix stack-out-of-bounds read in regs_get_kernel_stack_nth() [1 file, +1/-1]

Changes in 6.12.35
	configfs: Do not override creating attribute file failure in populate_attrs()
	crypto: marvell/cesa - Do not chain submitted requests
	gfs2: move msleep to sleepable context
	crypto: qat - add shutdown handler to qat_c3xxx
	crypto: qat - add shutdown handler to qat_420xx
	crypto: qat - add shutdown handler to qat_4xxx
	crypto: qat - add shutdown handler to qat_c62x
	crypto: qat - add shutdown handler to qat_dh895xcc
	ASoC: qcom: sdm845: Add error handling in sdm845_slim_snd_hw_params()
	ASoC: meson: meson-card-utils: use of_property_present() for DT parsing
	ASoC: amd: sof_amd_sdw: Fix unlikely uninitialized variable use in create_sdw_dailinks()
	io_uring: account drain memory to cgroup
	io_uring/kbuf: account ring io_buffer_list memory
	powerpc/pseries/msi: Avoid reading PCI device registers in reduced power states
	s390/pci: Remove redundant bus removal and disable from zpci_release_device()
	s390/pci: Prevent self deletion in disable_slot()
	s390/pci: Allow re-add of a reserved but not yet removed device
	s390/pci: Serialize device addition and removal
	regulator: max20086: Fix MAX200086 chip id
	regulator: max20086: Change enable gpio to optional
	net/mlx5_core: Add error handling inmlx5_query_nic_vport_qkey_viol_cntr()
	net/mlx5: Add error handling in mlx5_query_nic_vport_node_guid()
	wifi: p54: prevent buffer-overflow in p54_rx_eeprom_readback()
	wifi: mt76: mt7925: fix host interrupt register initialization
	wifi: ath11k: fix rx completion meta data corruption
	wifi: rtw88: usb: Upload the firmware in bigger chunks
	wifi: ath11k: fix ring-buffer corruption
	NFSD: unregister filesystem in case genl_register_family() fails
	NFSD: fix race between nfsd registration and exports_proc
	NFSD: Implement FATTR4_CLONE_BLKSIZE attribute
	nfsd: nfsd4_spo_must_allow() must check this is a v4 compound request
	nfsd: Initialize ssc before laundromat_work to prevent NULL dereference
	SUNRPC: Prevent hang on NFS mount with xprtsec=[m]tls
	NFSv4: Don't check for OPEN feature support in v4.1
	fs/nfs/read: fix double-unlock bug in nfs_return_empty_folio()
	wifi: ath12k: fix ring-buffer corruption
	jbd2: fix data-race and null-ptr-deref in jbd2_journal_dirty_metadata()
	svcrdma: Unregister the device if svc_rdma_accept() fails
	wifi: rtw88: usb: Reduce control message timeout to 500 ms
	wifi: rtlwifi: disable ASPM for RTL8723BE with subsystem ID 11ad:1723
	media: ov8856: suppress probe deferral errors
	media: ov5675: suppress probe deferral errors
	media: imx335: Use correct register width for HNUM
	media: nxp: imx8-isi: better handle the m2m usage_count
	media: i2c: ds90ub913: Fix returned fmt from .set_fmt()
	media: ccs-pll: Start VT pre-PLL multiplier search from correct value
	media: ov2740: Move pm-runtime cleanup on probe-errors to proper place
	media: ccs-pll: Start OP pre-PLL multiplier search from correct value
	media: ccs-pll: Correct the upper limit of maximum op_pre_pll_clk_div
	media: ccs-pll: Check for too high VT PLL multiplier in dual PLL case
	media: cxusb: no longer judge rbuf when the write fails
	media: davinci: vpif: Fix memory leak in probe error path
	media: gspca: Add error handling for stv06xx_read_sensor()
	media: i2c: imx335: Fix frame size enumeration
	media: imagination: fix a potential memory leak in e5010_probe()
	media: intel/ipu6: Fix dma mask for non-secure mode
	media: ipu6: Remove workaround for Meteor Lake ES2
	media: mediatek: vcodec: Correct vsi_core framebuffer size
	media: omap3isp: use sgtable-based scatterlist wrappers
	media: v4l2-dev: fix error handling in __video_register_device()
	media: venus: Fix probe error handling
	media: videobuf2: use sgtable-based scatterlist wrappers
	media: vidtv: Terminating the subsequent process of initialization failure
	media: vivid: Change the siize of the composing
	media: imx-jpeg: Drop the first error frames
	media: imx-jpeg: Move mxc_jpeg_free_slot_data() ahead
	media: imx-jpeg: Reset slot data pointers when freed
	media: imx-jpeg: Cleanup after an allocation error
	media: uvcvideo: Return the number of processed controls
	media: uvcvideo: Send control events for partial succeeds
	media: uvcvideo: Fix deferred probing error
	arm64/mm: Close theoretical race where stale TLB entry remains valid
	ARM: 9447/1: arm/memremap: fix arch_memremap_can_ram_remap()
	ARM: omap: pmic-cpcap: do not mess around without CPCAP or OMAP4
	ASoC: codecs: wcd9375: Fix double free of regulator supplies
	ASoC: codecs: wcd937x: Drop unused buck_supply
	block: use plug request list tail for one-shot backmerge attempt
	block: Clear BIO_EMULATES_ZONE_APPEND flag on BIO completion
	bus: mhi: ep: Update read pointer only after buffer is written
	bus: mhi: host: Fix conflict between power_up and SYSERR
	can: kvaser_pciefd: refine error prone echo_skb_max handling logic
	can: tcan4x5x: fix power regulator retrieval during probe
	ceph: avoid kernel BUG for encrypted inode with unaligned file size
	ceph: set superblock s_magic for IMA fsmagic matching
	cgroup,freezer: fix incomplete freezing when attaching tasks
	bus: firewall: Fix missing static inline annotations for stubs
	ata: pata_via: Force PIO for ATAPI devices on VT6415/VT6330
	ata: ahci: Disallow LPM for ASUSPRO-D840SA motherboard
	ata: ahci: Disallow LPM for Asus B550-F motherboard
	bus: fsl-mc: do not add a device-link for the UAPI used DPMCP device
	bus: fsl-mc: fix GET/SET_TAILDROP command ids
	ext4: inline: fix len overflow in ext4_prepare_inline_data
	ext4: fix calculation of credits for extent tree modification
	ext4: factor out ext4_get_maxbytes()
	ext4: ensure i_size is smaller than maxbytes
	ext4: only dirty folios when data journaling regular files
	Input: ims-pcu - check record size in ims_pcu_flash_firmware()
	Input: gpio-keys - fix possible concurrent access in gpio_keys_irq_timer()
	f2fs: fix to do sanity check on ino and xnid
	f2fs: prevent kernel warning due to negative i_nlink from corrupted image
	f2fs: fix to do sanity check on sit_bitmap_size
	hwmon: (ftsteutates) Fix TOCTOU race in fts_read()
	NFC: nci: uart: Set tty->disc_data only in success path
	net/sched: fix use-after-free in taprio_dev_notifier
	net: ftgmac100: select FIXED_PHY
	iommu/vt-d: Restore context entry setup order for aliased devices
	fbdev: Fix do_register_framebuffer to prevent null-ptr-deref in fb_videomode_to_var
	EDAC/altera: Use correct write width with the INTTEST register
	fbdev: Fix fb_set_var to prevent null-ptr-deref in fb_videomode_to_var
	parisc/unaligned: Fix hex output to show 8 hex chars
	vgacon: Add check for vc_origin address range in vgacon_scroll()
	parisc: fix building with gcc-15
	clk: meson-g12a: add missing fclk_div2 to spicc
	ipc: fix to protect IPCS lookups using RCU
	watchdog: fix watchdog may detect false positive of softlockup
	RDMA/iwcm: Fix use-after-free of work objects after cm_id destruction
	mm: fix ratelimit_pages update error in dirty_ratio_handler()
	soc: qcom: pmic_glink_altmode: fix spurious DP hotplug events
	configfs-tsm-report: Fix NULL dereference of tsm_ops
	firmware: arm_scmi: Ensure that the message-id supports fastchannel
	mtd: rawnand: sunxi: Add randomizer configuration in sunxi_nfc_hw_ecc_write_chunk
	mtd: nand: sunxi: Add randomizer configuration before randomizer enable
	KVM: SVM: Clear current_vmcb during vCPU free for all *possible* CPUs
	KVM: VMX: Flush shadow VMCS on emergency reboot
	dm-mirror: fix a tiny race condition
	dm-verity: fix a memory leak if some arguments are specified multiple times
	mtd: rawnand: qcom: Fix read len for onfi param page
	ftrace: Fix UAF when lookup kallsym after ftrace disabled
	dm: lock limits when reading them
	phy: fsl-imx8mq-usb: fix phy_tx_vboost_level_from_property()
	net: ch9200: fix uninitialised access during mii_nway_restart
	KVM: s390: rename PROT_NONE to PROT_TYPE_DUMMY
	sysfb: Fix screen_info type check for VGA
	video: screen_info: Relocate framebuffers behind PCI bridges
	pwm: axi-pwmgen: fix missing separate external clock
	staging: iio: ad5933: Correct settling cycles encoding per datasheet
	mips: Add -std= flag specified in KBUILD_CFLAGS to vdso CFLAGS
	ovl: Fix nested backing file paths
	regulator: max14577: Add error check for max14577_read_reg()
	remoteproc: core: Cleanup acquired resources when rproc_handle_resources() fails in rproc_attach()
	remoteproc: core: Release rproc->clean_table after rproc_attach() fails
	remoteproc: k3-m4: Don't assert reset in detach routine
	cifs: reset connections for all channels when reconnect requested
	cifs: update dstaddr whenever channel iface is updated
	cifs: dns resolution is needed only for primary channel
	smb: client: add NULL check in automount_fullpath
	Drivers: hv: Allocate interrupt and monitor pages aligned to system page boundary
	uio_hv_generic: Use correct size for interrupt and monitor pages
	uio_hv_generic: Align ring size to system page
	PCI: cadence-ep: Correct PBA offset in .set_msix() callback
	PCI: dwc: ep: Correct PBA offset in .set_msix() callback
	PCI: Add ACS quirk for Loongson PCIe
	PCI: Fix lock symmetry in pci_slot_unlock()
	PCI: dw-rockchip: Remove PCIE_L0S_ENTRY check from rockchip_pcie_link_up()
	PCI: dw-rockchip: Fix PHY function call sequence in rockchip_pcie_phy_deinit()
	iio: accel: fxls8962af: Fix temperature scan element sign
	accel/ivpu: Improve buffer object logging
	accel/ivpu: Use firmware names from upstream repo
	accel/ivpu: Use dma_resv_lock() instead of a custom mutex
	accel/ivpu: Fix warning in ivpu_gem_bo_free()
	dummycon: Trigger redraw when switching consoles with deferred takeover
	mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race
	iio: imu: inv_icm42600: Fix temperature calculation
	iio: adc: ad7944: mask high bits on direct read
	iio: adc: ti-ads1298: Kconfig: add kfifo dependency to fix module build
	iio: adc: ad7606_spi: fix reg write value mask
	ACPICA: fix acpi operand cache leak in dswstate.c
	ASoC: amd: yc: Add quirk for Lenovo Yoga Pro 7 14ASP9
	clocksource: Fix the CPUs' choice in the watchdog per CPU verification
	power: supply: collie: Fix wakeup source leaks on device unbind
	mmc: Add quirk to disable DDR50 tuning
	ACPICA: Avoid sequence overread in call to strncmp()
	ASoC: tas2770: Power cycle amp on ISENSE/VSENSE change
	ASoC: intel/sdw_utils: Assign initial value in asoc_sdw_rt_amp_spk_rtd_init()
	ACPI: bus: Bail out if acpi_kobj registration fails
	ACPI: Add missing prototype for non CONFIG_SUSPEND/CONFIG_X86 case
	ACPICA: fix acpi parse and parseext cache leaks
	ACPICA: Apply pack(1) to union aml_resource
	ALSA: hda: cs35l41: Fix swapped l/r audio channels for Acer Helios laptops
	power: supply: bq27xxx: Retrieve again when busy
	pmdomain: core: Reset genpd->states to avoid freeing invalid data
	ACPICA: utilities: Fix overflow check in vsnprintf()
	platform-msi: Add msi_remove_device_irq_domain() in platform_device_msi_free_irqs_all()
	ASoC: tegra210_ahub: Add check to of_device_get_match_data()
	Make 'cc-option' work correctly for the -Wno-xyzzy pattern
	gpiolib: of: Add polarity quirk for s5m8767
	PM: runtime: fix denying of auto suspend in pm_suspend_timer_fn()
	power: supply: max17040: adjust thermal channel scaling
	ACPI: battery: negate current when discharging
	net: macb: Check return value of dma_set_mask_and_coherent()
	net: lan743x: Modify the EEPROM and OTP size for PCI1xxxx devices
	tipc: use kfree_sensitive() for aead cleanup
	f2fs: use vmalloc instead of kvmalloc in .init_{,de}compress_ctx
	bpf: Check rcu_read_lock_trace_held() in bpf_map_lookup_percpu_elem()
	Bluetooth: btusb: Add new VID/PID 13d3/3584 for MT7922
	i2c: designware: Invoke runtime suspend on quick slave re-registration
	wifi: mt76: mt7996: drop fragments with multicast or broadcast RA
	emulex/benet: correct command version selection in be_cmd_get_stats()
	Bluetooth: btusb: Add new VID/PID 13d3/3630 for MT7925
	wifi: mt76: mt76x2: Add support for LiteOn WN4516R,WN4519R
	wifi: mt76: mt7921: add 160 MHz AP for mt7922 device
	wifi: mt76: mt7925: introduce thermal protection
	wifi: mac80211: validate SCAN_FLAG_AP in scan request during MLO
	sctp: Do not wake readers in __sctp_write_space()
	libbpf/btf: Fix string handling to support multi-split BTF
	cpufreq: scmi: Skip SCMI devices that aren't used by the CPUs
	i2c: tegra: check msg length in SMBUS block read
	i2c: npcm: Add clock toggle recovery
	clk: qcom: gcc-x1e80100: Set FORCE MEM CORE for UFS clocks
	net: dlink: add synchronization for stats update
	wifi: ath12k: fix macro definition HAL_RX_MSDU_PKT_LENGTH_GET
	wifi: ath12k: fix a possible dead lock caused by ab->base_lock
	wifi: ath11k: Fix QMI memory reuse logic
	iommu/amd: Allow matching ACPI HID devices without matching UIDs
	wifi: rtw89: leave idle mode when setting WEP encryption for AP mode
	tcp: always seek for minimal rtt in tcp_rcv_rtt_update()
	tcp: remove zero TCP TS samples for autotuning
	tcp: fix initial tp->rcvq_space.space value for passive TS enabled flows
	tcp: add receive queue awareness in tcp_rcv_space_adjust()
	x86/sgx: Prevent attempts to reclaim poisoned pages
	ipv4/route: Use this_cpu_inc() for stats on PREEMPT_RT
	net: page_pool: Don't recycle into cache on PREEMPT_RT
	xfrm: validate assignment of maximal possible SEQ number
	net: atlantic: generate software timestamp just before the doorbell
	pinctrl: armada-37xx: propagate error from armada_37xx_pmx_set_by_name()
	pinctrl: armada-37xx: propagate error from armada_37xx_gpio_get_direction()
	bpf: Pass the same orig_call value to trampoline functions
	net: stmmac: generate software timestamp just before the doorbell
	pinctrl: armada-37xx: propagate error from armada_37xx_pmx_gpio_set_direction()
	libbpf: Check bpf_map_skeleton link for NULL
	pinctrl: armada-37xx: propagate error from armada_37xx_gpio_get()
	net: mlx4: add SOF_TIMESTAMPING_TX_SOFTWARE flag when getting ts info
	net: vertexcom: mse102x: Return code for mse102x_rx_pkt_spi
	wireless: purelifi: plfxlc: fix memory leak in plfxlc_usb_wreq_asyn()
	wifi: mac80211: do not offer a mesh path if forwarding is disabled
	clk: rockchip: rk3036: mark ddrphy as critical
	hid-asus: check ROG Ally MCU version and warn
	wifi: iwlwifi: mvm: fix beacon CCK flag
	f2fs: fix to bail out in get_new_segment()
	netfilter: nft_set_pipapo: clamp maximum map bucket size to INT_MAX
	libbpf: Add identical pointer detection to btf_dedup_is_equiv()
	scsi: lpfc: Fix lpfc_check_sli_ndlp() handling for GEN_REQUEST64 commands
	scsi: smartpqi: Add new PCI IDs
	iommu/amd: Ensure GA log notifier callbacks finish running before module unload
	wifi: iwlwifi: pcie: make sure to lock rxq->read
	wifi: rtw89: 8922a: fix TX fail with wrong VCO setting
	wifi: mac80211_hwsim: Prevent tsf from setting if beacon is disabled
	netdevsim: Mark NAPI ID on skb in nsim_rcv
	net/mlx5: HWS, Fix IP version decision
	bpf: Use proper type to calculate bpf_raw_tp_null_args.mask index
	wifi: mac80211: VLAN traffic in multicast path
	Revert "mac80211: Dynamically set CoDel parameters per station"
	wifi: iwlwifi: Add missing MODULE_FIRMWARE for Qu-c0-jf-b0
	net: bridge: mcast: update multicast contex when vlan state is changed
	net: bridge: mcast: re-implement br_multicast_{enable, disable}_port functions
	vxlan: Do not treat dst cache initialization errors as fatal
	bnxt_en: Remove unused field "ref_count" in struct bnxt_ulp
	wifi: ath12k: using msdu end descriptor to check for rx multicast packets
	net: ethernet: ti: am65-cpsw: handle -EPROBE_DEFER
	software node: Correct a OOB check in software_node_get_reference_args()
	isofs: fix Y2038 and Y2156 issues in Rock Ridge TF entry
	pinctrl: mcp23s08: Reset all pins to input at probe
	wifi: ath12k: fix failed to set mhi state error during reboot with hardware grouping
	scsi: lpfc: Use memcpy() for BIOS version
	sock: Correct error checking condition for (assign|release)_proto_idx()
	i40e: fix MMIO write access to an invalid page in i40e_clear_hw
	ixgbe: Fix unreachable retry logic in combined and byte I2C write functions
	RDMA/hns: initialize db in update_srq_db()
	ice: fix check for existing switch rule
	usbnet: asix AX88772: leave the carrier control to phylink
	f2fs: fix to set atomic write status more clear
	bpf, sockmap: Fix data lost during EAGAIN retries
	net: ethernet: cortina: Use TOE/TSO on all TCP
	octeontx2-pf: Add error log forcn10k_map_unmap_rq_policer()
	wifi: ath11k: determine PM policy based on machine model
	wifi: ath12k: fix link valid field initialization in the monitor Rx
	wifi: ath12k: fix incorrect CE addresses
	wifi: ath12k: Pass correct values of center freq1 and center freq2 for 160 MHz
	net/mlx5: HWS, Harden IP version definer checks
	fbcon: Make sure modelist not set on unregistered console
	watchdog: da9052_wdt: respect TWDMIN
	bus: fsl-mc: increase MC_CMD_COMPLETION_TIMEOUT_MS value
	ARM: OMAP2+: Fix l4ls clk domain handling in STANDBY
	tee: Prevent size calculation wraparound on 32-bit kernels
	Revert "bus: ti-sysc: Probe for l4_wkup and l4_cfg interconnect devices first"
	fs/xattr.c: fix simple_xattr_list()
	platform/x86/amd: pmc: Clear metrics table at start of cycle
	platform/x86/amd: pmf: Prevent amd_pmf_tee_deinit() from running twice
	platform/x86: dell_rbu: Fix list usage
	platform/x86: dell_rbu: Stop overwriting data buffer
	powerpc/vdso: Fix build of VDSO32 with pcrel
	powerpc/eeh: Fix missing PE bridge reconfiguration during VFIO EEH recovery
	io_uring/kbuf: don't truncate end buffer for multiple buffer peeks
	io_uring: fix task leak issue in io_wq_create()
	drivers/rapidio/rio_cm.c: prevent possible heap overwrite
	platform/loongarch: laptop: Get brightness setting from EC on probe
	platform/loongarch: laptop: Unregister generic_sub_drivers on exit
	platform/loongarch: laptop: Add backlight power control support
	LoongArch: vDSO: Correctly use asm parameters in syscall wrappers
	LoongArch: Avoid using $r0/$r1 as "mask" for csrxchg
	LoongArch: Fix panic caused by NULL-PMD in huge_pte_offset()
	jffs2: check that raw node were preallocated before writing summary
	jffs2: check jffs2_prealloc_raw_node_refs() result in few other places
	cifs: deal with the channel loading lag while picking channels
	cifs: serialize other channels when query server interfaces is pending
	cifs: do not disable interface polling on failure
	smb: improve directory cache reuse for readdir operations
	scsi: storvsc: Increase the timeouts to storvsc_timeout
	scsi: s390: zfcp: Ensure synchronous unit_add
	nvme: always punt polled uring_cmd end_io work to task_work
	net_sched: sch_sfq: reject invalid perturb period
	net: clear the dst when changing skb protocol
	mm: close theoretical race where stale TLB entries could linger
	udmabuf: use sgtable-based scatterlist wrappers
	x86/virt/tdx: Avoid indirect calls to TDX assembly functions
	selftests/x86: Add a test to detect infinite SIGTRAP handler loop
	ksmbd: fix null pointer dereference in destroy_previous_session
	platform/x86: ideapad-laptop: use usleep_range() for EC polling
	selinux: fix selinux_xfrm_alloc_user() to set correct ctx_len
	platform/x86/intel-uncore-freq: Fail module load when plat_info is NULL
	sched_ext, sched/core: Don't call scx_group_set_weight() prematurely from sched_create_group()
	atm: Revert atm_account_tx() if copy_from_iter_full() fails.
	wifi: rtw89: phy: add dummy C2H event handler for report of TAS power
	cpufreq/amd-pstate: Add missing NULL ptr check in amd_pstate_update
	Input: sparcspkr - avoid unannotated fall-through
	wifi: ath12k: Clear affinity hint before calling ath12k_pci_free_irq() in error path
	wifi: cfg80211: init wiphy_work before allocating rfkill fails
	arm64: Restrict pagetable teardown to avoid false warning
	ALSA: usb-audio: Rename ALSA kcontrol PCM and PCM1 for the KTMicro sound card
	ALSA: hda/intel: Add Thinkpad E15 to PM deny list
	ALSA: hda/realtek - Add mute LED support for HP Victus 16-s1xxx and HP Victus 15-fa1xxx
	ALSA: hda/realtek: enable headset mic on Latitude 5420 Rugged
	ALSA: hda/realtek: Fix built-in mic on ASUS VivoBook X513EA
	ALSA: hda/realtek: Add quirk for Asus GU605C
	iio: accel: fxls8962af: Fix temperature calculation
	mm/hugetlb: unshare page tables during VMA split, not before
	drm/amdgpu: read back register after written for VCN v4.0.5
	kbuild: rust: add rustc-min-version support function
	rust: compile libcore with edition 2024 for 1.87+
	net: Fix checksum update for ILA adj-transport
	bpf: Fix L4 csum update on IPv6 in CHECKSUM_COMPLETE
	erofs: remove unused trace event erofs_destroy_inode
	nfsd: use threads array as-is in netlink interface
	sunrpc: handle SVC_GARBAGE during svc auth processing as auth error
	drm/v3d: Avoid NULL pointer dereference in `v3d_job_update_stats()`
	Kunit to check the longest symbol length
	x86/tools: Drop duplicate unlikely() definition in insn_decoder_test.c
	ipv6: remove leftover ip6 cookie initializer
	ipv6: replace ipcm6_init calls with ipcm6_init_sk
	smb: fix secondary channel creation issue with kerberos by populating hostname when adding channels
	drm/msm/disp: Correct porch timing for SDM845
	drm/msm/dsi/dsi_phy_10nm: Fix missing initial VCO rate
	drm/msm: Fix CP_RESET_CONTEXT_STATE bitfield names
	drm/msm/a7xx: Call CP_RESET_CONTEXT_STATE
	drm/ssd130x: fix ssd132x_clear_screen() columns
	ionic: Prevent driver/fw getting out of sync on devcmd(s)
	drm/nouveau/bl: increase buffer size to avoid truncate warning
	drm/i915/pmu: Fix build error with GCOV and AutoFDO enabled
	hwmon: (occ) Rework attribute registration for stack usage
	hwmon: (occ) fix unaligned accesses
	hwmon: (ltc4282) avoid repeated register write
	pldmfw: Select CRC32 when PLDMFW is selected
	aoe: clean device rq_list in aoedev_downdev()
	io_uring/sqpoll: don't put task_struct on tctx setup failure
	net: ice: Perform accurate aRFS flow match
	ice: fix eswitch code memory leak in reset scenario
	e1000e: set fixed clock frequency indication for Nahum 11 and Nahum 13
	workqueue: Initialize wq_isolated_cpumask in workqueue_init_early()
	ksmbd: add free_transport ops in ksmbd connection
	net: netmem: fix skb_ensure_writable with unreadable skbs
	bnxt_en: Fix double invocation of bnxt_ulp_stop()/bnxt_ulp_start()
	eth: bnxt: fix out-of-range access of vnic_info array
	bnxt_en: Add a helper function to configure MRU and RSS
	bnxt_en: Update MRU and RSS table of RSS contexts on queue reset
	ptp: fix breakage after ptp_vclock_in_use() rework
	ptp: allow reading of currently dialed frequency to succeed on free-running clocks
	wifi: carl9170: do not ping device which has failed to load firmware
	mpls: Use rcu_dereference_rtnl() in mpls_route_input_rcu().
	atm: atmtcp: Free invalid length skb in atmtcp_c_send().
	tcp: fix tcp_packet_delayed() for tcp_is_non_sack_preventing_reopen() behavior
	tipc: fix null-ptr-deref when acquiring remote ip of ethernet bearer
	tcp: fix passive TFO socket having invalid NAPI ID
	eth: fbnic: avoid double free when failing to DMA-map FW msg
	net: lan743x: fix potential out-of-bounds write in lan743x_ptp_io_event_clock_get()
	ublk: santizize the arguments from userspace when adding a device
	drm/xe: Wire up device shutdown handler
	drm/xe/gt: Update handling of xe_force_wake_get return
	drm/xe/bmg: Update Wa_16023588340
	calipso: Fix null-ptr-deref in calipso_req_{set,del}attr().
	mlxbf_gige: return EPROBE_DEFER if PHY IRQ is not available
	net: atm: add lec_mutex
	net: atm: fix /proc/net/atm/lec handling
	EDAC/amd64: Correct number of UMCs for family 19h models 70h-7fh
	dt-bindings: i2c: nvidia,tegra20-i2c: Specify the required properties
	smb: Log an error when close_all_cached_dirs fails
	serial: sh-sci: Clean sci_ports[0] after at earlycon exit
	serial: sh-sci: Increment the runtime usage counter for the earlycon device
	smb: client: fix first command failure during re-negotiation
	smb: client: fix max_sge overflow in smb_extract_folioq_to_rdma()
	s390/pci: Fix __pcilg_mio_inuser() inline assembly
	perf: Fix sample vs do_exit()
	perf: Fix cgroup state vs ERROR
	perf/core: Fix WARN in perf_cgroup_switch()
	arm64/ptrace: Fix stack-out-of-bounds read in regs_get_kernel_stack_nth()
	scsi: elx: efct: Fix memory leak in efct_hw_parse_filter()
	RISC-V: KVM: Fix the size parameter check in SBI SFENCE calls
	RISC-V: KVM: Don't treat SBI HFENCE calls as NOPs
	gpio: pca953x: fix wrong error probe return value
	perf evsel: Missed close() when probing hybrid core PMUs
	perf test: Directory file descriptor leak
	gpio: mlxbf3: only get IRQ for device instance 0
	cifs: Remove duplicate fattr->cf_dtype assignment from wsl_to_fattr() function
	bpftool: Fix cgroup command to only show cgroup bpf programs
	Linux 6.12.35

Change-Id: Ida57d269272a624bedb979bfad0b3c5e7df7e846
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2025-07-10 16:01:38 +00:00

2057 lines
53 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IPv6 output functions
* Linux INET6 implementation
*
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
* Based on linux/net/ipv4/ip_output.c
*
* Changes:
* A.N.Kuznetsov : airthmetics in fragmentation.
* extension headers are implemented.
* route changes now work.
* ip6_forward does not confuse sniffers.
* etc.
*
* H. von Brand : Added missing #include <linux/string.h>
* Imran Patel : frag id should be in NBO
* Kazunori MIYAZAWA @USAGI
* : add ip6_append_data and related functions
* for datagram xmit
*/
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
#include <linux/tcp.h>
#include <linux/route.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/bpf-cgroup.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/gso.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/rawv6.h>
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/checksum.h>
#include <linux/mroute6.h>
#include <net/l3mdev.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int hh_len = LL_RESERVED_SPACE(dev);
const struct in6_addr *daddr, *nexthop;
struct ipv6hdr *hdr;
struct neighbour *neigh;
int ret;
/* Be paranoid, rather than too clever. */
if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
/* Make sure idev stays alive */
rcu_read_lock();
skb = skb_expand_head(skb, hh_len);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
rcu_read_unlock();
return -ENOMEM;
}
rcu_read_unlock();
}
hdr = ipv6_hdr(skb);
daddr = &hdr->daddr;
if (ipv6_addr_is_multicast(daddr)) {
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
/* Do not check for IFF_ALLMULTI; multicast routing
is not supported in any case.
*/
if (newskb)
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
if (hdr->hop_limit == 0) {
IP6_INC_STATS(net, idev,
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
}
}
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
!(dev->flags & IFF_LOOPBACK)) {
kfree_skb(skb);
return 0;
}
}
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
rcu_read_lock();
nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
if (unlikely(IS_ERR_OR_NULL(neigh))) {
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (IS_ERR(neigh)) {
rcu_read_unlock();
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
return -EINVAL;
}
}
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
rcu_read_unlock();
return ret;
}
static int
ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
struct sk_buff *skb, unsigned int mtu)
{
struct sk_buff *segs, *nskb;
netdev_features_t features;
int ret = 0;
/* Please see corresponding comment in ip_finish_output_gso
* describing the cases where GSO segment length exceeds the
* egress MTU.
*/
features = netif_skb_features(skb);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs)) {
kfree_skb(skb);
return -ENOMEM;
}
consume_skb(skb);
skb_list_walk_safe(segs, segs, nskb) {
int err;
skb_mark_not_on_list(segs);
/* Last GSO segment can be smaller than gso_size (and MTU).
* Adding a fragment header would produce an "atomic fragment",
* which is considered harmful (RFC-8021). Avoid that.
*/
err = segs->len > mtu ?
ip6_fragment(net, sk, segs, ip6_finish_output2) :
ip6_finish_output2(net, sk, segs);
if (err && ret == 0)
ret = err;
}
return ret;
}
static int ip6_finish_output_gso(struct net *net, struct sock *sk,
struct sk_buff *skb, unsigned int mtu)
{
if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
!skb_gso_validate_network_len(skb, mtu))
return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
return ip6_finish_output2(net, sk, skb);
}
static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
IP6CB(skb)->flags |= IP6SKB_REROUTED;
return dst_output(net, sk, skb);
}
#endif
mtu = ip6_skb_dst_mtu(skb);
if (skb_is_gso(skb))
return ip6_finish_output_gso(net, sk, skb, mtu);
if (skb->len > mtu ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
return ip6_fragment(net, sk, skb, ip6_finish_output2);
return ip6_finish_output2(net, sk, skb);
}
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int ret;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
switch (ret) {
case NET_XMIT_SUCCESS:
case NET_XMIT_CN:
return __ip6_finish_output(net, sk, skb) ? : ret;
default:
kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}
}
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
}
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
net, sk, skb, indev, dev,
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
EXPORT_SYMBOL(ip6_output);
bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
{
if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk))
return ip6_default_np_autolabel(net);
return inet6_test_bit(AUTOFLOWLABEL, sk);
}
/*
* xmit an sk_buff (used by TCP, SCTP and DCCP)
* Note : socket lock is not held for SYNACK packets, but might be modified
* by calls to skb_set_owner_w() and ipv6_local_error(),
* which are using proper atomic operations or spinlocks.
*/
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
__u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
{
struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct inet6_dev *idev = ip6_dst_idev(dst);
struct hop_jumbo_hdr *hop_jumbo;
int hoplen = sizeof(*hop_jumbo);
unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
int seg_len = skb->len;
int hlimit = -1;
u32 mtu;
head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
if (unlikely(head_room > skb_headroom(skb))) {
/* Make sure idev stays alive */
rcu_read_lock();
skb = skb_expand_head(skb, head_room);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
rcu_read_unlock();
return -ENOBUFS;
}
rcu_read_unlock();
}
if (opt) {
seg_len += opt->opt_nflen + opt->opt_flen;
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if (opt->opt_nflen)
ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
&fl6->saddr);
}
if (unlikely(seg_len > IPV6_MAXPLEN)) {
hop_jumbo = skb_push(skb, hoplen);
hop_jumbo->nexthdr = proto;
hop_jumbo->hdrlen = 0;
hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
hop_jumbo->tlv_len = 4;
hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen);
proto = IPPROTO_HOPOPTS;
seg_len = 0;
IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO;
}
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
/*
* Fill in the IPv6 header
*/
if (np)
hlimit = READ_ONCE(np->hop_limit);
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
ip6_autoflowlabel(net, sk), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
hdr->hop_limit = hlimit;
hdr->saddr = fl6->saddr;
hdr->daddr = *first_hop;
skb->protocol = htons(ETH_P_IPV6);
skb->priority = priority;
skb->mark = mark;
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
skb = l3mdev_ip6_out((struct sock *)sk, skb);
if (unlikely(!skb))
return 0;
/* hooks should never assume socket lock is held.
* we promote our socket to non const
*/
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, (struct sock *)sk, skb, NULL, dev,
dst_output);
}
skb->dev = dev;
/* ipv6_local_error() does not require socket lock,
* we promote our socket to non const
*/
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
EXPORT_SYMBOL(ip6_xmit);
static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
{
struct ip6_ra_chain *ra;
struct sock *last = NULL;
read_lock(&ip6_ra_lock);
for (ra = ip6_ra_chain; ra; ra = ra->next) {
struct sock *sk = ra->sk;
if (sk && ra->sel == sel &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
if (inet6_test_bit(RTALERT_ISOLATE, sk) &&
!net_eq(sock_net(sk), dev_net(skb->dev))) {
continue;
}
if (last) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
rawv6_rcv(last, skb2);
}
last = sk;
}
}
if (last) {
rawv6_rcv(last, skb);
read_unlock(&ip6_ra_lock);
return 1;
}
read_unlock(&ip6_ra_lock);
return 0;
}
static int ip6_forward_proxy_check(struct sk_buff *skb)
{
struct ipv6hdr *hdr = ipv6_hdr(skb);
u8 nexthdr = hdr->nexthdr;
__be16 frag_off;
int offset;
if (ipv6_ext_hdr(nexthdr)) {
offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
if (offset < 0)
return 0;
} else
offset = sizeof(struct ipv6hdr);
if (nexthdr == IPPROTO_ICMPV6) {
struct icmp6hdr *icmp6;
if (!pskb_may_pull(skb, (skb_network_header(skb) +
offset + 1 - skb->data)))
return 0;
icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
switch (icmp6->icmp6_type) {
case NDISC_ROUTER_SOLICITATION:
case NDISC_ROUTER_ADVERTISEMENT:
case NDISC_NEIGHBOUR_SOLICITATION:
case NDISC_NEIGHBOUR_ADVERTISEMENT:
case NDISC_REDIRECT:
/* For reaction involving unicast neighbor discovery
* message destined to the proxied address, pass it to
* input function.
*/
return 1;
default:
break;
}
}
/*
* The proxying router can't forward traffic sent to a link-local
* address, so signal the sender and discard the packet. This
* behavior is clarified by the MIPv6 specification.
*/
if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
dst_link_failure(skb);
return -1;
}
return 0;
}
static inline int ip6_forward_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
consume_skb(skb);
return 0;
}
#endif
skb_clear_tstamp(skb);
return dst_output(net, sk, skb);
}
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
return true;
if (skb->ignore_df)
return false;
if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
}
int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
struct net *net = dev_net(dst->dev);
struct inet6_dev *idev;
SKB_DR(reason);
u32 mtu;
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
goto error;
if (skb->pkt_type != PACKET_HOST)
goto drop;
if (unlikely(skb->sk))
goto drop;
if (skb_warn_if_lro(skb))
goto drop;
if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) &&
(!idev || !READ_ONCE(idev->cnf.disable_policy)) &&
!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
skb_forward_csum(skb);
/*
* We DO NOT make any processing on
* RA packets, pushing them to user level AS IS
* without ane WARRANTY that application will be able
* to interpret them. The reason is that we
* cannot make anything clever here.
*
* We are not end-node, so that if packet contains
* AH/ESP, we cannot make anything.
* Defragmentation also would be mistake, RA packets
* cannot be fragmented, because there is no warranty
* that different fragments will go along one path. --ANK
*/
if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
return 0;
}
/*
* check and decrement ttl
*/
if (hdr->hop_limit <= 1) {
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return -ETIMEDOUT;
}
/* XXX: idev->cnf.proxy_ndp? */
if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
if (proxied > 0) {
/* It's tempting to decrease the hop limit
* here by 1, as we do at the end of the
* function too.
*
* But that would be incorrect, as proxying is
* not forwarding. The ip6_input function
* will handle this packet locally, and it
* depends on the hop limit being unchanged.
*
* One example is the NDP hop limit, that
* always has to stay 255, but other would be
* similar checks around RA packets, where the
* user can even change the desired limit.
*/
return ip6_input(skb);
} else if (proxied < 0) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
if (!xfrm6_route_forward(skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
}
dst = skb_dst(skb);
/* IPv6 specs say nothing about it, but it is clear that we cannot
send redirects to source routed frames.
We don't send redirects to frames decapsulated from IPsec.
*/
if (IP6CB(skb)->iif == dst->dev->ifindex &&
opt->srcrt == 0 && !skb_sec_path(skb)) {
struct in6_addr *target = NULL;
struct inet_peer *peer;
struct rt6_info *rt;
/*
* incoming and outgoing devices are the same
* send a redirect.
*/
rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY)
target = &rt->rt6i_gateway;
else
target = &hdr->daddr;
rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
*/
if (inet_peer_xrlim_allow(peer, 1*HZ))
ndisc_send_redirect(skb, target);
rcu_read_unlock();
} else {
int addrtype = ipv6_addr_type(&hdr->saddr);
/* This check is security critical. */
if (addrtype == IPV6_ADDR_ANY ||
addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
goto error;
if (addrtype & IPV6_ADDR_LINKLOCAL) {
icmpv6_send(skb, ICMPV6_DEST_UNREACH,
ICMPV6_NOT_NEIGHBOUR, 0);
goto error;
}
}
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
mtu = ip6_dst_mtu_maybe_forward(dst, true);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
if (ip6_pkt_too_big(skb, mtu)) {
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
__IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_FRAGFAILS);
kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
return -EMSGSIZE;
}
if (skb_cow(skb, dst->dev->hard_header_len)) {
__IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_OUTDISCARDS);
goto drop;
}
hdr = ipv6_hdr(skb);
/* Mangling hops number delayed to point after skb COW */
hdr->hop_limit--;
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
net, NULL, skb, skb->dev, dst->dev,
ip6_forward_finish);
error:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
SKB_DR_SET(reason, IP_INADDRERRORS);
drop:
kfree_skb_reason(skb, reason);
return -EINVAL;
}
static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
to->pkt_type = from->pkt_type;
to->priority = from->priority;
to->protocol = from->protocol;
skb_dst_drop(to);
skb_dst_set(to, dst_clone(skb_dst(from)));
to->dev = from->dev;
to->mark = from->mark;
skb_copy_hash(to, from);
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
skb_ext_copy(to, from);
skb_copy_secmark(to, from);
}
int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
u8 nexthdr, __be32 frag_id,
struct ip6_fraglist_iter *iter)
{
unsigned int first_len;
struct frag_hdr *fh;
/* BUILD HEADER */
*prevhdr = NEXTHDR_FRAGMENT;
iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
if (!iter->tmp_hdr)
return -ENOMEM;
iter->frag = skb_shinfo(skb)->frag_list;
skb_frag_list_init(skb);
iter->offset = 0;
iter->hlen = hlen;
iter->frag_id = frag_id;
iter->nexthdr = nexthdr;
__skb_pull(skb, hlen);
fh = __skb_push(skb, sizeof(struct frag_hdr));
__skb_push(skb, hlen);
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(IP6_MF);
fh->identification = frag_id;
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
return 0;
}
EXPORT_SYMBOL(ip6_fraglist_init);
void ip6_fraglist_prepare(struct sk_buff *skb,
struct ip6_fraglist_iter *iter)
{
struct sk_buff *frag = iter->frag;
unsigned int hlen = iter->hlen;
struct frag_hdr *fh;
frag->ip_summed = CHECKSUM_NONE;
skb_reset_transport_header(frag);
fh = __skb_push(frag, sizeof(struct frag_hdr));
__skb_push(frag, hlen);
skb_reset_network_header(frag);
memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
fh->nexthdr = iter->nexthdr;
fh->reserved = 0;
fh->frag_off = htons(iter->offset);
if (frag->next)
fh->frag_off |= htons(IP6_MF);
fh->identification = iter->frag_id;
ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
ip6_copy_metadata(frag, skb);
}
EXPORT_SYMBOL(ip6_fraglist_prepare);
void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
{
state->prevhdr = prevhdr;
state->nexthdr = nexthdr;
state->frag_id = frag_id;
state->hlen = hlen;
state->mtu = mtu;
state->left = skb->len - hlen; /* Space per frame */
state->ptr = hlen; /* Where to start from */
state->hroom = hdr_room;
state->troom = needed_tailroom;
state->offset = 0;
}
EXPORT_SYMBOL(ip6_frag_init);
struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
{
u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
struct sk_buff *frag;
struct frag_hdr *fh;
unsigned int len;
len = state->left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > state->mtu)
len = state->mtu;
/* IF: we are not sending up to and including the packet end
then align the next start on an eight byte boundary */
if (len < state->left)
len &= ~7;
/* Allocate buffer */
frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
state->hroom + state->troom, GFP_ATOMIC);
if (!frag)
return ERR_PTR(-ENOMEM);
/*
* Set up data on packet
*/
ip6_copy_metadata(frag, skb);
skb_reserve(frag, state->hroom);
skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
skb_reset_network_header(frag);
fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
frag->transport_header = (frag->network_header + state->hlen +
sizeof(struct frag_hdr));
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
if (skb->sk)
skb_set_owner_w(frag, skb->sk);
/*
* Copy the packet header into the new buffer.
*/
skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
fragnexthdr_offset = skb_network_header(frag);
fragnexthdr_offset += prevhdr - skb_network_header(skb);
*fragnexthdr_offset = NEXTHDR_FRAGMENT;
/*
* Build fragment header.
*/
fh->nexthdr = state->nexthdr;
fh->reserved = 0;
fh->identification = state->frag_id;
/*
* Copy a block of the IP datagram.
*/
BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
len));
state->left -= len;
fh->frag_off = htons(state->offset);
if (state->left > 0)
fh->frag_off |= htons(IP6_MF);
ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
state->ptr += len;
state->offset += len;
return frag;
}
EXPORT_SYMBOL(ip6_frag_next);
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
u8 tstamp_type = skb->tstamp_type;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
ktime_t tstamp = skb->tstamp;
int hroom, err = 0;
__be32 frag_id;
u8 *prevhdr, nexthdr = 0;
err = ip6_find_1stfragopt(skb, &prevhdr);
if (err < 0)
goto fail;
hlen = err;
nexthdr = *prevhdr;
nexthdr_offset = prevhdr - skb_network_header(skb);
mtu = ip6_skb_dst_mtu(skb);
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
if (unlikely(!skb->ignore_df && skb->len > mtu))
goto fail_toobig;
if (IP6CB(skb)->frag_max_size) {
if (IP6CB(skb)->frag_max_size > mtu)
goto fail_toobig;
/* don't send fragments larger than what we received */
mtu = IP6CB(skb)->frag_max_size;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
}
if (np) {
u32 frag_size = READ_ONCE(np->frag_size);
if (frag_size && frag_size < mtu)
mtu = frag_size;
}
if (mtu < hlen + sizeof(struct frag_hdr) + 8)
goto fail_toobig;
mtu -= hlen + sizeof(struct frag_hdr);
frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
(err = skb_checksum_help(skb)))
goto fail;
prevhdr = skb_network_header(skb) + nexthdr_offset;
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
struct ip6_fraglist_iter iter;
struct sk_buff *frag2;
if (first_len - hlen > mtu ||
((first_len - hlen) & 7) ||
skb_cloned(skb) ||
skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
goto slow_path;
skb_walk_frags(skb, frag) {
/* Correct geometry. */
if (frag->len > mtu ||
((frag->len & 7) && frag->next) ||
skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
goto slow_path_clean;
/* Partially cloned skb? */
if (skb_shared(frag))
goto slow_path_clean;
BUG_ON(frag->sk);
if (skb->sk) {
frag->sk = skb->sk;
frag->destructor = sock_wfree;
}
skb->truesize -= frag->truesize;
}
err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
&iter);
if (err < 0)
goto fail;
/* We prevent @rt from being freed. */
rcu_read_lock();
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
if (err || !iter.frag)
break;
skb = ip6_fraglist_next(&iter);
}
kfree(iter.tmp_hdr);
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
rcu_read_unlock();
return 0;
}
kfree_skb_list(iter.frag);
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
rcu_read_unlock();
return err;
slow_path_clean:
skb_walk_frags(skb, frag2) {
if (frag2 == frag)
break;
frag2->sk = NULL;
frag2->destructor = NULL;
skb->truesize += frag2->truesize;
}
}
slow_path:
/*
* Fragment the datagram.
*/
ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
&state);
/*
* Keep copying data until we run out.
*/
while (state.left > 0) {
frag = ip6_frag_next(skb, &state);
if (IS_ERR(frag)) {
err = PTR_ERR(frag);
goto fail;
}
/*
* Put this fragment into the sending queue.
*/
skb_set_delivery_time(frag, tstamp, tstamp_type);
err = output(net, sk, frag);
if (err)
goto fail;
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGCREATES);
}
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGOKS);
consume_skb(skb);
return err;
fail_toobig:
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
err = -EMSGSIZE;
fail:
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return err;
}
static inline int ip6_rt_check(const struct rt6key *rt_key,
const struct in6_addr *fl_addr,
const struct in6_addr *addr_cache)
{
return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
}
static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
struct dst_entry *dst,
const struct flowi6 *fl6)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct rt6_info *rt;
if (!dst)
goto out;
if (dst->ops->family != AF_INET6) {
dst_release(dst);
return NULL;
}
rt = dst_rt6_info(dst);
/* Yes, checking route validity in not connected
* case is not very simple. Take into account,
* that we do not support routing by source, TOS,
* and MSG_DONTROUTE --ANK (980726)
*
* 1. ip6_rt_check(): If route was host route,
* check that cached destination is current.
* If it is network route, we still may
* check its validity using saved pointer
* to the last used address: daddr_cache.
* We do not want to save whole address now,
* (because main consumer of this service
* is tcp, which has not this problem),
* so that the last trick works only on connected
* sockets.
* 2. oif also should be the same.
*/
if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
#ifdef CONFIG_IPV6_SUBTREES
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
#endif
(fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
dst_release(dst);
dst = NULL;
}
out:
return dst;
}
static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
struct neighbour *n;
struct rt6_info *rt;
#endif
int err;
int flags = 0;
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
* the route-specific preferred source forces the
* ip6_route_output call _before_ ip6_route_get_saddr.
*
* In source specific routing (no src=any default route),
* ip6_route_output will fail given src=any saddr, though, so
* that's why we try it again later.
*/
if (ipv6_addr_any(&fl6->saddr)) {
struct fib6_info *from;
struct rt6_info *rt;
*dst = ip6_route_output(net, sk, fl6);
rt = (*dst)->error ? NULL : dst_rt6_info(*dst);
rcu_read_lock();
from = rt ? rcu_dereference(rt->from) : NULL;
err = ip6_route_get_saddr(net, from, &fl6->daddr,
sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
fl6->flowi6_l3mdev,
&fl6->saddr);
rcu_read_unlock();
if (err)
goto out_err_release;
/* If we had an erroneous initial result, pretend it
* never existed and let the SA-enabled version take
* over.
*/
if ((*dst)->error) {
dst_release(*dst);
*dst = NULL;
}
if (fl6->flowi6_oif)
flags |= RT6_LOOKUP_F_IFACE;
}
if (!*dst)
*dst = ip6_route_output_flags(net, sk, fl6, flags);
err = (*dst)->error;
if (err)
goto out_err_release;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
/*
* Here if the dst entry we've looked up
* has a neighbour entry that is in the INCOMPLETE
* state and the src address from the flow is
* marked as OPTIMISTIC, we release the found
* dst entry and replace it instead with the
* dst entry of the nexthop router
*/
rt = dst_rt6_info(*dst);
rcu_read_lock();
n = __ipv6_neigh_lookup_noref(rt->dst.dev,
rt6_nexthop(rt, &fl6->daddr));
err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock();
if (err) {
struct inet6_ifaddr *ifp;
struct flowi6 fl_gw6;
int redirect;
ifp = ipv6_get_ifaddr(net, &fl6->saddr,
(*dst)->dev, 1);
redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
if (ifp)
in6_ifa_put(ifp);
if (redirect) {
/*
* We need to get the dst entry for the
* default router instead
*/
dst_release(*dst);
memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
*dst = ip6_route_output(net, sk, &fl_gw6);
err = (*dst)->error;
if (err)
goto out_err_release;
}
}
#endif
if (ipv6_addr_v4mapped(&fl6->saddr) &&
!(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
err = -EAFNOSUPPORT;
goto out_err_release;
}
return 0;
out_err_release:
dst_release(*dst);
*dst = NULL;
if (err == -ENETUNREACH)
IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
return err;
}
/**
* ip6_dst_lookup - perform route lookup on flow
* @net: Network namespace to perform lookup in
* @sk: socket which provides route info
* @dst: pointer to dst_entry * for result
* @fl6: flow to lookup
*
* This function performs a route lookup on the given flow.
*
* It returns zero on success, or a standard errno code on error.
*/
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6)
{
*dst = NULL;
return ip6_dst_lookup_tail(net, sk, dst, fl6);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup);
/**
* ip6_dst_lookup_flow - perform route lookup on flow with ipsec
* @net: Network namespace to perform lookup in
* @sk: socket which provides route info
* @fl6: flow to lookup
* @final_dst: final destination address for ipsec lookup
*
* This function performs a route lookup on the given flow.
*
* It returns a valid dst pointer on success, or a pointer encoded
* error code.
*/
struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst)
{
struct dst_entry *dst = NULL;
int err;
err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
if (err)
return ERR_PTR(err);
if (final_dst)
fl6->daddr = *final_dst;
return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
/**
* ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
* @sk: socket which provides the dst cache and route info
* @fl6: flow to lookup
* @final_dst: final destination address for ipsec lookup
* @connected: whether @sk is connected or not
*
* This function performs a route lookup on the given flow with the
* possibility of using the cached route in the socket if it is valid.
* It will take the socket dst lock when operating on the dst cache.
* As a result, this function can only be used in process context.
*
* In addition, for a connected socket, cache the dst in the socket
* if the current cache is not valid.
*
* It returns a valid dst pointer on success, or a pointer encoded
* error code.
*/
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst,
bool connected)
{
struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
dst = ip6_sk_dst_check(sk, dst, fl6);
if (dst)
return dst;
dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
if (connected && !IS_ERR(dst))
ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
return dst;
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
gfp_t gfp)
{
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
}
static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
gfp_t gfp)
{
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
}
static void ip6_append_data_mtu(unsigned int *mtu,
int *maxfraglen,
unsigned int fragheaderlen,
struct sk_buff *skb,
struct rt6_info *rt,
unsigned int orig_mtu)
{
if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
if (!skb) {
/* first fragment, reserve header_len */
*mtu = orig_mtu - rt->dst.header_len;
} else {
/*
* this fragment is not first, the headers
* space is regarded as data space.
*/
*mtu = orig_mtu;
}
*maxfraglen = ((*mtu - fragheaderlen) & ~7)
+ fragheaderlen - sizeof(struct frag_hdr);
}
}
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
struct rt6_info *rt)
{
struct ipv6_pinfo *np = inet6_sk(sk);
unsigned int mtu, frag_size;
struct ipv6_txoptions *nopt, *opt = ipc6->opt;
/* callers pass dst together with a reference, set it first so
* ip6_cork_release() can put it down even in case of an error.
*/
cork->base.dst = &rt->dst;
/*
* setup for corking
*/
if (opt) {
if (WARN_ON(v6_cork->opt))
return -EINVAL;
nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
if (unlikely(!nopt))
return -ENOBUFS;
nopt->tot_len = sizeof(*opt);
nopt->opt_flen = opt->opt_flen;
nopt->opt_nflen = opt->opt_nflen;
nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation);
if (opt->dst0opt && !nopt->dst0opt)
return -ENOBUFS;
nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation);
if (opt->dst1opt && !nopt->dst1opt)
return -ENOBUFS;
nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation);
if (opt->hopopt && !nopt->hopopt)
return -ENOBUFS;
nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation);
if (opt->srcrt && !nopt->srcrt)
return -ENOBUFS;
/* need source address above miyazawa*/
}
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
frag_size = READ_ONCE(np->frag_size);
if (frag_size && frag_size < mtu)
mtu = frag_size;
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
cork->base.length = 0;
cork->base.transmit_time = ipc6->sockc.transmit_time;
return 0;
}
static int __ip6_append_data(struct sock *sk,
struct sk_buff_head *queue,
struct inet_cork_full *cork_full,
struct inet6_cork *v6_cork,
struct page_frag *pfrag,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, size_t length, int transhdrlen,
unsigned int flags, struct ipcm6_cookie *ipc6)
{
struct sk_buff *skb, *skb_prev = NULL;
struct inet_cork *cork = &cork_full->base;
struct flowi6 *fl6 = &cork_full->fl.u.ip6;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
struct ubuf_info *uarg = NULL;
int exthdrlen = 0;
int dst_exthdrlen = 0;
int hh_len;
int copy;
int err;
int offset = 0;
bool zc = false;
u32 tskey = 0;
struct rt6_info *rt = dst_rt6_info(cork->dst);
bool paged, hold_tskey, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
skb = skb_peek_tail(queue);
if (!skb) {
exthdrlen = opt ? opt->opt_flen : 0;
dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
}
paged = !!cork->gso_size;
mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
orig_mtu = mtu;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
(opt ? opt->opt_nflen : 0);
headersize = sizeof(struct ipv6hdr) +
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
rt->rt6i_nfheader_len;
if (mtu <= fragheaderlen ||
((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
goto emsgsize;
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
sizeof(struct frag_hdr);
/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
* the first fragment
*/
if (headersize + transhdrlen > mtu)
goto emsgsize;
if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
sk->sk_protocol == IPPROTO_ICMPV6 ||
sk->sk_protocol == IPPROTO_RAW)) {
ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
sizeof(struct ipv6hdr));
goto emsgsize;
}
if (ip6_sk_ignore_df(sk))
maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
else
maxnonfragsize = mtu;
if (cork->length + length > maxnonfragsize - headersize) {
emsgsize:
pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
return -EMSGSIZE;
}
/* CHECKSUM_PARTIAL only with no extension headers and when
* we are not going to fragment
*/
if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
headersize == sizeof(struct ipv6hdr) &&
length <= mtu - headersize &&
(!(flags & MSG_MORE) || cork->gso_size) &&
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
if ((flags & MSG_ZEROCOPY) && length) {
struct msghdr *msg = from;
if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
return -EINVAL;
/* Leave uarg NULL if can't zerocopy, callers should
* be able to handle it.
*/
if ((rt->dst.dev->features & NETIF_F_SG) &&
csummode == CHECKSUM_PARTIAL) {
paged = true;
zc = true;
uarg = msg->msg_ubuf;
}
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
if (!uarg)
return -ENOBUFS;
extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
if (rt->dst.dev->features & NETIF_F_SG &&
csummode == CHECKSUM_PARTIAL) {
paged = true;
zc = true;
} else {
uarg_to_msgzc(uarg)->zerocopy = 0;
skb_zcopy_set(skb, uarg, &extra_uref);
}
}
} else if ((flags & MSG_SPLICE_PAGES) && length) {
if (inet_test_bit(HDRINCL, sk))
return -EPERM;
if (rt->dst.dev->features & NETIF_F_SG &&
getfrag == ip_generic_getfrag)
/* We need an empty buffer to attach stuff to */
paged = true;
else
flags &= ~MSG_SPLICE_PAGES;
}
hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
if (hold_tskey)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
* Otherwise, we need to reserve fragment header and
* fragment alignment (= 8-15 octects, in total).
*
* Note that we may need to "move" the data from the tail
* of the buffer to the new fragment when we split
* the message.
*
* FIXME: It may be fragmented into multiple chunks
* at once if non-fragmentable extension headers
* are too large.
* --yoshfuji
*/
cork->length += length;
if (!skb)
goto alloc_new_skb;
while (length > 0) {
/* Check if the remaining data fits into current packet. */
copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len;
if (copy < length)
copy = maxfraglen - skb->len;
if (copy <= 0) {
char *data;
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
fraggap = skb->len - maxfraglen;
else
fraggap = 0;
/* update mtu and maxfraglen if necessary */
if (!skb || !skb_prev)
ip6_append_data_mtu(&mtu, &maxfraglen,
fragheaderlen, skb, rt,
orig_mtu);
skb_prev = skb;
/*
* If remaining data exceeds the mtu,
* we know we need more fragment(s).
*/
datalen = length + fraggap;
if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
pagedlen = 0;
alloc_extra = hh_len;
alloc_extra += dst_exthdrlen;
alloc_extra += rt->dst.trailer_len;
/* We just reserve space for fragment header.
* Note: this may be overallocation if the message
* (without MSG_MORE) fits into the MTU.
*/
alloc_extra += sizeof(struct frag_hdr);
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
else if (!paged &&
(fraglen + alloc_extra < SKB_MAX_ALLOC ||
!(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = fragheaderlen + transhdrlen;
pagedlen = datalen - transhdrlen;
}
alloclen += alloc_extra;
if (datalen != length + fraggap) {
/*
* this is not the last fragment, the trailer
* space is regarded as data space.
*/
datalen += rt->dst.trailer_len;
}
fraglen = datalen + fragheaderlen;
copy = datalen - transhdrlen - fraggap - pagedlen;
/* [!] NOTE: copy may be negative if pagedlen>0
* because then the equation may reduces to -fraggap.
*/
if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) {
err = -EINVAL;
goto error;
}
if (transhdrlen) {
skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
}
if (!skb)
goto error;
/*
* Fill in the control structures
*/
skb->protocol = htons(ETH_P_IPV6);
skb->ip_summed = csummode;
skb->csum = 0;
/* reserve for fragmentation and ipsec header */
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
dst_exthdrlen);
/*
* Find where to start putting bytes
*/
data = skb_put(skb, fraglen - pagedlen);
skb_set_network_header(skb, exthdrlen);
data += fragheaderlen;
skb->transport_header = (skb->network_header +
fragheaderlen);
if (fraggap) {
skb->csum = skb_copy_and_csum_bits(
skb_prev, maxfraglen,
data + transhdrlen, fraggap);
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
data += fraggap;
pskb_trim_unique(skb_prev, maxfraglen);
}
if (copy > 0 &&
getfrag(from, data + transhdrlen, offset,
copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
} else if (flags & MSG_SPLICE_PAGES) {
copy = 0;
}
offset += copy;
length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
dst_exthdrlen = 0;
/* Only the initial fragment is time stamped */
skb_shinfo(skb)->tx_flags = cork->tx_flags;
cork->tx_flags = 0;
skb_shinfo(skb)->tskey = tskey;
tskey = 0;
skb_zcopy_set(skb, uarg, &extra_uref);
if ((flags & MSG_CONFIRM) && !skb_prev)
skb_set_dst_pending_confirm(skb, 1);
/*
* Put the packet on the pending queue
*/
if (!skb->destructor) {
skb->destructor = sock_wfree;
skb->sk = sk;
wmem_alloc_delta += skb->truesize;
}
__skb_queue_tail(queue, skb);
continue;
}
if (copy > length)
copy = length;
if (!(rt->dst.dev->features&NETIF_F_SG) &&
skb_tailroom(skb) >= copy) {
unsigned int off;
off = skb->len;
if (getfrag(from, skb_put(skb, copy),
offset, copy, off, skb) < 0) {
__skb_trim(skb, off);
err = -EFAULT;
goto error;
}
} else if (flags & MSG_SPLICE_PAGES) {
struct msghdr *msg = from;
err = -EIO;
if (WARN_ON_ONCE(copy > msg->msg_iter.count))
goto error;
err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
sk->sk_allocation);
if (err < 0)
goto error;
copy = err;
wmem_alloc_delta += copy;
} else if (!zc) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
if (!sk_page_frag_refill(sk, pfrag))
goto error;
skb_zcopy_downgrade_managed(skb);
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
err = -EMSGSIZE;
if (i == MAX_SKB_FRAGS)
goto error;
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
page_address(pfrag->page) + pfrag->offset,
offset, copy, skb->len, skb) < 0)
goto error_efault;
pfrag->offset += copy;
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
wmem_alloc_delta += copy;
} else {
err = skb_zerocopy_iter_dgram(skb, from, copy);
if (err < 0)
goto error;
}
offset += copy;
length -= copy;
}
if (wmem_alloc_delta)
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return 0;
error_efault:
err = -EFAULT;
error:
net_zcopy_put_abort(uarg, extra_uref);
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
if (hold_tskey)
atomic_dec(&sk->sk_tskey);
return err;
}
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
int exthdrlen;
int err;
if (flags&MSG_PROBE)
return 0;
if (skb_queue_empty(&sk->sk_write_queue)) {
/*
* setup for corking
*/
dst_hold(&rt->dst);
err = ip6_setup_cork(sk, &inet->cork, &np->cork,
ipc6, rt);
if (err)
return err;
inet->cork.fl.u.ip6 = *fl6;
exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
} else {
transhdrlen = 0;
}
return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
&np->cork, sk_page_frag(sk), getfrag,
from, length, transhdrlen, flags, ipc6);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
{
struct dst_entry *dst = cork->base.dst;
cork->base.dst = NULL;
skb_dst_set(skb, dst);
}
static void ip6_cork_release(struct inet_cork_full *cork,
struct inet6_cork *v6_cork)
{
if (v6_cork->opt) {
struct ipv6_txoptions *opt = v6_cork->opt;
kfree(opt->dst0opt);
kfree(opt->dst1opt);
kfree(opt->hopopt);
kfree(opt->srcrt);
kfree(opt);
v6_cork->opt = NULL;
}
if (cork->base.dst) {
dst_release(cork->base.dst);
cork->base.dst = NULL;
}
}
struct sk_buff *__ip6_make_skb(struct sock *sk,
struct sk_buff_head *queue,
struct inet_cork_full *cork,
struct inet6_cork *v6_cork)
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct in6_addr *final_dst;
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = v6_cork->opt;
struct rt6_info *rt = dst_rt6_info(cork->base.dst);
struct flowi6 *fl6 = &cork->fl.u.ip6;
unsigned char proto = fl6->flowi6_proto;
skb = __skb_dequeue(queue);
if (!skb)
goto out;
tail_skb = &(skb_shinfo(skb)->frag_list);
/* move skb->data to ip header from ext header */
if (skb->data < skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
__skb_pull(tmp_skb, skb_network_header_len(skb));
*tail_skb = tmp_skb;
tail_skb = &(tmp_skb->next);
skb->len += tmp_skb->len;
skb->data_len += tmp_skb->len;
skb->truesize += tmp_skb->truesize;
tmp_skb->destructor = NULL;
tmp_skb->sk = NULL;
}
/* Allow local fragmentation. */
skb->ignore_df = ip6_sk_ignore_df(sk);
__skb_pull(skb, skb_network_header_len(skb));
final_dst = &fl6->daddr;
if (opt && opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if (opt && opt->opt_nflen)
ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
ip6_autoflowlabel(net, sk), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst;
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = cork->base.mark;
if (sk_is_tcp(sk))
skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
else
skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid);
ip6_cork_steal_dst(skb, cork);
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
u8 icmp6_type;
if (sk->sk_socket->type == SOCK_RAW &&
!(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
icmp6_type = fl6->fl6_icmp_type;
else
icmp6_type = icmp6_hdr(skb)->icmp6_type;
ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
ip6_cork_release(cork, v6_cork);
out:
return skb;
}
int ip6_send_skb(struct sk_buff *skb)
{
struct net *net = sock_net(skb->sk);
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
int err;
rcu_read_lock();
err = ip6_local_out(net, skb->sk, skb);
if (err) {
if (err > 0)
err = net_xmit_errno(err);
if (err)
IP6_INC_STATS(net, rt->rt6i_idev,
IPSTATS_MIB_OUTDISCARDS);
}
rcu_read_unlock();
return err;
}
int ip6_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
skb = ip6_finish_skb(sk);
if (!skb)
return 0;
return ip6_send_skb(skb);
}
EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
static void __ip6_flush_pending_frames(struct sock *sk,
struct sk_buff_head *queue,
struct inet_cork_full *cork,
struct inet6_cork *v6_cork)
{
struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(queue)) != NULL) {
if (skb_dst(skb))
IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
}
ip6_cork_release(cork, v6_cork);
}
void ip6_flush_pending_frames(struct sock *sk)
{
__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
}
EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct rt6_info *rt,
unsigned int flags, struct inet_cork_full *cork)
{
struct inet6_cork v6_cork;
struct sk_buff_head queue;
int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
int err;
if (flags & MSG_PROBE) {
dst_release(&rt->dst);
return NULL;
}
__skb_queue_head_init(&queue);
cork->base.flags = 0;
cork->base.addr = 0;
cork->base.opt = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
if (err) {
ip6_cork_release(cork, &v6_cork);
return ERR_PTR(err);
}
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
flags, ipc6);
if (err) {
__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
return ERR_PTR(err);
}
return __ip6_make_skb(sk, &queue, cork, &v6_cork);
}